User Tools

Site Tools


wiki:powercli_script_to_check_cpu_utilization_per_vm_and_change_required

Here is a script to analyze CPU usage over a certain period and recommend if you need to increase or decrease vCPU

Checked information:
- Current vCPU
- Average Usage %
- P95 usage %(captures sustained high load without being overly influenced by single spikes)
- Average Ready %
- P95 Ready %

Usage: Save the script as: vCPU-Recommendations.ps1 Connect to the Vcenter:

Connect-VIServer vcenter.company.local

Run command (by default using 7 days):

C:\temp\vCPU-Recommendations.ps1 -Days XX

Script to save:

<#
.SYNOPSIS
  vCPU Right-Sizer (Recommendations Only) for VMware vSphere (PowerCLI)

.DESCRIPTION
  Analyzes VM CPU metrics (usage, ready, co-stop) over the last N days and recommends
  Increase/Decrease/Keep for vCPU, with a target value and reason. Outputs tables and
  optional CSV exports. Makes NO changes to any VM.

.NOTES
  Author: M365 Copilot
  Version: 1.2
  Requires: VMware PowerCLI
  Usage: Connect-VIServer first
#>

param(
  [string]$ClusterName = '*',                 # Cluster filter (supports wildcards)
  [int]$Days = 7,                             # Lookback window for metrics
  [int]$LowAvgUsagePct = 10,                  # Below this avg usage, candidate for decrease
  [int]$LowP95UsagePct = 40,                  # If 95th percentile usage is below this, stronger signal to decrease
  [int]$HighP95UsagePct = 80,                 # If 95th percentile usage above this, candidate for increase
  [int]$HighReadyPct = 5,                     # If 95th percentile CPU Ready % exceeds this, candidate for increase
  [int]$HighCoStopMsPerMin = 1500,            # High co-stop indicates SMP scheduling issues
  [int]$MaxVcpuPerVM = 16,                    # Cap for recommendations
  [int]$MinVcpuPerVM = 1,                     # Floor for recommendations
  [string]$TagToExclude = 'NoRightsize',      # Exclude VMs with this tag
  [string]$NameExcludeRegex = '(?i)(sql|oracle|sap|domain|controller|dc|exchange)', # Exclude critical by name
  [string]$ReportPath = ".\vCPU-Recommendations-$(Get-Date -Format yyyyMMdd-HHmm).csv",
  [switch]$ExportCsv                          # Export CSV if specified
)

# --- Helpers ---
function Get-Percentile {
  param([double[]]$Values, [double]$Percentile)
  if (-not $Values -or $Values.Count -eq 0) { return $null }
  $sorted = $Values | Sort-Object
  $index = [math]::Floor(($Percentile / 100.0) * ($sorted.Count - 1))
  return $sorted[$index]
}

function Convert-ReadySummationToPercent {
  param([double]$SummationMs, [int]$IntervalSeconds)
  if ($IntervalSeconds -le 0) { return $null }
  # Ready % = (ms ready / (interval seconds * 1000)) * 100
  return [math]::Round(($SummationMs / ($IntervalSeconds * 1000)) * 100, 2)
}

function Convert-CoStopSummationToMsPerMinute {
  param([double]$SummationMs, [int]$IntervalSeconds)
  if ($IntervalSeconds -le 0) { return $null }
  $minutes = $IntervalSeconds / 60.0
  return [math]::Round($SummationMs / $minutes, 2)
}

function Get-ClusterCpuOversubscription {
  param([VMware.VimAutomation.ViCore.Impl.V1.Inventory.ClusterImpl]$Cluster)
  $hosts = Get-VMHost -Location $Cluster | Where-Object { $_.ConnectionState -eq 'Connected' }
  $totalLogicalCpus = ($hosts | Measure-Object -Property NumCpu -Sum).Sum
  $poweredOnVMs = Get-VM -Location $Cluster | Where-Object { $_.PowerState -eq 'PoweredOn' }
  $totalVcpus = ($poweredOnVMs | Measure-Object -Property NumCpu -Sum).Sum
  [PSCustomObject]@{
    Cluster      = $Cluster.Name
    Hosts        = $hosts.Count
    LogicalCPUs  = $totalLogicalCpus
    PoweredOnVMs = $poweredOnVMs.Count
    TotalvCPUs   = $totalVcpus
    vCPUtoPCPU   = if ($totalLogicalCpus -gt 0) { [math]::Round($totalVcpus / $totalLogicalCpus, 2) } else { $null }
  }
}

# --- Input clusters ---
$clusters = Get-Cluster -Name $ClusterName -ErrorAction Stop
if (-not $clusters -or $clusters.Count -eq 0) {
  Write-Error "No clusters match '$ClusterName'."
  exit 1
}

$results = New-Object System.Collections.Generic.List[object]
$clusterRatios = New-Object System.Collections.Generic.List[object]

# Tag exclusion support
$excludeTagIds = @()
try {
  $tagObj = Get-Tag -Name $TagToExclude -ErrorAction SilentlyContinue
  if ($tagObj) { $excludeTagIds = @($tagObj.Id) }
} catch {
  # ignore tag errors
}

foreach ($cluster in $clusters) {

  # Cluster-level ratio
  $clusterRatios.Add((Get-ClusterCpuOversubscription -Cluster $cluster))

  # Candidate VMs
  $vms = Get-VM -Location $cluster | Where-Object {
    $_.PowerState -eq 'PoweredOn' -and
    -not $_.ExtensionData.Config.Template -and
    -not ($_.Name -match $NameExcludeRegex) -and
    -not ($_.SnapshotNum -gt 0)
  }

  foreach ($vm in $vms) {

    # Skip if tagged to exclude
    $exclude = $false
    if ($excludeTagIds.Count -gt 0) {
      try {
        $tags = Get-TagAssignment -Entity $vm -ErrorAction SilentlyContinue
        if ($tags -and ($tags | Where-Object { $excludeTagIds -contains $_.Tag.Id })) { $exclude = $true }
      } catch {
        # ignore tag assignment errors
      }
    }
    if ($exclude) {
      $results.Add([PSCustomObject]@{
        Cluster=$cluster.Name; VM=$vm.Name; CurrentvCPU=$vm.NumCpu;
        AvgUsagePct=$null; P95UsagePct=$null; AvgReadyPct=$null; P95ReadyPct=$null; AvgCoStopMsPerMin=$null; P95CoStopMsPerMin=$null;
        Recommendation='ExcludedByTag'; Reason=("Tag '{0}'" -f $TagToExclude); TargetvCPU=$vm.NumCpu
      })
      continue
    }

    # --- Collect stats (primary: 60-min interval) ---
    $start = (Get-Date).AddDays(-$Days)

    $usageStats  = Get-Stat -Entity $vm -Stat 'cpu.usage.average'    -Start $start -IntervalMins 60 -ErrorAction SilentlyContinue
    $readyStats  = Get-Stat -Entity $vm -Stat 'cpu.ready.summation'  -Start $start -IntervalMins 60 -ErrorAction SilentlyContinue
    $coStopStats = Get-Stat -Entity $vm -Stat 'cpu.coStop.summation' -Start $start -IntervalMins 60 -ErrorAction SilentlyContinue

    # Fallback to realtime if no 60-min samples
    if (-not $usageStats -or $usageStats.Count -eq 0) {
      $usageStats  = Get-Stat -Entity $vm -Stat 'cpu.usage.average'    -Realtime -MaxSamples 120 -ErrorAction SilentlyContinue
      $readyStats  = Get-Stat -Entity $vm -Stat 'cpu.ready.summation'  -Realtime -MaxSamples 120 -ErrorAction SilentlyContinue
      $coStopStats = Get-Stat -Entity $vm -Stat 'cpu.coStop.summation' -Realtime -MaxSamples 120 -ErrorAction SilentlyContinue
    }

    if (-not $usageStats -or $usageStats.Count -eq 0) {
      $results.Add([PSCustomObject]@{
        Cluster=$cluster.Name; VM=$vm.Name; CurrentvCPU=$vm.NumCpu;
        AvgUsagePct=$null; P95UsagePct=$null; AvgReadyPct=$null; P95ReadyPct=$null; AvgCoStopMsPerMin=$null; P95CoStopMsPerMin=$null;
        Recommendation='NoData'; Reason='No stats returned'; TargetvCPU=$vm.NumCpu
      })
      continue
    }

    # --- Aggregations ---
    $avgUsage = [math]::Round(($usageStats | Select-Object -ExpandProperty Value | Measure-Object -Average).Average, 2)
    $p95Usage = [math]::Round((Get-Percentile -Values ($usageStats | Select-Object -ExpandProperty Value) -Percentile 95), 2)

    # Derive interval from usage samples
    $intervalSeconds = 3600
    if ($usageStats.Count -ge 2) {
      $delta = ($usageStats[1].Timestamp - $usageStats[0].Timestamp).TotalSeconds
      $intervalSeconds = [int][Math]::Round($delta)
      if ($intervalSeconds -le 0) { $intervalSeconds = 3600 }
    }

    # Convert Ready to %
    $readyPctList = @()
    foreach ($s in $readyStats) {
      $readyPctList += (Convert-ReadySummationToPercent -SummationMs $s.Value -IntervalSeconds $intervalSeconds)
    }
    $avgReadyPct = if ($readyPctList.Count) { [math]::Round(($readyPctList | Measure-Object -Average).Average, 2) } else { $null }
    $p95ReadyPct = if ($readyPctList.Count) { [math]::Round((Get-Percentile -Values $readyPctList -Percentile 95), 2) } else { $null }

    # Convert Co-Stop to ms/min
    $coStopPerMinList = @()
    foreach ($c in $coStopStats) {
      $coStopPerMinList += (Convert-CoStopSummationToMsPerMinute -SummationMs $c.Value -IntervalSeconds $intervalSeconds)
    }
    $avgCoStopPerMin = if ($coStopPerMinList.Count) { [math]::Round(($coStopPerMinList | Measure-Object -Average).Average, 2) } else { $null }
    $p95CoStopPerMin = if ($coStopPerMinList.Count) { [math]::Round((Get-Percentile -Values $coStopPerMinList -Percentile 95), 2) } else { $null }

    # --- Recommendation logic ---
    $current = [int]$vm.NumCpu
    $recommendation = 'Keep'
    $reason = 'Usage/ready within thresholds'
    $target = $current

    # Underprovisioned signals (increase)
    $needsIncrease =
      (
        ($p95ReadyPct -ne $null -and $p95ReadyPct -ge $HighReadyPct) -or
        ($p95Usage -ge $HighP95UsagePct)
      ) -and
      ($current -lt $MaxVcpuPerVM)

    # Overprovisioned signals (decrease)
    $needsDecrease =
      (
        ($avgUsage -lt $LowAvgUsagePct) -and
        ($p95Usage -lt $LowP95UsagePct) -and
        ($p95ReadyPct -eq $null -or $p95ReadyPct -lt ($HighReadyPct - 2)) -and
        ($p95CoStopPerMin -eq $null -or $p95CoStopPerMin -lt $HighCoStopMsPerMin)
      ) -and
      ($current -gt $MinVcpuPerVM)

    if ($needsIncrease) {
      $target = [math]::Min($current + 1, $MaxVcpuPerVM)
      $recommendation = 'Increase'
      $reason = ("High P95 usage ({0}%) or ready ({1}%)" -f $p95Usage, $p95ReadyPct)
    } elseif ($needsDecrease) {
      if ($avgUsage -lt ($LowAvgUsagePct / 2) -and $current -ge 4) {
        $target = [math]::Max($current - 2, $MinVcpuPerVM)
        $reason = ("Very low avg usage ({0}%) and low contention" -f $avgUsage)
      } else {
        $target = [math]::Max($current - 1, $MinVcpuPerVM)
        $reason = ("Low avg ({0}%), low P95 ({1}%), low ready" -f $avgUsage, $p95Usage)
      }
      $recommendation = 'Decrease'
    }

    # --- Append to results ---
    $results.Add([PSCustomObject]@{
      Cluster=$cluster.Name
      VM=$vm.Name
      CurrentvCPU=$current
      TargetvCPU=$target
      Recommendation=$recommendation
      Reason=$reason
      AvgUsagePct=$avgUsage
      P95UsagePct=$p95Usage
      AvgReadyPct=$avgReadyPct
      P95ReadyPct=$p95ReadyPct
      AvgCoStopMsPerMin=$avgCoStopPerMin
      P95CoStopMsPerMin=$p95CoStopPerMin
    })

  } # end foreach VM

} # end foreach Cluster

# --- Output: Cluster ratio table ---
Write-Host "`n=== Cluster vCPU : pCPU ratios ==="
$clusterRatios |
  Sort-Object Cluster |
  Format-Table Cluster, Hosts, LogicalCPUs, PoweredOnVMs, TotalvCPUs, vCPUtoPCPU -AutoSize

# --- Output: VM recommendations table ---
Write-Host "`n=== VM vCPU Recommendations ==="
$results |
  Sort-Object Cluster, VM |
  Format-Table Cluster, VM, CurrentvCPU, TargetvCPU, Recommendation, AvgUsagePct, P95UsagePct, AvgReadyPct, P95ReadyPct, P95CoStopMsPerMin, Reason -AutoSize

# --- Optional CSV export ---
if ($ExportCsv) {
  $results | Export-Csv -NoTypeInformation -Path $ReportPath
  $clusterReportPath = [System.IO.Path]::ChangeExtension($ReportPath, '.clusters.csv')
  $clusterRatios | Export-Csv -NoTypeInformation -Path $clusterReportPath
  Write-Host "`nCSV exported:`n  VM recommendations: $ReportPath`n  Cluster ratios: $clusterReportPath"
}

Write-Host "`nDone."
wiki/powercli_script_to_check_cpu_utilization_per_vm_and_change_required.txt · Last modified: by wagner.jer

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki