From aa81ff1ffdd540449567b25cfa2f669f475d8ea0 Mon Sep 17 00:00:00 2001 From: Jared Burns Date: Thu, 15 Feb 2024 11:51:18 -0500 Subject: [PATCH] feat: Added support for multiple clusters in mgmt domain Added support to `PowerManagement-ManagmentDomain.ps1` for more than one cluster in management domain for shut down and start up. Signed-off-by: Jared Burns --- .../PowerManagement-ManagementDomain.ps1 | 114 ++++++++---------- 1 file changed, 47 insertions(+), 67 deletions(-) diff --git a/SampleScripts/PowerManagement-ManagementDomain.ps1 b/SampleScripts/PowerManagement-ManagementDomain.ps1 index f1389bb..65b496f 100644 --- a/SampleScripts/PowerManagement-ManagementDomain.ps1 +++ b/SampleScripts/PowerManagement-ManagementDomain.ps1 @@ -189,16 +189,15 @@ if ($PsBoundParameters.ContainsKey("shutdown") -or $PsBoundParameters.ContainsKe $vcfVersion = Get-VCFManager | Select-Object version | Select-String -Pattern '\d+\.\d+' -AllMatches | ForEach-Object { $_.matches.groups[0].value } if ($workloadDomain.clusters.id.count -gt 1) { + Write-PowerManagementLogMessage -Type INFO -Message "More than one cluster exists in te management domain." $mgmtClusterIds = @() $mgmtClusterIds = (Get-VCFWorkloadDomain | Select-Object Type -ExpandProperty clusters | Where-Object { $_.type -eq "MANAGEMENT" }).id foreach ($clusterid in $mgmtClusterIds) { $clusterid = (Get-VCFCluster | Select-Object name, id, isdefault | Where-Object { $_.id -eq $clusterid }) $clustername = $Clusterid.name - $isDefault = $clusterid.isDefault - if ($isDefault -ne "True") { + if (!$isDefault) { $answer = Read-Host -Prompt "This will shutdown cluster $clustername, Do you want to continue with shutdown? Y/N" - if ( $answer -eq 'N') { - Write-PowerManagementLogMessage -Type WARNING "Cancelled Shutdown of $clustername...." + Write-PowerManagementLogMessage -Type WARNING "Cancelling shutdown of $clustername. Exiting..." } else { Write-PowerManagementLogMessage -Type INFO "Will Move Forward with Shutdown of $clustername" } @@ -217,19 +216,18 @@ if ($PsBoundParameters.ContainsKey("shutdown") -or $PsBoundParameters.ContainsKe if ($tools[$vmnames.IndexOf($vmname)] -eq "toolsOK") { Stop-CloudComponent -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -nodes $vmname -timeout 300 } else { - Write-Output "Unable to shutdown Virtual Machines $vmname, because VMware Tools is not installed, Please Shutdown Server before moving forward" + Write-Error "Unable to shutdown virtual machines $vmname. VMware Tools is not running, Please shutdown the virtual machines before retrying. Exiting..." Exit } } - # Shut Down the vSphere Cluster Services Virtual Machines + + # Shut Down the vSphere Cluster Services Virtual Machines Set-Retreatmode -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -cluster $clustername -mode enable - # Waiting for vCLS VMs to be stopped for ($retries*10) seconds - Write-PowerManagementLogMessage -Type INFO -Message "vCLS retreat mode has been set. vCLS shutdown will take time. Please wait!" $counter = 0 $retries = 10 - $sleepTime = 30 + Write-PowerManagementLogMessage -Type INFO -Message "vCLS retreat mode has been set. vCLS shutdown will take time. Please wait..." while ($counter -ne $retries) { if (Test-vSphereAuthentication -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass) { @@ -238,7 +236,7 @@ if ($PsBoundParameters.ContainsKey("shutdown") -or $PsBoundParameters.ContainsKe Write-PowerManagementLogMessage -Type INFO -Message "Some vCLS VMs are still running. Sleeping for $sleepTime seconds until the next check..." Start-Sleep -s $sleepTime $counter += 1 - } else { + Write-PowerManagementLogMessage -Type INFO -Message "Some vCLS virtual machines are still running. Waiting for $sleepTime seconds until the next check..." Break } } @@ -247,14 +245,13 @@ if ($PsBoundParameters.ContainsKey("shutdown") -or $PsBoundParameters.ContainsKe Write-PowerManagementLogMessage -Type ERROR -Message "The vCLS VMs were not shut down within the expected time. Stopping the script execution... " Exit } - + # Stop vSphere HA to avoid "orphaned" VMs during vSAN shutdown if (Test-vSphereAuthentication -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass) { if (!$(Set-VsphereHA -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -cluster $clustername -disableHA)) { Write-PowerManagementLogMessage -Type ERROR -Message "Could not disable vSphere High Availability for cluster '$clustername'. Exiting!" } } - # Check if there are VMs running on a vSAN HCI Mesh if (Test-vSphereAuthentication -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass) { $RemoteVMs = @() $RemoteVMs = Get-poweronVMsOnRemoteDS $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -clustertocheck $clustername @@ -262,16 +259,14 @@ if ($PsBoundParameters.ContainsKey("shutdown") -or $PsBoundParameters.ContainsKe Write-PowerManagementLogMessage -Type INFO -Message "All remote VMs are powered off." } else { Write-PowerManagementLogMessage -Type ERROR -Message "Some remote VMs are still powered-on : $($RemoteVMs.Name). Cannot proceed until the powered-on VMs are shut down. Check your environment." - } + Write-PowerManagementLogMessage -Type INFO -Message "All remote virtual machines are powered off." } - + Write-PowerManagementLogMessage -Type ERROR -Message "Some remote virtual machiness are still powered on: $($RemoteVMs.Name). Unable to proceed until these are are shutdown. Exiting..." #Testing VSAN health if ( (Test-VsanHealth -cluster $clustername -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass) -eq 0) { Write-PowerManagementLogMessage -Type INFO -Message "vSAN cluster health is good." } else { - Write-PowerManagementLogMessage -Type WARNING -Message "The vSAN cluster isn't in a healthy state. Check the vSAN status in Cluster '$($clustername)'. After you resolve the vSAN issues, run the script again." - Write-PowerManagementLogMessage -Type WARNING -Message "If the script has reached ESXi vSAN shutdown previously, this error is expected. Continue by following the documentation of VMware Cloud Foundation. " - Write-PowerManagementLogMessage -Type ERROR -Message "The vSAN cluster isn't in a healthy state. Check the messages above for a solution." + Write-PowerManagementLogMessage -Type ERROR -Message "vSAN cluster is in an unhealthy state. Check the vSAN status in cluster '$($clustername)'. Retry after resolving the vSAN health state. Exiting..." Exit } if ((Test-VsanObjectResync -cluster $clustername -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass) -eq 0) { @@ -290,28 +285,26 @@ if ($PsBoundParameters.ContainsKey("shutdown") -or $PsBoundParameters.ContainsKe $esxihostpassword = $password.password[1] $status = Get-SSHEnabledStatus -server $esxi -user root -pass $esxihostpassword if (!$status) { - if (Test-vSphereAuthentication -server $esxi -user root -pass $esxihostpassword) { Get-VmHostService -VMHost $esxi | Where-Object { $_.key -eq "TSM-SSH" } | Start-VMHostService - Write-Output "Set hosts to ignoreClusterMemberListUpdates" + Write-Output "Setting ESXi host $esxi to ignoreClusterMemberListUpdates..." Invoke-EsxCommand -server $esxi -user root -pass $esxihostpassword -expected "Value of IgnoreClusterMemberListUpdates is 1" -cmd "esxcfg-advcfg -s 1 /VSAN/IgnoreClusterMemberListUpdates" } } else { if (Test-vSphereAuthentication -server $esxi -user root -pass $esxihostpassword) { - Write-Output "Set hosts to ignoreClusterMemberListUpdates" + Write-Output "Setting ESXi host $esxi to ignoreClusterMemberListUpdates..." Invoke-EsxCommand -server $esxi -user root -pass $esxihostpassword -expected "Value of IgnoreClusterMemberListUpdates is 1" -cmd "esxcfg-advcfg -s 1 /VSAN/IgnoreClusterMemberListUpdates" } } } - # Run vSAN cluster preparation - should be done on one host per cluster - # Sleeping 1 min before starting the preparation - Write-PowerManagementLogMessage -Type INFO -Message "Sleeping for 60 seconds before preparing hosts for vSAN shutdown..." + # Run vSAN cluster preparation on one ESXi host per cluster. + Write-PowerManagementLogMessage -Type INFO -Message "Pausing for 60 seconds before preparing ESXi hosts for vSAN shutdown..." Start-Sleep -s 60 $password = (Get-VCFCredential -resourceName $esxihosts[0] | Select-Object password) $esxihostpassword = $password.password[1] Invoke-EsxCommand -server $esxihosts[0] -user root -pass $esxihostpassword -expected "Cluster preparation is done" -cmd "python /usr/lib/vmware/vsan/bin/reboot_helper.py prepare" # Putting hosts in maintenance mode - Write-PowerManagementLogMessage -Type INFO -Message "Sleeping for 30 seconds before putting hosts in maintenance mode..." + Write-PowerManagementLogMessage -Type INFO -Message "Pausing for 30 seconds before putting ESXi hosts in maintenance mode..." Start-Sleep -s 30 foreach ($esxiNode in $esxihosts) { $password = (Get-VCFCredential -resourceName $esxi | Select-Object password) @@ -320,32 +313,28 @@ if ($PsBoundParameters.ContainsKey("shutdown") -or $PsBoundParameters.ContainsKe } # End of shutdown Write-PowerManagementLogMessage -Type INFO -Message "End of the shutdown sequence!" - Write-PowerManagementLogMessage -Type INFO -Message "Shut down the ESXi hosts!" + Write-PowerManagementLogMessage -Type INFO -Message "You can now shut down the ESXi hosts." } else { - # vSAN shutdown wizard automation. Set-VsanClusterPowerStatus -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -cluster $clustername -PowerStatus clusterPoweredOff -mgmt - - Write-PowerManagementLogMessage -Type INFO -Message "Sleeping for 60 seconds before checking ESXi hosts' shutdown status..." + Write-PowerManagementLogMessage -Type INFO -Message "Pausing for 60 seconds before checking ESXi hosts' shutdown status..." Start-Sleep -s 60 - $counter = 0 $sleepTime = 60 # in seconds - while ($counter -lt 1800) { $successcount = 0 - #Verify if all ESXi hosts are down in here to conclude End of Shutdown sequence + # Verify all ESXi hosts are shut down to conclude the sequence foreach ($esxiNode in $esxihosts) { if (Test-VsphereConnection -server $esxiNode) { - Write-PowerManagementLogMessage -Type WARNING -Message "Some hosts are still up. Sleeping for 60 seconds before next check..." - break + Write-PowerManagementLogMessage -Type WARNING -Message "Some ESXi hosts are still up. Pausing for 60 seconds before next check..." + Break } else { $successcount++ } } if ($successcount -eq $esxiWorkloadDomain.count) { - Write-PowerManagementLogMessage -Type INFO -Message "All hosts have been shut down successfully!" - Write-PowerManagementLogMessage -Type INFO -Message "End of the shutdown sequence!" + Write-PowerManagementLogMessage -Type INFO -Message "All ESXi hosts have been shut down successfully!" + Write-PowerManagementLogMessage -Type INFO -Message "Successfully completed the shutdown sequence!" Exit } else { Start-Sleep -s $sleepTime @@ -361,7 +350,7 @@ if ($PsBoundParameters.ContainsKey("shutdown") -or $PsBoundParameters.ContainsKe } } } else { - Write-PowerManagementLogMessage -Type INFO -Message "There is only one Cluster in Management Domain...." + Write-PowerManagementLogMessage -Type INFO -Message "A single cluister exists in the management domain." } $cluster = Get-VCFCluster | Where-Object { $_.id -eq ($workloadDomain.clusters.id) } @@ -1218,19 +1207,18 @@ if ($PsBoundParameters.ContainsKey("startup")) { $clustername = $Clusterid.name $isDefault = $clusterid.isDefault if (!$isDefault) { - $answer = Read-Host -Prompt "This will start up cluster $clustername, Do you want to continue with Start up? Y/N" + $answer = Read-Host -Prompt "Start up cluster $clustername, Do you want to continue? Y/N" if ( $answer -eq 'N') { - Write-PowerManagementLogMessage -Type WARNING "Cancelled start up of $clustername...." - } - else { + Write-PowerManagementLogMessage -Type WARNING "Cancelled start up of $clustername. Exiting..." + Exit + } else { Write-PowerManagementLogMessage -Type INFO "Will Move Forward with start up of $clustername" } $esxihosts = (Get-VCFHost | Select-Object fqdn -ExpandProperty cluster | Where-Object { $_.id -eq $clusterid.id }).fqdn foreach ($esxiNode in $esxihosts) { if (-Not (Test-VsphereConnection -server $esxiNode)) { Write-PowerManagementLogMessage -Type WARNING "ESXi host $esxiNode is not powered on...." - } - else { + } else { $password = (Get-VCFCredential -resourceName $esxiNode| Select-Object password) $esxihostpassword = $password.password[1] $status = Get-SSHEnabledStatus -server $esxiNode-user root -pass $esxihostpassword @@ -1239,25 +1227,24 @@ if ($PsBoundParameters.ContainsKey("startup")) { Get-VmHostService -VMHost $esxiNode | Where-Object { $_.key -eq "TSM-SSH" } | Start-VMHostService Set-MaintenanceMode -server $esxiNode -user root -pass $esxihostpassword -state DISABLE } - } - else { + } else { if (Test-vSphereAuthentication -server $esxiNode -user root -pass $esxihostpassword) { Set-MaintenanceMode -server $esxiNode -user root -pass $esxihostpassword -state DISABLE } } } } - Write-PowerManagementLogMessage -Type INFO -Message "Sleeping for 30 seconds while hosts come out of maintenance mode..." + Write-PowerManagementLogMessage -Type INFO -Message "Pausing for 30 seconds while hosts come out of maintenance mode..." Start-Sleep -s 30 if ([float]$vcfVersion -lt [float]4.5) { # Prepare the vSAN cluster for startup - Performed on a single host only # We need some time before this step, setting hard sleep 30 sec - Write-PowerManagementLogMessage -Type INFO -Message "Sleeping for 30 seconds before starting vSAN..." + Write-PowerManagementLogMessage -Type INFO -Message "Pausing for 30 seconds before starting vSAN..." $password = (Get-VCFCredential -resourceName $esxihosts[0] | Select-Object password) $esxihostpassword = $password.password[1] Invoke-EsxCommand -server $esxihosts[0] -user root -pass $esxihostpassword -expected "Cluster reboot/poweron is completed successfully!" -cmd "python /usr/lib/vmware/vsan/bin/reboot_helper.py recover" # We need some time before this step, setting hard sleep 30 sec - Write-PowerManagementLogMessage -Type INFO -Message "Sleeping for 30 seconds before enabling vSAN updates..." + Write-PowerManagementLogMessage -Type INFO -Message "Pausing for 30 seconds before enabling vSAN updates..." Start-Sleep -s 30 foreach ($esxi in $esxihosts) { $password = (Get-VCFCredential -resourceName $esxi | Select-Object password) @@ -1268,23 +1255,22 @@ if ($PsBoundParameters.ContainsKey("startup")) { } } - Write-PowerManagementLogMessage -Type INFO -Message "Checking vSAN status of the ESXi hosts." + Write-PowerManagementLogMessage -Type INFO -Message "Checking vSAN status of the ESXi hosts..." foreach ($esxiNode in $esxihosts) { $password = (Get-VCFCredential -resourceName $esxi | Select-Object password) $esxihostpassword = $password.password[1] Invoke-EsxCommand -server $esxiNode.fqdn -user $esxiNode.username -pass $esxiNode.password -expected "Local Node Health State: HEALTHY" -cmd "esxcli vsan cluster get" } - } $domain = Get-VCFWorkloadDomain | Select-Object name, type | Where-Object { $_.type -eq "MANAGEMENT" } if (($vcfVcenterDetails = Get-vCenterServerDetail -server $server -user $user -pass $pass -domain $domain.name)) { if (Test-vSphereAuthentication -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass) { - #Restart Cluster Via Wizard + # Start the vSAN cluster wizard. if ([float]$vcfVersion -gt [float]4.5) { # Lockdown mode check Test-LockdownMode -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -cluster $clustername - # Start VSAN Cluster wizard + # Restart cluster using wizard Set-VsanClusterPowerStatus -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -cluster $clustername -PowerStatus clusterPoweredOn } # Check vSAN Status @@ -1294,12 +1280,11 @@ if ($PsBoundParameters.ContainsKey("startup")) { } # Check vSAN Status if ( (Test-VsanObjectResync -cluster $clustername -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass) -ne 0) { - Write-PowerManagementLogMessage -Type ERROR -Message "vSAN object resynchronization is in progress. Check your environment and run the script again." + Write-PowerManagementLogMessage -Type ERROR -Message "vSAN object resynchronization failed. Check your environment and run the script again." Exit } - - #Start workflow for VCF prior version 4.5 - if ([float]$vcfVersion -lt [float]4.5) { + + # Start workflow for VCF prior version 4.5 # Start vSphere HA if (!$(Set-VsphereHA -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -cluster $clustername -enableHA)) { Write-PowerManagementLogMessage -Type ERROR -Message "Could not enable vSphere High Availability for cluster '$cluster'." @@ -1307,44 +1292,39 @@ if ($PsBoundParameters.ContainsKey("startup")) { # Restore the DRS Automation Level to the mode backed up for Management Domain Cluster during shutdown if ([string]::IsNullOrEmpty($DrsAutomationLevel)) { - Write-PowerManagementLogMessage -Type ERROR -Message "The DrsAutomationLevel value in the JSON file is empty. Exiting!" + Write-PowerManagementLogMessage -Type ERROR -Message "Unable to enable Drs Automation Level for cluster '$cluster'. Exiting..." Exit - } - else { + } else { Set-DrsAutomationLevel -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -cluster $clustername -level $DrsAutomationLevel - } - } + } # Startup the vSphere Cluster Services Virtual Machines in the Management Workload Domain Set-Retreatmode -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -cluster $clustername -mode disable # Waiting for vCLS VMs to be started for ($retries*10) seconds - Write-PowerManagementLogMessage -Type INFO -Message "vCLS retreat mode has been set. vCLS virtual machines startup will take some time. Please wait." $counter = 0 $retries = 10 $sleepTime = 30 while ($counter -ne $retries) { $powerOnVMcount = (Get-VMsWithPowerStatus -powerstate "poweredon" -server $vcfVcenterDetails.fqdn -user $vcfVcenterDetails.ssoAdmin -pass $vcfVcenterDetails.ssoAdminPass -pattern "(^vCLS-\w{8}-\w{4}-\w{4}-\w{4}-\w{12})|(^vCLS\s*\(\d+\))|(^vCLS\s*$)" -silence).count if ( $powerOnVMcount -lt 3 ) { - Write-PowerManagementLogMessage -Type INFO -Message "There are $powerOnVMcount vCLS virtual machines running. Sleeping for $sleepTime seconds until the next check." + Write-PowerManagementLogMessage -Type INFO -Message "vCLS retreat mode has been set. vCLS virtual machines startup will take some time. Please wait..." Start-Sleep -s $sleepTime $counter += 1 - } - else { + } else { Break } } if ($counter -eq $retries) { - Write-PowerManagementLogMessage -Type ERROR -Message "The vCLS virtual machines did not start within the expected time. Stopping script execution..." + Write-PowerManagementLogMessage -Type ERROR -Message "vCLS virtual machines did not start within the expected time. Exiting..." Exit } - } } } } } } - # End of startup + Write-PowerManagementLogMessage -Type INFO -Message "##################################################################################" if ([float]$vcfVersion -lt [float]4.5) { Write-PowerManagementLogMessage -Type INFO -Message "vSphere vSphere High Availability has been enabled by the script. Please disable it according to your environment's design."