diff --git a/apis/v1alpha3/conversion_test.go b/apis/v1alpha3/conversion_test.go index e956f34da2..732fae6f8b 100644 --- a/apis/v1alpha3/conversion_test.go +++ b/apis/v1alpha3/conversion_test.go @@ -120,7 +120,6 @@ func CustomSpecNewFieldFuzzer(in *infrav1.VirtualMachineCloneSpec, c fuzz.Contin c.FuzzNoCustom(in) in.PciDevices = nil - in.VGPUDevices = nil in.AdditionalDisksGiB = nil in.OS = "" in.HardwareVersion = "" diff --git a/apis/v1alpha3/zz_generated.conversion.go b/apis/v1alpha3/zz_generated.conversion.go index 0c8f7f2cb0..966195ccbb 100644 --- a/apis/v1alpha3/zz_generated.conversion.go +++ b/apis/v1alpha3/zz_generated.conversion.go @@ -1760,7 +1760,6 @@ func autoConvert_v1beta1_VirtualMachineCloneSpec_To_v1alpha3_VirtualMachineClone out.CustomVMXKeys = *(*map[string]string)(unsafe.Pointer(&in.CustomVMXKeys)) // WARNING: in.TagIDs requires manual conversion: does not exist in peer-type // WARNING: in.PciDevices requires manual conversion: does not exist in peer-type - // WARNING: in.VGPUDevices requires manual conversion: does not exist in peer-type // WARNING: in.OS requires manual conversion: does not exist in peer-type // WARNING: in.HardwareVersion requires manual conversion: does not exist in peer-type return nil diff --git a/apis/v1alpha4/zz_generated.conversion.go b/apis/v1alpha4/zz_generated.conversion.go index a18d2ecd3d..147c1a9894 100644 --- a/apis/v1alpha4/zz_generated.conversion.go +++ b/apis/v1alpha4/zz_generated.conversion.go @@ -1914,7 +1914,6 @@ func autoConvert_v1beta1_VirtualMachineCloneSpec_To_v1alpha4_VirtualMachineClone out.CustomVMXKeys = *(*map[string]string)(unsafe.Pointer(&in.CustomVMXKeys)) // WARNING: in.TagIDs requires manual conversion: does not exist in peer-type // WARNING: in.PciDevices requires manual conversion: does not exist in peer-type - // WARNING: in.VGPUDevices requires manual conversion: does not exist in peer-type // WARNING: in.OS requires manual conversion: does not exist in peer-type // WARNING: in.HardwareVersion requires manual conversion: does not exist in peer-type return nil diff --git a/apis/v1beta1/types.go b/apis/v1beta1/types.go index 81c5882279..374084ce5d 100644 --- a/apis/v1beta1/types.go +++ b/apis/v1beta1/types.go @@ -193,9 +193,6 @@ type VirtualMachineCloneSpec struct { // PciDevices is the list of pci devices used by the virtual machine. // +optional PciDevices []PCIDeviceSpec `json:"pciDevices,omitempty"` - // VGPUDevices is the list of vGPUs used by the virtual machine. - // +optional - VGPUDevices []VGPUSpec `json:"vgpuDevices,omitempty"` // OS is the Operating System of the virtual machine // Defaults to Linux // +optional @@ -261,6 +258,11 @@ type PCIDeviceSpec struct { // virtual machine is cloned. // +kubebuilder:validation:Required VendorID *int32 `json:"vendorId,omitempty"` + // VGPUProfile is the profile name of a virtual machine's vGPU, in string. + // Defaults to the eponymous property value in the template from which the + // virtual machine is cloned. + // +kubebuilder:validation:Required + VGPUProfile string `json:"vgpuProfile,omitempty"` // CustomLabel is the hardware label of a virtual machine's PCI device. // Defaults to the eponymous property value in the template from which the // virtual machine is cloned. @@ -268,15 +270,6 @@ type PCIDeviceSpec struct { CustomLabel string `json:"customLabel,omitempty"` } -// VGPUSpec defines virtual machine's VGPU configuration -type VGPUSpec struct { - // ProfileName is the ProfileName of a virtual machine's vGPU, in string. - // Defaults to the eponymous property value in the template from which the - // virtual machine is cloned. - // +kubebuilder:validation:Required - ProfileName string `json:"profileName,omitempty"` -} - // NetworkSpec defines the virtual machine's network configuration. type NetworkSpec struct { // Devices is the list of network devices used by the virtual machine. diff --git a/apis/v1beta1/zz_generated.deepcopy.go b/apis/v1beta1/zz_generated.deepcopy.go index c6b26cfb39..44d12a65fe 100644 --- a/apis/v1beta1/zz_generated.deepcopy.go +++ b/apis/v1beta1/zz_generated.deepcopy.go @@ -403,21 +403,6 @@ func (in *Topology) DeepCopy() *Topology { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *VGPUSpec) DeepCopyInto(out *VGPUSpec) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUSpec. -func (in *VGPUSpec) DeepCopy() *VGPUSpec { - if in == nil { - return nil - } - out := new(VGPUSpec) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VSphereCluster) DeepCopyInto(out *VSphereCluster) { *out = *in @@ -1336,11 +1321,6 @@ func (in *VirtualMachineCloneSpec) DeepCopyInto(out *VirtualMachineCloneSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.VGPUDevices != nil { - in, out := &in.VGPUDevices, &out.VGPUDevices - *out = make([]VGPUSpec, len(*in)) - copy(*out, *in) - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMachineCloneSpec. diff --git a/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspheremachines.yaml b/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspheremachines.yaml index 2e81ee9152..16ecb9001d 100644 --- a/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspheremachines.yaml +++ b/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspheremachines.yaml @@ -1229,6 +1229,11 @@ spec: in the template from which the virtual machine is cloned. format: int32 type: integer + vgpuProfile: + description: VGPUProfile is the VGPUProfile of a virtual machine's + vGPU, in string. Defaults to the eponymous property value + in the template from which the virtual machine is cloned. + type: string type: object type: array powerOffMode: @@ -1286,19 +1291,6 @@ spec: of the communication between Cluster API Provider vSphere and the VMware vCenter server. type: string - vgpuDevices: - description: VGPUDevices is the list of vGPUs used by the virtual - machine. - items: - description: VGPUSpec defines virtual machine's VGPU configuration - properties: - profileName: - description: ProfileName is the ProfileName of a virtual machine's - vGPU, in string. Defaults to the eponymous property value - in the template from which the virtual machine is cloned. - type: string - type: object - type: array required: - network - template diff --git a/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspheremachinetemplates.yaml b/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspheremachinetemplates.yaml index 8b4c849693..2f9f0c43cc 100644 --- a/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspheremachinetemplates.yaml +++ b/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspheremachinetemplates.yaml @@ -1143,6 +1143,12 @@ spec: machine is cloned. format: int32 type: integer + vgpuProfile: + description: VGPUProfile is the VGPUProfile of a virtual + machine's vGPU, in string. Defaults to the eponymous + property value in the template from which the virtual + machine is cloned. + type: string type: object type: array powerOffMode: @@ -1202,20 +1208,6 @@ spec: TLS certificate validation of the communication between Cluster API Provider vSphere and the VMware vCenter server. type: string - vgpuDevices: - description: VGPUDevices is the list of vGPUs used by the - virtual machine. - items: - description: VGPUSpec defines virtual machine's VGPU configuration - properties: - profileName: - description: ProfileName is the ProfileName of a virtual - machine's vGPU, in string. Defaults to the eponymous - property value in the template from which the virtual - machine is cloned. - type: string - type: object - type: array required: - network - template diff --git a/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspherevms.yaml b/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspherevms.yaml index f50a142af6..02d994806e 100644 --- a/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspherevms.yaml +++ b/config/default/crd/bases/infrastructure.cluster.x-k8s.io_vspherevms.yaml @@ -1275,6 +1275,11 @@ spec: in the template from which the virtual machine is cloned. format: int32 type: integer + vgpuProfile: + description: VGPUProfile is the VGPUProfile of a virtual machine's + vGPU, in string. Defaults to the eponymous property value + in the template from which the virtual machine is cloned. + type: string type: object type: array powerOffMode: @@ -1328,19 +1333,6 @@ spec: of the communication between Cluster API Provider vSphere and the VMware vCenter server. type: string - vgpuDevices: - description: VGPUDevices is the list of vGPUs used by the virtual - machine. - items: - description: VGPUSpec defines virtual machine's VGPU configuration - properties: - profileName: - description: ProfileName is the ProfileName of a virtual machine's - vGPU, in string. Defaults to the eponymous property value - in the template from which the virtual machine is cloned. - type: string - type: object - type: array required: - network - template diff --git a/docs/gpu-vgpu.md b/docs/gpu-vgpu.md index af6901d049..268aab1075 100644 --- a/docs/gpu-vgpu.md +++ b/docs/gpu-vgpu.md @@ -59,7 +59,7 @@ $ make dev-flavors go run ./packaging/flavorgen --output-dir /home/user/.cluster-api/overrides/infrastructure-vsphere/v0.0.0 ``` -Edit the generated Cluster template (`cluster-template.yaml`) to set the values for the `vgpuDevices` array. Here we are editing the VSphereMachineTemplate object for the worker nodes. This will create a worker node with a single NVIDIA 16GB vGPU device attached to the VM. +Edit the generated Cluster template (`cluster-template.yaml`) to set the values for the `pciDevices` array. Here we are editing the VSphereMachineTemplate object for the worker nodes. This will create a worker node with a single NVIDIA 16GB vGPU device attached to the VM. ```yaml --- @@ -89,8 +89,8 @@ spec: storagePolicyName: '${VSPHERE_STORAGE_POLICY}' template: '${VSPHERE_TEMPLATE}' thumbprint: '${VSPHERE_TLS_THUMBPRINT}' - vgpuDevices: - - profileName: "grid_v100d-4c" # value from above + pciDevices: + - vgpuProfile: "grid_t4-1a" # value from above ``` Set the required values for the other fields and the cluster template is ready for use. The similar changes can be made to a template generated using clusterctl generate cluster command as well. diff --git a/pkg/services/govmomi/pci/device.go b/pkg/services/govmomi/pci/device.go index f92670bf9d..cc75f670bc 100644 --- a/pkg/services/govmomi/pci/device.go +++ b/pkg/services/govmomi/pci/device.go @@ -76,18 +76,28 @@ func ConstructDeviceSpecs(pciDeviceSpecs []infrav1.PCIDeviceSpec) []types.BaseVi return pciDevices } -func createBackingInfo(spec infrav1.PCIDeviceSpec) *types.VirtualPCIPassthroughDynamicBackingInfo { - return &types.VirtualPCIPassthroughDynamicBackingInfo{ - AllowedDevice: []types.VirtualPCIPassthroughAllowedDevice{ - { - VendorId: *spec.VendorID, - DeviceId: *spec.DeviceID, +func createBackingInfo(spec infrav1.PCIDeviceSpec) types.BaseVirtualDeviceBackingInfo { + if spec.VGPUProfile == "" { + return &types.VirtualPCIPassthroughDynamicBackingInfo{ + AllowedDevice: []types.VirtualPCIPassthroughAllowedDevice{ + { + VendorId: *spec.VendorID, + DeviceId: *spec.DeviceID, + }, }, - }, - CustomLabel: spec.CustomLabel, + CustomLabel: spec.CustomLabel, + } + } + + return &types.VirtualPCIPassthroughVmiopBackingInfo{ + Vgpu: spec.VGPUProfile, } } func constructKey(pciDeviceSpec infrav1.PCIDeviceSpec) string { - return fmt.Sprintf("%d-%d", *pciDeviceSpec.DeviceID, *pciDeviceSpec.VendorID) + if pciDeviceSpec.VGPUProfile == "" { + return fmt.Sprintf("%d-%d", *pciDeviceSpec.DeviceID, *pciDeviceSpec.VendorID) + } + + return pciDeviceSpec.VGPUProfile } diff --git a/pkg/services/govmomi/pci/device_test.go b/pkg/services/govmomi/pci/device_test.go index 74f57245c8..5f62089552 100644 --- a/pkg/services/govmomi/pci/device_test.go +++ b/pkg/services/govmomi/pci/device_test.go @@ -72,30 +72,36 @@ func Test_CalculateDevicesToBeAdded(t *testing.T) { inputs := []input{ { name: "when adding a single PCI device of each type", - expectedLen: 2, + expectedLen: 3, pciDeviceSpecs: []infrav1.PCIDeviceSpec{ {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, {DeviceID: ptr.To[int32](4321), VendorID: ptr.To[int32](8765)}, + {VGPUProfile: "grid_t4-1a"}, }, assertFunc: func(g *gomega.WithT, actual []infrav1.PCIDeviceSpec) { g.Expect(*actual[0].DeviceID).To(gomega.Equal(int32(1234))) g.Expect(*actual[0].VendorID).To(gomega.Equal(int32(5678))) g.Expect(*actual[1].DeviceID).To(gomega.Equal(int32(4321))) g.Expect(*actual[1].VendorID).To(gomega.Equal(int32(8765))) + g.Expect(actual[2].VGPUProfile).To(gomega.Equal("grid_t4-1a")) }, }, { name: "when adding multiple PCI devices of a type", - expectedLen: 2, + expectedLen: 4, pciDeviceSpecs: []infrav1.PCIDeviceSpec{ {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, + {VGPUProfile: "grid_t4-1a"}, + {VGPUProfile: "grid_t4-1a"}, }, assertFunc: func(g *gomega.WithT, actual []infrav1.PCIDeviceSpec) { g.Expect(*actual[0].DeviceID).To(gomega.Equal(int32(1234))) g.Expect(*actual[0].VendorID).To(gomega.Equal(int32(5678))) g.Expect(*actual[1].DeviceID).To(gomega.Equal(int32(1234))) g.Expect(*actual[1].VendorID).To(gomega.Equal(int32(5678))) + g.Expect(actual[2].VGPUProfile).To(gomega.Equal("grid_t4-1a")) + g.Expect(actual[3].VGPUProfile).To(gomega.Equal("grid_t4-1a")) }, }, } @@ -112,8 +118,9 @@ func Test_CalculateDevicesToBeAdded(t *testing.T) { pciDeviceSpecs: []infrav1.PCIDeviceSpec{ {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, {DeviceID: ptr.To[int32](4321), VendorID: ptr.To[int32](8765)}, + {VGPUProfile: "grid_t4-1a"}, }, - existingDeviceSpecIndexes: []int{0, 1}, + existingDeviceSpecIndexes: []int{0, 1, 2}, }, { name: "when adding multiple PCI devices of a type", @@ -121,8 +128,10 @@ func Test_CalculateDevicesToBeAdded(t *testing.T) { pciDeviceSpecs: []infrav1.PCIDeviceSpec{ {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, + {VGPUProfile: "grid_t4-1a"}, + {VGPUProfile: "grid_t4-1a"}, }, - existingDeviceSpecIndexes: []int{0, 1}, + existingDeviceSpecIndexes: []int{0, 1, 2, 3}, }, } for _, tt := range inputs { @@ -134,24 +143,27 @@ func Test_CalculateDevicesToBeAdded(t *testing.T) { inputs := []input{ { name: "when adding a single PCI device of each type", - expectedLen: 1, + expectedLen: 2, pciDeviceSpecs: []infrav1.PCIDeviceSpec{ {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, {DeviceID: ptr.To[int32](4321), VendorID: ptr.To[int32](8765)}, + {VGPUProfile: "grid_t4-1a"}, }, existingDeviceSpecIndexes: []int{0}, assertFunc: func(g *gomega.WithT, actual []infrav1.PCIDeviceSpec) { g.Expect(*actual[0].DeviceID).To(gomega.Equal(int32(4321))) g.Expect(*actual[0].VendorID).To(gomega.Equal(int32(8765))) + g.Expect(actual[1].VGPUProfile).To(gomega.Equal("grid_t4-1a")) }, }, { name: "when adding multiple PCI devices of a type", - expectedLen: 2, + expectedLen: 3, pciDeviceSpecs: []infrav1.PCIDeviceSpec{ {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, {DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)}, {DeviceID: ptr.To[int32](4321), VendorID: ptr.To[int32](8765)}, + {VGPUProfile: "grid_t4-1a"}, }, existingDeviceSpecIndexes: []int{0}, assertFunc: func(g *gomega.WithT, actual []infrav1.PCIDeviceSpec) { @@ -159,6 +171,7 @@ func Test_CalculateDevicesToBeAdded(t *testing.T) { g.Expect(*actual[0].VendorID).To(gomega.Equal(int32(5678))) g.Expect(*actual[1].DeviceID).To(gomega.Equal(int32(4321))) g.Expect(*actual[1].VendorID).To(gomega.Equal(int32(8765))) + g.Expect(actual[2].VGPUProfile).To(gomega.Equal("grid_t4-1a")) }, }, } diff --git a/pkg/services/govmomi/pci/vgpu.go b/pkg/services/govmomi/pci/vgpu.go deleted file mode 100644 index e4053b54ff..0000000000 --- a/pkg/services/govmomi/pci/vgpu.go +++ /dev/null @@ -1,81 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package pci - -import ( - "context" - - "github.com/vmware/govmomi/object" - "github.com/vmware/govmomi/vim25/types" - - infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" -) - -// CalculateVGPUsToBeAdded calculates the vGPU devices which should be added to the VM. -func CalculateVGPUsToBeAdded(ctx context.Context, vm *object.VirtualMachine, deviceSpecs []infrav1.VGPUSpec) ([]infrav1.VGPUSpec, error) { - // store the number of expected devices for each deviceID + vendorID combo - deviceVendorIDComboMap := map[string]int{} - for _, spec := range deviceSpecs { - key := spec.ProfileName - if _, ok := deviceVendorIDComboMap[key]; !ok { - deviceVendorIDComboMap[key] = 1 - } else { - deviceVendorIDComboMap[key]++ - } - } - - devices, err := vm.Device(ctx) - if err != nil { - return nil, err - } - - specsToBeAdded := []infrav1.VGPUSpec{} - for _, spec := range deviceSpecs { - key := spec.ProfileName - pciDeviceList := devices.SelectByBackingInfo(createBackingInfoVGPU(spec)) - expectedDeviceLen := deviceVendorIDComboMap[key] - if expectedDeviceLen-len(pciDeviceList) > 0 { - specsToBeAdded = append(specsToBeAdded, spec) - deviceVendorIDComboMap[key]-- - } - } - return specsToBeAdded, nil -} - -// ConstructDeviceSpecsVGPU transforms a list of VGPUSpec into a list of BaseVirutalDevices used by govmomi. -func ConstructDeviceSpecsVGPU(vGPUDeviceSpecs []infrav1.VGPUSpec) []types.BaseVirtualDevice { - vGPUDevices := []types.BaseVirtualDevice{} - deviceKey := int32(-200) - - for _, pciDevice := range vGPUDeviceSpecs { - backingInfo := createBackingInfoVGPU(pciDevice) - vGPUDevices = append(vGPUDevices, &types.VirtualPCIPassthrough{ - VirtualDevice: types.VirtualDevice{ - Key: deviceKey, - Backing: backingInfo, - }, - }) - deviceKey-- - } - return vGPUDevices -} - -func createBackingInfoVGPU(spec infrav1.VGPUSpec) *types.VirtualPCIPassthroughVmiopBackingInfo { - return &types.VirtualPCIPassthroughVmiopBackingInfo{ - Vgpu: spec.ProfileName, - } -} diff --git a/pkg/services/govmomi/pci/vgpu_test.go b/pkg/services/govmomi/pci/vgpu_test.go deleted file mode 100644 index 5b4a572040..0000000000 --- a/pkg/services/govmomi/pci/vgpu_test.go +++ /dev/null @@ -1,156 +0,0 @@ -/* -Copyright 2023 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package pci - -import ( - "context" - "testing" - - "github.com/onsi/gomega" - "github.com/vmware/govmomi/find" - "github.com/vmware/govmomi/simulator" - "github.com/vmware/govmomi/vim25" - - infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" -) - -func Test_CalculateVGPUsToBeAdded(t *testing.T) { - type input struct { - name string - expectedLen int - existingDeviceSpecIndexes []int - vGPUDeviceSpecs []infrav1.VGPUSpec - assertFunc func(g *gomega.WithT, actual []infrav1.VGPUSpec) - } - - testFunc := func(t *testing.T, i input) { - t.Helper() - t.Run(i.name, func(t *testing.T) { - g := gomega.NewWithT(t) - simulator.Run(func(ctx context.Context, client *vim25.Client) error { - finder := find.NewFinder(client) - vm, err := finder.VirtualMachine(ctx, "DC0_H0_VM0") - if err != nil { - return err - } - - if len(i.existingDeviceSpecIndexes) > 0 { - existingDevices := []infrav1.VGPUSpec{} - for _, idx := range i.existingDeviceSpecIndexes { - existingDevices = append(existingDevices, i.vGPUDeviceSpecs[idx]) - } - g.Expect(vm.AddDevice(ctx, - ConstructDeviceSpecsVGPU(existingDevices)...)).ToNot(gomega.HaveOccurred()) - } - toBeAdded, err := CalculateVGPUsToBeAdded(ctx, vm, i.vGPUDeviceSpecs) - g.Expect(err).ToNot(gomega.HaveOccurred()) - g.Expect(toBeAdded).To(gomega.HaveLen(i.expectedLen)) - if i.assertFunc != nil { - i.assertFunc(g, toBeAdded) - } - return nil - }) - }) - } - - t.Run("when no vGPU devices exist on the VM", func(t *testing.T) { - inputs := []input{ - { - name: "when adding a single vGPU device of each type", - expectedLen: 2, - vGPUDeviceSpecs: []infrav1.VGPUSpec{ - {ProfileName: "1234"}, {ProfileName: "4321"}, - }, - assertFunc: func(g *gomega.WithT, actual []infrav1.VGPUSpec) { - g.Expect(actual[0].ProfileName).To(gomega.Equal("1234")) - g.Expect(actual[1].ProfileName).To(gomega.Equal("4321")) - }, - }, - { - name: "when adding multiple vGPU devices of a type", - expectedLen: 2, - vGPUDeviceSpecs: []infrav1.VGPUSpec{ - {ProfileName: "1234"}, {ProfileName: "1234"}, - }, - assertFunc: func(g *gomega.WithT, actual []infrav1.VGPUSpec) { - g.Expect(actual[0].ProfileName).To(gomega.Equal("1234")) - g.Expect(actual[1].ProfileName).To(gomega.Equal("1234")) - }, - }, - } - for _, tt := range inputs { - testFunc(t, tt) - } - }) - - t.Run("when all vGPU devices exist on the VM", func(t *testing.T) { - inputs := []input{ - { - name: "when adding a single vGPU device of each type", - expectedLen: 0, - vGPUDeviceSpecs: []infrav1.VGPUSpec{ - {ProfileName: "1234"}, {ProfileName: "4321"}, - }, - existingDeviceSpecIndexes: []int{0, 1}, - }, - { - name: "when adding multiple vGPU devices of a type", - expectedLen: 0, - vGPUDeviceSpecs: []infrav1.VGPUSpec{ - {ProfileName: "1234"}, {ProfileName: "1234"}, - }, - existingDeviceSpecIndexes: []int{0, 1}, - }, - } - for _, tt := range inputs { - testFunc(t, tt) - } - }) - - t.Run("when some vGPU devices exist on the VM", func(t *testing.T) { - inputs := []input{ - { - name: "when adding a single vGPU device of each type", - expectedLen: 1, - vGPUDeviceSpecs: []infrav1.VGPUSpec{ - {ProfileName: "1234"}, {ProfileName: "4321"}, - }, - existingDeviceSpecIndexes: []int{0}, - assertFunc: func(g *gomega.WithT, actual []infrav1.VGPUSpec) { - g.Expect(actual[0].ProfileName).To(gomega.Equal("4321")) - }, - }, - { - name: "when adding multiple vGPU devices of a type", - expectedLen: 2, - vGPUDeviceSpecs: []infrav1.VGPUSpec{ - {ProfileName: "1234"}, - {ProfileName: "1234"}, - {ProfileName: "4321"}, - }, - existingDeviceSpecIndexes: []int{0}, - assertFunc: func(g *gomega.WithT, actual []infrav1.VGPUSpec) { - g.Expect(actual[0].ProfileName).To(gomega.Equal("1234")) - g.Expect(actual[1].ProfileName).To(gomega.Equal("4321")) - }, - }, - } - for _, tt := range inputs { - testFunc(t, tt) - } - }) -} diff --git a/pkg/services/govmomi/service.go b/pkg/services/govmomi/service.go index e53358bbc0..92256bf2a5 100644 --- a/pkg/services/govmomi/service.go +++ b/pkg/services/govmomi/service.go @@ -538,40 +538,6 @@ func (vms *VMService) reconcilePCIDevices(ctx context.Context, virtualMachineCtx return errors.Wrapf(err, "error adding pci devices for %q", ctx) } } - if expectedVGPUs := virtualMachineCtx.VSphereVM.Spec.VirtualMachineCloneSpec.VGPUDevices; len(expectedVGPUs) != 0 { - specsToBeAdded, err := pci.CalculateVGPUsToBeAdded(ctx, virtualMachineCtx.Obj, expectedVGPUs) - if err != nil { - return err - } - - if len(specsToBeAdded) == 0 { - if conditions.Has(virtualMachineCtx.VSphereVM, infrav1.PCIDevicesDetachedCondition) { - conditions.Delete(virtualMachineCtx.VSphereVM, infrav1.PCIDevicesDetachedCondition) - } - log.V(5).Info("No new PCI devices to be added") - return nil - } - - powerState, err := virtualMachineCtx.Obj.PowerState(ctx) - if err != nil { - return err - } - if powerState == types.VirtualMachinePowerStatePoweredOn { - // This would arise only when the PCI device is manually removed from - // the VM post creation. - log.Info("vGPU device cannot be attached in powered on state") - conditions.MarkFalse(virtualMachineCtx.VSphereVM, - infrav1.PCIDevicesDetachedCondition, - infrav1.NotFoundReason, - clusterv1.ConditionSeverityWarning, - "vGPU devices removed after VM was powered on") - return errors.Errorf("missing vGPU devices") - } - log.Info("vGPU devices to be added", "number", len(specsToBeAdded)) - if err := virtualMachineCtx.Obj.AddDevice(ctx, pci.ConstructDeviceSpecsVGPU(specsToBeAdded)...); err != nil { - return errors.Wrapf(err, "error adding vGPU devices for %q", ctx) - } - } return nil } diff --git a/pkg/services/govmomi/vcenter/clone.go b/pkg/services/govmomi/vcenter/clone.go index 61126d7e43..0334e247e1 100644 --- a/pkg/services/govmomi/vcenter/clone.go +++ b/pkg/services/govmomi/vcenter/clone.go @@ -196,10 +196,10 @@ func Clone(ctx context.Context, vmCtx *capvcontext.VMContext, bootstrapData []by Snapshot: snapshotRef, } - // For PCI and vGPU devices, the memory for the VM needs to be reserved + // For PCI devices, the memory for the VM needs to be reserved // We can replace this once we have another way of reserving memory option // exposed via the API types. - if len(vmCtx.VSphereVM.Spec.PciDevices) > 0 || len(vmCtx.VSphereVM.Spec.VGPUDevices) > 0 { + if len(vmCtx.VSphereVM.Spec.PciDevices) > 0 { spec.Config.MemoryReservationLockedToMax = ptr.To(true) }