Skip to content

Commit

Permalink
Make VGPU directly part of PCI specs.
Browse files Browse the repository at this point in the history
  • Loading branch information
birksl committed May 30, 2024
1 parent bafb8ec commit 4cac1b4
Show file tree
Hide file tree
Showing 15 changed files with 64 additions and 366 deletions.
1 change: 0 additions & 1 deletion apis/v1alpha3/conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ func CustomSpecNewFieldFuzzer(in *infrav1.VirtualMachineCloneSpec, c fuzz.Contin
c.FuzzNoCustom(in)

in.PciDevices = nil
in.VGPUDevices = nil
in.AdditionalDisksGiB = nil
in.OS = ""
in.HardwareVersion = ""
Expand Down
1 change: 0 additions & 1 deletion apis/v1alpha3/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion apis/v1alpha4/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 5 additions & 12 deletions apis/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,6 @@ type VirtualMachineCloneSpec struct {
// PciDevices is the list of pci devices used by the virtual machine.
// +optional
PciDevices []PCIDeviceSpec `json:"pciDevices,omitempty"`
// VGPUDevices is the list of vGPUs used by the virtual machine.
// +optional
VGPUDevices []VGPUSpec `json:"vgpuDevices,omitempty"`
// OS is the Operating System of the virtual machine
// Defaults to Linux
// +optional
Expand Down Expand Up @@ -261,22 +258,18 @@ type PCIDeviceSpec struct {
// virtual machine is cloned.
// +kubebuilder:validation:Required
VendorID *int32 `json:"vendorId,omitempty"`
// VGPUProfile is the profile name of a virtual machine's vGPU, in string.
// Defaults to the eponymous property value in the template from which the
// virtual machine is cloned.
// +kubebuilder:validation:Required
VGPUProfile string `json:"vgpuProfile,omitempty"`
// CustomLabel is the hardware label of a virtual machine's PCI device.
// Defaults to the eponymous property value in the template from which the
// virtual machine is cloned.
// +optional
CustomLabel string `json:"customLabel,omitempty"`
}

// VGPUSpec defines virtual machine's VGPU configuration
type VGPUSpec struct {
// ProfileName is the ProfileName of a virtual machine's vGPU, in string.
// Defaults to the eponymous property value in the template from which the
// virtual machine is cloned.
// +kubebuilder:validation:Required
ProfileName string `json:"profileName,omitempty"`
}

// NetworkSpec defines the virtual machine's network configuration.
type NetworkSpec struct {
// Devices is the list of network devices used by the virtual machine.
Expand Down
20 changes: 0 additions & 20 deletions apis/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,11 @@ spec:
in the template from which the virtual machine is cloned.
format: int32
type: integer
vgpuProfile:
description: VGPUProfile is the VGPUProfile of a virtual machine's
vGPU, in string. Defaults to the eponymous property value
in the template from which the virtual machine is cloned.
type: string
type: object
type: array
powerOffMode:
Expand Down Expand Up @@ -1286,19 +1291,6 @@ spec:
of the communication between Cluster API Provider vSphere and the
VMware vCenter server.
type: string
vgpuDevices:
description: VGPUDevices is the list of vGPUs used by the virtual
machine.
items:
description: VGPUSpec defines virtual machine's VGPU configuration
properties:
profileName:
description: ProfileName is the ProfileName of a virtual machine's
vGPU, in string. Defaults to the eponymous property value
in the template from which the virtual machine is cloned.
type: string
type: object
type: array
required:
- network
- template
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,12 @@ spec:
machine is cloned.
format: int32
type: integer
vgpuProfile:
description: VGPUProfile is the VGPUProfile of a virtual
machine's vGPU, in string. Defaults to the eponymous
property value in the template from which the virtual
machine is cloned.
type: string
type: object
type: array
powerOffMode:
Expand Down Expand Up @@ -1202,20 +1208,6 @@ spec:
TLS certificate validation of the communication between
Cluster API Provider vSphere and the VMware vCenter server.
type: string
vgpuDevices:
description: VGPUDevices is the list of vGPUs used by the
virtual machine.
items:
description: VGPUSpec defines virtual machine's VGPU configuration
properties:
profileName:
description: ProfileName is the ProfileName of a virtual
machine's vGPU, in string. Defaults to the eponymous
property value in the template from which the virtual
machine is cloned.
type: string
type: object
type: array
required:
- network
- template
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1275,6 +1275,11 @@ spec:
in the template from which the virtual machine is cloned.
format: int32
type: integer
vgpuProfile:
description: VGPUProfile is the VGPUProfile of a virtual machine's
vGPU, in string. Defaults to the eponymous property value
in the template from which the virtual machine is cloned.
type: string
type: object
type: array
powerOffMode:
Expand Down Expand Up @@ -1328,19 +1333,6 @@ spec:
of the communication between Cluster API Provider vSphere and the
VMware vCenter server.
type: string
vgpuDevices:
description: VGPUDevices is the list of vGPUs used by the virtual
machine.
items:
description: VGPUSpec defines virtual machine's VGPU configuration
properties:
profileName:
description: ProfileName is the ProfileName of a virtual machine's
vGPU, in string. Defaults to the eponymous property value
in the template from which the virtual machine is cloned.
type: string
type: object
type: array
required:
- network
- template
Expand Down
6 changes: 3 additions & 3 deletions docs/gpu-vgpu.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ $ make dev-flavors
go run ./packaging/flavorgen --output-dir /home/user/.cluster-api/overrides/infrastructure-vsphere/v0.0.0
```
Edit the generated Cluster template (`cluster-template.yaml`) to set the values for the `vgpuDevices` array. Here we are editing the VSphereMachineTemplate object for the worker nodes. This will create a worker node with a single NVIDIA 16GB vGPU device attached to the VM.
Edit the generated Cluster template (`cluster-template.yaml`) to set the values for the `pciDevices` array. Here we are editing the VSphereMachineTemplate object for the worker nodes. This will create a worker node with a single NVIDIA 16GB vGPU device attached to the VM.
```yaml
---
Expand Down Expand Up @@ -89,8 +89,8 @@ spec:
storagePolicyName: '${VSPHERE_STORAGE_POLICY}'
template: '${VSPHERE_TEMPLATE}'
thumbprint: '${VSPHERE_TLS_THUMBPRINT}'
vgpuDevices:
- profileName: "grid_v100d-4c" # value from above
pciDevices:
- vgpuProfile: "grid_t4-1a" # value from above
```
Set the required values for the other fields and the cluster template is ready for use. The similar changes can be made to a template generated using clusterctl generate cluster command as well.
Expand Down
28 changes: 19 additions & 9 deletions pkg/services/govmomi/pci/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,28 @@ func ConstructDeviceSpecs(pciDeviceSpecs []infrav1.PCIDeviceSpec) []types.BaseVi
return pciDevices
}

func createBackingInfo(spec infrav1.PCIDeviceSpec) *types.VirtualPCIPassthroughDynamicBackingInfo {
return &types.VirtualPCIPassthroughDynamicBackingInfo{
AllowedDevice: []types.VirtualPCIPassthroughAllowedDevice{
{
VendorId: *spec.VendorID,
DeviceId: *spec.DeviceID,
func createBackingInfo(spec infrav1.PCIDeviceSpec) types.BaseVirtualDeviceBackingInfo {
if spec.VGPUProfile == "" {
return &types.VirtualPCIPassthroughDynamicBackingInfo{
AllowedDevice: []types.VirtualPCIPassthroughAllowedDevice{
{
VendorId: *spec.VendorID,
DeviceId: *spec.DeviceID,
},
},
},
CustomLabel: spec.CustomLabel,
CustomLabel: spec.CustomLabel,
}
}

return &types.VirtualPCIPassthroughVmiopBackingInfo{
Vgpu: spec.VGPUProfile,
}
}

func constructKey(pciDeviceSpec infrav1.PCIDeviceSpec) string {
return fmt.Sprintf("%d-%d", *pciDeviceSpec.DeviceID, *pciDeviceSpec.VendorID)
if pciDeviceSpec.VGPUProfile == "" {
return fmt.Sprintf("%d-%d", *pciDeviceSpec.DeviceID, *pciDeviceSpec.VendorID)
}

return pciDeviceSpec.VGPUProfile
}
25 changes: 19 additions & 6 deletions pkg/services/govmomi/pci/device_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,30 +72,36 @@ func Test_CalculateDevicesToBeAdded(t *testing.T) {
inputs := []input{
{
name: "when adding a single PCI device of each type",
expectedLen: 2,
expectedLen: 3,
pciDeviceSpecs: []infrav1.PCIDeviceSpec{
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{DeviceID: ptr.To[int32](4321), VendorID: ptr.To[int32](8765)},
{VGPUProfile: "grid_t4-1a"},
},
assertFunc: func(g *gomega.WithT, actual []infrav1.PCIDeviceSpec) {
g.Expect(*actual[0].DeviceID).To(gomega.Equal(int32(1234)))
g.Expect(*actual[0].VendorID).To(gomega.Equal(int32(5678)))
g.Expect(*actual[1].DeviceID).To(gomega.Equal(int32(4321)))
g.Expect(*actual[1].VendorID).To(gomega.Equal(int32(8765)))
g.Expect(actual[2].VGPUProfile).To(gomega.Equal("grid_t4-1a"))
},
},
{
name: "when adding multiple PCI devices of a type",
expectedLen: 2,
expectedLen: 4,
pciDeviceSpecs: []infrav1.PCIDeviceSpec{
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{VGPUProfile: "grid_t4-1a"},
{VGPUProfile: "grid_t4-1a"},
},
assertFunc: func(g *gomega.WithT, actual []infrav1.PCIDeviceSpec) {
g.Expect(*actual[0].DeviceID).To(gomega.Equal(int32(1234)))
g.Expect(*actual[0].VendorID).To(gomega.Equal(int32(5678)))
g.Expect(*actual[1].DeviceID).To(gomega.Equal(int32(1234)))
g.Expect(*actual[1].VendorID).To(gomega.Equal(int32(5678)))
g.Expect(actual[2].VGPUProfile).To(gomega.Equal("grid_t4-1a"))
g.Expect(actual[3].VGPUProfile).To(gomega.Equal("grid_t4-1a"))
},
},
}
Expand All @@ -112,17 +118,20 @@ func Test_CalculateDevicesToBeAdded(t *testing.T) {
pciDeviceSpecs: []infrav1.PCIDeviceSpec{
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{DeviceID: ptr.To[int32](4321), VendorID: ptr.To[int32](8765)},
{VGPUProfile: "grid_t4-1a"},
},
existingDeviceSpecIndexes: []int{0, 1},
existingDeviceSpecIndexes: []int{0, 1, 2},
},
{
name: "when adding multiple PCI devices of a type",
expectedLen: 0,
pciDeviceSpecs: []infrav1.PCIDeviceSpec{
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{VGPUProfile: "grid_t4-1a"},
{VGPUProfile: "grid_t4-1a"},
},
existingDeviceSpecIndexes: []int{0, 1},
existingDeviceSpecIndexes: []int{0, 1, 2, 3},
},
}
for _, tt := range inputs {
Expand All @@ -134,31 +143,35 @@ func Test_CalculateDevicesToBeAdded(t *testing.T) {
inputs := []input{
{
name: "when adding a single PCI device of each type",
expectedLen: 1,
expectedLen: 2,
pciDeviceSpecs: []infrav1.PCIDeviceSpec{
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{DeviceID: ptr.To[int32](4321), VendorID: ptr.To[int32](8765)},
{VGPUProfile: "grid_t4-1a"},
},
existingDeviceSpecIndexes: []int{0},
assertFunc: func(g *gomega.WithT, actual []infrav1.PCIDeviceSpec) {
g.Expect(*actual[0].DeviceID).To(gomega.Equal(int32(4321)))
g.Expect(*actual[0].VendorID).To(gomega.Equal(int32(8765)))
g.Expect(actual[1].VGPUProfile).To(gomega.Equal("grid_t4-1a"))
},
},
{
name: "when adding multiple PCI devices of a type",
expectedLen: 2,
expectedLen: 3,
pciDeviceSpecs: []infrav1.PCIDeviceSpec{
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{DeviceID: ptr.To[int32](1234), VendorID: ptr.To[int32](5678)},
{DeviceID: ptr.To[int32](4321), VendorID: ptr.To[int32](8765)},
{VGPUProfile: "grid_t4-1a"},
},
existingDeviceSpecIndexes: []int{0},
assertFunc: func(g *gomega.WithT, actual []infrav1.PCIDeviceSpec) {
g.Expect(*actual[0].DeviceID).To(gomega.Equal(int32(1234)))
g.Expect(*actual[0].VendorID).To(gomega.Equal(int32(5678)))
g.Expect(*actual[1].DeviceID).To(gomega.Equal(int32(4321)))
g.Expect(*actual[1].VendorID).To(gomega.Equal(int32(8765)))
g.Expect(actual[2].VGPUProfile).To(gomega.Equal("grid_t4-1a"))
},
},
}
Expand Down
Loading

0 comments on commit 4cac1b4

Please sign in to comment.