Skip to content

Commit

Permalink
vGPU implementation
Browse files Browse the repository at this point in the history
- Builds on the changes in
  #1579

Co-authored-by: Geetika Batra <[email protected]>
Signed-off-by: Puneet Katyal <[email protected]>
  • Loading branch information
puneetkatyal and geetikabatra committed Aug 23, 2023
1 parent 9881385 commit 5fabdad
Show file tree
Hide file tree
Showing 15 changed files with 274 additions and 5 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ generate-e2e-templates-main: $(KUSTOMIZE) ## Generate test templates for the mai
"$(KUSTOMIZE)" --load-restrictor LoadRestrictionsNone build $(E2E_TEMPLATE_DIR)/main/topology > $(E2E_TEMPLATE_DIR)/main/cluster-template-topology.yaml
# for PCI passthrough template
"$(KUSTOMIZE)" --load-restrictor LoadRestrictionsNone build $(E2E_TEMPLATE_DIR)/main/pci > $(E2E_TEMPLATE_DIR)/main/cluster-template-pci.yaml
# for vGPU template
"$(KUSTOMIZE)" --load-restrictor LoadRestrictionsNone build $(E2E_TEMPLATE_DIR)/main/vgpu > $(E2E_TEMPLATE_DIR)/main/cluster-template-vgpu.yaml
# for DHCP overrides
"$(KUSTOMIZE)" --load-restrictor LoadRestrictionsNone build $(E2E_TEMPLATE_DIR)/main/dhcp-overrides > $(E2E_TEMPLATE_DIR)/main/cluster-template-dhcp-overrides.yaml

Expand Down
1 change: 1 addition & 0 deletions apis/v1alpha3/conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ func CustomSpecNewFieldFuzzer(in *nextver.VirtualMachineCloneSpec, c fuzz.Contin
c.FuzzNoCustom(in)

in.PciDevices = nil
in.VGPUDevices = nil
in.AdditionalDisksGiB = nil
in.OS = ""
in.HardwareVersion = ""
Expand Down
1 change: 1 addition & 0 deletions apis/v1alpha3/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions apis/v1alpha4/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions apis/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ type VirtualMachineCloneSpec struct {
// PciDevices is the list of pci devices used by the virtual machine.
// +optional
PciDevices []PCIDeviceSpec `json:"pciDevices,omitempty"`
// VGPUDevices is the list of vGPUs used by the virtual machine.
// +optional
VGPUDevices []VGPUSpec `json:"vgpuDevices,omitempty"`
// OS is the Operating System of the virtual machine
// Defaults to Linux
// +optional
Expand Down Expand Up @@ -261,6 +264,15 @@ type PCIDeviceSpec struct {
VendorID *int32 `json:"vendorId,omitempty"`
}

// VGPUSpec defines virtual machine's VGPU configuration
type VGPUSpec struct {
// ProfileName is the ProfileName of a virtual machine's vGPU, in string.
// Defaults to the eponymous property value in the template from which the
// virtual machine is cloned.
// +kubebuilder:validation:Required
ProfileName string `json:"profileName,omitempty"`
}

// NetworkSpec defines the virtual machine's network configuration.
type NetworkSpec struct {
// Devices is the list of network devices used by the virtual machine.
Expand Down
20 changes: 20 additions & 0 deletions apis/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -1274,6 +1274,19 @@ spec:
of the communication between Cluster API Provider vSphere and the
VMware vCenter server.
type: string
vgpuDevices:
description: VGPUDevices is the list of vGPUs used by the virtual
machine.
items:
description: VGPUSpec defines virtual machine's VGPU configuration
properties:
profileName:
description: ProfileName is the ProfileName of a virtual machine's
vGPU, in string. Defaults to the eponymous property value
in the template from which the virtual machine is cloned.
type: string
type: object
type: array
required:
- network
- template
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,20 @@ spec:
TLS certificate validation of the communication between
Cluster API Provider vSphere and the VMware vCenter server.
type: string
vgpuDevices:
description: VGPUDevices is the list of vGPUs used by the
virtual machine.
items:
description: VGPUSpec defines virtual machine's VGPU configuration
properties:
profileName:
description: ProfileName is the ProfileName of a virtual
machine's vGPU, in string. Defaults to the eponymous
property value in the template from which the virtual
machine is cloned.
type: string
type: object
type: array
required:
- network
- template
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,19 @@ spec:
of the communication between Cluster API Provider vSphere and the
VMware vCenter server.
type: string
vgpuDevices:
description: VGPUDevices is the list of vGPUs used by the virtual
machine.
items:
description: VGPUSpec defines virtual machine's VGPU configuration
properties:
profileName:
description: ProfileName is the ProfileName of a virtual machine's
vGPU, in string. Defaults to the eponymous property value
in the template from which the virtual machine is cloned.
type: string
type: object
type: array
required:
- network
- template
Expand Down
107 changes: 107 additions & 0 deletions docs/gpu-vgpu.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# GPU enabled clusters using vGPU

## Overview

You can choose to create a cluster with both worker and control plane nodes having vGPU devices attached to them.

Before we begin, a few important things to note:

- [NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator) is used to expose the GPU PCI devices to the workloads running on the cluster.
- The OVA templates used for cluster creation should have the VMX version (Virtual Hardware) set to 17 or higher. This is necessary because Dynamic DirectPath I/O was introduced in this version, which enables the Assignable Hardware intelligence for passthrough devices.
- Since we need the VMX version to be >=17, this way of provisioning clusters with PCI passthrough devices works for vSphere 7.0 and above. This is the ESXi/VMX version [compatibility list](https://kb.vmware.com/s/article/2007240).
- UEFI boot mode is recommended for the OVAs used for cluster creation.
- Most of the setup is similar to [GPU enabled clusters via PCI Passthrough](https://github.com/kubernetes-sigs/cluster-api-provider-vsphere/blob/main/docs/gpu-pci.md#create-the-cluster).

## An example GPU enabled cluster

Let's create a CAPV cluster with vGPU enabled nodes.

### Prerequisites

- Refer the [NVIDIA Virtual GPU Software Quick Start Guide](https://docs.nvidia.com/grid/latest/grid-software-quick-start-guide/index.html) to download and install the vGPU software and configure vGPU licensing.

- Ensure vGPU compatibility for your vSphere installation and the GPU devices using the [VMware Compatibility Guide - Shared Pass-through Graphics](https://www.vmware.com/resources/compatibility/search.php?deviceCategory=vgpu)

- Enable Shared Passthrough for the GPU device on the ESXi Host
- Browse to a host in the vSphere Client navigator.
- On the **Configure** tab, expand **Hardware** and click **Graphics**.
- Under **GRAPHICS DEVICES**, select the GPU device to be used for vGPU, click **EDIT...** and select **Shared Direct**. Repeat this for additional GPU devices as needed.
- Select **HOST GRAPHICS**, click **EDIT...** and select **Shared Direct** and select a shared passthrough GPU assignment policy, for example **Group VMs on GPU until full (GPU consolidation)**.

- Build an OVA template
We can build a custom OVA template using the [image-builder](https://github.com/kubernetes-sigs/image-builder) project. We will build a Ubuntu 20.04 OVA with UEFI boot mode. More documentation on how to use image-builder can be found in the [image-builder book](https://image-builder.sigs.k8s.io/capi/providers/vsphere.html)
- Clone the repo locally and go to the `./images/capi/` directory.
- Create a `packer-vars.json` file with the following content.

```shell
$ cat packer-vars.json
{
"vmx_version": 17
}
```

- Run the make file target associated to ubuntu 20.04 UEFI OVA as follows:

```shell
> PACKER_VAR_FILES=packer-vars.json make build-node-ova-vsphere-ubuntu-2004-efi
```

### Source the vGPU profile(s) for the GPU device

See "2. Choosing the vGPU Profile for the Virtual Machine" at [Using GPUs with Virtual Machines on vSphere](https://blogs.vmware.com/apps/2018/09/using-gpus-with-virtual-machines-on-vsphere-part-3-installing-the-nvidia-grid-technology.html) to see what vGPU profiles are available for your GPU device.

We are using NVIDIA Tesla V100 32GB cards for this example and will use the `grid_v100d-4c` vGPU profile for this card that allocates 4GB GPU memory to the worker node's vGPU device.
### Create the cluster template
```shell
$ make dev-flavors
/Applications/Xcode.app/Contents/Developer/usr/bin/make generate-flavors FLAVOR_DIR=/Users/pkatyal/.cluster-api/overrides/infrastructure-vsphere/v0.0.0
go run ./packaging/flavorgen --output-dir /Users/pkatyal/.cluster-api/overrides/infrastructure-vsphere/v0.0.0
```
Edit the generated Cluster template (`cluster-template.yaml`) to set the values for the `vgpuDevices` array. Here we are editing the VSphereMachineTemplate object for the worker nodes. This will create a worker node with a single NVIDIA 16GB vGPU device attached to the VM.
```yaml
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: VSphereMachineTemplate
metadata:
name: ${CLUSTER_NAME}-worker
namespace: '${NAMESPACE}'
spec:
template:
spec:
cloneMode: linkedClone
datacenter: '${VSPHERE_DATACENTER}'
datastore: '${VSPHERE_DATASTORE}'
diskGiB: 25
folder: '${VSPHERE_FOLDER}'
memoryMiB: 8192
network:
devices:
- dhcp4: true
networkName: '${VSPHERE_NETWORK}'
numCPUs: 2
os: Linux
powerOffMode: trySoft
resourcePool: '${VSPHERE_RESOURCE_POOL}'
server: '${VSPHERE_SERVER}'
storagePolicyName: '${VSPHERE_STORAGE_POLICY}'
template: '${VSPHERE_TEMPLATE}'
thumbprint: '${VSPHERE_TLS_THUMBPRINT}'
vgpuDevices:
- profileName: "grid_v100d-4c" <============ value from above
```
Set the required values for the other fields and the cluster template is ready for use. The similar changes can be made to a template generated using clusterctl generate cluster command as well.
### Create the cluster
Set the size of the GPU nodes appropriately, since the Nvidia gpu-operator requires additional CPU and memory to install the device drivers on the VMs.
Note: For GPU nodes (PCI Passthrough or vGPU), all memory of the nodes must be reserved. CAPV will automatically do this for nodes that have a PCI Passthrough GPU or a vGPU device in the spec. See "Memory Reservation" at [Using GPUs with Virtual Machines on vSphere](https://blogs.vmware.com/apps/2018/09/using-gpus-with-virtual-machines-on-vsphere-part-2-vmdirectpath-i-o.html)
Apply the manifest from the previous step to your management cluster to have CAPV create a workload cluster with worker nodes that have vGPUs.
From this point on, the setup is exactly the same as [GPU enabled clusters via PCI Passthrough](https://github.com/kubernetes-sigs/cluster-api-provider-vsphere/blob/main/docs/gpu-pci.md#create-the-cluster).
76 changes: 71 additions & 5 deletions pkg/services/govmomi/vcenter/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func Clone(ctx *context.VMContext, bootstrapData []byte, format bootstrapv1.Form
}
}
if ctx.VSphereVM.Spec.CustomVMXKeys != nil {
ctx.Logger.Info("applied custom vmx keys o VM clone spec")
ctx.Logger.Info("applied custom vmx keys to VM clone spec")
if err := extraConfig.SetCustomVMXKeys(ctx.VSphereVM.Spec.CustomVMXKeys); err != nil {
return err
}
Expand Down Expand Up @@ -151,8 +151,22 @@ func Clone(ctx *context.VMContext, bootstrapData []byte, format bootstrapv1.Form

deviceSpecs = append(deviceSpecs, networkSpecs...)

if err != nil {
return errors.Wrapf(err, "error getting network specs for %q", ctx)
if len(ctx.VSphereVM.Spec.VirtualMachineCloneSpec.PciDevices) != 0 {
gpuSpecs, err := getGpuSpecs(ctx)
if err != nil {
return errors.Wrapf(err, "error getting gpu specs for %q", ctx)
}
ctx.Logger.V(4).Info("created gpu devices", "gpu-device-specs", gpuSpecs)
deviceSpecs = append(deviceSpecs, gpuSpecs...)
}

if len(ctx.VSphereVM.Spec.VirtualMachineCloneSpec.VGPUDevices) != 0 {
vgpuSpecs, err := getVgpuSpecs(ctx)
if err != nil {
return errors.Wrapf(err, "error getting gpu specs for %q", ctx)
}
ctx.Logger.V(4).Info("created vgpu devices", "vgpu-device-specs", vgpuSpecs)
deviceSpecs = append(deviceSpecs, vgpuSpecs...)
}

numCPUs := ctx.VSphereVM.Spec.NumCPUs
Expand Down Expand Up @@ -199,10 +213,10 @@ func Clone(ctx *context.VMContext, bootstrapData []byte, format bootstrapv1.Form
Snapshot: snapshotRef,
}

// For PCI devices, the memory for the VM needs to be reserved
// For PCI and vGPU devices, the memory for the VM needs to be reserved
// We can replace this once we have another way of reserving memory option
// exposed via the API types.
if len(ctx.VSphereVM.Spec.PciDevices) > 0 {
if len(ctx.VSphereVM.Spec.VirtualMachineCloneSpec.PciDevices) > 0 || len(ctx.VSphereVM.Spec.VirtualMachineCloneSpec.VGPUDevices) > 0 {
spec.Config.MemoryReservationLockedToMax = pointer.Bool(true)
}

Expand Down Expand Up @@ -453,3 +467,55 @@ func getNetworkSpecs(ctx *context.VMContext, devices object.VirtualDeviceList) (

return deviceSpecs, nil
}

func createPCIPassThroughDevice(deviceKey int32, backingInfo types.BaseVirtualDeviceBackingInfo) types.BaseVirtualDevice {
device := &types.VirtualPCIPassthrough{
VirtualDevice: types.VirtualDevice{
Key: deviceKey,
Backing: backingInfo,
},
}
return device
}

func getGpuSpecs(ctx *context.VMContext) ([]types.BaseVirtualDeviceConfigSpec, error) {
deviceSpecs := []types.BaseVirtualDeviceConfigSpec{}
deviceKey := int32(-200)

for _, pciDevice := range ctx.VSphereVM.Spec.VirtualMachineCloneSpec.PciDevices {
backingInfo := &types.VirtualPCIPassthroughDynamicBackingInfo{
AllowedDevice: []types.VirtualPCIPassthroughAllowedDevice{
{
VendorId: *pciDevice.VendorID,
DeviceId: *pciDevice.DeviceID,
},
},
}
dynamicDirectPathDevice := createPCIPassThroughDevice(deviceKey, backingInfo)
deviceSpecs = append(deviceSpecs, &types.VirtualDeviceConfigSpec{
Device: dynamicDirectPathDevice,
Operation: types.VirtualDeviceConfigSpecOperationAdd,
})
deviceKey--
}
return deviceSpecs, nil
}

func getVgpuSpecs(ctx *context.VMContext) ([]types.BaseVirtualDeviceConfigSpec, error) {
deviceSpecs := []types.BaseVirtualDeviceConfigSpec{}
deviceKey := int32(-200)

for _, vGPUDevice := range ctx.VSphereVM.Spec.VirtualMachineCloneSpec.VGPUDevices {
backingInfo := &types.VirtualPCIPassthroughVmiopBackingInfo{
Vgpu: vGPUDevice.ProfileName,
}
dynamicDirectPathDevice := createPCIPassThroughDevice(deviceKey, backingInfo)
deviceSpecs = append(deviceSpecs, &types.VirtualDeviceConfigSpec{
Device: dynamicDirectPathDevice,
Operation: types.VirtualDeviceConfigSpecOperationAdd,
})
ctx.Logger.V(4).Info("created vGPU device", "vgpu-profile", vGPUDevice.ProfileName)
deviceKey--
}
return deviceSpecs, nil
}
1 change: 1 addition & 0 deletions test/e2e/config/vsphere-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ variables:
# These IDs correspond to Tesla T4s, they are the decimal representation of the hex values.
DEVICE_ID: 7864
VENDOR_ID: 4318
PROFILE_NAME: grid_v100d-4c
# CAPV feature flags
EXP_NODE_ANTI_AFFINITY: "true"

Expand Down
1 change: 1 addition & 0 deletions test/e2e/config/vsphere-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ variables:
# These IDs correspond to Tesla T4s, they are the decimal representation of the hex values.
DEVICE_ID: 7864
VENDOR_ID: 4318
PROFILE_NAME: grid_v100d-4c
# CAPV feature flags
EXP_NODE_ANTI_AFFINITY: "true"
# Following CAPV variables is used for multivc_test.go. This is the second VSphere and should be set if multivc test is enabled.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
patchesStrategicMerge:
- vgpu-device-template.yaml
Loading

0 comments on commit 5fabdad

Please sign in to comment.