add gpu resource driver helm chart (#44)

* add gpu resource driver helm chart Signed-off-by: Oksana Baranova <[email protected]>
intel · Nov 21, 2023 · d060265 · d060265
1 parent 18b9535
commit d060265
Show file tree

Hide file tree

Showing 16 changed files with 863 additions and 0 deletions.
diff --git a/charts/intel-gpu-resource-driver/.helmignore b/charts/intel-gpu-resource-driver/.helmignore
@@ -0,0 +1,18 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/charts/intel-gpu-resource-driver/Chart.yaml b/charts/intel-gpu-resource-driver/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: intel-gpu-resource-driver
+description: A Helm chart for a Dynamic Resource Allocation (DRA) GPU Resource Driver
+
+type: application
+version: 0.2.0
+appVersion: "v0.2.0"
diff --git a/charts/intel-gpu-resource-driver/LICENSE b/charts/intel-gpu-resource-driver/LICENSE
@@ -0,0 +1,14 @@
+Copyright 2023 Intel Corporation
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/charts/intel-gpu-resource-driver/README.md b/charts/intel-gpu-resource-driver/README.md
@@ -0,0 +1,40 @@
+# Dynamic Resource Allocation (DRA) GPU Driver Helm Chart
+
+## Get Helm Repository Info
+```
+helm repo add intel https://intel.github.io/helm-charts/
+helm repo update
+```
+
+You can execute `helm search repo intel` command to see pulled charts [optional].
+
+## Install Helm Chart
+CRDs of the GPU driver are installed as part of the chart first.
+
+```
+helm install intel-gpu-resource-driver intel/intel-gpu-resource-driver \
+--create-namespace --namespace intel-gpu-resource-driver
+```
+## Upgrade Chart
+```
+helm upgrade intel-gpu-resource-driver intel/intel-gpu-resource-driver [flags]
+```
+
+## Uninstall Chart
+```
+helm uninstall intel-gpu-resource-driver --namespace intel-gpu-resource-driver
+```
+
+## Configuration
+See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments:
+
+```console
+helm show values intel/intel-gpu-resource-driver
+```
+
+You may also run `helm show values` on this chart's dependencies for additional options.
+
+|parameter| value |
+|---------|-----------|
+| `image.repository` | `intel` |
+| `image.tag` | `v0.2.0` |
diff --git a/charts/intel-gpu-resource-driver/crds/gpu.resource.intel.com_gpuallocationstates.yaml b/charts/intel-gpu-resource-driver/crds/gpu.resource.intel.com_gpuallocationstates.yaml
@@ -0,0 +1,205 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.13.0
+  name: gpuallocationstates.gpu.resource.intel.com
+spec:
+  group: gpu.resource.intel.com
+  names:
+    kind: GpuAllocationState
+    listKind: GpuAllocationStateList
+    plural: gpuallocationstates
+    singular: gas
+  scope: Namespaced
+  versions:
+  - name: v1alpha2
+    schema:
+      openAPIV3Schema:
+        description: GpuAllocationState holds the state required for allocation on
+          a node.
+        properties:
+          apiVersion:
+            description: 'APIVersion defines the versioned schema of this representation
+              of an object. Servers should convert recognized schemas to the latest
+              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+            type: string
+          kind:
+            description: 'Kind is a string value representing the REST resource this
+              object represents. Servers may infer this from the endpoint the client
+              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: GpuAllocationStateSpec is the spec for the GpuAllocationState
+              CRD.
+            properties:
+              allocatableDevices:
+                additionalProperties:
+                  description: AllocatableGpu represents an allocatable Gpu on a node.
+                  properties:
+                    ecc:
+                      description: True if ECC is enabled, might impact memory amount
+                        and VF profiles.
+                      type: boolean
+                    maxvfs:
+                      description: Greater than 0 if SR-IOV is supported / enabled.
+                      format: int64
+                      type: integer
+                    memory:
+                      description: Amount of local memory in MiB.
+                      format: int64
+                      maximum: 1048576
+                      minimum: 0
+                      type: integer
+                    millicores:
+                      description: Amount of GPU millicores.
+                      format: int64
+                      maximum: 1000
+                      minimum: 0
+                      type: integer
+                    model:
+                      description: pci-id of the Gpu device.
+                      type: string
+                    parentuid:
+                      description: Device where VF should be / is provisioned.
+                      type: string
+                    type:
+                      description: 'Type of the device: bare-metal Gpu or SR-IOV Virtual
+                        Function (VF).'
+                      enum:
+                      - gpu
+                      - vf
+                      - any
+                      type: string
+                    uid:
+                      description: 'Unique identifier of device: PCI address and PCI
+                        Device ID.'
+                      type: string
+                  required:
+                  - ecc
+                  - maxvfs
+                  - memory
+                  - millicores
+                  - model
+                  - parentuid
+                  - type
+                  - uid
+                  type: object
+                type: object
+              allocatedClaims:
+                additionalProperties:
+                  description: Resources that were allocated for the claim by controller.
+                  properties:
+                    gpus:
+                      description: AllocatedGpus represents a list of allocated devices
+                        on a node.
+                      items:
+                        description: AllocatedGpu represents an allocated Gpu on a
+                          node.
+                        properties:
+                          memory:
+                            description: Amount of local memory in MiB.
+                            format: int64
+                            maximum: 1048576
+                            minimum: 0
+                            type: integer
+                          millicores:
+                            description: Amount of GPU millicores.
+                            format: int64
+                            maximum: 1000
+                            minimum: 0
+                            type: integer
+                          parentuid:
+                            description: Device where VF should be / is provisioned.
+                            type: string
+                          profile:
+                            description: Virtual Function profile defines amount of
+                              local memory and time slice VF gets.
+                            type: string
+                          type:
+                            description: 'Type of the device: bare-metal Gpu or SR-IOV
+                              Virtual Function (VF).'
+                            enum:
+                            - gpu
+                            - vf
+                            - any
+                            type: string
+                          uid:
+                            description: 'Unique identifier of device: PCI address
+                              and PCI Device ID.'
+                            type: string
+                        required:
+                        - memory
+                        - millicores
+                        - parentuid
+                        - profile
+                        - type
+                        - uid
+                        type: object
+                      maxItems: 8
+                      type: array
+                    owner:
+                      description: Pod UID, for delayed allocation to match Resource
+                        Claims of same Pod when allocating VFs.
+                      type: string
+                  required:
+                  - gpus
+                  - owner
+                  type: object
+                type: object
+              preparedClaims:
+                additionalProperties:
+                  description: Resources prepared for the claim by kubelet-plugin.
+                  items:
+                    description: AllocatedGpu represents an allocated Gpu on a node.
+                    properties:
+                      memory:
+                        description: Amount of local memory in MiB.
+                        format: int64
+                        maximum: 1048576
+                        minimum: 0
+                        type: integer
+                      millicores:
+                        description: Amount of GPU millicores.
+                        format: int64
+                        maximum: 1000
+                        minimum: 0
+                        type: integer
+                      parentuid:
+                        description: Device where VF should be / is provisioned.
+                        type: string
+                      profile:
+                        description: Virtual Function profile defines amount of local
+                          memory and time slice VF gets.
+                        type: string
+                      type:
+                        description: 'Type of the device: bare-metal Gpu or SR-IOV
+                          Virtual Function (VF).'
+                        enum:
+                        - gpu
+                        - vf
+                        - any
+                        type: string
+                      uid:
+                        description: 'Unique identifier of device: PCI address and
+                          PCI Device ID.'
+                        type: string
+                    required:
+                    - memory
+                    - millicores
+                    - parentuid
+                    - profile
+                    - type
+                    - uid
+                    type: object
+                  type: array
+                type: object
+            type: object
+          status:
+            type: string
+        type: object
+    served: true
+    storage: true
diff --git a/charts/intel-gpu-resource-driver/crds/gpu.resource.intel.com_gpuclaimparameters.yaml b/charts/intel-gpu-resource-driver/crds/gpu.resource.intel.com_gpuclaimparameters.yaml
@@ -0,0 +1,74 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.13.0
+  name: gpuclaimparameters.gpu.resource.intel.com
+spec:
+  group: gpu.resource.intel.com
+  names:
+    kind: GpuClaimParameters
+    listKind: GpuClaimParametersList
+    plural: gpuclaimparameters
+    singular: gpuclaimparameters
+  scope: Namespaced
+  versions:
+  - name: v1alpha2
+    schema:
+      openAPIV3Schema:
+        description: GpuClaimParameters holds the set of parameters provided when
+          creating a resource claim for a GPU.
+        properties:
+          apiVersion:
+            description: 'APIVersion defines the versioned schema of this representation
+              of an object. Servers should convert recognized schemas to the latest
+              internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+            type: string
+          kind:
+            description: 'Kind is a string value representing the REST resource this
+              object represents. Servers may infer this from the endpoint the client
+              submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: GpuClaimParametersSpec is the spec for the GpuClaimParameters
+              CRD.
+            properties:
+              count:
+                description: How many items of the Type are being requested. 10 PCIe
+                  devices x 64 SR-IOV VFs each = 640 items maximum on one Node.
+                format: int64
+                maximum: 640
+                minimum: 1
+                type: integer
+              memory:
+                description: Per GPU memory request, in MiB, maximum 1048576 (1 TiB)
+                format: int64
+                maximum: 1048576
+                minimum: 8
+                type: integer
+              millicores:
+                description: Per GPU millicores request.
+                format: int64
+                maximum: 1000
+                minimum: 1
+                type: integer
+              shareable:
+                description: True if the same ResourceClaim can be shared by multiple
+                  Pods.
+                type: boolean
+              type:
+                description: 'Type of the GPU device: physical or virtual or any.'
+                enum:
+                - gpu
+                - vf
+                - any
+                type: string
+            required:
+            - count
+            type: object
+        type: object
+    served: true
+    storage: true