From 17d9bca6c614dfd163914a9743e44d3142703459 Mon Sep 17 00:00:00 2001 From: amaslennikov Date: Mon, 18 Dec 2023 12:24:52 +0300 Subject: [PATCH 1/4] Configure resource requirements in the helm chart Signed-off-by: amaslennikov --- deployment/network-operator/README.md | 210 ++++++++++-------- ...anox.com_v1alpha1_nicclusterpolicy_cr.yaml | 30 +++ deployment/network-operator/values.yaml | 88 ++++++++ hack/templates/values/values.template | 88 ++++++++ 4 files changed, 326 insertions(+), 90 deletions(-) diff --git a/deployment/network-operator/README.md b/deployment/network-operator/README.md index 3c0afbdd..36ea8b1b 100644 --- a/deployment/network-operator/README.md +++ b/deployment/network-operator/README.md @@ -362,26 +362,27 @@ parameters. ### General parameters -| Name | Type | Default | Description | -|------------------------------------------------------|--------|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `operator.admissionController.enabled` | bool | `False` | deploy with admission controller webhook | -| `operator.admissionController.useCertManager` | bool | `False` | use cert-manager for generating self-signed certificate | -| `operator.admissionController.certificate.tlsCrt` | string | `` | External certificate crt. Ignored if cert-manager is used. | -| `operator.admissionController.certificate.tlsKey` | string | `` | External certificate key. Ignored if cert-manager is used. | -| `nfd.enabled` | bool | `True` | deploy Node Feature Discovery | -| `nfd.deployNodeFeatureRules` | bool | `True` | deploy Node Feature Rules to label the nodes | -| `sriovNetworkOperator.enabled` | bool | `False` | deploy SR-IOV Network Operator | -| `upgradeCRDs` | bool | `True` | enable CRDs upgrade with helm pre-install and pre-upgrade hooks | -| `sriovNetworkOperator.configDaemonNodeSelectorExtra` | object | `{"node-role.kubernetes.io/worker": ""}` | Additional nodeSelector for sriov-network-operator config daemon. These values will be added in addition to default values managed by the network-operator. | -| `imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the Network Operator image if it's not overrided | -| `operator.repository` | string | `nvcr.io/nvidia/cloud-native` | Network Operator image repository | -| `operator.image` | string | `network-operator` | Network Operator image name | -| `operator.tag` | string | `None` | Network Operator image tag, if `None`, then the Chart's `appVersion` will be used | -| `operator.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling Network Operator image | +| Name | Type | Default | Description | +|------------------------------------------------------|--------|------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `operator.admissionController.enabled` | bool | `False` | deploy with admission controller webhook | +| `operator.admissionController.useCertManager` | bool | `False` | use cert-manager for generating self-signed certificate | +| `operator.admissionController.certificate.tlsCrt` | string | `` | External certificate crt. Ignored if cert-manager is used. | +| `operator.admissionController.certificate.tlsKey` | string | `` | External certificate key. Ignored if cert-manager is used. | +| `nfd.enabled` | bool | `True` | deploy Node Feature Discovery | +| `nfd.deployNodeFeatureRules` | bool | `True` | deploy Node Feature Rules to label the nodes | +| `sriovNetworkOperator.enabled` | bool | `False` | deploy SR-IOV Network Operator | +| `upgradeCRDs` | bool | `True` | enable CRDs upgrade with helm pre-install and pre-upgrade hooks | +| `sriovNetworkOperator.configDaemonNodeSelectorExtra` | object | `{"node-role.kubernetes.io/worker": ""}` | Additional nodeSelector for sriov-network-operator config daemon. These values will be added in addition to default values managed by the network-operator. | +| `imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the Network Operator image if it's not overrided | +| `operator.repository` | string | `nvcr.io/nvidia/cloud-native` | Network Operator image repository | +| `operator.image` | string | `network-operator` | Network Operator image name | +| `operator.tag` | string | `None` | Network Operator image tag, if `None`, then the Chart's `appVersion` will be used | +| `operator.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling Network Operator image | +| `operator.resources` | object | `{}` | Optional [resource requests and limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the operator | | `operator.cniBinDirectory` | yaml | `/opt/cni/bin` | Directory, where CNI binaries will be deployed on the nodes. Setting for the sriov-network-operator is set with `sriov-network-operator.cniBinPath` parameter. Note, that CNI bin directory should be aligned with the CNI bin directory in container runtime. | -| `deployCR` | bool | `false` | Deploy `NicClusterPolicy` custom resource according to provided parameters | -| `nodeAffinity` | yaml | `` | Override the node affinity for various Daemonsets deployed by network operator, e.g. whereabouts, multus, cni-plugins. | -| `tolerations` | yaml | `` | Set additional tolerations for various Daemonsets deployed by network operator, e.g. whereabouts, multus, cni-plugins. | +| `deployCR` | bool | `false` | Deploy `NicClusterPolicy` custom resource according to provided parameters | +| `nodeAffinity` | yaml | `` | Override the node affinity for various Daemonsets deployed by network operator, e.g. whereabouts, multus, cni-plugins. | +| `tolerations` | yaml | `` | Set additional tolerations for various Daemonsets deployed by network operator, e.g. whereabouts, multus, cni-plugins. | #### imagePullSecrets customization @@ -395,6 +396,23 @@ imagePullSecrets: ### NicClusterPolicy Custom resource parameters +#### Container resources + +Optional [resource requests and limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the operator + +Example: + +```yaml +containerResources: + - name: "mofed-container" + requests: + cpu: "200m" + memory: "150Mi" + limits: + cpu: "300m" + memory: "300Mi" +``` + #### Mellanox OFED driver | Name | Type | Default | Description | @@ -428,17 +446,19 @@ imagePullSecrets: | `ofedDriver.upgradePolicy.drain.deleteEmptyDir` | bool | `true` | continue even if there are pods using emptyDir | | `ofedDriver.upgradePolicy.waitForCompletion.podSelector` | string | not set | specifies a label selector for the pods to wait for completion before starting the driver upgrade | | `ofedDriver.upgradePolicy.waitForCompletion.timeoutSeconds` | int | not set | specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite | +| `ofedDriver.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `mofed-container` container | #### RDMA Device Plugin -| Name | Type | Default | Description | -| ---- | ---- | ------- | ----------- | -| `rdmaSharedDevicePlugin.deploy` | bool | `true` | Deploy RDMA Shared device plugin | -| `rdmaSharedDevicePlugin.repository` | string | `nvcr.io/nvidia/cloud-native` | RDMA Shared device plugin image repository | -| `rdmaSharedDevicePlugin.image` | string | `k8s-rdma-shared-dev-plugin` | RDMA Shared device plugin image name | -| `rdmaSharedDevicePlugin.version` | string | `v1.3.2` | RDMA Shared device plugin version | -| `rdmaSharedDevicePlugin.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the RDMA Shared device plugin image | -| `rdmaSharedDevicePlugin.resources` | list | See below | RDMA Shared device plugin resources | +| Name | Type | Default | Description | +|---------------------------------------------|--------|-------------------------------|---------------------------------------------------------------------------------------------------------| +| `rdmaSharedDevicePlugin.deploy` | bool | `true` | Deploy RDMA Shared device plugin | +| `rdmaSharedDevicePlugin.repository` | string | `nvcr.io/nvidia/cloud-native` | RDMA Shared device plugin image repository | +| `rdmaSharedDevicePlugin.image` | string | `k8s-rdma-shared-dev-plugin` | RDMA Shared device plugin image name | +| `rdmaSharedDevicePlugin.version` | string | `v1.3.2` | RDMA Shared device plugin version | +| `rdmaSharedDevicePlugin.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the RDMA Shared device plugin image | +| `rdmaSharedDevicePlugin.resources` | list | See below | RDMA Shared device plugin resources | +| `rdmaSharedDevicePlugin.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `rdma-shated-dp` container | ##### RDMA Device Plugin Resource configurations @@ -463,14 +483,15 @@ resources: #### SR-IOV Network Device plugin -| Name | Type | Default | Description | -| ---- | ---- | ------- | ----------- | -| `sriovDevicePlugin.deploy` | bool | `false` | Deploy SR-IOV Network device plugin | -| `sriovDevicePlugin.repository` | string | `ghcr.io/k8snetworkplumbingwg` | SR-IOV Network device plugin image repository | -| `sriovDevicePlugin.image` | string | `sriov-network-device-plugin` | SR-IOV Network device plugin image name | -| `sriovDevicePlugin.version` | string | `v3.5.1` | SR-IOV Network device plugin version | -| `sriovDevicePlugin.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the SR-IOV Network device plugin image | -| `sriovDevicePlugin.resources` | list | See below | SR-IOV Network device plugin resources | +| Name | Type | Default | Description | +|----------------------------------------|--------|--------------------------------|------------------------------------------------------------------------------------------------------------| +| `sriovDevicePlugin.deploy` | bool | `false` | Deploy SR-IOV Network device plugin | +| `sriovDevicePlugin.repository` | string | `ghcr.io/k8snetworkplumbingwg` | SR-IOV Network device plugin image repository | +| `sriovDevicePlugin.image` | string | `sriov-network-device-plugin` | SR-IOV Network device plugin image name | +| `sriovDevicePlugin.version` | string | `v3.5.1` | SR-IOV Network device plugin version | +| `sriovDevicePlugin.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the SR-IOV Network device plugin image | +| `sriovDevicePlugin.resources` | list | See below | SR-IOV Network device plugin resources | +| `sriovDevicePlugin.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `kube-sriovdp` container | ##### SR-IOV Network Device Plugin Resource configurations @@ -502,17 +523,19 @@ Create/Update/Delete), reading the Pod's network annotation and fetching its cor PKey, to add the newly generated Guid or the predefined Guid in guid field of CRD cni-args to that PKey, for pods with annotation mellanox.infiniband.app. -| Name | Type | Default | Description | -|---------------------------------------|--------|---------------------------|---------------------------------------------------------------------------------------------| -| `ibKubernetes.deploy` | bool | `false` | Deploy IB Kubernetes | -| `ibKubernetes.repository` | string | `ghcr.io/mellanox` | IB Kubernetes image repository | -| `ibKubernetes.image` | string | `ib-kubernetes` | IB Kubernetes image name | -| `ibKubernetes.version` | string | `v1.0.2` | IB Kubernetes version | -| `ibKubernetes.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the IB Kubernetes image | -| `ibKubernetes.periodicUpdateSeconds` | int | `5` | Interval of periodic update in seconds | -| `ibKubernetes.pKeyGUIDPoolRangeStart` | string | `02:00:00:00:00:00:00:00` | Minimal available GUID value to be allocated for the Pod | -| `ibKubernetes.pKeyGUIDPoolRangeEnd` | string | `02:FF:FF:FF:FF:FF:FF:FF` | Maximal available GUID value to be allocated for the Pod | -| `ibKubernetes.ufmSecret` | string | See below | Name of the Secret with the NVIDIA® UFM® access credentials, deployed beforehand | +| Name | Type | Default | Description | +|---------------------------------------|--------|-----------------------------|-------------------------------------------------------------------------------------------------| +| `ibKubernetes.deploy` | bool | `false` | Deploy IB Kubernetes | +| `ibKubernetes.repository` | string | `ghcr.io/mellanox` | IB Kubernetes image repository | +| `ibKubernetes.image` | string | `ib-kubernetes` | IB Kubernetes image name | +| `ibKubernetes.version` | string | `v1.0.2` | IB Kubernetes version | +| `ibKubernetes.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the IB Kubernetes image | +| `ibKubernetes.periodicUpdateSeconds` | int | `5` | Interval of periodic update in seconds | +| `ibKubernetes.pKeyGUIDPoolRangeStart` | string | `02:00:00:00:00:00:00:00` | Minimal available GUID value to be allocated for the Pod | +| `ibKubernetes.pKeyGUIDPoolRangeEnd` | string | `02:FF:FF:FF:FF:FF:FF:FF` | Maximal available GUID value to be allocated for the Pod | +| `ibKubernetes.ufmSecret` | string | See below | Name of the Secret with the NVIDIA® UFM® access credentials, deployed beforehand | +| `ibKubernetes.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `ib-kubernetes` container | + ##### UFM secret @@ -553,56 +576,62 @@ optionally deployed components: ##### CNI Plugin Secondary Network -| Name | Type | Default | Description | -| ---- | ---- | ------- | ----------- | -| `cniPlugins.deploy` | bool | `true` | Deploy CNI Plugins Secondary Network | -| `cniPlugins.image` | string | `plugins` | CNI Plugins image name | -| `cniPlugins.repository` | string | `ghcr.io/k8snetworkplumbingwg` | CNI Plugins image repository | -| `cniPlugins.version` | string | `v0.8.7-amd64` | CNI Plugins image version | -| `cniPlugins.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the CNI Plugins image | +| Name | Type | Default | Description | +|---------------------------------|--------|--------------------------------|-----------------------------------------------------------------------------------------------| +| `cniPlugins.deploy` | bool | `true` | Deploy CNI Plugins Secondary Network | +| `cniPlugins.image` | string | `plugins` | CNI Plugins image name | +| `cniPlugins.repository` | string | `ghcr.io/k8snetworkplumbingwg` | CNI Plugins image repository | +| `cniPlugins.version` | string | `v0.8.7-amd64` | CNI Plugins image version | +| `cniPlugins.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the CNI Plugins image | +| `cniPlugins.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `cni-plugins` container | ##### Multus CNI Secondary Network -| Name | Type | Default | Description | -| ---- | ---- | ------- | ----------- | -| `multus.deploy` | bool | `true` | Deploy Multus Secondary Network | -| `multus.image` | string | `multus-cni` | Multus image name | -| `multus.repository` | string | `ghcr.io/k8snetworkplumbingwg` | Multus image repository | -| `multus.version` | string | `v3.8` | Multus image version | -| `multus.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the Multus image | -| `multus.config` | string | nil | Multus CNI config, if not specified or empty then config will be automatically generated from the CNI configuration file of the master plugin (the first file in lexicographical order in cni-conf-dir) | +| Name | Type | Default | Description | +|-----------------------------|--------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `multus.deploy` | bool | `true` | Deploy Multus Secondary Network | +| `multus.image` | string | `multus-cni` | Multus image name | +| `multus.repository` | string | `ghcr.io/k8snetworkplumbingwg` | Multus image repository | +| `multus.version` | string | `v3.8` | Multus image version | +| `multus.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the Multus image | +| `multus.config` | string | nil | Multus CNI config, if not specified or empty then config will be automatically generated from the CNI configuration file of the master plugin (the first file in lexicographical order in cni-conf-dir) | +| `multus.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `kube-multus` container | ##### IPoIB CNI -| Name | Type | Default | Description | -| ---- | ---- | ------- | ----------- | -| `ipoib.deploy` | bool | `false` | Deploy IPoIB CNI | -| `ipoib.image` | string | `ipoib-cni` | IPoIB CNI image name | -| `ipoib.repository` | string | `nvcr.io/nvidia/cloud-native` | IPoIB CNI image repository | -| `ipoib.version` | string | `v1.1.0` | IPoIB CNI image version | -| `ipoib.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the IPoIB CNI image | +| Name | Type | Default | Description | +|----------------------------|--------|-------------------------------|---------------------------------------------------------------------------------------------| +| `ipoib.deploy` | bool | `false` | Deploy IPoIB CNI | +| `ipoib.image` | string | `ipoib-cni` | IPoIB CNI image name | +| `ipoib.repository` | string | `nvcr.io/nvidia/cloud-native` | IPoIB CNI image repository | +| `ipoib.version` | string | `v1.1.0` | IPoIB CNI image version | +| `ipoib.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the IPoIB CNI image | +| `ipoib.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `ipoib-cni` container | ##### IPAM CNI Plugin Secondary Network -| Name | Type | Default | Description | -| ----------------------------- | ------ |--------------------------------| ----------- | -| `ipamPlugin.deploy` | bool | `true` | Deploy IPAM CNI Plugin Secondary Network | -| `ipamPlugin.image` | string | `whereabouts` | IPAM CNI Plugin image name | -| `ipamPlugin.repository` | string | `ghcr.io/k8snetworkplumbingwg` | IPAM CNI Plugin image repository | -| `ipamPlugin.version` | string | `v0.5.4-amd64` | IPAM CNI Plugin image version | -| `ipamPlugin.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the IPAM CNI Plugin image | +| Name | Type | Default | Description | +|---------------------------------| ------ |--------------------------------|-----------------------------------------------------------------------------------------------| +| `ipamPlugin.deploy` | bool | `true` | Deploy IPAM CNI Plugin Secondary Network | +| `ipamPlugin.image` | string | `whereabouts` | IPAM CNI Plugin image name | +| `ipamPlugin.repository` | string | `ghcr.io/k8snetworkplumbingwg` | IPAM CNI Plugin image repository | +| `ipamPlugin.version` | string | `v0.5.4-amd64` | IPAM CNI Plugin image version | +| `ipamPlugin.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the IPAM CNI Plugin image | +| `ipamPlugin.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `whereabouts` container | #### NVIDIA IPAM Plugin -| Name | Type | Default | Description | -| ------------------------- |--------|--------------------|-------------------------------------------------------------------------------------| -| `nvIpam.deploy` | bool | `false` | Deploy NVIDIA IPAM Plugin | -| `nvIpam.image` | string | `nvidia-k8s-ipam` | NVIDIA IPAM Plugin image name | -| `nvIpam.repository` | string | `ghcr.io/mellanox` | NVIDIA IPAM Plugin image repository | -| `nvIpam.version` | string | `v0.1.1` | NVIDIA IPAM Plugin image version | -| `nvIpam.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the Plugin image | -| `nvIpam.config` | string | Deprecated | This field is ignored. Configuration is done by using IPPool CRD | -| `nvIpam.enableWebhook` | bool | `false` | Enable deployment of the validataion webhook for IPPool CRD | +| Name | Type | Default | Description | +|-----------------------------|--------|--------------------------|--------------------------------------------------------------------------------------------------------------------------| +| `nvIpam.deploy` | bool | `false` | Deploy NVIDIA IPAM Plugin | +| `nvIpam.image` | string | `nvidia-k8s-ipam` | NVIDIA IPAM Plugin image name | +| `nvIpam.repository` | string | `ghcr.io/mellanox` | NVIDIA IPAM Plugin image repository | +| `nvIpam.version` | string | `v0.1.1` | NVIDIA IPAM Plugin image version | +| `nvIpam.imagePullSecrets` | list | `[]` | An optional list of references to secrets to use for pulling any of the Plugin image | +| `nvIpam.config` | string | Deprecated | This field is ignored. Configuration is done by using IPPool CRD | +| `nvIpam.enableWebhook` | bool | `false` | Enable deployment of the validataion webhook for IPPool CRD | +| `nvIpam.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `nv-ipam-node` and `nv-ipam-controller` container | + > __Note__: Supported X.509 certificate management system should be available in the cluster to enable the validation webhook. > Currently supported systems are [certmanager](https://cert-manager.io/) and @@ -615,12 +644,13 @@ optionally deployed components: leverages [Node Feature Discovery](https://kubernetes-sigs.github.io/node-feature-discovery/stable/get-started/index.html) to advertise NIC specific labels on K8s Node objects. -| Name | Type | Default | Description | -| -------------------------------- | ------ | ----------------------- | --------------------------------------------- | -| `nicFeatureDiscovery.deploy` | bool | `false` | Deploy NVIDIA NIC Feature Discovery | -| `nicFeatureDiscovery.image` | string | `nic-feature-discovery` | NVIDIA NIC Feature Discovery image name | -| `nicFeatureDiscovery.repository` | string | `ghcr.io/mellanox` | NVIDIA NIC Feature Discovery image repository | -| `nicFeatureDiscovery.version` | string | `v0.0.1` | NVIDIA NIC Feature Discovery image version | +| Name | Type | Default | Description | +|------------------------------------------| ------ |--------------------------|---------------------------------------------------------------------------------------------------------| +| `nicFeatureDiscovery.deploy` | bool | `false` | Deploy NVIDIA NIC Feature Discovery | +| `nicFeatureDiscovery.image` | string | `nic-feature-discovery` | NVIDIA NIC Feature Discovery image name | +| `nicFeatureDiscovery.repository` | string | `ghcr.io/mellanox` | NVIDIA NIC Feature Discovery image repository | +| `nicFeatureDiscovery.version` | string | `v0.0.1` | NVIDIA NIC Feature Discovery image version | +| `nicFeatureDiscovery.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `nic-feature-discovery` container | ## Deployment Examples diff --git a/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml b/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml index 5cda6a02..d47b03e9 100644 --- a/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml +++ b/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml @@ -45,6 +45,9 @@ spec: name: {{ .Values.ofedDriver.repoConfig.name }} {{- end }} imagePullSecrets: {{ include "network-operator.ofed.imagePullSecrets" . }} + {{- if .Values.ofedDriver.containerResources }} + containerResources: {{ toYaml .Values.ofedDriver.containerResources | nindent 6 }} + {{- end }} terminationGracePeriodSeconds: {{ .Values.ofedDriver.terminationGracePeriodSeconds }} startupProbe: initialDelaySeconds: {{ .Values.ofedDriver.startupProbe.initialDelaySeconds }} @@ -106,6 +109,9 @@ spec: {{- end }} ] } + {{- if .Values.rdmaSharedDevicePlugin.containerResources }} + containerResources: {{ toYaml .Values.rdmaSharedDevicePlugin.containerResources | nindent 6 }} + {{- end }} {{- end }} {{- if .Values.sriovDevicePlugin.deploy }} sriovDevicePlugin: @@ -138,6 +144,9 @@ spec: {{- end }} ] } + {{- if .Values.sriovDevicePlugin.containerResources }} + containerResources: {{ toYaml .Values.sriovDevicePlugin.containerResources | nindent 6 }} + {{- end }} {{- end }} {{- if .Values.ibKubernetes.deploy }} ibKubernetes: @@ -145,6 +154,9 @@ spec: repository: {{ .Values.ibKubernetes.repository }} version: {{ .Values.ibKubernetes.version }} imagePullSecrets: {{ include "network-operator.ibKubernetes.imagePullSecrets" . }} + {{- if .Values.ibKubernetes.containerResources }} + containerResources: {{ toYaml .Values.ibKubernetes.containerResources | nindent 6 }} + {{- end }} pKeyGUIDPoolRangeStart: {{ .Values.ibKubernetes.pKeyGUIDPoolRangeStart }} pKeyGUIDPoolRangeEnd: {{ .Values.ibKubernetes.pKeyGUIDPoolRangeEnd }} ufmSecret: {{ .Values.ibKubernetes.ufmSecret | quote }} @@ -157,6 +169,9 @@ spec: repository: {{ .Values.secondaryNetwork.cniPlugins.repository }} version: {{ .Values.secondaryNetwork.cniPlugins.version }} imagePullSecrets: {{ include "network-operator.secondaryNetwork.cniPlugins.imagePullSecrets" . }} + {{- if .Values.secondaryNetwork.cniPlugins.containerResources }} + containerResources: {{ toYaml .Values.secondaryNetwork.cniPlugins.containerResources | nindent 8 }} + {{- end }} {{- end }} {{- if .Values.secondaryNetwork.multus.deploy }} multus: @@ -164,6 +179,9 @@ spec: repository: {{ .Values.secondaryNetwork.multus.repository }} version: {{ .Values.secondaryNetwork.multus.version }} imagePullSecrets: {{ include "network-operator.secondaryNetwork.multus.imagePullSecrets" . }} + {{- if .Values.secondaryNetwork.multus.containerResources }} + containerResources: {{ toYaml .Values.secondaryNetwork.multus.containerResources | nindent 8 }} + {{- end }} {{- if .Values.secondaryNetwork.multus.config | empty | not }} config: {{ .Values.secondaryNetwork.multus.config | quote }} {{- end }} @@ -173,6 +191,9 @@ spec: image: {{ .Values.secondaryNetwork.ipoib.image }} repository: {{ .Values.secondaryNetwork.ipoib.repository }} version: {{ .Values.secondaryNetwork.ipoib.version }} + {{- if .Values.secondaryNetwork.ipoib.containerResources }} + containerResources: {{ toYaml .Values.secondaryNetwork.ipoib.containerResources | nindent 8 }} + {{- end }} {{- end }} {{- if .Values.secondaryNetwork.ipamPlugin.deploy }} ipamPlugin: @@ -180,6 +201,9 @@ spec: repository: {{ .Values.secondaryNetwork.ipamPlugin.repository }} version: {{ .Values.secondaryNetwork.ipamPlugin.version }} imagePullSecrets: {{ include "network-operator.secondaryNetwork.ipamPlugin.imagePullSecrets" . }} + {{- if .Values.secondaryNetwork.ipamPlugin.containerResources }} + containerResources: {{ toYaml .Values.secondaryNetwork.ipamPlugin.containerResources | nindent 8 }} + {{- end }} {{- end }} {{- end }} {{- if .Values.nvIpam.deploy }} @@ -188,6 +212,9 @@ spec: repository: {{ .Values.nvIpam.repository }} version: {{ .Values.nvIpam.version }} imagePullSecrets: {{ include "network-operator.nvIpam.imagePullSecrets" . }} + {{- if .Values.nvIpam.containerResources }} + containerResources: {{ toYaml .Values.nvIpam.containerResources | nindent 6 }} + {{- end }} enableWebhook: {{ .Values.nvIpam.enableWebhook }} {{- end }} {{- if .Values.nicFeatureDiscovery.deploy }} @@ -196,5 +223,8 @@ spec: repository: {{ .Values.nicFeatureDiscovery.repository }} version: {{ .Values.nicFeatureDiscovery.version }} imagePullSecrets: {{ include "network-operator.nicFeatureDiscovery.imagePullSecrets" . }} + {{- if .Values.nicFeatureDiscovery.containerResources }} + containerResources: {{ toYaml .Values.nicFeatureDiscovery.containerResources | nindent 6 }} + {{- end }} {{- end }} {{ end }} diff --git a/deployment/network-operator/values.yaml b/deployment/network-operator/values.yaml index 49cb7aad..cf1102f4 100644 --- a/deployment/network-operator/values.yaml +++ b/deployment/network-operator/values.yaml @@ -109,6 +109,7 @@ sriov-network-operator: # General Operator related values # The operator element allows to deploy network operator from an alternate location operator: + resources: {} tolerations: - key: "node-role.kubernetes.io/master" operator: "Equal" @@ -176,6 +177,14 @@ ofedDriver: # env: # - name: EXAMPLE_ENV_VAR # value: example_env_var_value + # containerResources: + # - name: "mofed-container" + # requests: + # cpu: "200m" + # memory: "150Mi" + # limits: + # cpu: "300m" + # memory: "300Mi" terminationGracePeriodSeconds: 300 # Private mirror repository configuration repoConfig: @@ -225,6 +234,14 @@ rdmaSharedDevicePlugin: version: sha-fe7f371c7e1b8315bf900f71cd25cfc1251dc775 useCdi: false # imagePullSecrets: [] + # containerResources: + # - name: "rdma-shared-dp" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "150m" + # memory: "100Mi" # The following defines the RDMA resources in the cluster # it must be provided by the user when deploying the chart # each entry in the resources element will create a resource with the provided and list of devices @@ -241,6 +258,14 @@ sriovDevicePlugin: version: 2cc723dcbc712290055b763dc9d3c090ba41e929 useCdi: false # imagePullSecrets: [] + # containerResources: + # - name: "kube-sriovdp" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "150m" + # memory: "100Mi" resources: - name: hostdev vendors: [15b3] @@ -251,6 +276,14 @@ ibKubernetes: repository: ghcr.io/mellanox version: v1.0.2 # imagePullSecrets: [] + # containerResources: + # - name: "ib-kubernetes" + # requests: + # cpu: "100m" + # memory: "300Mi" + # limits: + # cpu: "100m" + # memory: "300Mi" periodicUpdateSeconds: 5 pKeyGUIDPoolRangeStart: "02:00:00:00:00:00:00:00" pKeyGUIDPoolRangeEnd: "02:FF:FF:FF:FF:FF:FF:FF" @@ -263,6 +296,21 @@ nvIpam: version: v0.1.1 enableWebhook: false # imagePullSecrets: [] + # containerResources: + # - name: "nv-ipam-node" + # requests: + # cpu: "150m" + # memory: "50Mi" + # limits: + # cpu: "300m" + # memory: "300Mi" + # - name: "nv-ipam-controller" + # requests: + # cpu: "150m" + # memory: "50Mi" + # limits: + # cpu: "300m" + # memory: "300Mi" secondaryNetwork: deploy: true @@ -272,6 +320,14 @@ secondaryNetwork: repository: ghcr.io/k8snetworkplumbingwg version: v1.2.0-amd64 # imagePullSecrets: [] + # containerResources: + # - name: "cni-plugins" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "100m" + # memory: "50Mi" multus: deploy: true image: multus-cni @@ -279,18 +335,42 @@ secondaryNetwork: version: v3.9.3 # imagePullSecrets: [] # config: '' + # containerResources: + # - name: "kube-multus" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "100m" + # memory: "50Mi" ipoib: deploy: false image: ipoib-cni repository: nvcr.io/nvidia/cloud-native version: v1.1.0 # imagePullSecrets: [] + # containerResources: + # - name: "ipoib-cni" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "100m" + # memory: "50Mi" ipamPlugin: deploy: true image: whereabouts repository: ghcr.io/k8snetworkplumbingwg version: v0.6.1-amd64 # imagePullSecrets: [] + # containerResources: + # - name: "whereabouts" + # requests: + # cpu: "100m" + # memory: "100Mi" + # limits: + # cpu: "100m" + # memory: "200Mi" nicFeatureDiscovery: deploy: false @@ -298,6 +378,14 @@ nicFeatureDiscovery: repository: ghcr.io/mellanox version: v0.0.1 # imagePullSecrets: [] + # containerResources: + # - name: "nic-feature-discovery" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "300m" + # memory: "150Mi" # Can be set to nicclusterpolicy and override other ds node affinity, # e.g. https://github.com/Mellanox/network-operator/blob/master/manifests/state-multus-cni/0050-multus-ds.yml#L26-L36 diff --git a/hack/templates/values/values.template b/hack/templates/values/values.template index eb07d8c9..d3d510fe 100644 --- a/hack/templates/values/values.template +++ b/hack/templates/values/values.template @@ -109,6 +109,7 @@ sriov-network-operator: # General Operator related values # The operator element allows to deploy network operator from an alternate location operator: + resources: {} tolerations: - key: "node-role.kubernetes.io/master" operator: "Equal" @@ -176,6 +177,14 @@ ofedDriver: # env: # - name: EXAMPLE_ENV_VAR # value: example_env_var_value + # containerResources: + # - name: "mofed-container" + # requests: + # cpu: "200m" + # memory: "150Mi" + # limits: + # cpu: "300m" + # memory: "300Mi" terminationGracePeriodSeconds: 300 # Private mirror repository configuration repoConfig: @@ -225,6 +234,14 @@ rdmaSharedDevicePlugin: version: {{ .RdmaSharedDevicePlugin.Version }} useCdi: false # imagePullSecrets: [] + # containerResources: + # - name: "rdma-shared-dp" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "150m" + # memory: "100Mi" # The following defines the RDMA resources in the cluster # it must be provided by the user when deploying the chart # each entry in the resources element will create a resource with the provided and list of devices @@ -241,6 +258,14 @@ sriovDevicePlugin: version: {{ .SriovDevicePlugin.Version }} useCdi: false # imagePullSecrets: [] + # containerResources: + # - name: "kube-sriovdp" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "150m" + # memory: "100Mi" resources: - name: hostdev vendors: [15b3] @@ -251,6 +276,14 @@ ibKubernetes: repository: {{ .IbKubernetes.Repository }} version: {{ .IbKubernetes.Version }} # imagePullSecrets: [] + # containerResources: + # - name: "ib-kubernetes" + # requests: + # cpu: "100m" + # memory: "300Mi" + # limits: + # cpu: "100m" + # memory: "300Mi" periodicUpdateSeconds: 5 pKeyGUIDPoolRangeStart: "02:00:00:00:00:00:00:00" pKeyGUIDPoolRangeEnd: "02:FF:FF:FF:FF:FF:FF:FF" @@ -263,6 +296,21 @@ nvIpam: version: {{ .NvIPAM.Version }} enableWebhook: false # imagePullSecrets: [] + # containerResources: + # - name: "nv-ipam-node" + # requests: + # cpu: "150m" + # memory: "50Mi" + # limits: + # cpu: "300m" + # memory: "300Mi" + # - name: "nv-ipam-controller" + # requests: + # cpu: "150m" + # memory: "50Mi" + # limits: + # cpu: "300m" + # memory: "300Mi" secondaryNetwork: deploy: true @@ -272,6 +320,14 @@ secondaryNetwork: repository: {{ .CniPlugins.Repository }} version: {{ .CniPlugins.Version }} # imagePullSecrets: [] + # containerResources: + # - name: "cni-plugins" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "100m" + # memory: "50Mi" multus: deploy: true image: {{ .Multus.Image }} @@ -279,18 +335,42 @@ secondaryNetwork: version: {{ .Multus.Version }} # imagePullSecrets: [] # config: '' + # containerResources: + # - name: "kube-multus" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "100m" + # memory: "50Mi" ipoib: deploy: false image: {{ .Ipoib.Image }} repository: {{ .Ipoib.Repository }} version: {{ .Ipoib.Version }} # imagePullSecrets: [] + # containerResources: + # - name: "ipoib-cni" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "100m" + # memory: "50Mi" ipamPlugin: deploy: true image: {{ .IpamPlugin.Image }} repository: {{ .IpamPlugin.Repository }} version: {{ .IpamPlugin.Version }} # imagePullSecrets: [] + # containerResources: + # - name: "whereabouts" + # requests: + # cpu: "100m" + # memory: "100Mi" + # limits: + # cpu: "100m" + # memory: "200Mi" nicFeatureDiscovery: deploy: false @@ -298,6 +378,14 @@ nicFeatureDiscovery: repository: {{ .NicFeatureDiscovery.Repository }} version: {{ .NicFeatureDiscovery.Version }} # imagePullSecrets: [] + # containerResources: + # - name: "nic-feature-discovery" + # requests: + # cpu: "100m" + # memory: "50Mi" + # limits: + # cpu: "300m" + # memory: "150Mi" # Can be set to nicclusterpolicy and override other ds node affinity, # e.g. https://github.com/Mellanox/network-operator/blob/master/manifests/state-multus-cni/0050-multus-ds.yml#L26-L36 From a0e0b3463e8115d75f18670640439bfe129d8793 Mon Sep 17 00:00:00 2001 From: amaslennikov Date: Wed, 20 Dec 2023 18:49:17 +0300 Subject: [PATCH 2/4] Fix indentations in some manifests Signed-off-by: amaslennikov --- .../state-nv-ipam-cni/040-nv-ipam-node.yaml | 18 +++++++++--------- .../0030-sriov-dp-daemonset.yml | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/manifests/state-nv-ipam-cni/040-nv-ipam-node.yaml b/manifests/state-nv-ipam-cni/040-nv-ipam-node.yaml index d0b10226..90713c18 100644 --- a/manifests/state-nv-ipam-cni/040-nv-ipam-node.yaml +++ b/manifests/state-nv-ipam-cni/040-nv-ipam-node.yaml @@ -82,15 +82,15 @@ spec: - --cni-log-level=info # log level for shim CNI - --ippools-namespace=$(IPPOOLS_NAMESPACE) {{- with index .RuntimeSpec.ContainerResources "nv-ipam-node" }} - resources: - {{- if .Requests }} - requests: - {{ .Requests | yaml | nindent 14}} - {{- end }} - {{- if .Limits }} - limits: - {{ .Limits | yaml | nindent 14}} - {{- end }} + resources: + {{- if .Requests }} + requests: + {{ .Requests | yaml | nindent 12}} + {{- end }} + {{- if .Limits }} + limits: + {{ .Limits | yaml | nindent 12}} + {{- end }} {{- else }} resources: requests: diff --git a/manifests/state-sriov-device-plugin/0030-sriov-dp-daemonset.yml b/manifests/state-sriov-device-plugin/0030-sriov-dp-daemonset.yml index 4162e235..cf7506a1 100644 --- a/manifests/state-sriov-device-plugin/0030-sriov-dp-daemonset.yml +++ b/manifests/state-sriov-device-plugin/0030-sriov-dp-daemonset.yml @@ -97,11 +97,11 @@ spec: resources: {{- if .Requests }} requests: - {{ .Requests | yaml | nindent 12}} + {{ .Requests | yaml | nindent 14}} {{- end }} {{- if .Limits }} limits: - {{ .Limits | yaml | nindent 12}} + {{ .Limits | yaml | nindent 14}} {{- end }} {{- end }} volumes: From 30fd89b200339c830357688aa1e5bdea3c6ef1c1 Mon Sep 17 00:00:00 2001 From: amaslennikov Date: Fri, 22 Dec 2023 13:28:52 +0300 Subject: [PATCH 3/4] Check state spec for nil in GetManifestObjects to avoid runtime panic Signed-off-by: amaslennikov --- pkg/state/state_cni_plugins.go | 4 ++++ pkg/state/state_ib_kubernetes.go | 4 ++++ pkg/state/state_ipoib_cni.go | 4 ++++ pkg/state/state_multus_cni.go | 4 ++++ pkg/state/state_nic_feature_discovery.go | 4 ++++ pkg/state/state_nv_ipam_cni.go | 4 ++++ pkg/state/state_ofed.go | 4 ++++ pkg/state/state_shared_dp.go | 4 ++++ pkg/state/state_sriov_dp.go | 4 ++++ pkg/state/state_whereabouts_cni.go | 4 ++++ 10 files changed, 40 insertions(+) diff --git a/pkg/state/state_cni_plugins.go b/pkg/state/state_cni_plugins.go index 8017e76f..63caeb2e 100644 --- a/pkg/state/state_cni_plugins.go +++ b/pkg/state/state_cni_plugins.go @@ -138,6 +138,10 @@ func (s *stateCNIPlugins) GetWatchSources() map[string]client.Object { func (s *stateCNIPlugins) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.SecondaryNetwork == nil || cr.Spec.SecondaryNetwork.CniPlugins == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + staticConfig := catalog.GetStaticConfigProvider() if staticConfig == nil { return nil, errors.New("staticConfig provider required") diff --git a/pkg/state/state_ib_kubernetes.go b/pkg/state/state_ib_kubernetes.go index 78c92a7f..7429f78d 100644 --- a/pkg/state/state_ib_kubernetes.go +++ b/pkg/state/state_ib_kubernetes.go @@ -141,6 +141,10 @@ func (s *stateIBKubernetes) GetWatchSources() map[string]client.Object { func (s *stateIBKubernetes) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.IBKubernetes == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + clusterInfo := catalog.GetClusterTypeProvider() if clusterInfo == nil { return nil, errors.New("clusterType provider required") diff --git a/pkg/state/state_ipoib_cni.go b/pkg/state/state_ipoib_cni.go index 0ca01ac7..eac62486 100644 --- a/pkg/state/state_ipoib_cni.go +++ b/pkg/state/state_ipoib_cni.go @@ -138,6 +138,10 @@ func (s *stateIPoIBCNI) GetWatchSources() map[string]client.Object { func (s *stateIPoIBCNI) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.SecondaryNetwork == nil || cr.Spec.SecondaryNetwork.IPoIB == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + clusterInfo := catalog.GetClusterTypeProvider() if clusterInfo == nil { return nil, errors.New("clusterInfo provider required") diff --git a/pkg/state/state_multus_cni.go b/pkg/state/state_multus_cni.go index 2af7a2c4..d1b41fae 100644 --- a/pkg/state/state_multus_cni.go +++ b/pkg/state/state_multus_cni.go @@ -135,6 +135,10 @@ func (s *stateMultusCNI) GetWatchSources() map[string]client.Object { func (s *stateMultusCNI) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.SecondaryNetwork == nil || cr.Spec.SecondaryNetwork.Multus == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + staticConfig := catalog.GetStaticConfigProvider() if staticConfig == nil { return nil, errors.New("staticConfig provider required") diff --git a/pkg/state/state_nic_feature_discovery.go b/pkg/state/state_nic_feature_discovery.go index 96a7bc00..ffaeb497 100644 --- a/pkg/state/state_nic_feature_discovery.go +++ b/pkg/state/state_nic_feature_discovery.go @@ -141,6 +141,10 @@ func (s *stateNICFeatureDiscovery) GetWatchSources() map[string]client.Object { func (s *stateNICFeatureDiscovery) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.NicFeatureDiscovery == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + clusterInfo := catalog.GetClusterTypeProvider() if clusterInfo == nil { return nil, errors.New("clusterType provider required") diff --git a/pkg/state/state_nv_ipam_cni.go b/pkg/state/state_nv_ipam_cni.go index 1ba2adb4..df3196fb 100644 --- a/pkg/state/state_nv_ipam_cni.go +++ b/pkg/state/state_nv_ipam_cni.go @@ -142,6 +142,10 @@ func (s *stateNVIPAMCNI) GetWatchSources() map[string]client.Object { func (s *stateNVIPAMCNI) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.NvIpam == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + clusterInfo := catalog.GetClusterTypeProvider() if clusterInfo == nil { return nil, errors.New("clusterInfo provider required") diff --git a/pkg/state/state_ofed.go b/pkg/state/state_ofed.go index 6bce8bb4..59919e8e 100644 --- a/pkg/state/state_ofed.go +++ b/pkg/state/state_ofed.go @@ -382,6 +382,10 @@ func (s *stateOFED) handleAdditionalMounts( func (s *stateOFED) GetManifestObjects( ctx context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.OFEDDriver == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + nodeInfo := catalog.GetNodeInfoProvider() if nodeInfo == nil { return nil, errors.New("nodeInfo provider required") diff --git a/pkg/state/state_shared_dp.go b/pkg/state/state_shared_dp.go index bbc13b6c..a2623cfb 100644 --- a/pkg/state/state_shared_dp.go +++ b/pkg/state/state_shared_dp.go @@ -141,6 +141,10 @@ func (s *stateSharedDp) GetWatchSources() map[string]client.Object { func (s *stateSharedDp) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.RdmaSharedDevicePlugin == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + clusterInfo := catalog.GetClusterTypeProvider() if clusterInfo == nil { return nil, errors.New("clusterInfo provider required") diff --git a/pkg/state/state_sriov_dp.go b/pkg/state/state_sriov_dp.go index 6c2fbb83..1a075539 100644 --- a/pkg/state/state_sriov_dp.go +++ b/pkg/state/state_sriov_dp.go @@ -143,6 +143,10 @@ func (s *stateSriovDp) GetWatchSources() map[string]client.Object { func (s *stateSriovDp) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.SriovDevicePlugin == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + clusterInfo := catalog.GetClusterTypeProvider() if clusterInfo == nil { return nil, errors.New("clusterInfo provider required") diff --git a/pkg/state/state_whereabouts_cni.go b/pkg/state/state_whereabouts_cni.go index 9922c0c3..64adb56a 100644 --- a/pkg/state/state_whereabouts_cni.go +++ b/pkg/state/state_whereabouts_cni.go @@ -134,6 +134,10 @@ func (s *stateWhereaboutsCNI) GetWatchSources() map[string]client.Object { func (s *stateWhereaboutsCNI) GetManifestObjects( _ context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, catalog InfoCatalog, reqLogger logr.Logger) ([]*unstructured.Unstructured, error) { + if cr == nil || cr.Spec.SecondaryNetwork == nil || cr.Spec.SecondaryNetwork.IpamPlugin == nil { + return nil, errors.New("failed to render objects: state spec is nil") + } + staticConfig := catalog.GetStaticConfigProvider() if staticConfig == nil { return nil, errors.New("staticConfig provider required") From 32fb2727a3347da3079c3cf45d5d3223b5be29db Mon Sep 17 00:00:00 2001 From: amaslennikov Date: Wed, 20 Dec 2023 13:48:59 +0300 Subject: [PATCH 4/4] Add continuity check unit test This test renders all manifests to extract deployable containers' names and parses values.yaml from the helm chart to make sure that all containers are represented in the helm chart under the ResourceRequirements section Signed-off-by: amaslennikov --- pkg/state/continuity_check_test.go | 156 +++++++++++++++++++++++++++++ pkg/state/factory.go | 4 +- 2 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 pkg/state/continuity_check_test.go diff --git a/pkg/state/continuity_check_test.go b/pkg/state/continuity_check_test.go new file mode 100644 index 00000000..140d5311 --- /dev/null +++ b/pkg/state/continuity_check_test.go @@ -0,0 +1,156 @@ +/* +2023 NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package state + +import ( + "bufio" + "bytes" + "os" + "path/filepath" + "sort" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "gopkg.in/yaml.v3" + + mellanoxv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1" + "github.com/Mellanox/network-operator/pkg/config" +) + +func extractContainerNamesFromHelmChart(path string) ([]string, error) { + //nolint:gosec + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + var parsedData map[string]interface{} + err = yaml.Unmarshal(uncommentContainerResources(data), &parsedData) + if err != nil { + return nil, err + } + + containerNames := extractContainerNamesFromSubsection(parsedData) + + return containerNames, nil +} + +// uncommentContainerResources iterates through the document and removes '#' comments from containerResources sections +func uncommentContainerResources(fileData []byte) []byte { + var result bytes.Buffer + scanner := bufio.NewScanner(bytes.NewReader(fileData)) + var processNextLines bool + + for scanner.Scan() { + line := scanner.Text() + + if strings.TrimSpace(line) == "# containerResources:" { + // Remove "# " and set flag to process next lines + line = strings.Replace(line, "# ", "", 1) + processNextLines = true + } else if processNextLines && strings.HasPrefix(strings.TrimSpace(line), "# ") { + // For subsequent lines starting with "# ", remove "# " + line = strings.Replace(line, "# ", "", 1) + } else { + processNextLines = false + } + + result.WriteString(line + "\n") + } + + return result.Bytes() +} + +//nolint:gocognit +func extractContainerNamesFromSubsection(data interface{}) []string { + var names []string + + switch v := data.(type) { + case []interface{}: + for _, item := range v { + names = append(names, extractContainerNamesFromSubsection(item)...) + } + case map[string]interface{}: + for key, value := range v { + if key == "containerResources" { + if resources, ok := value.([]interface{}); ok { + for _, resource := range resources { + if resMap, ok := resource.(map[string]interface{}); ok { + if name, ok := resMap["name"].(string); ok { + names = append(names, name) + } + } + } + } + } else { + names = append(names, extractContainerNamesFromSubsection(value)...) + } + } + } + + return names +} + +var _ = Describe("Continuity check", func() { + + Context("Resource requirements", func() { + It("Resource requirements from helm chart should cover all deployable containers", func() { + wd, err := os.Getwd() + Expect(err).NotTo(HaveOccurred()) + + chartPath := filepath.Join(wd, "..", "..", "deployment", "network-operator", "values.yaml") + + namesFromChart, err := extractContainerNamesFromHelmChart(chartPath) + Expect(err).NotTo(HaveOccurred()) + + var namesFromManifests []string + + cr := &mellanoxv1alpha1.NicClusterPolicy{} + cr.Name = "nic-cluster-policy" + imageSpec := mellanoxv1alpha1.ImageSpec{Image: "image", Repository: "", Version: "version"} + imageSpecWithConfig := mellanoxv1alpha1.ImageSpecWithConfig{ImageSpec: imageSpec} + cr.Spec.IBKubernetes = &mellanoxv1alpha1.IBKubernetesSpec{ImageSpec: imageSpec} + cr.Spec.OFEDDriver = &mellanoxv1alpha1.OFEDDriverSpec{ImageSpec: imageSpec} + cr.Spec.RdmaSharedDevicePlugin = &mellanoxv1alpha1.DevicePluginSpec{ImageSpecWithConfig: imageSpecWithConfig} + cr.Spec.SriovDevicePlugin = &mellanoxv1alpha1.DevicePluginSpec{ImageSpecWithConfig: imageSpecWithConfig} + cr.Spec.NvIpam = &mellanoxv1alpha1.NVIPAMSpec{ImageSpec: imageSpec} + cr.Spec.NicFeatureDiscovery = &mellanoxv1alpha1.NICFeatureDiscoverySpec{ImageSpec: imageSpec} + cr.Spec.SecondaryNetwork = &mellanoxv1alpha1.SecondaryNetworkSpec{} + cr.Spec.SecondaryNetwork.CniPlugins = &imageSpec + cr.Spec.SecondaryNetwork.IpamPlugin = &imageSpec + cr.Spec.SecondaryNetwork.IPoIB = &imageSpec + cr.Spec.SecondaryNetwork.Multus = &mellanoxv1alpha1.MultusSpec{ImageSpecWithConfig: imageSpecWithConfig} + + manifestsBaseDir := filepath.Join("..", "..", "manifests") + envConfig = &config.OperatorConfig{State: config.StateConfig{ManifestBaseDir: manifestsBaseDir}} + states, err := newNicClusterPolicyStates(nil, nil) + Expect(err).NotTo(HaveOccurred()) + + for _, state := range states { + names, err := ParseContainerNames(state.(ManifestRenderer), cr, testLogger) + Expect(err).NotTo(HaveOccurred()) + namesFromManifests = append(namesFromManifests, names...) + } + + sort.Strings(namesFromChart) + sort.Strings(namesFromManifests) + Expect(namesFromChart).To(Equal(namesFromManifests)) + + }) + }) +}) diff --git a/pkg/state/factory.go b/pkg/state/factory.go index f450cc5c..8c943422 100644 --- a/pkg/state/factory.go +++ b/pkg/state/factory.go @@ -30,6 +30,8 @@ import ( "github.com/Mellanox/network-operator/pkg/consts" ) +var envConfig = config.FromEnv() + // NewManager creates a state.Manager for the given CRD Kind func NewManager( crdKind string, k8sAPIClient client.Client, scheme *runtime.Scheme, setupLog logr.Logger) (Manager, error) { @@ -71,7 +73,7 @@ func newStates(crdKind string, k8sAPIClient client.Client, scheme *runtime.Schem // newNicClusterPolicyStates creates states that reconcile NicClusterPolicy CRD func newNicClusterPolicyStates(k8sAPIClient client.Client, scheme *runtime.Scheme) ([]State, error) { - manifestBaseDir := config.FromEnv().State.ManifestBaseDir + manifestBaseDir := envConfig.State.ManifestBaseDir ofedState, _, err := NewStateOFED( k8sAPIClient, scheme, filepath.Join(manifestBaseDir, "state-ofed-driver")) if err != nil {