From 3cb012d4d9cfb445de2e3d57e8ab8b7f2d1e16e5 Mon Sep 17 00:00:00 2001 From: Fred Rolland Date: Mon, 4 Dec 2023 13:30:13 +0200 Subject: [PATCH] feat: Support ForcePrecompiled flag Mofed images support some OS/Kernel with precompiled drivers. In case that a Precompiled image is available in the image registry, it will be used for MOFED, otherwise the image with sources will be used. The user can specify that in case the precompiled image does not exists, the Ofed State will fail. This can be done with setting ForcePrecompiled to "true" in "ofedDriver" spec in NicClusterPolicy. The default for ForcePrecompiled is false. Signed-off-by: Fred Rolland --- Dockerfile | 2 + api/v1alpha1/nicclusterpolicy_types.go | 6 + .../mellanox.com_nicclusterpolicies.yaml | 7 + controllers/nicclusterpolicy_controller.go | 15 +- controllers/suite_test.go | 13 +- deployment/network-operator/README.md | 3 +- .../crds/mellanox.com_nicclusterpolicies.yaml | 7 + ...anox.com_v1alpha1_nicclusterpolicy_cr.yaml | 1 + deployment/network-operator/values.yaml | 1 + go.mod | 12 ++ go.sum | 28 ++++ hack/templates/values/values.template | 1 + main.go | 22 +-- pkg/config/config.go | 1 + pkg/docadriverimages/doca_drivers.go | 146 ++++++++++++++++++ pkg/state/dummy_provider.go | 7 + pkg/state/info_source.go | 14 ++ pkg/state/state_ofed.go | 68 ++++++-- pkg/state/state_ofed_test.go | 139 ++++++++++++++++- 19 files changed, 457 insertions(+), 36 deletions(-) create mode 100644 pkg/docadriverimages/doca_drivers.go diff --git a/Dockerfile b/Dockerfile index c788b9b2..5a05f9ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -78,6 +78,8 @@ COPY --from=builder /workspace/manager . COPY --from=builder /workspace/kubectl-${ARCH} /usr/local/bin/kubectl COPY --from=builder /workspace/crds /crds +# Default Certificates are missing in micro-ubi. These are need to fetch DOCA drivers image tags +COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem COPY /webhook-schemas /webhook-schemas COPY manifests/ manifests/ USER 65532:65532 diff --git a/api/v1alpha1/nicclusterpolicy_types.go b/api/v1alpha1/nicclusterpolicy_types.go index 60ca7e2a..1cbd8d88 100644 --- a/api/v1alpha1/nicclusterpolicy_types.go +++ b/api/v1alpha1/nicclusterpolicy_types.go @@ -106,6 +106,12 @@ type OFEDDriverSpec struct { // +kubebuilder:default:=300 // +kubebuilder:validation:Minimum:=0 TerminationGracePeriodSeconds int64 `json:"terminationGracePeriodSeconds,omitempty"` + // ForcePrecompiled specifies if only MOFED precompiled images are allowed + // If set to false and precompiled image does not exists, MOFED drivers will be compiled on Nodes + // If set to true and precompiled image does not exists, OFED state will be Error. + // +optional + // +kubebuilder:default:=false + ForcePrecompiled bool `json:"forcePrecompiled,omitempty"` } // DriverUpgradePolicySpec describes policy configuration for automatic upgrades diff --git a/config/crd/bases/mellanox.com_nicclusterpolicies.yaml b/config/crd/bases/mellanox.com_nicclusterpolicies.yaml index 4496c765..3e976db3 100644 --- a/config/crd/bases/mellanox.com_nicclusterpolicies.yaml +++ b/config/crd/bases/mellanox.com_nicclusterpolicies.yaml @@ -668,6 +668,13 @@ spec: - name type: object type: array + forcePrecompiled: + default: false + description: |- + ForcePrecompiled specifies if only MOFED precompiled images are allowed + If set to false and precompiled image does not exists, MOFED drivers will be compiled on Nodes + If set to true and precompiled image does not exists, OFED state will be Error. + type: boolean image: pattern: '[a-zA-Z0-9\-]+' type: string diff --git a/controllers/nicclusterpolicy_controller.go b/controllers/nicclusterpolicy_controller.go index 74a36d73..1e5d380b 100644 --- a/controllers/nicclusterpolicy_controller.go +++ b/controllers/nicclusterpolicy_controller.go @@ -41,6 +41,7 @@ import ( "github.com/Mellanox/network-operator/pkg/clustertype" "github.com/Mellanox/network-operator/pkg/config" "github.com/Mellanox/network-operator/pkg/consts" + "github.com/Mellanox/network-operator/pkg/docadriverimages" "github.com/Mellanox/network-operator/pkg/nodeinfo" "github.com/Mellanox/network-operator/pkg/state" "github.com/Mellanox/network-operator/pkg/staticconfig" @@ -49,10 +50,11 @@ import ( // NicClusterPolicyReconciler reconciles a NicClusterPolicy object type NicClusterPolicyReconciler struct { client.Client - Scheme *runtime.Scheme - ClusterTypeProvider clustertype.Provider - StaticConfigProvider staticconfig.Provider - MigrationCh chan struct{} + Scheme *runtime.Scheme + ClusterTypeProvider clustertype.Provider + StaticConfigProvider staticconfig.Provider + MigrationCh chan struct{} + DocaDriverImagesProvider docadriverimages.Provider stateManager state.Manager } @@ -129,6 +131,7 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req sc := state.NewInfoCatalog() sc.Add(state.InfoTypeClusterType, r.ClusterTypeProvider) sc.Add(state.InfoTypeStaticConfig, r.StaticConfigProvider) + if instance.Spec.OFEDDriver != nil { // Create node infoProvider and add to the service catalog reqLogger.V(consts.LogLevelInfo).Info("Creating Node info provider") @@ -148,6 +151,10 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req reqLogger.V(consts.LogLevelDebug).Info("Node info provider with", "Nodes:", nodeNames) infoProvider := nodeinfo.NewProvider(nodePtrList) sc.Add(state.InfoTypeNodeInfo, infoProvider) + r.DocaDriverImagesProvider.SetImageSpec(&instance.Spec.OFEDDriver.ImageSpec) + sc.Add(state.InfoTypeDocaDriverImage, r.DocaDriverImagesProvider) + } else { + r.DocaDriverImagesProvider.SetImageSpec(nil) } // Sync state and update status managerStatus := r.stateManager.SyncState(ctx, instance, sc) diff --git a/controllers/suite_test.go b/controllers/suite_test.go index b16ea5f7..d083ef1f 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -39,6 +39,7 @@ import ( mellanoxcomv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1" "github.com/Mellanox/network-operator/pkg/clustertype" + "github.com/Mellanox/network-operator/pkg/docadriverimages" "github.com/Mellanox/network-operator/pkg/staticconfig" // +kubebuilder:scaffold:imports ) @@ -133,13 +134,15 @@ var _ = BeforeSuite(func() { clusterTypeProvider, err := clustertype.NewProvider(context.Background(), k8sClient) Expect(err).NotTo(HaveOccurred()) staticConfigProvider := staticconfig.NewProvider(staticconfig.StaticConfig{CniBinDirectory: "/opt/cni/bin"}) + docaImagesProvider := docadriverimages.NewProvider(context.Background(), k8sClient) err = (&NicClusterPolicyReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - ClusterTypeProvider: clusterTypeProvider, - StaticConfigProvider: staticConfigProvider, - MigrationCh: migrationCompletionChan, + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + ClusterTypeProvider: clusterTypeProvider, + StaticConfigProvider: staticConfigProvider, + MigrationCh: migrationCompletionChan, + DocaDriverImagesProvider: docaImagesProvider, }).SetupWithManager(k8sManager, testSetupLog) Expect(err).ToNot(HaveOccurred()) diff --git a/deployment/network-operator/README.md b/deployment/network-operator/README.md index b051f3dc..d183cc53 100644 --- a/deployment/network-operator/README.md +++ b/deployment/network-operator/README.md @@ -417,7 +417,7 @@ containerResources: #### Mellanox OFED driver | Name | Type | Default | Description | -|-------------------------------------------------------------|--------|-----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ----------------------------------------------------------- | ------ | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `ofedDriver.deploy` | bool | `false` | deploy Mellanox OFED driver container | | `ofedDriver.repository` | string | `mellanox` | Mellanox OFED driver image repository | | `ofedDriver.image` | string | `mofed` | Mellanox OFED driver image name | @@ -448,6 +448,7 @@ containerResources: | `ofedDriver.upgradePolicy.waitForCompletion.podSelector` | string | not set | specifies a label selector for the pods to wait for completion before starting the driver upgrade | | `ofedDriver.upgradePolicy.waitForCompletion.timeoutSeconds` | int | not set | specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite | | `ofedDriver.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `mofed-container` container | +| `ofedDriver.forcePrecompiled` | bool | `false` | Fail Mellanox OFED deployment if precompiled OFED driver container image does not exists | #### RDMA Device Plugin diff --git a/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml b/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml index 4496c765..3e976db3 100644 --- a/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml +++ b/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml @@ -668,6 +668,13 @@ spec: - name type: object type: array + forcePrecompiled: + default: false + description: |- + ForcePrecompiled specifies if only MOFED precompiled images are allowed + If set to false and precompiled image does not exists, MOFED drivers will be compiled on Nodes + If set to true and precompiled image does not exists, OFED state will be Error. + type: boolean image: pattern: '[a-zA-Z0-9\-]+' type: string diff --git a/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml b/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml index 1943435a..fa3f25df 100644 --- a/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml +++ b/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml @@ -32,6 +32,7 @@ spec: image: {{ .Values.ofedDriver.image }} repository: {{ .Values.ofedDriver.repository }} version: {{ .Values.ofedDriver.version }} + forcePrecompiled: {{ .Values.ofedDriver.forcePrecompiled }} {{- if .Values.ofedDriver.env }} env: {{ toYaml .Values.ofedDriver.env | nindent 6 }} diff --git a/deployment/network-operator/values.yaml b/deployment/network-operator/values.yaml index 286ebffb..f62c5a13 100644 --- a/deployment/network-operator/values.yaml +++ b/deployment/network-operator/values.yaml @@ -280,6 +280,7 @@ ofedDriver: # podSelector: "app=myapp" # specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite # timeoutSeconds: 300 + forcePrecompiled: false rdmaSharedDevicePlugin: deploy: true diff --git a/go.mod b/go.mod index 4b8f20b6..5b2e4b57 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,8 @@ require ( github.com/caarlos0/env/v6 v6.10.1 github.com/containers/image/v5 v5.30.0 github.com/go-logr/logr v1.4.1 + github.com/google/go-containerregistry v0.19.0 + github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-20231129213221-4fdaa32ee934 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0 github.com/onsi/ginkgo/v2 v2.16.0 github.com/onsi/gomega v1.31.1 @@ -29,8 +31,13 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect + github.com/containerd/stargz-snapshotter/estargz v0.15.1 // indirect github.com/containers/storage v1.53.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/docker/cli v25.0.3+incompatible // indirect + github.com/docker/distribution v2.8.3+incompatible // indirect + github.com/docker/docker v25.0.3+incompatible // indirect + github.com/docker/docker-credential-helpers v0.8.1 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/evanphx/json-patch v5.7.0+incompatible // indirect github.com/evanphx/json-patch/v5 v5.8.0 // indirect @@ -58,9 +65,11 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.17.7 // indirect github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect + github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/go-wordwrap v1.0.1 // indirect github.com/moby/spdystream v0.2.0 // indirect github.com/moby/term v0.5.0 // indirect @@ -70,6 +79,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.1.0 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_golang v1.18.0 // indirect @@ -77,9 +87,11 @@ require ( github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/cobra v1.8.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/objx v0.5.2 // indirect + github.com/vbatts/tar-split v0.11.5 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect github.com/xlab/treeprint v1.2.0 // indirect diff --git a/go.sum b/go.sum index 988ddeab..5f39b93a 100644 --- a/go.sum +++ b/go.sum @@ -16,6 +16,8 @@ github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk= github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= +github.com/containerd/stargz-snapshotter/estargz v0.15.1 h1:eXJjw9RbkLFgioVaTG+G/ZW/0kEe2oEKCdS/ZxIyoCU= +github.com/containerd/stargz-snapshotter/estargz v0.15.1/go.mod h1:gr2RNwukQ/S9Nv33Lt6UC7xEx58C+LHRdoqbEKjz1Kk= github.com/containers/image/v5 v5.30.0 h1:CmHeSwI6W2kTRWnUsxATDFY5TEX4b58gPkaQcEyrLIA= github.com/containers/image/v5 v5.30.0/go.mod h1:gSD8MVOyqBspc0ynLsuiMR9qmt8UQ4jpVImjmK0uXfk= github.com/containers/storage v1.53.0 h1:VSES3C/u1pxjTJIXvLrSmyP7OBtDky04oGu07UvdTEA= @@ -27,6 +29,14 @@ github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/docker/cli v25.0.3+incompatible h1:KLeNs7zws74oFuVhgZQ5ONGZiXUUdgsdy6/EsX/6284= +github.com/docker/cli v25.0.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= +github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= +github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= +github.com/docker/docker v25.0.3+incompatible h1:D5fy/lYmY7bvZa0XTZ5/UJPljor41F+vdyJG5luQLfQ= +github.com/docker/docker v25.0.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker-credential-helpers v0.8.1 h1:j/eKUktUltBtMzKqmfLB0PAgqYyMHOp5vfsD1807oKo= +github.com/docker/docker-credential-helpers v0.8.1/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v5.7.0+incompatible h1:vgGkfT/9f8zE6tvSCe74nfpAVDQ2tG6yudJd8LBksgI= @@ -70,6 +80,10 @@ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-containerregistry v0.19.0 h1:uIsMRBV7m/HDkDxE/nXMnv1q+lOOSPlQ/ywc5JbB8Ic= +github.com/google/go-containerregistry v0.19.0/go.mod h1:u0qB2l7mvtWVR5kNcbFIhFY1hLbf8eeGapA+vbFDCtQ= +github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-20231129213221-4fdaa32ee934 h1:meXymsGqWofFUx2IXYkCa1IqdhHl8AVkRLRcEAZecaY= +github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-20231129213221-4fdaa32ee934/go.mod h1:5sSbf/SbGGvjWIlMlt2bkEqOq+ufOIBYrBevLuxbfSs= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -96,6 +110,8 @@ github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0 h1:B github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.6.0/go.mod h1:wxt2YWRVItDtaQmVSmaN5ubE2L1c9CiNoHQwSJnM8Ko= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg= +github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -109,6 +125,8 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= @@ -132,6 +150,8 @@ github.com/onsi/gomega v1.31.1 h1:KYppCUK+bUgAZwHOu7EXVBKyQA6ILvOESHkn/tgoqvo= github.com/onsi/gomega v1.31.1/go.mod h1:y40C95dwAD1Nz36SsEnxvfFe8FFfNxzI5eJ0EYGyAy0= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= +github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/openshift/api v0.0.0-20231120222239-b86761094ee3 h1:nLhV2lbWrJ3E3hx0/97G3ZZvppC67cNwo+CLp7/PAbA= github.com/openshift/api v0.0.0-20231120222239-b86761094ee3/go.mod h1:qNtV0315F+f8ld52TLtPvrfivZpdimOzTi3kn9IVbtU= github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= @@ -154,6 +174,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -171,6 +193,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/vbatts/tar-split v0.11.5 h1:3bHCTIheBm1qFTcgh9oPu+nNBtX+XJIupG/vacinCts= +github.com/vbatts/tar-split v0.11.5/go.mod h1:yZbwRsSeGjusneWgA781EKej9HF8vme8okylkAeNKLk= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= @@ -223,6 +247,7 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= @@ -270,6 +295,9 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= +gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU= +gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU= k8s.io/api v0.29.2 h1:hBC7B9+MU+ptchxEqTNW2DkUosJpp1P+Wn6YncZ474A= k8s.io/api v0.29.2/go.mod h1:sdIaaKuU7P44aoyyLlikSLayT6Vb7bvJNCX105xZXY0= k8s.io/apiextensions-apiserver v0.29.0 h1:0VuspFG7Hj+SxyF/Z/2T0uFbI5gb5LRgEyUVE3Q4lV0= diff --git a/hack/templates/values/values.template b/hack/templates/values/values.template index 6b33a94a..c2542948 100644 --- a/hack/templates/values/values.template +++ b/hack/templates/values/values.template @@ -280,6 +280,7 @@ ofedDriver: # podSelector: "app=myapp" # specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite # timeoutSeconds: 300 + forcePrecompiled: false rdmaSharedDevicePlugin: deploy: true diff --git a/main.go b/main.go index 77854d34..4f7f30da 100644 --- a/main.go +++ b/main.go @@ -44,6 +44,7 @@ import ( "github.com/Mellanox/network-operator/api/v1alpha1/validator" "github.com/Mellanox/network-operator/controllers" "github.com/Mellanox/network-operator/pkg/clustertype" + "github.com/Mellanox/network-operator/pkg/docadriverimages" "github.com/Mellanox/network-operator/pkg/migrate" "github.com/Mellanox/network-operator/pkg/staticconfig" "github.com/Mellanox/network-operator/version" @@ -85,20 +86,23 @@ func setupWebhookControllers(mgr ctrl.Manager) error { func setupCRDControllers(ctx context.Context, c client.Client, mgr ctrl.Manager, migrationChan chan struct{}) error { ctrLog := setupLog.WithName("controller") clusterTypeProvider, err := clustertype.NewProvider(ctx, c) - - cniBinDir := os.Getenv("CNI_BIN_DIR") - staticInfoProvider := staticconfig.NewProvider(staticconfig.StaticConfig{CniBinDirectory: cniBinDir}) - if err != nil { setupLog.Error(err, "unable to create cluster type provider") return err } + + cniBinDir := os.Getenv("CNI_BIN_DIR") + staticInfoProvider := staticconfig.NewProvider(staticconfig.StaticConfig{CniBinDirectory: cniBinDir}) + + docaImagesProvider := docadriverimages.NewProvider(ctx, c) + if err := (&controllers.NicClusterPolicyReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - ClusterTypeProvider: clusterTypeProvider, // we want to cache information about the cluster type - StaticConfigProvider: staticInfoProvider, - MigrationCh: migrationChan, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + ClusterTypeProvider: clusterTypeProvider, // we want to cache information about the cluster type + StaticConfigProvider: staticInfoProvider, + MigrationCh: migrationChan, + DocaDriverImagesProvider: docaImagesProvider, }).SetupWithManager(mgr, ctrLog.WithName("NicClusterPolicy")); err != nil { setupLog.Error(err, "unable to create controller", "controller", "NicClusterPolicy") return err diff --git a/pkg/config/config.go b/pkg/config/config.go index bc983cc7..bb3c4601 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -39,6 +39,7 @@ type StateConfig struct { NetworkOperatorResourceNamespace string `env:"POD_NAMESPACE" envDefault:"nvidia-network-operator"` ManifestBaseDir string `env:"STATE_MANIFEST_BASE_DIR" envDefault:"./manifests"` OFEDState OFEDStateConfig + DocaDriverImagePollTimeMinutes uint `env:"DOCA_DRIVER_IMAGE_POLL_TIME_MINUTES" envDefault:"30"` } // ControllerConfig holds configuration for Operator controllers. diff --git a/pkg/docadriverimages/doca_drivers.go b/pkg/docadriverimages/doca_drivers.go new file mode 100644 index 00000000..b7e8b304 --- /dev/null +++ b/pkg/docadriverimages/doca_drivers.go @@ -0,0 +1,146 @@ +/* +2024 NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package docadriverimages package provides information about DOCA driver images +package docadriverimages + +import ( + "context" + "fmt" + "reflect" + "sync" + "time" + + mellanoxv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1" + "github.com/Mellanox/network-operator/pkg/config" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + kauth "github.com/google/go-containerregistry/pkg/authn/kubernetes" + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/remote" + + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// Provider provides interface to check the DOCA driver images +type Provider interface { + // TagExists returns true if DOCA driver image with provided tag exists + TagExists(tag string) bool + // SetImageSpec sets the Container registry details + SetImageSpec(*mellanoxv1alpha1.ImageSpec) +} + +// NewProvider creates a provider for DOCA driver images, +// queries the container image registry to get the exiting tags +func NewProvider(ctx context.Context, c client.Client) Provider { + p := &provider{c: c, docaImageSpec: nil, tags: make([]string, 0), ctx: ctx} + + ticker := time.NewTicker(time.Duration(config.FromEnv().State.DocaDriverImagePollTimeMinutes) * time.Minute) + + go func() { + for ; ; <-ticker.C { + p.retrieveTags() + } + }() + return p +} + +// provider is a static implementation of the Provider interface +type provider struct { + c client.Client + tags []string + docaImageSpec *mellanoxv1alpha1.ImageSpec + ctx context.Context + mu sync.Mutex +} + +// TagExists returns true if DOCA driver image with provided tag exists +func (p *provider) TagExists(tag string) bool { + p.mu.Lock() + defer p.mu.Unlock() + for _, t := range p.tags { + if t == tag { + return true + } + } + return false +} + +// SetImageSpec sets the Container registry details +func (p *provider) SetImageSpec(spec *mellanoxv1alpha1.ImageSpec) { + p.mu.Lock() + if spec == nil { + p.docaImageSpec = nil + if len(p.tags) > 0 { + p.tags = make([]string, 0) + } + p.mu.Unlock() + return + } + if reflect.DeepEqual(p.docaImageSpec, spec) { + p.mu.Unlock() + return + } + p.docaImageSpec = spec + p.mu.Unlock() + p.retrieveTags() +} + +func (p *provider) retrieveTags() { + if p.docaImageSpec == nil { + return + } + p.mu.Lock() + defer p.mu.Unlock() + logger := log.FromContext(p.ctx) + logger.Info("fetching DOCA driver image tags", "repo", p.docaImageSpec.Repository, "image", p.docaImageSpec.Image) + pullSecrets := make([]corev1.Secret, 0) + for _, name := range p.docaImageSpec.ImagePullSecrets { + secret := &corev1.Secret{} + err := p.c.Get(p.ctx, types.NamespacedName{ + Name: name, + Namespace: config.FromEnv().State.NetworkOperatorResourceNamespace, + }, secret) + if errors.IsNotFound(err) { + continue + } else if err != nil { + logger.Error(err, "failed to get pull secret") + return + } + pullSecrets = append(pullSecrets, *secret) + } + auth, err := kauth.NewFromPullSecrets(p.ctx, pullSecrets) + if err != nil { + logger.Error(err, "failed to create registry auth from secrets") + return + } + image := fmt.Sprintf("%s/%s", p.docaImageSpec.Repository, p.docaImageSpec.Image) + repo, err := name.NewRepository(image) + if err != nil { + logger.Error(err, "failed to create repo") + return + } + tags, err := remote.List(repo, remote.WithAuthFromKeychain(auth)) + if err != nil { + logger.Error(err, "failed to list tags") + return + } + p.tags = tags +} diff --git a/pkg/state/dummy_provider.go b/pkg/state/dummy_provider.go index 35c629c8..bf71d670 100644 --- a/pkg/state/dummy_provider.go +++ b/pkg/state/dummy_provider.go @@ -17,6 +17,7 @@ limitations under the License. package state import ( + "github.com/Mellanox/network-operator/api/v1alpha1" "github.com/Mellanox/network-operator/pkg/clustertype" "github.com/Mellanox/network-operator/pkg/nodeinfo" "github.com/Mellanox/network-operator/pkg/staticconfig" @@ -60,12 +61,18 @@ func (d *dummyProvider) GetNodePools(...nodeinfo.Filter) []nodeinfo.NodePool { }, } } +func (d *dummyProvider) TagExists(_ string) bool { + return false +} + +func (d *dummyProvider) SetImageSpec(_ *v1alpha1.ImageSpec) {} func getDummyCatalog() InfoCatalog { catalog := NewInfoCatalog() catalog.Add(InfoTypeNodeInfo, &dummyProvider{}) catalog.Add(InfoTypeStaticConfig, &dummyProvider{}) catalog.Add(InfoTypeClusterType, &dummyProvider{}) + catalog.Add(InfoTypeDocaDriverImage, &dummyProvider{}) return catalog } diff --git a/pkg/state/info_source.go b/pkg/state/info_source.go index 5306bcc4..66740f39 100644 --- a/pkg/state/info_source.go +++ b/pkg/state/info_source.go @@ -18,6 +18,7 @@ package state import ( "github.com/Mellanox/network-operator/pkg/clustertype" + "github.com/Mellanox/network-operator/pkg/docadriverimages" "github.com/Mellanox/network-operator/pkg/nodeinfo" "github.com/Mellanox/network-operator/pkg/staticconfig" ) @@ -32,6 +33,8 @@ const ( InfoTypeClusterType // InfoTypeStaticConfig describes an InfoSource related to a static configuration. InfoTypeStaticConfig + // InfoTypeDocaDriverImage describes an InfoSource related to DOCA Drivers images + InfoTypeDocaDriverImage ) // NewInfoCatalog returns an initialized InfoCatalog. @@ -54,6 +57,9 @@ type InfoCatalog interface { GetClusterTypeProvider() clustertype.Provider // GetStaticConfigProvider returns a reference staticinfo.Provider from catalog or nil if provider does not exist GetStaticConfigProvider() staticconfig.Provider + // GetDocaDriverImageProvider returns a reference docadriverimages.Provider from catalog + // or nil if provider does not exist + GetDocaDriverImageProvider() docadriverimages.Provider } type infoCatalog struct { @@ -87,3 +93,11 @@ func (sc *infoCatalog) GetStaticConfigProvider() staticconfig.Provider { } return infoSource.(staticconfig.Provider) } + +func (sc *infoCatalog) GetDocaDriverImageProvider() docadriverimages.Provider { + infoSource, ok := sc.infoSources[InfoTypeDocaDriverImage] + if !ok { + return nil + } + return infoSource.(docadriverimages.Provider) +} diff --git a/pkg/state/state_ofed.go b/pkg/state/state_ofed.go index 36e82bfd..586eb213 100644 --- a/pkg/state/state_ofed.go +++ b/pkg/state/state_ofed.go @@ -48,6 +48,7 @@ import ( "github.com/Mellanox/network-operator/pkg/clustertype" "github.com/Mellanox/network-operator/pkg/config" "github.com/Mellanox/network-operator/pkg/consts" + "github.com/Mellanox/network-operator/pkg/docadriverimages" "github.com/Mellanox/network-operator/pkg/nodeinfo" "github.com/Mellanox/network-operator/pkg/render" "github.com/Mellanox/network-operator/pkg/utils" @@ -61,6 +62,15 @@ const ( // format: /:-- // e.x: nvcr.io/nvidia/mellanox/mofed:5.7-0.1.2.0-ubuntu20.04-amd64 mofedImageFormat = "%s/%s:%s-%s%s-%s" + + // precompiledTagFormat is the tag format for precompiled drivers. + // format: --- + precompiledTagFormat = "%s-%s-%s%s-%s" + + // precompiledImageFormat is the precompiled mofed driver container image name format + // format: /:--- + // e.x: nvcr.io/nvidia/mellanox/mofed:5.7-0.1.2.0-5.15.0-91-generic-ubuntu22.04-amd64 + precompiledImageFormat = "%s/%s:%s-%s-%s%s-%s" ) // Openshift cluster-wide Proxy @@ -286,6 +296,7 @@ func (s *stateOFED) Sync(ctx context.Context, customResource interface{}, infoCa } objs, err := s.GetManifestObjects(ctx, cr, infoCatalog, log.FromContext(ctx)) + if err != nil { return SyncStateNotReady, errors.Wrap(err, "failed to create k8s objects from manifest") } @@ -396,14 +407,11 @@ func (s *stateOFED) GetManifestObjects( return nil, errors.New("failed to render objects: state spec is nil") } - nodeInfo := catalog.GetNodeInfoProvider() - if nodeInfo == nil { - return nil, errors.New("nodeInfo provider required") - } - clusterInfo := catalog.GetClusterTypeProvider() - if clusterInfo == nil { - return nil, errors.New("clusterInfo provider required") + nodeInfo, clusterInfo, docaProvider, err := getProviders(catalog) + if err != nil { + return nil, err } + nodePools := nodeInfo.GetNodePools( nodeinfo.NewNodeLabelFilterBuilder().WithLabel(nodeinfo.NodeLabelMlnxNIC, "true").Build()) if len(nodePools) == 0 { @@ -421,9 +429,8 @@ func (s *stateOFED) GetManifestObjects( for _, np := range nodePools { nodePool := np - // render objects - renderedObjs, err := renderObjects(ctx, &nodePool, useDtk, s, cr, reqLogger, clusterInfo) + renderedObjs, err := renderObjects(ctx, &nodePool, useDtk, s, cr, reqLogger, clusterInfo, docaProvider) if err != nil { return nil, errors.Wrap(err, "failed to render objects") } @@ -442,7 +449,19 @@ func (s *stateOFED) GetManifestObjects( func renderObjects(ctx context.Context, nodePool *nodeinfo.NodePool, useDtk bool, s *stateOFED, cr *mellanoxv1alpha1.NicClusterPolicy, reqLogger logr.Logger, - clusterInfo clustertype.Provider) ([]*unstructured.Unstructured, error) { + clusterInfo clustertype.Provider, docaProvider docadriverimages.Provider) ([]*unstructured.Unstructured, error) { + precompiledTag := fmt.Sprintf(precompiledTagFormat, cr.Spec.OFEDDriver.Version, nodePool.Kernel, + nodePool.OsName, nodePool.OsVersion, nodePool.Arch) + precompiledExists := docaProvider.TagExists(precompiledTag) + reqLogger.V(consts.LogLevelDebug).Info("Precompiled tag", "tag:", precompiledTag, "found:", precompiledExists) + if !precompiledExists && cr.Spec.OFEDDriver.ForcePrecompiled { + return nil, fmt.Errorf("ForcePrecompiled is enabled and precompiled image was not found") + } + + if precompiledExists { + useDtk = false + } + var dtkImageName string rhcosVersion := nodePool.RhcosVersion if useDtk { @@ -480,7 +499,7 @@ func renderObjects(ctx context.Context, nodePool *nodeinfo.NodePool, useDtk bool OSVer: nodePool.OsVersion, Kernel: nodePool.Kernel, KernelHash: getStringHash(nodePool.Kernel), - MOFEDImageName: s.getMofedDriverImageName(cr, nodePool, reqLogger), + MOFEDImageName: s.getMofedDriverImageName(cr, nodePool, precompiledExists, reqLogger), InitContainerConfig: s.getInitContainerConfig(cr, reqLogger, config.FromEnv().State.OFEDState.InitContainerImage), IsOpenshift: clusterInfo.IsOpenshift(), @@ -499,6 +518,22 @@ func renderObjects(ctx context.Context, nodePool *nodeinfo.NodePool, useDtk bool return renderedObjs, err } +func getProviders(catalog InfoCatalog) (nodeinfo.Provider, clustertype.Provider, docadriverimages.Provider, error) { + nodeInfo := catalog.GetNodeInfoProvider() + if nodeInfo == nil { + return nil, nil, nil, errors.New("nodeInfo provider required") + } + clusterInfo := catalog.GetClusterTypeProvider() + if clusterInfo == nil { + return nil, nil, nil, errors.New("clusterInfo provider required") + } + docaProvider := catalog.GetDocaDriverImageProvider() + if docaProvider == nil { + return nil, nil, nil, errors.New("docaProvider provider required") + } + return nodeInfo, clusterInfo, docaProvider, nil +} + // prepare configuration for the init container, // the init container will be disabled if the image is empty func (s *stateOFED) getInitContainerConfig( @@ -525,16 +560,21 @@ func (s *stateOFED) getInitContainerConfig( // getMofedDriverImageName generates MOFED driver image name based on the driver version specified in CR func (s *stateOFED) getMofedDriverImageName(cr *mellanoxv1alpha1.NicClusterPolicy, - pool *nodeinfo.NodePool, reqLogger logr.Logger) string { + pool *nodeinfo.NodePool, precompiledExists bool, reqLogger logr.Logger) string { curDriverVer, err := semver.NewVersion(cr.Spec.OFEDDriver.Version) if err != nil { reqLogger.V(consts.LogLevelDebug).Info("failed to parse ofed driver version as semver") } reqLogger.V(consts.LogLevelDebug).Info("Generating ofed driver image name for version: %v", "version", curDriverVer) + if precompiledExists { + return fmt.Sprintf(precompiledImageFormat, + cr.Spec.OFEDDriver.Repository, cr.Spec.OFEDDriver.Image, + cr.Spec.OFEDDriver.Version, pool.Kernel, + pool.OsName, pool.OsVersion, pool.Arch) + } return fmt.Sprintf(mofedImageFormat, - cr.Spec.OFEDDriver.Repository, - cr.Spec.OFEDDriver.Image, + cr.Spec.OFEDDriver.Repository, cr.Spec.OFEDDriver.Image, cr.Spec.OFEDDriver.Version, pool.OsName, pool.OsVersion, diff --git a/pkg/state/state_ofed_test.go b/pkg/state/state_ofed_test.go index 3df184a3..a3a06ec4 100644 --- a/pkg/state/state_ofed_test.go +++ b/pkg/state/state_ofed_test.go @@ -54,6 +54,7 @@ const ( rhcosOsTree = "414.92.202311061957-0" kernelFull1 = "5.15.0-78-generic" kernelFull2 = "5.15.0-91-generic" + archAmd = "amd64" ) type openShiftClusterProvider struct { @@ -71,6 +72,16 @@ func (d *openShiftClusterProvider) IsOpenshift() bool { return true } +type dummyOfedImageProvider struct { + tagExists bool +} + +func (d *dummyOfedImageProvider) TagExists(_ string) bool { + return d.tagExists +} + +func (d *dummyOfedImageProvider) SetImageSpec(*v1alpha1.ImageSpec) {} + var _ = Describe("MOFED state test", func() { var stateOfed stateOFED var ctx context.Context @@ -100,17 +111,17 @@ var _ = Describe("MOFED state test", func() { It("generates new image format", func() { cr.Spec.OFEDDriver.Version = "5.7-1.0.0.0" - imageName := stateOfed.getMofedDriverImageName(cr, nodePool, testLogger) + imageName := stateOfed.getMofedDriverImageName(cr, nodePool, false, testLogger) Expect(imageName).To(Equal("nvcr.io/mellanox/mofed:5.7-1.0.0.0-ubuntu20.04-amd64")) }) It("generates new image format double digit minor", func() { cr.Spec.OFEDDriver.Version = "5.10-0.0.0.1" - imageName := stateOfed.getMofedDriverImageName(cr, nodePool, testLogger) + imageName := stateOfed.getMofedDriverImageName(cr, nodePool, false, testLogger) Expect(imageName).To(Equal("nvcr.io/mellanox/mofed:5.10-0.0.0.1-ubuntu20.04-amd64")) }) It("return new image format in case of a bad version", func() { cr.Spec.OFEDDriver.Version = "1.1.1.1.1" - imageName := stateOfed.getMofedDriverImageName(cr, nodePool, testLogger) + imageName := stateOfed.getMofedDriverImageName(cr, nodePool, false, testLogger) Expect(imageName).To(Equal("nvcr.io/mellanox/mofed:1.1.1.1.1-ubuntu20.04-amd64")) }) }) @@ -293,6 +304,7 @@ var _ = Describe("MOFED state test", func() { catalog := NewInfoCatalog() catalog.Add(InfoTypeClusterType, &dummyProvider{}) catalog.Add(InfoTypeNodeInfo, infoProvider) + catalog.Add(InfoTypeDocaDriverImage, &dummyOfedImageProvider{tagExists: true}) objs, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) Expect(err).NotTo(HaveOccurred()) // Expect 5 objects: 1 DS per pool, Service Account, Role, RoleBinding @@ -401,6 +413,7 @@ var _ = Describe("MOFED state test", func() { catalog := NewInfoCatalog() catalog.Add(InfoTypeClusterType, &openShiftClusterProvider{}) catalog.Add(InfoTypeNodeInfo, infoProvider) + catalog.Add(InfoTypeDocaDriverImage, &dummyOfedImageProvider{tagExists: false}) objs, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) Expect(err).NotTo(HaveOccurred()) // Expect 6 object due to OpenShift: DaemonSet, Service Account, ClusterRole, ClusterRoleBinding @@ -426,8 +439,128 @@ var _ = Describe("MOFED state test", func() { } }) }) + Context("Force Precompiled", func() { + It("Should fail getManifestObjects, forcePrecompiled true and tag does not exists", func() { + ofedState := getOfedState() + cr := &v1alpha1.NicClusterPolicy{} + cr.Name = "nic-cluster-policy" + cr.Spec.OFEDDriver = &v1alpha1.OFEDDriverSpec{ + ImageSpec: v1alpha1.ImageSpec{ + Image: "mofed", + Repository: "nvcr.io/mellanox", + Version: "23.10-0.5.5.0", + }, + ForcePrecompiled: true, + } + By("Creating NodeProvider with 1 Node, that form 1 Node pool") + infoProvider := nodeinfo.NewProvider([]*v1.Node{ + getNode("node1", kernelFull1), + }) + catalog := NewInfoCatalog() + catalog.Add(InfoTypeClusterType, &dummyProvider{}) + catalog.Add(InfoTypeNodeInfo, infoProvider) + catalog.Add(InfoTypeDocaDriverImage, &dummyOfedImageProvider{tagExists: false}) + _, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) + Expect(err).To(HaveOccurred()) + }) + It("Should use image with sources format, forcePrecompiled false and tag does not exists", func() { + ofedState := getOfedState() + cr := &v1alpha1.NicClusterPolicy{} + cr.Name = "nic-cluster-policy" + cr.Spec.OFEDDriver = &v1alpha1.OFEDDriverSpec{ + ImageSpec: v1alpha1.ImageSpec{ + Image: "mofed", + Repository: "nvcr.io/mellanox", + Version: "23.10-0.5.5.0", + }, + ForcePrecompiled: false, + } + By("Creating NodeProvider with 1 Node, that form 1 Node pool") + infoProvider := nodeinfo.NewProvider([]*v1.Node{ + getNode("node1", kernelFull1), + }) + catalog := NewInfoCatalog() + catalog.Add(InfoTypeClusterType, &dummyProvider{}) + catalog.Add(InfoTypeNodeInfo, infoProvider) + catalog.Add(InfoTypeDocaDriverImage, &dummyOfedImageProvider{tagExists: false}) + objs, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) + Expect(err).NotTo(HaveOccurred()) + By("Verify image is not precompiled format") + // Expect 4 objects: DS , Service Account, Role, RoleBinding + Expect(len(objs)).To(Equal(4)) + for _, obj := range objs { + if obj.GetKind() != "DaemonSet" { + continue + } + ds := appsv1.DaemonSet{} + err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, &ds) + Expect(err).NotTo(HaveOccurred()) + withSourceImage := fmt.Sprintf(mofedImageFormat, + cr.Spec.OFEDDriver.Repository, cr.Spec.OFEDDriver.Image, cr.Spec.OFEDDriver.Version, + osName, osVer, archAmd) + Expect(ds.Spec.Template.Spec.Containers[0].Image).To(Equal(withSourceImage)) + } + }) + It("Should use image with sources format, forcePrecompiled false and tag exists", func() { + ofedState := getOfedState() + cr := &v1alpha1.NicClusterPolicy{} + cr.Name = "nic-cluster-policy" + cr.Spec.OFEDDriver = &v1alpha1.OFEDDriverSpec{ + ImageSpec: v1alpha1.ImageSpec{ + Image: "mofed", + Repository: "nvcr.io/mellanox", + Version: "23.10-0.5.5.0", + }, + ForcePrecompiled: false, + } + By("Creating NodeProvider with 1 Node, that form 1 Node pool") + infoProvider := nodeinfo.NewProvider([]*v1.Node{ + getNode("node1", kernelFull1), + }) + catalog := NewInfoCatalog() + catalog.Add(InfoTypeClusterType, &dummyProvider{}) + catalog.Add(InfoTypeNodeInfo, infoProvider) + catalog.Add(InfoTypeDocaDriverImage, &dummyOfedImageProvider{tagExists: true}) + objs, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) + Expect(err).NotTo(HaveOccurred()) + By("Verify image is not precompiled format") + // Expect 4 objects: DS , Service Account, Role, RoleBinding + Expect(len(objs)).To(Equal(4)) + for _, obj := range objs { + if obj.GetKind() != "DaemonSet" { + continue + } + ds := appsv1.DaemonSet{} + err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, &ds) + Expect(err).NotTo(HaveOccurred()) + precompiledImage := fmt.Sprintf(precompiledImageFormat, + cr.Spec.OFEDDriver.Repository, cr.Spec.OFEDDriver.Image, cr.Spec.OFEDDriver.Version, + kernelFull1, osName, osVer, archAmd) + Expect(ds.Spec.Template.Spec.Containers[0].Image).To(Equal(precompiledImage)) + } + }) + }) }) +func getOfedState() *stateOFED { + client := mocks.ControllerRuntimeClient{} + manifestBaseDir := "../../manifests/state-ofed-driver" + + files, err := utils.GetFilesWithSuffix(manifestBaseDir, render.ManifestFileSuffix...) + Expect(err).NotTo(HaveOccurred()) + renderer := render.NewRenderer(files) + + ofedState := &stateOFED{ + stateSkel: stateSkel{ + name: stateOFEDName, + description: stateOFEDDescription, + client: &client, + renderer: renderer, + }, + } + return ofedState +} + func verifyPodAntiInfinity(affinity *v1.Affinity) { By("Verify PodAntiInfinity") Expect(affinity).NotTo(BeNil())