Skip to content

Commit

Permalink
Add support for network-operator-init-container
Browse files Browse the repository at this point in the history
Signed-off-by: Yury Kulazhenkov <[email protected]>
  • Loading branch information
ykulazhenkov committed Nov 22, 2023
1 parent 0fa6951 commit 7d7d966
Show file tree
Hide file tree
Showing 12 changed files with 256 additions and 24 deletions.
2 changes: 1 addition & 1 deletion api/v1alpha1/nicclusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ type ConfigMapNameReference struct {

// InitContainerSpec contains configuration for the init container of the OFED driver
type InitContainerSpec struct {
// Enable indicates if init container deployment is requried
// Enable indicates if init container deployment is required
// +optional
// +kubebuilder:default:=false
Enable bool `json:"enable,omitempty"`
Expand Down
2 changes: 1 addition & 1 deletion config/crd/bases/mellanox.com_nicclusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ spec:
enable:
default: false
description: Enable indicates if init container deployment
is requried
is required
type: boolean
image:
pattern: '[a-zA-Z0-9\-]+'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ spec:
enable:
default: false
description: Enable indicates if init container deployment
is requried
is required
type: boolean
image:
pattern: '[a-zA-Z0-9\-]+'
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func main() {
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"InitContainerEnable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
opts := zap.Options{
Development: true,
Expand Down

This file was deleted.

18 changes: 18 additions & 0 deletions manifests/state-ofed-driver/0010_service-account.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# 2023 NVIDIA CORPORATION & AFFILIATES
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: ServiceAccount
metadata:
name: ofed-driver
namespace: {{ .RuntimeSpec.Namespace }}
21 changes: 21 additions & 0 deletions manifests/state-ofed-driver/0020_cluster_role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# 2023 NVIDIA CORPORATION & AFFILIATES
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: ofed-driver
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "patch", "watch", "update"]
25 changes: 25 additions & 0 deletions manifests/state-ofed-driver/0030_cluster_role_binding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# 2023 NVIDIA CORPORATION & AFFILIATES
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: ofed-driver
subjects:
- kind: ServiceAccount
name: ofed-driver
namespace: {{ .RuntimeSpec.Namespace }}
roleRef:
kind: ClusterRole
name: ofed-driver
apiGroup: rbac.authorization.k8s.io
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# 2023 NVIDIA CORPORATION & AFFILIATES
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if .RuntimeSpec.InitContainerConfig.InitContainerEnable }}
apiVersion: v1
kind: ConfigMap
metadata:
name: ofed-init-container-config
namespace: {{ .RuntimeSpec.Namespace }}
data:
config.json: |-
{
"safeDriverLoad": {
"enable": {{ .RuntimeSpec.InitContainerConfig.SafeLoadEnable }},
"annotation": "{{ .RuntimeSpec.InitContainerConfig.SafeLoadAnnotation }}"
}
}
{{end}}
30 changes: 27 additions & 3 deletions manifests/state-ofed-driver/0050_ofed-driver-ds.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 NVIDIA
# Copyright 2023 NVIDIA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,16 +40,34 @@ spec:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
{{ if .RuntimeSpec.IsOpenshift }}
serviceAccountName: ofed-driver
{{end}}
hostNetwork: true
{{- if .CrSpec.ImagePullSecrets }}
imagePullSecrets:
{{- range .CrSpec.ImagePullSecrets }}
- name: {{ . }}
{{- end }}
{{- end }}
{{- if .RuntimeSpec.InitContainerConfig.InitContainerEnable }}
initContainers:
- name: network-operator-init-container
imagePullPolicy: IfNotPresent
image: {{ .RuntimeSpec.InitContainerConfig.InitContainerImageName }}
args:
- --node-name
- $(NODE_NAME)
- --config
- /config.json
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: init-container-config
mountPath: /config.json
subPath: config.json
{{- end }}
containers:
- image: {{ .RuntimeSpec.MOFEDImageName }}
imagePullPolicy: IfNotPresent
Expand Down Expand Up @@ -112,6 +130,12 @@ spec:
# unloading OFED modules can take more time than default terminationGracePeriod (30 sec)
terminationGracePeriodSeconds: {{ .CrSpec.TerminationGracePeriodSeconds }}
volumes:
{{- if .RuntimeSpec.InitContainerConfig.InitContainerEnable }}
- name: init-container-config
configMap:
name: ofed-init-container-config
defaultMode: 0744
{{- end }}
- name: run-mlnx-ofed
hostPath:
path: /run/mellanox/drivers
Expand Down
82 changes: 72 additions & 10 deletions pkg/state/state_ofed.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"time"

"github.com/Masterminds/semver/v3"
"github.com/NVIDIA/k8s-operator-libs/pkg/upgrade"
"github.com/go-logr/logr"
osconfigv1 "github.com/openshift/api/config/v1"
"github.com/pkg/errors"
Expand Down Expand Up @@ -137,12 +138,20 @@ type additionalVolumeMounts struct {
Volumes []v1.Volume
}

type initContainerConfig struct {
InitContainerEnable bool
InitContainerImageName string
SafeLoadEnable bool
SafeLoadAnnotation string
}

type ofedRuntimeSpec struct {
runtimeSpec
CPUArch string
OSName string
OSVer string
MOFEDImageName string
CPUArch string
OSName string
OSVer string
MOFEDImageName string
InitContainerConfig initContainerConfig
// is true if cluster type is Openshift
IsOpenshift bool
}
Expand Down Expand Up @@ -440,15 +449,18 @@ func (s *stateOFED) getManifestObjects(
}
}

s.mergeImagePullSecrets(cr)

renderData := &ofedManifestRenderData{
CrSpec: cr.Spec.OFEDDriver,
RuntimeSpec: &ofedRuntimeSpec{
runtimeSpec: runtimeSpec{config.FromEnv().State.NetworkOperatorResourceNamespace},
CPUArch: nodeAttr[nodeinfo.AttrTypeCPUArch],
OSName: nodeAttr[nodeinfo.AttrTypeOSName],
OSVer: nodeAttr[nodeinfo.AttrTypeOSVer],
MOFEDImageName: s.getMofedDriverImageName(cr, nodeAttr, reqLogger),
IsOpenshift: clusterInfo.IsOpenshift(),
runtimeSpec: runtimeSpec{config.FromEnv().State.NetworkOperatorResourceNamespace},
CPUArch: nodeAttr[nodeinfo.AttrTypeCPUArch],
OSName: nodeAttr[nodeinfo.AttrTypeOSName],
OSVer: nodeAttr[nodeinfo.AttrTypeOSVer],
MOFEDImageName: s.getMofedDriverImageName(cr, nodeAttr, reqLogger),
InitContainerConfig: s.getInitContainerConfig(cr, reqLogger),
IsOpenshift: clusterInfo.IsOpenshift(),
},
Tolerations: cr.Spec.Tolerations,
NodeAffinity: cr.Spec.NodeAffinity,
Expand All @@ -464,6 +476,56 @@ func (s *stateOFED) getManifestObjects(
return objs, nil
}

// add pull secrets from the init container to the pull secrets list of the main container,
// this list will be used as imagePullSecrets on the Pod level
func (s *stateOFED) mergeImagePullSecrets(cr *mellanoxv1alpha1.NicClusterPolicy) {
if cr.Spec.OFEDDriver.InitContainer == nil {
return
}
if len(cr.Spec.OFEDDriver.InitContainer.ImagePullSecrets) == 0 {
return
}
secretesToAdd := make([]string, 0, len(cr.Spec.OFEDDriver.InitContainer.ImagePullSecrets))
for _, initSecret := range cr.Spec.OFEDDriver.InitContainer.ImagePullSecrets {
found := false
for _, mainSecret := range cr.Spec.OFEDDriver.ImagePullSecrets {
if initSecret == mainSecret {
found = true
break
}
}
if !found {
secretesToAdd = append(secretesToAdd, initSecret)
}
}
cr.Spec.OFEDDriver.ImagePullSecrets = append(cr.Spec.OFEDDriver.ImagePullSecrets, secretesToAdd...)
}

// prepare configuration for the init container
func (s *stateOFED) getInitContainerConfig(
cr *mellanoxv1alpha1.NicClusterPolicy, reqLogger logr.Logger) initContainerConfig {
var initContCfg initContainerConfig
safeLoadEnable := cr.Spec.OFEDDriver.OfedUpgradePolicy != nil &&
cr.Spec.OFEDDriver.OfedUpgradePolicy.AutoUpgrade &&
cr.Spec.OFEDDriver.OfedUpgradePolicy.SafeLoad
if cr.Spec.OFEDDriver.InitContainer != nil {
initContCfg = initContainerConfig{
InitContainerEnable: cr.Spec.OFEDDriver.InitContainer.Enable,
InitContainerImageName: cr.Spec.OFEDDriver.InitContainer.Repository + "/" +
cr.Spec.OFEDDriver.InitContainer.Image + ":" +
cr.Spec.OFEDDriver.InitContainer.Version,
SafeLoadEnable: safeLoadEnable,
SafeLoadAnnotation: upgrade.GetUpgradeDriverWaitForSafeLoadAnnotationKey(),
}
}

if safeLoadEnable && !initContCfg.InitContainerEnable {
reqLogger.V(consts.LogLevelWarning).Info("safe driver loading feature is enabled, but init container is" +
"disabled. It is required to enable init container to use safe driver loading feature.")
}
return initContCfg
}

// getMofedDriverImageName generates MOFED driver image name based on the driver version specified in CR
// TODO(adrianc): in Network-Operator v1.5.0, we should just use the new naming scheme
func (s *stateOFED) getMofedDriverImageName(cr *mellanoxv1alpha1.NicClusterPolicy,
Expand Down
61 changes: 61 additions & 0 deletions pkg/state/state_ofed_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,67 @@ var _ = Describe("MOFED state test", func() {
})
})

Context("Init container", func() {
It("Merge ImagePullSecrets", func() {
cr := &v1alpha1.NicClusterPolicy{
Spec: v1alpha1.NicClusterPolicySpec{
OFEDDriver: &v1alpha1.OFEDDriverSpec{
ImageSpec: v1alpha1.ImageSpec{
ImagePullSecrets: []string{"1", "2", "3"},
},
InitContainer: &v1alpha1.InitContainerSpec{
ImageSpec: v1alpha1.ImageSpec{
ImagePullSecrets: []string{"A", "1", "B", "2", "3", "C"},
}},
},
},
}
stateOfed.mergeImagePullSecrets(cr)
Expect(cr.Spec.OFEDDriver.ImageSpec.ImagePullSecrets).To(And(
HaveLen(6),
ContainElements("A", "B", "C", "1", "2", "3")))
})
It("Merge ImagePullSecrets - no init container config", func() {
cr := &v1alpha1.NicClusterPolicy{
Spec: v1alpha1.NicClusterPolicySpec{
OFEDDriver: &v1alpha1.OFEDDriverSpec{
ImageSpec: v1alpha1.ImageSpec{
ImagePullSecrets: []string{"1", "2", "3"},
},
},
},
}
stateOfed.mergeImagePullSecrets(cr)
Expect(cr.Spec.OFEDDriver.ImageSpec.ImagePullSecrets).To(And(
HaveLen(3),
ContainElements("1", "2", "3")))
})
It("getInitContainerConfig", func() {
cr := &v1alpha1.NicClusterPolicy{
Spec: v1alpha1.NicClusterPolicySpec{
OFEDDriver: &v1alpha1.OFEDDriverSpec{
OfedUpgradePolicy: &v1alpha1.DriverUpgradePolicySpec{
AutoUpgrade: true,
SafeLoad: true,
},
InitContainer: &v1alpha1.InitContainerSpec{
Enable: true,
ImageSpec: v1alpha1.ImageSpec{
Image: "image",
Repository: "repository",
Version: "version",
},
},
},
},
}
cfg := stateOfed.getInitContainerConfig(cr, testLogger)
Expect(cfg.SafeLoadAnnotation).NotTo(BeEmpty())
Expect(cfg.SafeLoadEnable).To(BeTrue())
Expect(cfg.InitContainerEnable).To(BeTrue())
Expect(cfg.InitContainerImageName).To(Equal("repository/image:version"))
})
})
Context("Proxy config", func() {
It("Set Proxy from Cluster Wide Proxy", func() {
cr := &v1alpha1.NicClusterPolicy{
Expand Down

0 comments on commit 7d7d966

Please sign in to comment.