Skip to content

Commit

Permalink
Add orchestrator type specific spec for pods created by the operator
Browse files Browse the repository at this point in the history
For e.g. seccompprofile is a must for TKGS while not supported on OCP with the nonroot SCC

Signed-off-by: Shiva Krishna, Merla <[email protected]>
  • Loading branch information
shivamerla committed Oct 30, 2024
1 parent 9db6581 commit 4855e39
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 36 deletions.
14 changes: 14 additions & 0 deletions internal/controller/nemo_guardrail_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
"github.com/NVIDIA/k8s-nim-operator/internal/conditions"
"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
"github.com/NVIDIA/k8s-nim-operator/internal/render"
"github.com/NVIDIA/k8s-nim-operator/internal/shared"
"github.com/NVIDIA/k8s-nim-operator/internal/utils"
Expand Down Expand Up @@ -60,19 +61,27 @@ type NemoGuardrailReconciler struct {
renderer render.Renderer
Config *rest.Config
recorder record.EventRecorder
k8sType k8sutil.OrchestratorType
}

// Ensure NemoGuardrailReconciler implements the Reconciler interface
var _ shared.Reconciler = &NemoGuardrailReconciler{}

// NewNemoGuardrailReconciler creates a new reconciler for NemoGuardrail with the given platform
func NewNemoGuardrailReconciler(client client.Client, scheme *runtime.Scheme, updater conditions.Updater, renderer render.Renderer, log logr.Logger) *NemoGuardrailReconciler {
// Set container platform type
k8sType, err := k8sutil.GetOrchestratorType(client)
if err != nil {
return nil
}

return &NemoGuardrailReconciler{
Client: client,
scheme: scheme,
updater: updater,
renderer: renderer,
log: log,
k8sType: k8sType,
}
}

Expand Down Expand Up @@ -199,6 +208,11 @@ func (r *NemoGuardrailReconciler) GetEventRecorder() record.EventRecorder {
return r.recorder
}

// GetK8sType returns the container platform type
func (r *NemoGuardrailReconciler) GetK8sType() k8sutil.OrchestratorType {
return r.k8sType
}

// SetupWithManager sets up the controller with the Manager.
func (r *NemoGuardrailReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.recorder = mgr.GetEventRecorderFor("nemo-guardrail-service-controller")
Expand Down
57 changes: 44 additions & 13 deletions internal/controller/nimcache_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
"github.com/NVIDIA/k8s-nim-operator/internal/conditions"
platform "github.com/NVIDIA/k8s-nim-operator/internal/controller/platform"
"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
"github.com/NVIDIA/k8s-nim-operator/internal/nimparser"
"github.com/NVIDIA/k8s-nim-operator/internal/render"
"github.com/NVIDIA/k8s-nim-operator/internal/shared"
Expand Down Expand Up @@ -86,6 +87,7 @@ type NIMCacheReconciler struct {
scheme *runtime.Scheme
log logr.Logger
Platform platform.Platform
k8sType k8sutil.OrchestratorType
updater conditions.Updater
recorder record.EventRecorder
}
Expand All @@ -95,11 +97,19 @@ var _ shared.Reconciler = &NIMCacheReconciler{}

// NewNIMCacheReconciler creates a new reconciler for NIMCache with the given platform
func NewNIMCacheReconciler(client client.Client, scheme *runtime.Scheme, log logr.Logger, platform platform.Platform) *NIMCacheReconciler {
// Set container orchestrator type
k8sType, err := k8sutil.GetOrchestratorType(client)
if err != nil {
log.Error(err, "Unable to get container orhestrator type")
return nil
}

return &NIMCacheReconciler{
Client: client,
scheme: scheme,
log: log,
Platform: platform,
k8sType: k8sType,
}
}

Expand Down Expand Up @@ -221,6 +231,11 @@ func (r *NIMCacheReconciler) GetEventRecorder() record.EventRecorder {
return r.recorder
}

// GetK8sType returns the container platform type
func (r *NIMCacheReconciler) GetK8sType() k8sutil.OrchestratorType {
return r.k8sType
}

// SetupWithManager sets up the controller with the Manager.
func (r *NIMCacheReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.recorder = mgr.GetEventRecorderFor("nimcache-controller")
Expand Down Expand Up @@ -564,7 +579,7 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach

// Create a configmap by extracting the model manifest
// Create a temporary pod for parsing model manifest
pod := constructPodSpec(nimCache)
pod := constructPodSpec(nimCache, r.GetK8sType())
// Add nimCache as owner for watching on status change
if err := controllerutil.SetControllerReference(nimCache, pod, r.GetScheme()); err != nil {
return false, err
Expand Down Expand Up @@ -683,7 +698,7 @@ func (r *NIMCacheReconciler) reconcileJob(ctx context.Context, nimCache *appsv1a

// If Job does not exist and caching is not complete, create a new one
if err != nil && nimCache.Status.State != appsv1alpha1.NimCacheStatusReady {
job, err := r.constructJob(ctx, nimCache)
job, err := r.constructJob(ctx, nimCache, r.GetK8sType())
if err != nil {
logger.Error(err, "Failed to construct job")
return err
Expand Down Expand Up @@ -886,15 +901,19 @@ func getManifestConfigName(nimCache *appsv1alpha1.NIMCache) string {
}

// constructPodSpec constructs a Pod specification
func constructPodSpec(nimCache *appsv1alpha1.NIMCache) *corev1.Pod {
func constructPodSpec(nimCache *appsv1alpha1.NIMCache, platformType k8sutil.OrchestratorType) *corev1.Pod {
labels := map[string]string{
"app": "k8s-nim-operator",
"app.kubernetes.io/name": nimCache.Name,
"app.kubernetes.io/managed-by": "k8s-nim-operator",
}

annotations := map[string]string{
"openshift.io/scc": "nonroot",
annotations := make(map[string]string)

if platformType == k8sutil.OpenShift {
annotations = map[string]string{
"openshift.io/scc": "nonroot",
}
}

pod := &corev1.Pod{
Expand Down Expand Up @@ -925,16 +944,20 @@ func constructPodSpec(nimCache *appsv1alpha1.NIMCache) *corev1.Pod {
RunAsUser: nimCache.GetUserID(),
FSGroup: nimCache.GetGroupID(),
RunAsNonRoot: ptr.To[bool](true),
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
ServiceAccountName: NIMCacheServiceAccount,
Tolerations: nimCache.GetTolerations(),
NodeSelector: nimCache.GetNodeSelectors(),
},
}

// SeccompProfile must be set for TKGS
if platformType == k8sutil.TKGS {
pod.Spec.Containers[0].SecurityContext.SeccompProfile = &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
}
}

pod.Spec.ImagePullSecrets = []corev1.LocalObjectReference{
{
Name: nimCache.Spec.Source.NGC.PullSecret,
Expand Down Expand Up @@ -971,7 +994,7 @@ func (r *NIMCacheReconciler) getPodLogs(ctx context.Context, pod *corev1.Pod) (s
return buf.String(), nil
}

func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1alpha1.NIMCache) (*batchv1.Job, error) {
func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1alpha1.NIMCache, platformType k8sutil.OrchestratorType) (*batchv1.Job, error) {
logger := r.GetLogger()
pvcName := getPvcName(nimCache, nimCache.Spec.Storage.PVC)
labels := map[string]string{
Expand All @@ -981,10 +1004,13 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
}

annotations := map[string]string{
"openshift.io/scc": "nonroot",
"sidecar.istio.io/inject": "false",
}

if platformType == k8sutil.OpenShift {
annotations["openshift.io/scc"] = "nonroot"
}

job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: nimCache.Name + "-job",
Expand All @@ -1001,9 +1027,6 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
RunAsUser: nimCache.GetUserID(),
FSGroup: nimCache.GetGroupID(),
RunAsNonRoot: ptr.To[bool](true),
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Containers: []corev1.Container{},
RestartPolicy: corev1.RestartPolicyNever,
Expand All @@ -1027,6 +1050,14 @@ func (r *NIMCacheReconciler) constructJob(ctx context.Context, nimCache *appsv1a
TTLSecondsAfterFinished: ptr.To[int32](600), // cleanup automatically after job finishes
},
}

// SeccompProfile must be set for TKGS
if platformType == k8sutil.TKGS {
job.Spec.Template.Spec.SecurityContext.SeccompProfile = &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeRuntimeDefault,
}
}

if nimCache.Spec.Source.DataStore != nil {
outputPath := "/output"
if nimCache.Spec.Storage.HostPath != nil {
Expand Down
15 changes: 8 additions & 7 deletions internal/controller/nimcache_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/interceptor"

appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
"github.com/NVIDIA/k8s-nim-operator/internal/nimparser"
)

Expand Down Expand Up @@ -338,7 +339,7 @@ var _ = Describe("NIMCache Controller", func() {
Source: appsv1alpha1.NIMSource{NGC: &appsv1alpha1.NGCSource{ModelPuller: "nvcr.io/nim:test", PullSecret: "my-secret"}},
},
}
pod := constructPodSpec(nimCache)
pod := constructPodSpec(nimCache, k8sutil.K8s)
Expect(pod.Name).To(Equal(getPodName(nimCache)))
Expect(pod.Spec.Containers[0].Image).To(Equal("nvcr.io/nim:test"))
Expect(pod.Spec.ImagePullSecrets[0].Name).To(Equal("my-secret"))
Expand All @@ -359,7 +360,7 @@ var _ = Describe("NIMCache Controller", func() {
},
}

pod := constructPodSpec(nimCache)
pod := constructPodSpec(nimCache, k8sutil.K8s)

err := cli.Create(context.TODO(), pod)
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -387,7 +388,7 @@ var _ = Describe("NIMCache Controller", func() {
},
}

job, err := reconciler.constructJob(context.TODO(), nimCache)
job, err := reconciler.constructJob(context.TODO(), nimCache, k8sutil.K8s)
Expect(err).ToNot(HaveOccurred())

Expect(job.Name).To(Equal(getJobName(nimCache)))
Expand Down Expand Up @@ -418,7 +419,7 @@ var _ = Describe("NIMCache Controller", func() {
},
}

job, err := reconciler.constructJob(context.TODO(), nimCache)
job, err := reconciler.constructJob(context.TODO(), nimCache, k8sutil.K8s)
Expect(err).ToNot(HaveOccurred())

Expect(job.Name).To(Equal(getJobName(nimCache)))
Expand All @@ -445,7 +446,7 @@ var _ = Describe("NIMCache Controller", func() {
},
}

job, err := reconciler.constructJob(context.TODO(), nimCache)
job, err := reconciler.constructJob(context.TODO(), nimCache, k8sutil.K8s)
Expect(err).ToNot(HaveOccurred())

Expect(job.Name).To(Equal(getJobName(nimCache)))
Expand All @@ -471,7 +472,7 @@ var _ = Describe("NIMCache Controller", func() {
},
}

job, err := reconciler.constructJob(context.TODO(), nimCache)
job, err := reconciler.constructJob(context.TODO(), nimCache, k8sutil.K8s)
Expect(err).ToNot(HaveOccurred())

err = cli.Create(context.TODO(), job)
Expand Down Expand Up @@ -516,7 +517,7 @@ var _ = Describe("NIMCache Controller", func() {
err := reconciler.Create(context.TODO(), configMap)
Expect(err).ToNot(HaveOccurred())

job, err := reconciler.constructJob(context.TODO(), nimCache)
job, err := reconciler.constructJob(context.TODO(), nimCache, k8sutil.K8s)
Expect(err).ToNot(HaveOccurred())

err = cli.Create(context.TODO(), job)
Expand Down
15 changes: 15 additions & 0 deletions internal/controller/nimservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
"github.com/NVIDIA/k8s-nim-operator/internal/conditions"
platform "github.com/NVIDIA/k8s-nim-operator/internal/controller/platform"
"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
"github.com/NVIDIA/k8s-nim-operator/internal/render"
"github.com/NVIDIA/k8s-nim-operator/internal/shared"
"github.com/go-logr/logr"
Expand Down Expand Up @@ -54,6 +55,7 @@ type NIMServiceReconciler struct {
renderer render.Renderer
Config *rest.Config
Platform platform.Platform
k8sType k8sutil.OrchestratorType
recorder record.EventRecorder
}

Expand All @@ -62,13 +64,21 @@ var _ shared.Reconciler = &NIMServiceReconciler{}

// NewNIMServiceReconciler creates a new reconciler for NIMService with the given platform
func NewNIMServiceReconciler(client client.Client, scheme *runtime.Scheme, updater conditions.Updater, renderer render.Renderer, log logr.Logger, platform platform.Platform) *NIMServiceReconciler {
// Set container orchestrator type
k8sType, err := k8sutil.GetOrchestratorType(client)
if err != nil {
log.Error(err, "Unable to get container orhestrator type")
return nil
}

return &NIMServiceReconciler{
Client: client,
scheme: scheme,
updater: updater,
renderer: renderer,
log: log,
Platform: platform,
k8sType: k8sType,
}
}

Expand Down Expand Up @@ -189,6 +199,11 @@ func (r *NIMServiceReconciler) GetEventRecorder() record.EventRecorder {
return r.recorder
}

// GetK8sType returns the container platform type
func (r *NIMServiceReconciler) GetK8sType() k8sutil.OrchestratorType {
return r.k8sType
}

// SetupWithManager sets up the controller with the Manager.
func (r *NIMServiceReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.recorder = mgr.GetEventRecorderFor("nimservice-controller")
Expand Down
8 changes: 8 additions & 0 deletions internal/controller/platform/standalone/nimservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
"github.com/NVIDIA/k8s-nim-operator/internal/conditions"
"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
"github.com/NVIDIA/k8s-nim-operator/internal/render"
rendertypes "github.com/NVIDIA/k8s-nim-operator/internal/render/types"
"github.com/NVIDIA/k8s-nim-operator/internal/shared"
Expand Down Expand Up @@ -75,6 +76,11 @@ func (r *NIMServiceReconciler) GetEventRecorder() record.EventRecorder {
return r.recorder
}

// GetK8sType returns the container platform type
func (r *NIMServiceReconciler) GetK8sType() k8sutil.OrchestratorType {
return r.k8sType
}

func (r *NIMServiceReconciler) cleanupNIMService(ctx context.Context, nimService *appsv1alpha1.NIMService) error {
// All dependent (owned) objects will be automatically garbage collected.
// TODO: Handle any custom cleanup logic for the NIM microservice
Expand Down Expand Up @@ -173,6 +179,8 @@ func (r *NIMServiceReconciler) reconcileNIMService(ctx context.Context, nimServi
var modelPVC *appsv1alpha1.PersistentVolumeClaim
modelProfile := ""

deploymentParams.OrchestratorType = string(r.GetK8sType())

// Select PVC for model store
if nimService.GetNIMCacheName() != "" {
// Fetch PVC for the associated NIMCache instance and mount it
Expand Down
3 changes: 3 additions & 0 deletions internal/controller/platform/standalone/standalone.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
"github.com/NVIDIA/k8s-nim-operator/internal/conditions"
"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
"github.com/NVIDIA/k8s-nim-operator/internal/render"
"github.com/NVIDIA/k8s-nim-operator/internal/shared"
"github.com/go-logr/logr"
Expand Down Expand Up @@ -59,6 +60,7 @@ type NIMServiceReconciler struct {
updater conditions.Updater
renderer render.Renderer
recorder record.EventRecorder
k8sType k8sutil.OrchestratorType
}

// NewNIMCacheReconciler returns NIMCacheReconciler for standalone mode
Expand All @@ -80,6 +82,7 @@ func NewNIMServiceReconciler(r shared.Reconciler) *NIMServiceReconciler {
log: r.GetLogger(),
updater: r.GetUpdater(),
recorder: r.GetEventRecorder(),
k8sType: r.GetK8sType(),
}
}

Expand Down
Loading

0 comments on commit 4855e39

Please sign in to comment.