Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rte: metrics: merge manifests into existing manifests handling #1130

Merged
merged 4 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 16 additions & 62 deletions controllers/numaresourcesoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ import (
"github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"
"github.com/k8stopologyawareschedwg/deployer/pkg/manifests"
apimanifests "github.com/k8stopologyawareschedwg/deployer/pkg/manifests/api"
rtemanifests "github.com/k8stopologyawareschedwg/deployer/pkg/manifests/rte"
k8swgrteupdate "github.com/k8stopologyawareschedwg/deployer/pkg/objectupdate/rte"
nropv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1"
nodegroupv1 "github.com/openshift-kni/numaresources-operator/api/numaresourcesoperator/v1/helper/nodegroup"
Expand All @@ -61,11 +60,8 @@ import (
"github.com/openshift-kni/numaresources-operator/pkg/hash"
"github.com/openshift-kni/numaresources-operator/pkg/images"
"github.com/openshift-kni/numaresources-operator/pkg/loglevel"
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/objectnames"
apistate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/api"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/compare"
"github.com/openshift-kni/numaresources-operator/pkg/objectstate/merge"
rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte"
"github.com/openshift-kni/numaresources-operator/pkg/status"
Expand All @@ -86,16 +82,15 @@ type poolDaemonSet struct {
// NUMAResourcesOperatorReconciler reconciles a NUMAResourcesOperator object
type NUMAResourcesOperatorReconciler struct {
client.Client
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtemanifests.Manifests
RTEMetricsManifests rtemetricsmanifests.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
Scheme *runtime.Scheme
Platform platform.Platform
APIManifests apimanifests.Manifests
RTEManifests rtestate.Manifests
Namespace string
Images images.Data
ImagePullPolicy corev1.PullPolicy
Recorder record.EventRecorder
ForwardMCPConds bool
}

// TODO: narrow down
Expand Down Expand Up @@ -529,30 +524,30 @@ func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx

// using a slice of poolDaemonSet instead of a map because Go maps assignment order is not consistent and non-deterministic
dsPoolPairs := []poolDaemonSet{}
err = rteupdate.DaemonSetUserImageSettings(r.RTEManifests.DaemonSet, instance.Spec.ExporterImage, r.Images.Preferred(), r.ImagePullPolicy)
err = rteupdate.DaemonSetUserImageSettings(r.RTEManifests.Core.DaemonSet, instance.Spec.ExporterImage, r.Images.Preferred(), r.ImagePullPolicy)
if err != nil {
return dsPoolPairs, err
}

err = rteupdate.DaemonSetPauseContainerSettings(r.RTEManifests.DaemonSet)
err = rteupdate.DaemonSetPauseContainerSettings(r.RTEManifests.Core.DaemonSet)
if err != nil {
return dsPoolPairs, err
}

err = loglevel.UpdatePodSpec(&r.RTEManifests.DaemonSet.Spec.Template.Spec, manifests.ContainerNameRTE, instance.Spec.LogLevel)
err = loglevel.UpdatePodSpec(&r.RTEManifests.Core.DaemonSet.Spec.Template.Spec, manifests.ContainerNameRTE, instance.Spec.LogLevel)
if err != nil {
return dsPoolPairs, err
}

// ConfigMap should be provided by the kubeletconfig reconciliation loop
if r.RTEManifests.ConfigMap != nil {
cmHash, err := hash.ComputeCurrentConfigMap(ctx, r.Client, r.RTEManifests.ConfigMap)
if r.RTEManifests.Core.ConfigMap != nil {
cmHash, err := hash.ComputeCurrentConfigMap(ctx, r.Client, r.RTEManifests.Core.ConfigMap)
if err != nil {
return dsPoolPairs, err
}
rteupdate.DaemonSetHashAnnotation(r.RTEManifests.DaemonSet, cmHash)
rteupdate.DaemonSetHashAnnotation(r.RTEManifests.Core.DaemonSet, cmHash)
}
rteupdate.SecurityContextConstraint(r.RTEManifests.SecurityContextConstraint, annotations.IsCustomPolicyEnabled(instance.Annotations))
rteupdate.SecurityContextConstraint(r.RTEManifests.Core.SecurityContextConstraint, annotations.IsCustomPolicyEnabled(instance.Annotations))

processor := func(poolName string, gdm *rtestate.GeneratedDesiredManifest) error {
err := daemonsetUpdater(poolName, gdm)
Expand Down Expand Up @@ -584,29 +579,6 @@ func (r *NUMAResourcesOperatorReconciler) syncNUMAResourcesOperatorResources(ctx
}
}

for _, obj := range r.RTEMetricsManifests.ToObjects() {
// Check if the object already exists
existingObj := obj.DeepCopyObject().(client.Object)
err := r.Client.Get(ctx, client.ObjectKeyFromObject(obj), existingObj)
if err != nil && !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("failed to get %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
}
if apierrors.IsNotFound(err) {
err := controllerutil.SetControllerReference(instance, obj, r.Scheme)
if err != nil {
return nil, fmt.Errorf("failed to set controller reference to %s %s: %w", obj.GetNamespace(), obj.GetName(), err)
}
err = r.Client.Create(ctx, obj)
if err != nil {
return nil, fmt.Errorf("failed to create %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
}
} else {
if err := updateIfNeeded(ctx, existingObj, obj, r.Client); err != nil {
return nil, err
}
}
}

if len(dsPoolPairs) < len(trees) {
klog.Warningf("daemonset and tree size mismatch: expected %d got in daemonsets %d", len(trees), len(dsPoolPairs))
}
Expand Down Expand Up @@ -809,21 +781,3 @@ func getTreesByNodeGroup(ctx context.Context, cli client.Client, nodeGroups []nr
return nil, fmt.Errorf("unsupported platform")
}
}

func updateIfNeeded(ctx context.Context, existingObj, desiredObj client.Object, cli client.Client) error {
merged, err := merge.MetadataForUpdate(existingObj, desiredObj)
if err != nil {
return fmt.Errorf("could not merge object %s with existing: %w", desiredObj.GetName(), err)
}
isEqual, err := compare.Object(existingObj, merged)
if err != nil {
return fmt.Errorf("could not compare object %s with existing: %w", desiredObj.GetName(), err)
}
if !isEqual {
err = cli.Update(ctx, desiredObj)
if err != nil {
return fmt.Errorf("failed to update %s/%s: %w", desiredObj.GetNamespace(), desiredObj.GetName(), err)
}
}
return nil
}
49 changes: 24 additions & 25 deletions controllers/numaresourcesoperator_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,15 @@ func NewFakeNUMAResourcesOperatorReconciler(plat platform.Platform, platVersion
recorder := record.NewFakeRecorder(bufferSize)

return &NUMAResourcesOperatorReconciler{
Client: fakeClient,
Scheme: scheme.Scheme,
Platform: plat,
APIManifests: apiManifests,
RTEManifests: rteManifests,
RTEMetricsManifests: rtemetricsmanifests,
Namespace: testNamespace,
Client: fakeClient,
Scheme: scheme.Scheme,
Platform: plat,
APIManifests: apiManifests,
RTEManifests: rte.Manifests{
Core: rteManifests,
Metrics: rtemetricsmanifests,
},
Namespace: testNamespace,
Images: images.Data{
Builtin: testImageSpec,
},
Expand Down Expand Up @@ -361,7 +363,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
BeforeEach(func() {
By("Create a new Daemonset with correct name but not owner reference")

ds := reconciler.RTEManifests.DaemonSet.DeepCopy()
ds := reconciler.RTEManifests.Core.DaemonSet.DeepCopy()
ds.Name = objectnames.GetComponentName(nro.Name, pn2)
ds.Namespace = testNamespace

Expand Down Expand Up @@ -902,7 +904,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
ds := &appsv1.DaemonSet{}
Expect(reconciler.Client.Get(context.TODO(), dsKey, ds)).To(Succeed())

Expect(ds.Spec.Template.Spec.Tolerations).To(Equal(reconciler.RTEManifests.DaemonSet.Spec.Template.Spec.Tolerations), "mismatched DS default tolerations")
Expect(ds.Spec.Template.Spec.Tolerations).To(Equal(reconciler.RTEManifests.Core.DaemonSet.Spec.Template.Spec.Tolerations), "mismatched DS default tolerations")
})

It("should add the extra tolerations in the DS objects", func() {
Expand Down Expand Up @@ -1041,7 +1043,7 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
Expect(err).ToNot(HaveOccurred())

Expect(reconciler.Client.Get(context.TODO(), dsKey, ds)).To(Succeed())
Expect(ds.Spec.Template.Spec.Tolerations).To(Equal(reconciler.RTEManifests.DaemonSet.Spec.Template.Spec.Tolerations), "DS tolerations not restored to defaults")
Expect(ds.Spec.Template.Spec.Tolerations).To(Equal(reconciler.RTEManifests.Core.DaemonSet.Spec.Template.Spec.Tolerations), "DS tolerations not restored to defaults")

nroUpdated := &nropv1.NUMAResourcesOperator{}
Expect(reconciler.Client.Get(context.TODO(), client.ObjectKeyFromObject(nro), nroUpdated)).ToNot(HaveOccurred())
Expand Down Expand Up @@ -1481,33 +1483,30 @@ var _ = Describe("Test NUMAResourcesOperator Reconcile", func() {
}
Expect(reconciler.Client.Get(context.TODO(), mcp2DSKey, ds)).ToNot(HaveOccurred())

By("Check All RTE metrics components are created")
for _, obj := range reconciler.RTEMetricsManifests.ToObjects() {
objectKey := client.ObjectKeyFromObject(obj)
switch obj.(type) {
case *corev1.Service:
service := &corev1.Service{}
Expect(reconciler.Client.Get(context.TODO(), objectKey, service)).ToNot(HaveOccurred())
default:
}
serKey := client.ObjectKey{
Name: "numaresources-rte-metrics-service", // TODO: staticize
Namespace: testNamespace,
}
ser := &corev1.Service{}
By("Check All RTE metrics components are created")
Expect(reconciler.Client.Get(context.TODO(), serKey, ser)).ToNot(HaveOccurred())
})
When("daemonsets are ready", func() {
var dsDesiredNumberScheduled int32
var dsNumReady int32
BeforeEach(func() {
dsDesiredNumberScheduled = reconciler.RTEManifests.DaemonSet.Status.DesiredNumberScheduled
dsNumReady = reconciler.RTEManifests.DaemonSet.Status.NumberReady
dsDesiredNumberScheduled = reconciler.RTEManifests.Core.DaemonSet.Status.DesiredNumberScheduled
dsNumReady = reconciler.RTEManifests.Core.DaemonSet.Status.NumberReady

reconciler.RTEManifests.DaemonSet.Status.DesiredNumberScheduled = int32(len(nro.Spec.NodeGroups))
reconciler.RTEManifests.DaemonSet.Status.NumberReady = reconciler.RTEManifests.DaemonSet.Status.DesiredNumberScheduled
reconciler.RTEManifests.Core.DaemonSet.Status.DesiredNumberScheduled = int32(len(nro.Spec.NodeGroups))
reconciler.RTEManifests.Core.DaemonSet.Status.NumberReady = reconciler.RTEManifests.Core.DaemonSet.Status.DesiredNumberScheduled

_, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() {
reconciler.RTEManifests.DaemonSet.Status.DesiredNumberScheduled = dsDesiredNumberScheduled
reconciler.RTEManifests.DaemonSet.Status.NumberReady = dsNumReady
reconciler.RTEManifests.Core.DaemonSet.Status.DesiredNumberScheduled = dsDesiredNumberScheduled
reconciler.RTEManifests.Core.DaemonSet.Status.NumberReady = dsNumReady

_, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
Expect(err).ToNot(HaveOccurred())
Expand Down
48 changes: 29 additions & 19 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ import (
rtemetricsmanifests "github.com/openshift-kni/numaresources-operator/pkg/metrics/manifests/monitor"
"github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/controlplane"
schedmanifests "github.com/openshift-kni/numaresources-operator/pkg/numaresourcesscheduler/manifests/sched"
rtestate "github.com/openshift-kni/numaresources-operator/pkg/objectstate/rte"
rteupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/rte"
schedupdate "github.com/openshift-kni/numaresources-operator/pkg/objectupdate/sched"
"github.com/openshift-kni/numaresources-operator/pkg/version"
Expand Down Expand Up @@ -228,8 +229,19 @@ func main() {
}
klog.InfoS("manifests loaded", "component", "RTE")

rteMetricsManifests, err := rtemetricsmanifests.GetManifests(namespace)
if err != nil {
klog.ErrorS(err, "unable to load the RTE metrics manifests")
os.Exit(1)
}
klog.InfoS("manifests loaded", "component", "RTEMetrics")

if params.renderMode {
os.Exit(manageRendering(params.render, clusterPlatform, apiManifests, rteManifests, namespace, params.enableScheduler))
rteMf := rtestate.Manifests{
Core: rteManifests,
Metrics: rteMetricsManifests,
}
os.Exit(manageRendering(params.render, clusterPlatform, apiManifests, rteMf, namespace, params.enableScheduler))
}

klog.InfoS("metrics server", "enabled", params.enableMetrics, "addr", params.metricsAddr)
Expand Down Expand Up @@ -263,24 +275,21 @@ func main() {
klog.ErrorS(err, "unable to render RTE manifests", "controller", "NUMAResourcesOperator")
os.Exit(1)
}
rteMetricsManifests, err := rtemetricsmanifests.GetManifests(namespace)
if err != nil {
klog.ErrorS(err, "unable to load the RTE metrics manifests")
os.Exit(1)
}

if err = (&controllers.NUMAResourcesOperatorReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("numaresources-controller"),
APIManifests: apiManifests,
RTEManifests: rteManifestsRendered,
RTEMetricsManifests: rteMetricsManifests,
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("numaresources-controller"),
APIManifests: apiManifests,
RTEManifests: rtestate.Manifests{
Core: rteManifestsRendered,
Metrics: rteMetricsManifests,
},
Platform: clusterPlatform,
Images: imgs,
ImagePullPolicy: pullPolicy,
Namespace: namespace,
ForwardMCPConds: params.enableMCPCondsForward,
}).SetupWithManager(mgr); err != nil {
klog.ErrorS(err, "unable to create controller", "controller", "NUMAResourcesOperator")
os.Exit(1)
Expand Down Expand Up @@ -357,7 +366,7 @@ func manageIntrospection() int {
return 0
}

func manageRendering(render RenderParams, clusterPlatform platform.Platform, apiMf apimanifests.Manifests, rteMf rtemanifests.Manifests, namespace string, enableScheduler bool) int {
func manageRendering(render RenderParams, clusterPlatform platform.Platform, apiMf apimanifests.Manifests, rteMf rtestate.Manifests, namespace string, enableScheduler bool) int {
if render.NRTCRD {
if err := renderObjects(apiMf.ToObjects()); err != nil {
klog.ErrorS(err, "unable to render manifests")
Expand Down Expand Up @@ -392,12 +401,13 @@ func manageRendering(render RenderParams, clusterPlatform platform.Platform, api
User: render.Image.Exporter,
Builtin: images.SpecPath(),
}
mf, err := renderRTEManifests(rteMf, render.Namespace, imgs)
mf, err := renderRTEManifests(rteMf.Core, render.Namespace, imgs)
if err != nil {
klog.ErrorS(err, "unable to render RTE manifests")
return 1
}
objs = append(objs, mf.ToObjects()...)
objs = append(objs, rteMf.Metrics.ToObjects()...) // no rendering needed

if err := renderObjects(objs); err != nil {
klog.ErrorS(err, "unable to render manifests")
Expand Down
Loading
Loading