Skip to content

Commit

Permalink
Performance metrics added (#415)
Browse files Browse the repository at this point in the history
Co-authored-by: Christian Blæsbjerg <[email protected]>
  • Loading branch information
bdumpp and ChrBlaesBD authored Jan 8, 2025
1 parent 10f0ec0 commit c65e885
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 29 deletions.
42 changes: 38 additions & 4 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,26 +156,60 @@ func main() {
}

// System Controller
metric := prometheus.NewGaugeVec(
systemReadyMetric := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "controller_system_status_ready",
Help: "Show if a system is in status ready",
},
[]string{"system", "namespace"},
[]string{"system_name", "namespace", "system_id"},
)

if err := metrics.Registry.Register(metric); err != nil {
if err := metrics.Registry.Register(systemReadyMetric); err != nil {
err := errors.Wrap(err, "could not register controller_system_status_ready metric")
log.Error(err, err.Error())
exit(err)
}

reconcileSegmentTimeMetric := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "controller_system_reconcile_segment_seconds",
Help: "Time taken to perform one segment of reconciling a system",
Buckets: prometheus.DefBuckets,
}, []string{"segment"},
)

if err := metrics.Registry.Register(reconcileSegmentTimeMetric); err != nil {
err := errors.Wrap(err, "could not register reconcileSegmentTimeMetric")
log.Error(err, err.Error())
exit(err)
}

reconcileTimeMetric := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "controller_system_reconcile_seconds",
Help: "Time taken to reconcile a system",
Buckets: prometheus.DefBuckets,
}, []string{"result"},
)

if err := metrics.Registry.Register(reconcileTimeMetric); err != nil {
err := errors.Wrap(err, "could not register reconcileTimeMetric")
log.Error(err, err.Error())
exit(err)
}

systemMetrics := &controllers.SystemReconcilerMetrics{
ControllerSystemStatusReady: systemReadyMetric,
ReconcileSegmentTime: reconcileSegmentTimeMetric,
ReconcileTime: reconcileTimeMetric,
}

r1 := &controllers.SystemReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Styra: styraClient,
Recorder: mgr.GetEventRecorderFor("system-controller"),
Metric: metric,
Metrics: systemMetrics,
Config: ctrlConfig,
}

Expand Down
3 changes: 2 additions & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ kind: Kustomization
images:
- name: controller
newName: controller
newTag: latest
newTag: latest

137 changes: 113 additions & 24 deletions internal/controller/styra/system_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"net/http"
"path"
"reflect"
"time"

"github.com/go-logr/logr"
"github.com/pkg/errors"
Expand Down Expand Up @@ -56,14 +57,21 @@ import (
"github.com/bankdata/styra-controller/pkg/styra"
)

// SystemReconcilerMetrics holds the metrics for the SystemReconciller
type SystemReconcilerMetrics struct {
ControllerSystemStatusReady *prometheus.GaugeVec
ReconcileSegmentTime *prometheus.HistogramVec
ReconcileTime *prometheus.HistogramVec
}

// SystemReconciler reconciles a System object
type SystemReconciler struct {
client.Client
Scheme *runtime.Scheme
Styra styra.ClientInterface
WebhookClient webhook.Client
Recorder record.EventRecorder
Metric *prometheus.GaugeVec
Metrics *SystemReconcilerMetrics
Config *configv2alpha2.ProjectConfig
}

Expand All @@ -78,26 +86,28 @@ type SystemReconciler struct {
// ensuring that the current state of the System resource renconciled
// towards the desired state.
func (r *SystemReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
start := time.Now()
log := log.FromContext(ctx)
log.Info("Reconciliation begins")

log.Info("Fetching System")
var system v1beta1.System
if err := r.Get(ctx, req.NamespacedName, &system); err != nil {
r.Metrics.ReconcileTime.WithLabelValues("delete").Observe(time.Since(start).Seconds())
if k8serrors.IsNotFound(err) {
log.Info("Could not find System in kubernetes")
r.deleteMetric(req)
r.deleteMetrics(req)
return ctrl.Result{}, nil
}
r.deleteMetric(req)
r.deleteMetrics(req)
return ctrl.Result{}, errors.Wrap(err, "unable to fetch System")
}

log = log.WithValues("systemID", system.Status.ID)

if !labels.ControllerClassMatches(&system, r.Config.ControllerClass) {
log.Info("This is not a System we are managing. Skipping reconciliation.")
r.deleteMetric(req)
r.deleteMetrics(req)
return ctrl.Result{}, nil
}

Expand All @@ -108,23 +118,28 @@ func (r *SystemReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr

if system.ObjectMeta.DeletionTimestamp.IsZero() {
res, err = r.reconcile(ctx, log, &system)
r.updateMetric(req, system.Status.Ready)
r.updateMetric(req, system.Status.ID, system.Status.Ready)
} else {
res, err = r.reconcileDeletion(ctx, log, &system)
if err != nil {
r.updateMetric(req, system.Status.Ready)
r.updateMetric(req, system.Status.ID, system.Status.Ready)
} else {
r.deleteMetric(req)
r.deleteMetrics(req)
r.Metrics.ReconcileTime.WithLabelValues("delete").Observe(time.Since(start).Seconds())
return res, err
}
}

if err != nil {
r.recordErrorEvent(&system, err)
r.setSystemStatusError(&system, err)

r.Metrics.ReconcileTime.WithLabelValues("error").Observe(time.Since(start).Seconds())
if err := r.Status().Update(ctx, &system); err != nil {
return res, errors.Wrap(err, "could not set failure status on System")
}
} else {
r.Metrics.ReconcileTime.WithLabelValues("ok").Observe(time.Since(start).Seconds())
}
return res, err
}
Expand All @@ -142,22 +157,27 @@ func (r *SystemReconciler) setSystemStatusError(System *v1beta1.System, err erro
}
}

func (r *SystemReconciler) updateMetric(req ctrl.Request, ready bool) {
if r.Metric == nil {
func (r *SystemReconciler) updateMetric(req ctrl.Request, systemID string, ready bool) {
if r.Metrics == nil || r.Metrics.ControllerSystemStatusReady == nil {
return
}

var value float64
if ready {
value = 1
}
r.Metric.WithLabelValues(req.Name, req.Namespace).Set(value)
r.Metrics.ControllerSystemStatusReady.WithLabelValues(req.Name, req.Namespace, systemID).Set(value)
}

func (r *SystemReconciler) deleteMetric(req ctrl.Request) {
if r.Metric == nil {
func (r *SystemReconciler) deleteMetrics(req ctrl.Request) {
if r.Metrics == nil || r.Metrics.ControllerSystemStatusReady == nil {
return
}
r.Metric.Delete(prometheus.Labels{"System": req.Name, "namespace": req.Namespace})
if deleted := r.Metrics.ControllerSystemStatusReady.DeletePartialMatch(
prometheus.Labels{"System": req.Name, "namespace": req.Namespace},
); deleted != 1 {
log.Log.Error(errors.New("Failed to delete metric"), "Incorrect number of deleted metrics", "deleted", deleted)
}
}

func (r *SystemReconciler) recordErrorEvent(system *v1beta1.System, err error) {
Expand Down Expand Up @@ -239,27 +259,44 @@ func (r *SystemReconciler) reconcile(

if r.Config.EnableMigrations && systemID == "" && migrationID != "" {
log.Info(fmt.Sprintf("Use migrationId(%s) to fetch system from Styra DAS", migrationID))

getSystemStart := time.Now()
cfg, err = r.getSystem(ctx, log, migrationID)
r.Metrics.ReconcileSegmentTime.WithLabelValues("getSystem").Observe(time.Since(getSystemStart).Seconds())

if err != nil {
return ctrl.Result{}, err
}
if err := r.reconcileID(ctx, log, system, migrationID); err != nil {
return ctrl.Result{}, err
}
} else if systemID != "" {
getSystemStart := time.Now()
cfg, err = r.getSystem(ctx, log, systemID)
r.Metrics.ReconcileSegmentTime.WithLabelValues("getSystem").Observe(time.Since(getSystemStart).Seconds())

if err != nil {
var serr *styra.HTTPError
if errors.As(err, &serr) && serr.StatusCode == http.StatusNotFound {
createSystemStart := time.Now()
res, err := r.createSystem(ctx, log, system)
r.Metrics.ReconcileSegmentTime.WithLabelValues("createSystem").
Observe(time.Since(createSystemStart).Seconds())

if err != nil {
return ctrl.Result{}, err
}
if err := r.deleteDefaultPolicies(ctx, log, res.SystemConfig.ID); err != nil {
deleteDefaultPolicyStart := time.Now()
err = r.deleteDefaultPolicies(ctx, log, res.SystemConfig.ID)
r.Metrics.ReconcileSegmentTime.WithLabelValues("deleteDefaultPolicies").
Observe(time.Since(deleteDefaultPolicyStart).Seconds())
if err != nil {
return ctrl.Result{}, err
}
if err := r.reconcileID(ctx, log, system, res.SystemConfig.ID); err != nil {
reconcileIDStart := time.Now()
err = r.reconcileID(ctx, log, system, res.SystemConfig.ID)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileID").
Observe(time.Since(reconcileIDStart).Seconds())
if err != nil {
return ctrl.Result{}, err
}
} else {
Expand All @@ -269,60 +306,102 @@ func (r *SystemReconciler) reconcile(
} else {
displayName := system.DisplayName(r.Config.SystemPrefix, r.Config.SystemSuffix)

getSystemByNameStart := time.Now()
cfg, err = r.getSystemByName(ctx, log, displayName)
r.Metrics.ReconcileSegmentTime.WithLabelValues("getSystemByName").
Observe(time.Since(getSystemByNameStart).Seconds())
if err != nil {
return ctrl.Result{}, err
}
if cfg != nil {
if err := r.reconcileID(ctx, log, system, cfg.ID); err != nil {
reconcileIDStart := time.Now()
err = r.reconcileID(ctx, log, system, cfg.ID)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileID").
Observe(time.Since(reconcileIDStart).Seconds())
if err != nil {
return ctrl.Result{}, err
}
} else {
createSystemStart := time.Now()
res, err := r.createSystem(ctx, log, system)
r.Metrics.ReconcileSegmentTime.WithLabelValues("createSystem").
Observe(time.Since(createSystemStart).Seconds())

if err != nil {
return ctrl.Result{}, err
}
if err := r.deleteDefaultPolicies(ctx, log, res.SystemConfig.ID); err != nil {
deleteDefaultPolicyStart := time.Now()
err = r.deleteDefaultPolicies(ctx, log, res.SystemConfig.ID)
r.Metrics.ReconcileSegmentTime.WithLabelValues("deleteDefaultPolicies").
Observe(time.Since(deleteDefaultPolicyStart).Seconds())
if err != nil {
return ctrl.Result{}, err
}
if err := r.reconcileID(ctx, log, system, res.SystemConfig.ID); err != nil {
reconcileIDStart := time.Now()
err = r.reconcileID(ctx, log, system, res.SystemConfig.ID)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileID").
Observe(time.Since(reconcileIDStart).Seconds())

if err != nil {
return ctrl.Result{}, err
}
}
}

system.SetCondition(v1beta1.ConditionTypeCreatedInStyra, metav1.ConditionTrue)

if result, err := r.reconcileCredentials(ctx, log, system); err != nil {
reconcileCredentialsStart := time.Now()
result, err := r.reconcileCredentials(ctx, log, system)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileCredentials").
Observe(time.Since(reconcileCredentialsStart).Seconds())
if err != nil {
return result, err
}
system.SetCondition(v1beta1.ConditionTypeGitCredentialsUpdated, metav1.ConditionTrue)

if r.systemNeedsUpdate(log, system, cfg) {
if cfg, err = r.updateSystem(ctx, log, system); err != nil {
updateSystemStart := time.Now()
cfg, err = r.updateSystem(ctx, log, system)
r.Metrics.ReconcileSegmentTime.WithLabelValues("updateSystem").
Observe(time.Since(updateSystemStart).Seconds())
if err != nil {
return ctrl.Result{}, err
}
}
system.SetCondition(v1beta1.ConditionTypeSystemConfigUpdated, metav1.ConditionTrue)

if result, err := r.reconcileSubjects(ctx, log, system); err != nil {
reconcileSubjectsStart := time.Now()
result, err = r.reconcileSubjects(ctx, log, system)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileSubjects").
Observe(time.Since(reconcileSubjectsStart).Seconds())
if err != nil {
return result, err
}
system.SetCondition(v1beta1.ConditionTypeSubjectsUpdated, metav1.ConditionTrue)

if result, err := r.reconcileDatasources(ctx, log, system, cfg); err != nil {
reconcileDatasourcesStart := time.Now()
result, err = r.reconcileDatasources(ctx, log, system, cfg)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileDatasources").
Observe(time.Since(reconcileDatasourcesStart).Seconds())
if err != nil {
return result, err
}
system.SetCondition(v1beta1.ConditionTypeDatasourcesUpdated, metav1.ConditionTrue)

getOPAConfigStart := time.Now()
opaConfig, err := r.Styra.GetOPAConfig(ctx, system.Status.ID)
r.Metrics.ReconcileSegmentTime.WithLabelValues("getOPAConfig").
Observe(time.Since(getOPAConfigStart).Seconds())
if err != nil {
return ctrl.Result{}, ctrlerr.Wrap(err, "Could not get OPA config from styra API").
WithEvent("ErrorFetchOPAConfig").
WithSystemCondition(v1beta1.ConditionTypeOPATokenUpdated)
}

reconcileOPATokenStart := time.Now()
result, updatedToken, err := r.reconcileOPAToken(ctx, log, system, opaConfig.Token)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileOPAToken").
Observe(time.Since(reconcileOPATokenStart).Seconds())
if err != nil {
return result, err
}
Expand All @@ -331,7 +410,10 @@ func (r *SystemReconciler) reconcile(
}
system.SetCondition(v1beta1.ConditionTypeOPATokenUpdated, metav1.ConditionTrue)

reconcileOPAConfigMapStart := time.Now()
result, updatedOPAConfigMap, err := r.reconcileOPAConfigMap(ctx, log, system, opaConfig)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileOPAConfigMap").
Observe(time.Since(reconcileOPAConfigMapStart).Seconds())
if err != nil {
return result, err
}
Expand All @@ -340,7 +422,10 @@ func (r *SystemReconciler) reconcile(
}
system.SetCondition(v1beta1.ConditionTypeOPAConfigMapUpdated, metav1.ConditionTrue)

reconcileSLPConfigMapStart := time.Now()
result, updatedSLPConfigMap, err := r.reconcileSLPConfigMap(ctx, log, system, opaConfig)
r.Metrics.ReconcileSegmentTime.WithLabelValues("reconcileSLPConfigMap").
Observe(time.Since(reconcileSLPConfigMapStart).Seconds())
if err != nil {
return result, err
}
Expand All @@ -352,7 +437,11 @@ func (r *SystemReconciler) reconcile(
system.Status.Ready = true
system.Status.Phase = v1beta1.SystemPhaseCreated
system.Status.FailureMessage = ""
if err := r.Status().Update(ctx, system); err != nil {

updateStatusStart := time.Now()
err = r.Status().Update(ctx, system)
r.Metrics.ReconcileSegmentTime.WithLabelValues("updateStatus").Observe(time.Since(updateStatusStart).Seconds())
if err != nil {
return ctrl.Result{}, ctrlerr.Wrap(err, "Could not change status.phase to Created").
WithEvent("ErrorPhaseToCreated")
}
Expand Down
Loading

0 comments on commit c65e885

Please sign in to comment.