diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index 574893879..435d75ba2 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -15,7 +15,6 @@ jobs: strategy: fail-fast: false matrix: - arch: [arm64] target: - name: "dev" ref: "refs/heads/master" diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index fe0b9be71..2a7bf1aee 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -7,9 +7,6 @@ jobs: build: name: pull-request-check runs-on: ubuntu-latest - strategy: - matrix: - arch: [arm64] steps: - uses: actions/checkout@v4 - name: Set up Docker Buildx @@ -36,9 +33,6 @@ jobs: radix-operator-test: name: Pipeline-runner unit tests runs-on: ubuntu-latest - strategy: - matrix: - arch: [arm64] steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 @@ -56,9 +50,6 @@ jobs: pipeline-runner-test: name: Pipeline-runner unit tests runs-on: ubuntu-latest - strategy: - matrix: - arch: [arm64] steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 @@ -76,9 +67,6 @@ jobs: radix-operator-lint: name: Lint runs-on: ubuntu-latest - strategy: - matrix: - arch: [arm64] steps: - uses: actions/checkout@v4 with: @@ -95,9 +83,6 @@ jobs: verify-code-generation: name: Verify Code Generation runs-on: ubuntu-latest - strategy: - matrix: - arch: [arm64] steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 diff --git a/Makefile b/Makefile index 76646f75d..6dfc163c0 100644 --- a/Makefile +++ b/Makefile @@ -86,6 +86,7 @@ mocks: bootstrap mockgen -source ./pkg/apis/dnsalias/syncer.go -destination ./pkg/apis/dnsalias/syncer_mock.go -package dnsalias mockgen -source ./radix-operator/dnsalias/internal/syncerfactory.go -destination ./radix-operator/dnsalias/internal/syncerfactory_mock.go -package internal mockgen -source ./radix-operator/common/handler.go -destination ./radix-operator/common/handler_mock.go -package common + mockgen -source ./pkg/apis/job/job_history.go -destination ./radix-operator/job/job_history_mock.go -package job mockgen -source ./pipeline-runner/internal/wait/job.go -destination ./pipeline-runner/internal/wait/job_mock.go -package wait mockgen -source ./pipeline-runner/internal/watcher/radix_deployment_watcher.go -destination ./pipeline-runner/internal/watcher/radix_deployment_watcher_mock.go -package watcher diff --git a/charts/radix-operator/Chart.yaml b/charts/radix-operator/Chart.yaml index f824d87a9..3d5cae51f 100644 --- a/charts/radix-operator/Chart.yaml +++ b/charts/radix-operator/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: radix-operator -version: 1.37.8 -appVersion: 1.57.7 +version: 1.37.9 +appVersion: 1.57.8 kubeVersion: ">=1.24.0" description: Radix Operator keywords: diff --git a/charts/radix-operator/templates/deployment.yaml b/charts/radix-operator/templates/deployment.yaml index 0f28fe8f1..eb6634714 100644 --- a/charts/radix-operator/templates/deployment.yaml +++ b/charts/radix-operator/templates/deployment.yaml @@ -87,6 +87,8 @@ spec: value: {{ .Values.deploymentsPerEnvironmentHistoryLimit | quote }} - name: RADIX_PIPELINE_JOBS_HISTORY_LIMIT value: {{ .Values.pipelineJobsHistoryLimit | quote }} + - name: RADIX_PIPELINE_JOBS_HISTORY_PERIOD_LIMIT + value: {{ .Values.pipelineJobsHistoryPeriodLimit | quote }} - name: RADIX_TEKTON_IMAGE value: {{ .Values.radixTekton }} - name: RADIX_IMAGE_BUILDER diff --git a/charts/radix-operator/values.yaml b/charts/radix-operator/values.yaml index 94533a863..d5137bd5a 100644 --- a/charts/radix-operator/values.yaml +++ b/charts/radix-operator/values.yaml @@ -161,6 +161,7 @@ radixGroups: deploymentsPerEnvironmentHistoryLimit: 10 pipelineJobsHistoryLimit: 5 +pipelineJobsHistoryPeriodLimit: "720h" logLevel: "INFO" logPretty: false oauthProxyDefaultIssuerUrl: https://login.microsoftonline.com/3aa4a235-b6e2-48d5-9195-7fcf05b459b0/v2.0 diff --git a/operator.Dockerfile b/operator.Dockerfile index c7e39ba1f..72e5b07e1 100644 --- a/operator.Dockerfile +++ b/operator.Dockerfile @@ -1,30 +1,24 @@ -FROM golang:1.22-alpine3.20 as base -ENV GO111MODULE=on -RUN apk update && \ - apk add git ca-certificates curl && \ - apk add --no-cache gcc musl-dev +FROM --platform=$BUILDPLATFORM docker.io/golang:1.22.5-alpine3.20 AS builder +ARG TARGETARCH +ENV CGO_ENABLED=0 \ + GOOS=linux \ + GOARCH=${TARGETARCH} -WORKDIR /go/src/github.com/equinor/radix-operator/ +WORKDIR /src -# Install project dependencies -COPY go.mod go.sum ./ +COPY ./go.mod ./go.sum ./ RUN go mod download -# Copy project code COPY ./radix-operator ./radix-operator COPY ./pkg ./pkg +WORKDIR /src/radix-operator +RUN go build -ldflags="-s -w" -o /build/radix-operator + +# Final stage, ref https://github.com/GoogleContainerTools/distroless/blob/main/base/README.md for distroless +FROM gcr.io/distroless/static +WORKDIR /app +COPY --from=builder /build/radix-operator . +USER 1000 +ENTRYPOINT ["/app/radix-operator"] -FROM base as builder -# Build -WORKDIR /go/src/github.com/equinor/radix-operator/radix-operator/ -RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-s -w" -a -installsuffix cgo -o ./rootfs/radix-operator -RUN addgroup -S -g 1000 radix-operator -RUN adduser -S -u 1000 -G radix-operator radix-operator -# Run operator -FROM scratch -COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ -COPY --from=builder /etc/passwd /etc/passwd -COPY --from=builder /go/src/github.com/equinor/radix-operator/radix-operator/rootfs/radix-operator /usr/local/bin/radix-operator -USER radix-operator -ENTRYPOINT ["/usr/local/bin/radix-operator"] diff --git a/pipeline.Dockerfile b/pipeline.Dockerfile index c348cec38..542758065 100644 --- a/pipeline.Dockerfile +++ b/pipeline.Dockerfile @@ -1,30 +1,21 @@ -FROM golang:1.22-alpine3.20 as base +FROM --platform=$BUILDPLATFORM docker.io/golang:1.22.5-alpine3.20 AS builder +ARG TARGETARCH +ENV CGO_ENABLED=0 \ + GOOS=linux \ + GOARCH=${TARGETARCH} -RUN apk update && \ - apk add ca-certificates curl git && \ - apk add --no-cache gcc musl-dev +WORKDIR /src -WORKDIR /go/src/github.com/equinor/radix-operator/ - -# Install project dependencies -COPY go.mod go.sum ./ +COPY ./go.mod ./go.sum ./ RUN go mod download - -# Copy project code COPY ./pipeline-runner ./pipeline-runner COPY ./pkg ./pkg +WORKDIR /src/pipeline-runner +RUN go build -ldflags="-s -w" -o /build/pipeline-runner -# Build -FROM base as builder -WORKDIR /go/src/github.com/equinor/radix-operator/pipeline-runner/ -RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-s -w" -a -installsuffix cgo -o ./rootfs/pipeline-runner -RUN adduser -D -g '' radix-pipeline - -# Run operator -FROM scratch -COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ -COPY --from=builder /etc/passwd /etc/passwd -COPY --from=builder /go/src/github.com/equinor/radix-operator/pipeline-runner/rootfs/pipeline-runner /usr/local/bin/pipeline-runner - -USER radix-pipeline -ENTRYPOINT ["/usr/local/bin/pipeline-runner"] +# Final stage, ref https://github.com/GoogleContainerTools/distroless/blob/main/base/README.md for distroless +FROM gcr.io/distroless/static +WORKDIR /app +COPY --from=builder /build/pipeline-runner . +USER 1000 +ENTRYPOINT ["/app/pipeline-runner"] \ No newline at end of file diff --git a/pkg/apis/config/pipelinejob/config.go b/pkg/apis/config/pipelinejob/config.go index d73f3f810..34037f210 100644 --- a/pkg/apis/config/pipelinejob/config.go +++ b/pkg/apis/config/pipelinejob/config.go @@ -1,10 +1,15 @@ package pipelinejob -import "k8s.io/apimachinery/pkg/api/resource" +import ( + "time" + + "k8s.io/apimachinery/pkg/api/resource" +) // Config for pipeline jobs type Config struct { PipelineJobsHistoryLimit int + PipelineJobsHistoryPeriodLimit time.Duration DeploymentsHistoryLimitPerEnvironment int AppBuilderResourcesLimitsMemory *resource.Quantity AppBuilderResourcesRequestsCPU *resource.Quantity diff --git a/pkg/apis/defaults/environment_variables.go b/pkg/apis/defaults/environment_variables.go index 67561a7b0..01ea21047 100644 --- a/pkg/apis/defaults/environment_variables.go +++ b/pkg/apis/defaults/environment_variables.go @@ -13,9 +13,12 @@ const ( // DeploymentsHistoryLimitEnvironmentVariable Controls the number of RDs we can have in a environment DeploymentsHistoryLimitEnvironmentVariable = "RADIX_DEPLOYMENTS_PER_ENVIRONMENT_HISTORY_LIMIT" - // PipelineJobsHistoryLimitEnvironmentVariable Controls the number of RJs we can have in an app namespace, per groups by branch and status + // PipelineJobsHistoryLimitEnvironmentVariable Controls the number of RJs should exist in an app namespace, per groups by environment and status PipelineJobsHistoryLimitEnvironmentVariable = "RADIX_PIPELINE_JOBS_HISTORY_LIMIT" + // PipelineJobsHistoryPeriodLimitEnvironmentVariable Controls how long an RJ should exist in an app namespace, per groups by environment and status + PipelineJobsHistoryPeriodLimitEnvironmentVariable = "RADIX_PIPELINE_JOBS_HISTORY_PERIOD_LIMIT" + // ClusternameEnvironmentVariable The name of the cluster ClusternameEnvironmentVariable = "RADIX_CLUSTERNAME" diff --git a/pkg/apis/job/job.go b/pkg/apis/job/job.go index 6fa54a8fb..89194a724 100644 --- a/pkg/apis/job/job.go +++ b/pkg/apis/job/job.go @@ -24,8 +24,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/selection" "k8s.io/client-go/kubernetes" "k8s.io/client-go/util/retry" ) @@ -100,10 +98,6 @@ func (job *Job) OnSync(ctx context.Context) error { return err } } - - job.maintainHistoryLimit(ctx) - job.garbageCollectConfigMaps(ctx) - return nil } @@ -711,44 +705,3 @@ func (job *Job) updateRadixJobStatus(ctx context.Context, rj *v1.RadixJob, chang }) return err } - -func (job *Job) garbageCollectConfigMaps(ctx context.Context) { - namespace := job.radixJob.GetNamespace() - radixJobConfigMaps, err := job.kubeutil.ListConfigMapsWithSelector(ctx, namespace, getRadixJobNameExistsSelector().String()) - if err != nil { - log.Ctx(ctx).Warn().Err(err).Msgf("Failed to get ConfigMaps while garbage collecting config-maps in %s", namespace) - return - } - radixJobNameSet, err := job.getRadixJobNameSet(ctx) - if err != nil { - log.Ctx(ctx).Warn().Err(err).Msg("Failed to get RadixJob name set") - return - } - for _, configMap := range radixJobConfigMaps { - jobName := configMap.GetLabels()[kube.RadixJobNameLabel] - if _, radixJobExists := radixJobNameSet[jobName]; !radixJobExists { - log.Ctx(ctx).Debug().Msgf("Delete ConfigMap %s in %s", configMap.GetName(), configMap.GetNamespace()) - err := job.kubeutil.DeleteConfigMap(ctx, configMap.GetNamespace(), configMap.GetName()) - if err != nil { - log.Ctx(ctx).Warn().Err(err).Msgf("failed to delete ConfigMap %s while garbage collecting config-maps in %s", configMap.GetName(), namespace) - } - } - } -} - -func (job *Job) getRadixJobNameSet(ctx context.Context) (map[string]bool, error) { - radixJobs, err := job.getAllRadixJobs(ctx) - if err != nil { - return nil, fmt.Errorf("failed to list RadixJobs: %w", err) - } - radixJobNameSet := make(map[string]bool) - for _, radixJob := range radixJobs { - radixJobNameSet[radixJob.GetName()] = true - } - return radixJobNameSet, nil -} - -func getRadixJobNameExistsSelector() labels.Selector { - requirement, _ := labels.NewRequirement(kube.RadixJobNameLabel, selection.Exists, []string{}) - return labels.NewSelector().Add(*requirement) -} diff --git a/pkg/apis/job/job_history.go b/pkg/apis/job/job_history.go index bb2d6f4be..9d6a9dc12 100644 --- a/pkg/apis/job/job_history.go +++ b/pkg/apis/job/job_history.go @@ -2,164 +2,273 @@ package job import ( "context" + "errors" "fmt" - "strings" + "sync" + "time" + "github.com/equinor/radix-common/utils/slice" "github.com/equinor/radix-operator/pkg/apis/kube" - v1 "github.com/equinor/radix-operator/pkg/apis/radix/v1" + radixv1 "github.com/equinor/radix-operator/pkg/apis/radix/v1" "github.com/equinor/radix-operator/pkg/apis/utils" + radixclient "github.com/equinor/radix-operator/pkg/client/clientset/versioned" "github.com/rs/zerolog/log" k8errors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" ) -type radixJobsWithRadixDeployments map[string]v1.RadixDeployment -type radixJobsForBranches map[string][]v1.RadixJob -type radixJobsForConditions map[v1.RadixJobCondition]radixJobsForBranches +// History Interface for job History +type History interface { + // Cleanup the pipeline job history for the Radix application + Cleanup(ctx context.Context, appName string) error +} + +type history struct { + namespacesRequestsToCleanup sync.Map + radixClient radixclient.Interface + historyLimit int + kubeUtil *kube.Kube + historyPeriodLimit time.Duration +} -func (job *Job) maintainHistoryLimit(ctx context.Context) { - radixJobs, err := job.getAllRadixJobs(ctx) +// NewHistory Constructor for job History +func NewHistory(radixClient radixclient.Interface, kubeUtil *kube.Kube, historyLimit int, historyPeriodLimit time.Duration) History { + return &history{ + radixClient: radixClient, + historyLimit: historyLimit, + historyPeriodLimit: historyPeriodLimit, + kubeUtil: kubeUtil, + } +} + +// Cleanup the pipeline job history +func (h *history) Cleanup(ctx context.Context, appName string) error { + namespace := utils.GetAppNamespace(appName) + if _, ok := h.namespacesRequestsToCleanup.LoadOrStore(namespace, struct{}{}); ok { + return nil // a request to clean up history in this namespace already exists + } + defer h.namespacesRequestsToCleanup.Delete(namespace) + if err := h.garbageCollectRadixJobs(ctx, appName); err != nil { + return err + } + return h.garbageCollectConfigMaps(ctx, namespace) +} + +type radixJobsWithRadixDeployments map[string]radixv1.RadixDeployment +type radixJobsForBranches map[string][]radixv1.RadixJob +type radixJobsForConditionsMap map[radixv1.RadixJobCondition]radixJobsForBranches +type radixJobsNamesMap map[string]struct{} + +func (h *history) garbageCollectRadixJobs(ctx context.Context, appName string) error { + namespace := utils.GetAppNamespace(appName) + radixJobs, err := h.getAllRadixJobs(ctx, namespace) if err != nil { - log.Ctx(ctx).Warn().Err(err).Msg("failed to get RadixJob in maintain job history") - return + return err } if len(radixJobs) == 0 { - return + return nil // no need to delete anything or the active job is already completed + } + ra, err := h.radixClient.RadixV1().RadixApplications(namespace).Get(ctx, appName, metav1.GetOptions{}) + if err != nil { + // RadixApplication may not exist if this is the first job for a new application + if k8errors.IsNotFound(err) { + return nil // no need to delete anything + } + return err } - radixJobsWithRDs, err := job.getRadixJobsWithRadixDeployments(ctx) + radixJobsWithRDs, err := h.getRadixJobsMapToRadixDeployments(ctx, appName, ra) if err != nil { - log.Ctx(ctx).Warn().Err(err).Msg("failed to get RadixJobs with RadixDeployments in maintain job history") - return + return err } - deletingJobs, radixJobsForConditions := job.groupSortedRadixJobs(radixJobs, radixJobsWithRDs) - jobHistoryLimit := job.config.PipelineJobConfig.PipelineJobsHistoryLimit - log.Ctx(ctx).Info().Msgf("Delete history RadixJob for limit %d", jobHistoryLimit) - jobsByConditionAndBranch := job.getJobsToGarbageCollectByJobConditionAndBranch(ctx, radixJobsForConditions, jobHistoryLimit) - deletingJobs = append(deletingJobs, jobsByConditionAndBranch...) - job.garbageCollectRadixJobs(ctx, deletingJobs) -} + log.Ctx(ctx).Info().Msgf("Delete history RadixJob for limit %d", h.historyLimit) -func (job *Job) garbageCollectRadixJobs(ctx context.Context, radixJobs []v1.RadixJob) { - if len(radixJobs) == 0 { + deletingRadixJobs := h.getRadixJobsToGarbageCollect(ctx, radixJobs, radixJobsWithRDs, ra) + if len(deletingRadixJobs) == 0 { log.Ctx(ctx).Info().Msg("There is no RadixJobs to delete") - return + return nil } - for _, rj := range radixJobs { - if strings.EqualFold(rj.GetName(), job.radixJob.GetName()) { - continue // do not remove current job - } - log.Ctx(ctx).Info().Msgf("Delete RadixJob %s from %s", rj.GetName(), rj.GetNamespace()) - err := job.radixclient.RadixV1().RadixJobs(rj.GetNamespace()).Delete(ctx, rj.GetName(), metav1.DeleteOptions{}) - if err != nil { - log.Ctx(ctx).Warn().Err(err).Msgf("Failed to delete RadixJob %s from %s", rj.GetName(), rj.GetNamespace()) - } + return h.deleteRadixJobs(ctx, namespace, deletingRadixJobs) +} + +func (h *history) getRadixJobsToGarbageCollect(ctx context.Context, radixJobs []radixv1.RadixJob, radixJobsWithRDs radixJobsWithRadixDeployments, ra *radixv1.RadixApplication) []radixv1.RadixJob { + radixJobsToBeExplicitlyDeleted, radixJobsForConditionsAndEnvs, radixJobsNamesWithExistingRadixDeployments := h.getRadixJobCandidatesForDeletion(radixJobs, radixJobsWithRDs, ra) + if len(radixJobsToBeExplicitlyDeleted) > 0 { + log.Ctx(ctx).Info().Msgf("Delete %d RadixJobs without considering history rules", len(radixJobsToBeExplicitlyDeleted)) } + + deletingRadixJobsByConditionsAndEnvs := h.getRadixJobsToGarbageCollectByJobConditionsAndEnvs(ctx, radixJobsForConditionsAndEnvs, radixJobsNamesWithExistingRadixDeployments) + + deletingRadixJobs := append(radixJobsToBeExplicitlyDeleted, deletingRadixJobsByConditionsAndEnvs...) + return deletingRadixJobs } -func (job *Job) groupSortedRadixJobs(radixJobs []v1.RadixJob, radixJobsWithRDs radixJobsWithRadixDeployments) ([]v1.RadixJob, radixJobsForConditions) { - var deletingJobs []v1.RadixJob - radixJobsForConditions := make(radixJobsForConditions) - for _, rj := range radixJobs { - rj := rj - jobCondition := rj.Status.Condition - switch { - case jobCondition == v1.JobSucceeded && rj.Spec.PipeLineType != v1.Build: - if _, ok := radixJobsWithRDs[rj.GetName()]; !ok { - deletingJobs = append(deletingJobs, rj) - } +func (h *history) deleteRadixJobs(ctx context.Context, namespace string, radixJobs []radixv1.RadixJob) error { + var errs []error + for _, radixJob := range radixJobs { + select { + case <-ctx.Done(): + return fmt.Errorf("failed deleting of RadixJobs: %w", ctx.Err()) default: - if radixJobsForConditions[jobCondition] == nil { - radixJobsForConditions[jobCondition] = make(radixJobsForBranches) + log.Ctx(ctx).Info().Msgf("Delete RadixJob %s from %s", radixJob.GetName(), namespace) + if err := h.radixClient.RadixV1().RadixJobs(namespace).Delete(ctx, radixJob.GetName(), metav1.DeleteOptions{}); err != nil { + errs = append(errs, fmt.Errorf("failed to delete RadixJob %s from %s: %w", radixJob.GetName(), namespace, err)) } - jobBranch := getRadixJobBranch(rj) - radixJobsForConditions[jobCondition][jobBranch] = append(radixJobsForConditions[jobCondition][jobBranch], rj) } } - return sortRadixJobsByCreatedDesc(deletingJobs), sortRadixJobGroupsByCreatedDesc(radixJobsForConditions) + return errors.Join(errs...) } -func sortRadixJobGroupsByCreatedDesc(radixJobsForConditions radixJobsForConditions) radixJobsForConditions { - for jobCondition, jobsForBranches := range radixJobsForConditions { - for jobBranch, jobs := range jobsForBranches { - radixJobsForConditions[jobCondition][jobBranch] = sortRadixJobsByCreatedDesc(jobs) +func (h *history) getRadixJobCandidatesForDeletion(radixJobs []radixv1.RadixJob, radixJobsWithRDs radixJobsWithRadixDeployments, ra *radixv1.RadixApplication) ([]radixv1.RadixJob, radixJobsForConditionsMap, radixJobsNamesMap) { + branchToEnvsMap := getBranchesToEnvsMap(ra) + var radixJobsToBeExplicitlyDeleted []radixv1.RadixJob + radixJobsForConditionsAndEnvs := make(radixJobsForConditionsMap) + radixJobsNamesWithExistingRadixDeployments := make(radixJobsNamesMap) + for _, radixJob := range radixJobs { + jobCondition := radixJob.Status.Condition + rj := radixJob + if _, rdExists := radixJobsWithRDs[rj.GetName()]; rdExists { + radixJobsNamesWithExistingRadixDeployments[rj.GetName()] = struct{}{} + } else { + if jobCondition == radixv1.JobSucceeded && rj.Spec.PipeLineType != radixv1.Build && rj.Spec.PipeLineType != radixv1.ApplyConfig { + radixJobsToBeExplicitlyDeleted = append(radixJobsToBeExplicitlyDeleted, rj) // delete all completed job, which does not have a RadixDeployment, excluding build-only jobs + continue + } + if jobIsCompleted(jobCondition) && rj.Status.Created != nil && rj.Status.Created.Time.Before(time.Now().Add(-h.historyPeriodLimit)) { + radixJobsToBeExplicitlyDeleted = append(radixJobsToBeExplicitlyDeleted, rj) // delete all job, which is older than the history period limit + continue + } + } + if !jobIsCompleted(jobCondition) { + continue // keep not completed jobs + } + if radixJobsForConditionsAndEnvs[jobCondition] == nil { + radixJobsForConditionsAndEnvs[jobCondition] = make(radixJobsForBranches) + } + radixJobTargetEnvs := getRadixJobEnvs(rj, branchToEnvsMap) + for _, targetEnv := range radixJobTargetEnvs { + radixJobsForConditionsAndEnvs[jobCondition][targetEnv] = append(radixJobsForConditionsAndEnvs[jobCondition][targetEnv], rj) } } - return radixJobsForConditions + return radixJobsToBeExplicitlyDeleted, radixJobsForConditionsAndEnvs, radixJobsNamesWithExistingRadixDeployments } -func (job *Job) getRadixJobsWithRadixDeployments(ctx context.Context) (radixJobsWithRadixDeployments, error) { - appName, err := job.getAppName() - if err != nil { - return nil, err - } - ra, err := job.radixclient.RadixV1().RadixApplications(job.radixJob.Namespace).Get(ctx, appName, metav1.GetOptions{}) - if err != nil { - // RadixApplication may not exist if this is the first job for a new application - if k8errors.IsNotFound(err) { - return nil, nil +func getBranchesToEnvsMap(ra *radixv1.RadixApplication) map[string][]string { + return slice.Reduce(ra.Spec.Environments, make(map[string][]string), func(acc map[string][]string, env radixv1.Environment) map[string][]string { + if len(env.Build.From) > 0 { + acc[env.Build.From] = append(acc[env.Build.From], env.Name) } - return nil, err - } + return acc + }) +} + +func jobIsCompleted(jobCondition radixv1.RadixJobCondition) bool { + return jobCondition == radixv1.JobSucceeded || jobCondition == radixv1.JobFailed || jobCondition == radixv1.JobStopped || jobCondition == radixv1.JobStoppedNoChanges +} + +func (h *history) getRadixJobsMapToRadixDeployments(ctx context.Context, appName string, ra *radixv1.RadixApplication) (radixJobsWithRadixDeployments, error) { rdRadixJobs := make(radixJobsWithRadixDeployments) for _, env := range ra.Spec.Environments { - envNamespace := utils.GetEnvironmentNamespace(appName, env.Name) - envRdList, err := job.radixclient.RadixV1().RadixDeployments(envNamespace).List(ctx, metav1.ListOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to get RadixDeployments from the environment %s. Error: %w", env.Name, err) - } - for _, rd := range envRdList.Items { - rd := rd - if jobName, ok := rd.GetLabels()[kube.RadixJobNameLabel]; ok { - rdRadixJobs[jobName] = rd + select { + case <-ctx.Done(): + return nil, fmt.Errorf("failed getting of RadixDeployments: %w", ctx.Err()) + default: + envNamespace := utils.GetEnvironmentNamespace(appName, env.Name) + envRdList, err := h.radixClient.RadixV1().RadixDeployments(envNamespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get RadixDeployments from the environment %s. Error: %w", env.Name, err) + } + for _, rd := range envRdList.Items { + rd := rd + if jobName, ok := rd.GetLabels()[kube.RadixJobNameLabel]; ok { + rdRadixJobs[jobName] = rd + } } } } return rdRadixJobs, nil } -func (job *Job) getAppName() (string, error) { - appName, ok := job.radixJob.GetLabels()[kube.RadixAppLabel] - if !ok || len(appName) == 0 { - return "", fmt.Errorf("missing label %s in the RadixJob", kube.RadixAppLabel) - } - return appName, nil -} - -func getRadixJobBranch(rj v1.RadixJob) string { - if branch, ok := rj.GetAnnotations()[kube.RadixBranchAnnotation]; ok && len(branch) > 0 { - return branch +func getRadixJobEnvs(rj radixv1.RadixJob, envsMap map[string][]string) []string { + switch rj.Spec.PipeLineType { + case radixv1.BuildDeploy: + return envsMap[rj.Spec.Build.Branch] + case radixv1.Deploy: + return []string{rj.Spec.Deploy.ToEnvironment} + case radixv1.Promote: + return []string{rj.Spec.Promote.ToEnvironment} } - if branch, ok := rj.GetLabels()[kube.RadixBuildLabel]; ok && len(branch) > 0 { - return branch - } - return "" + return []string{""} } -func (job *Job) getAllRadixJobs(ctx context.Context) ([]v1.RadixJob, error) { - radixJobList, err := job.radixclient.RadixV1().RadixJobs(job.radixJob.Namespace).List(ctx, metav1.ListOptions{}) +func (h *history) getAllRadixJobs(ctx context.Context, namespace string) ([]radixv1.RadixJob, error) { + radixJobList, err := h.radixClient.RadixV1().RadixJobs(namespace).List(ctx, metav1.ListOptions{}) if err != nil { return nil, err } return radixJobList.Items, err } -func (job *Job) getJobsToGarbageCollectByJobConditionAndBranch(ctx context.Context, jobsForConditions radixJobsForConditions, jobHistoryLimit int) []v1.RadixJob { - var deletingJobs []v1.RadixJob - for jobCondition, jobsForBranches := range jobsForConditions { - switch jobCondition { - case v1.JobRunning, v1.JobQueued, v1.JobWaiting, "": // Jobs with this condition should never be garbage collected - continue +func (h *history) getRadixJobsToGarbageCollectByJobConditionsAndEnvs(ctx context.Context, jobsForConditions radixJobsForConditionsMap, radixJobsNamesWithExistingRadixDeployments radixJobsNamesMap) []radixv1.RadixJob { + var deletingJobs []radixv1.RadixJob + for jobCondition, jobsForEnvs := range jobsForConditions { + for env, jobsForEnv := range jobsForEnvs { + jobs := sortRadixJobsByCreatedDesc(jobsForEnv) + for i := h.historyLimit; i < len(jobs); i++ { + if _, jobExists := radixJobsNamesWithExistingRadixDeployments[jobs[i].GetName()]; jobExists { + continue // keep RadixJobs with existing RadixDeployments + } + log.Ctx(ctx).Debug().Msgf("Collect for deleting RadixJob %s for the env %s, condition %s", jobs[i].GetName(), env, jobCondition) + deletingJobs = append(deletingJobs, jobs[i]) + } + } + } + return deletingJobs +} + +func (h *history) garbageCollectConfigMaps(ctx context.Context, namespace string) error { + radixJobConfigMaps, err := h.kubeUtil.ListConfigMapsWithSelector(ctx, namespace, getRadixJobNameExistsSelector().String()) + if err != nil { + return err + } + radixJobNameSet, err := h.getRadixJobNameSet(ctx, namespace) + if err != nil { + return err + } + var errs []error + for _, configMap := range radixJobConfigMaps { + select { + case <-ctx.Done(): + return fmt.Errorf("failed deleting of RadixJob's ConfigMaps: %w", ctx.Err()) default: - for jobBranch, jobs := range jobsForBranches { - jobs := sortRadixJobsByCreatedDesc(jobs) - for i := jobHistoryLimit; i < len(jobs); i++ { - log.Ctx(ctx).Debug().Msgf("Collect for deleting RadixJob %s for the env %s, condition %s", jobs[i].GetName(), jobBranch, jobCondition) - deletingJobs = append(deletingJobs, jobs[i]) + jobName := configMap.GetLabels()[kube.RadixJobNameLabel] + configMapName := configMap.GetName() + if _, radixJobExists := radixJobNameSet[jobName]; !radixJobExists { + log.Ctx(ctx).Debug().Msgf("Delete ConfigMap %s in %s", configMapName, namespace) + err := h.kubeUtil.DeleteConfigMap(ctx, namespace, configMapName) + if err != nil { + errs = append(errs, err) } } } + } + return errors.Join(errs...) +} +func (h *history) getRadixJobNameSet(ctx context.Context, namespace string) (map[string]struct{}, error) { + radixJobs, err := h.getAllRadixJobs(ctx, namespace) + if err != nil { + return nil, err } - return deletingJobs + return slice.Reduce(radixJobs, make(map[string]struct{}), func(acc map[string]struct{}, radixJob radixv1.RadixJob) map[string]struct{} { + acc[radixJob.GetName()] = struct{}{} + return acc + }), nil +} + +func getRadixJobNameExistsSelector() labels.Selector { + requirement, _ := labels.NewRequirement(kube.RadixJobNameLabel, selection.Exists, []string{}) + return labels.NewSelector().Add(*requirement) } diff --git a/pkg/apis/job/job_history_test.go b/pkg/apis/job/job_history_test.go new file mode 100644 index 000000000..3bb77b89c --- /dev/null +++ b/pkg/apis/job/job_history_test.go @@ -0,0 +1,532 @@ +package job_test + +import ( + "context" + "testing" + "time" + + "github.com/equinor/radix-common/utils/pointers" + "github.com/equinor/radix-common/utils/slice" + "github.com/equinor/radix-operator/pkg/apis/job" + "github.com/equinor/radix-operator/pkg/apis/kube" + radixv1 "github.com/equinor/radix-operator/pkg/apis/radix/v1" + "github.com/equinor/radix-operator/pkg/apis/utils" + radixlabels "github.com/equinor/radix-operator/pkg/apis/utils/labels" + radixclient "github.com/equinor/radix-operator/pkg/client/clientset/versioned" + radix "github.com/equinor/radix-operator/pkg/client/clientset/versioned/fake" + kedafake "github.com/kedacore/keda/v2/pkg/generated/clientset/versioned/fake" + "github.com/stretchr/testify/suite" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + kubernetes "k8s.io/client-go/kubernetes/fake" + secretproviderfake "sigs.k8s.io/secrets-store-csi-driver/pkg/client/clientset/versioned/fake" +) + +type RadixJobHistoryTestSuite struct { + suite.Suite + kubeUtils *kube.Kube + radixClient radixclient.Interface +} + +func (s *RadixJobHistoryTestSuite) setupTest() { + kubeClient := kubernetes.NewSimpleClientset() + radixClient := radix.NewSimpleClientset() + kedaClient := kedafake.NewSimpleClientset() + secretproviderclient := secretproviderfake.NewSimpleClientset() + kubeUtil, _ := kube.New(kubeClient, radixClient, kedaClient, secretproviderclient) + s.kubeUtils, s.radixClient = kubeUtil, radixClient +} + +func TestRadixJobHistoryTestSuite(t *testing.T) { + suite.Run(t, new(RadixJobHistoryTestSuite)) +} + +const ( + app1 = "any-app1" + app2 = "any-app2" + job1 = "any-job1" + job2 = "any-job2" + job3 = "any-job3" + job4 = "any-job4" + job5 = "any-job5" + job6 = "any-job6" + job7 = "any-job7" + job8 = "any-job8" + job9 = "any-job9" + job10 = "any-job10" + job11 = "any-job11" + job12 = "any-job12" + job13 = "any-job13" + job14 = "any-job14" + job15 = "any-job15" + env1 = "dev1" + env2 = "dev2" + env3 = "dev3" + envBranch1 = "dev-branch1" + envBranch2 = "dev-branch2" +) + +func (s *RadixJobHistoryTestSuite) TestJobHistory_Cleanup() { + type appRadixJob struct { + appName string + jobName string + } + type appRadixJobsMap map[string][]string + type scenario struct { + name string + historyLimit int + historyPeriodLimit time.Duration + initTest func(radixClient radixclient.Interface) + syncAddingRadixJob appRadixJob + expectedRadixJobs appRadixJobsMap + } + + now := time.Now() + historyPeriodLimit30Days := time.Hour * 24 * 30 + scenarios := []scenario{ + { + name: "No jobs deleted when count is below limit", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobRunning, true, radixv1.BuildDeploy, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job1}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1}, + }, + }, + { + name: "No jobs deleted when count equals to limit", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job2}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1, job2}, + }, + }, + { + name: "No jobs are deleted as they have RadixDeployments when count is more then limit for build-deploy", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobSucceeded, true, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobSucceeded, true, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobSucceeded, true, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job4}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1, job2, job3, job4}}, + }, + { + name: "One job without radix-deployment deleted when count is more then limit for build-deploy", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job4}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job2, job3, job4}}, + }, + { + name: "One job without RadixDeploy deleted when count is more then limit for build-only", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobFailed, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobFailed, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobRunning, false, radixv1.Build, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job4}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job2, job3, job4}}, + }, + { + name: "One job deleted when count is more then limit for deploy-only", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobFailed, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobFailed, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobRunning, false, radixv1.Deploy, env1, "") + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job4}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job2, job3, job4}}, + }, + { + name: "One job deleted when count is more then limit for promote", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobFailed, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobFailed, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobRunning, false, radixv1.Promote, env1, "") + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job4}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job2, job3, job4}}, + }, + { + name: "One job deleted when count is more then limit for apply-config", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobFailed, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobFailed, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobRunning, false, radixv1.ApplyConfig, "", "") + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job4}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job2, job3, job4}}, + }, + { + name: "One job deleted when count is more then limit for different pipeline types", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobFailed, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobRunning, false, radixv1.Build, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job4}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job2, job3, job4}}, + }, + { + name: "Deleted jobs only for specific app", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app2, job1, now, radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app2, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job4}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job2, job3, job4}, + app2: []string{job1, job2}, + }, + }, + { + name: "None deleted below or equal history limit", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + // s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobSucceeded, false, radixv1.BuildDeploy, env1, envBranch1) //successful job is a special case - exclude it from here + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobWaiting, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job5, now.Add(4*time.Minute), radixv1.JobQueued, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job6, now.Add(5*time.Minute), radixv1.JobStopped, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job7, now.Add(6*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) // below limit + s.createRadixJob(radixClient, app1, job8, now.Add(7*time.Minute), radixv1.JobStoppedNoChanges, false, radixv1.BuildDeploy, env1, envBranch1) // over limit - delete this + + s.createRadixJob(radixClient, app1, job9, now.Add(9*time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job10, now.Add(11*time.Minute), radixv1.JobWaiting, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job11, now.Add(12*time.Minute), radixv1.JobQueued, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job12, now.Add(13*time.Minute), radixv1.JobStopped, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job13, now.Add(14*time.Minute), radixv1.JobStoppedNoChanges, false, radixv1.BuildDeploy, env1, envBranch1) // equals limit + s.createRadixJob(radixClient, app1, job14, now.Add(15*time.Minute), radixv1.JobStoppedNoChanges, false, radixv1.BuildDeploy, env1, envBranch1) // below limit + s.createRadixJob(radixClient, app1, job15, now.Add(16*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job15}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job2, job4, job5, job6, job7, job9, job10, job11, job12, job13, job14, job15}}, + }, + { + name: "Deleted only completed jobs without RadixDeployment per status", + historyLimit: 1, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobSucceeded, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job3, now.Add(2*time.Minute), radixv1.JobWaiting, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job4, now.Add(3*time.Minute), radixv1.JobQueued, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job5, now.Add(4*time.Minute), radixv1.JobStopped, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job6, now.Add(5*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job7, now.Add(6*time.Minute), radixv1.JobStoppedNoChanges, false, radixv1.BuildDeploy, env1, envBranch1) + + s.createRadixJob(radixClient, app1, job8, now.Add(7*time.Minute), radixv1.JobSucceeded, true, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job9, now.Add(8*time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job10, now.Add(10*time.Minute), radixv1.JobWaiting, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job11, now.Add(11*time.Minute), radixv1.JobQueued, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job12, now.Add(12*time.Minute), radixv1.JobStopped, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job13, now.Add(13*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job14, now.Add(14*time.Minute), radixv1.JobStoppedNoChanges, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job15, now.Add(15*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job15}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job3, job4, job6, job8, job9, job10, job11, job12, job13, job14, job15}}, + }, + { + name: "Deleted succeeded jobs without deployment within limit for build-deploy", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobSucceeded, true, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobSucceeded, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job3, now.Add(3*time.Minute), radixv1.JobQueued, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job4, now.Add(4*time.Minute), radixv1.JobRunning, true, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job5, now.Add(5*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job6, now.Add(6*time.Minute), radixv1.JobStopped, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job7, now.Add(7*time.Minute), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job8, now.Add(8*time.Minute), radixv1.JobStoppedNoChanges, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job9, now.Add(9*time.Minute), radixv1.JobWaiting, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job10, now.Add(10*time.Minute), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job10}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1, job3, job4, job5, job6, job7, job8, job9, job10}}, + }, + { + name: "Not deleted succeeded jobs without deployment within limit for build", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobSucceeded, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(3*time.Minute), radixv1.JobQueued, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job3, now.Add(4*time.Minute), radixv1.JobRunning, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job4, now.Add(6*time.Minute), radixv1.JobStopped, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job5, now.Add(7*time.Minute), radixv1.JobFailed, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job6, now.Add(8*time.Minute), radixv1.JobStoppedNoChanges, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job7, now.Add(9*time.Minute), radixv1.JobWaiting, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job8, now.Add(10*time.Minute), radixv1.JobRunning, false, radixv1.Build, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job8}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1, job2, job3, job4, job5, job6, job7, job8}}, + }, + { + name: "Deleted succeeded jobs without deployment within limit for deploy-only", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobSucceeded, true, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobSucceeded, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job3, now.Add(3*time.Minute), radixv1.JobQueued, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job4, now.Add(4*time.Minute), radixv1.JobRunning, true, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job5, now.Add(5*time.Minute), radixv1.JobRunning, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job6, now.Add(6*time.Minute), radixv1.JobStopped, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job7, now.Add(7*time.Minute), radixv1.JobFailed, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job8, now.Add(9*time.Minute), radixv1.JobWaiting, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job9, now.Add(10*time.Minute), radixv1.JobRunning, false, radixv1.Deploy, env1, "") + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job9}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1, job3, job4, job5, job6, job7, job8, job9}}, + }, + { + name: "Deleted succeeded jobs without deployment within limit for promote", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobSucceeded, true, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job2, now.Add(time.Minute), radixv1.JobSucceeded, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job3, now.Add(3*time.Minute), radixv1.JobQueued, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job4, now.Add(4*time.Minute), radixv1.JobRunning, true, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job5, now.Add(5*time.Minute), radixv1.JobRunning, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job6, now.Add(6*time.Minute), radixv1.JobStopped, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job7, now.Add(7*time.Minute), radixv1.JobFailed, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job8, now.Add(9*time.Minute), radixv1.JobWaiting, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job9, now.Add(10*time.Minute), radixv1.JobRunning, false, radixv1.Promote, env1, "") + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job9}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1, job3, job4, job5, job6, job7, job8, job9}}, + }, + { + name: "Not deleted succeeded jobs without deployment within limit for apply-config", + historyLimit: 2, + historyPeriodLimit: historyPeriodLimit30Days, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now, radixv1.JobSucceeded, true, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job2, now.Add(3*time.Minute), radixv1.JobQueued, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job3, now.Add(5*time.Minute), radixv1.JobRunning, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job4, now.Add(6*time.Minute), radixv1.JobStopped, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job5, now.Add(7*time.Minute), radixv1.JobFailed, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job6, now.Add(8*time.Minute), radixv1.JobStoppedNoChanges, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job7, now.Add(9*time.Minute), radixv1.JobWaiting, false, radixv1.ApplyConfig, "", "") + s.createRadixJob(radixClient, app1, job8, now.Add(10*time.Minute), radixv1.JobRunning, false, radixv1.ApplyConfig, "", "") + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job8}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1, job2, job3, job4, job5, job6, job7, job8}}, + }, + { + name: "Delete all completed jobs without RadixDeployment older them historyPeriodLimit", + historyLimit: 100, + historyPeriodLimit: time.Hour, + initTest: func(radixClient radixclient.Interface) { + s.createRadixJob(radixClient, app1, job1, now.Add(-100*time.Hour), radixv1.JobSucceeded, true, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, job2, now.Add(-99*time.Hour), radixv1.JobSucceeded, true, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, job3, now.Add(-98*time.Hour), radixv1.JobSucceeded, true, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, job4, now.Add(-time.Minute), radixv1.JobRunning, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, "some-job-87", now.Add(-100*time.Hour), radixv1.JobSucceeded, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-88", now.Add(-99*time.Hour), radixv1.JobSucceeded, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, "some-job-89", now.Add(-98*time.Hour), radixv1.JobSucceeded, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, "some-job-90", now.Add(-97*time.Hour), radixv1.JobSucceeded, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-91", now.Add(-96*time.Hour), radixv1.JobFailed, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-92", now.Add(-95*time.Hour), radixv1.JobFailed, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, "some-job-93", now.Add(-94*time.Hour), radixv1.JobFailed, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, "some-job-94", now.Add(-93*time.Hour), radixv1.JobFailed, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-95", now.Add(-96*time.Hour), radixv1.JobQueued, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-96", now.Add(-95*time.Hour), radixv1.JobQueued, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, "some-job-97", now.Add(-94*time.Hour), radixv1.JobQueued, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, "some-job-98", now.Add(-93*time.Hour), radixv1.JobQueued, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-99", now.Add(-96*time.Hour), radixv1.JobWaiting, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-100", now.Add(-95*time.Hour), radixv1.JobWaiting, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, "some-job-101", now.Add(-94*time.Hour), radixv1.JobWaiting, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, "some-job-102", now.Add(-93*time.Hour), radixv1.JobWaiting, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-103", now.Add(-96*time.Hour), radixv1.JobRunning, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-104", now.Add(-95*time.Hour), radixv1.JobRunning, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, "some-job-105", now.Add(-94*time.Hour), radixv1.JobRunning, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, "some-job-106", now.Add(-93*time.Hour), radixv1.JobRunning, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-107", now.Add(-96*time.Hour), radixv1.JobStopped, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-108", now.Add(-95*time.Hour), radixv1.JobStopped, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, "some-job-109", now.Add(-94*time.Hour), radixv1.JobStopped, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, "some-job-110", now.Add(-93*time.Hour), radixv1.JobStopped, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-111", now.Add(-96*time.Hour), radixv1.JobStoppedNoChanges, false, radixv1.BuildDeploy, env1, envBranch1) + s.createRadixJob(radixClient, app1, "some-job-112", now.Add(-95*time.Hour), radixv1.JobStoppedNoChanges, false, radixv1.Deploy, env1, "") + s.createRadixJob(radixClient, app1, "some-job-113", now.Add(-94*time.Hour), radixv1.JobStoppedNoChanges, false, radixv1.Promote, env1, "") + s.createRadixJob(radixClient, app1, "some-job-114", now.Add(-93*time.Hour), radixv1.JobStoppedNoChanges, false, radixv1.Build, env1, envBranch1) + s.createRadixJob(radixClient, app1, job5, now, radixv1.JobRunning, true, radixv1.Build, env1, envBranch1) + }, + syncAddingRadixJob: appRadixJob{appName: app1, jobName: job5}, + expectedRadixJobs: appRadixJobsMap{ + app1: []string{job1, job2, job3, job4, "some-job-95", "some-job-96", "some-job-97", "some-job-98", "some-job-99", "some-job-100", "some-job-101", "some-job-102", "some-job-103", "some-job-104", "some-job-105", "some-job-106", job5}}, + }, + } + + for _, ts := range scenarios { + s.T().Run(ts.name, func(t *testing.T) { + s.T().Logf("Running test: %s", ts.name) + s.setupTest() + ts.initTest(s.radixClient) + + err := job.NewHistory(s.radixClient, s.kubeUtils, ts.historyLimit, ts.historyPeriodLimit). + Cleanup(context.Background(), ts.syncAddingRadixJob.appName) + s.Require().NoError(err) + + expectedJobCount := 0 + for _, jobsMap := range ts.expectedRadixJobs { + expectedJobCount += len(jobsMap) + } + actualRadixJobList, err := s.radixClient.RadixV1().RadixJobs("").List(context.Background(), metav1.ListOptions{}) + s.NoError(err) + actualRadixJobCount := len(actualRadixJobList.Items) + s.Equal(expectedJobCount, actualRadixJobCount, "RadixJob count") + for _, radixJob := range actualRadixJobList.Items { + expectedAppJobs, ok := ts.expectedRadixJobs[radixJob.Spec.AppName] + s.True(ok, "missing RadixJobs for the app %s", radixJob.Spec.AppName) + jobNameIndex := slice.FindIndex(expectedAppJobs, func(jobName string) bool { return radixJob.Name == jobName }) + if s.True(jobNameIndex >= 0, "unexpected RadixJob %s for the app %s", radixJob.Name, radixJob.Spec.AppName) { + ts.expectedRadixJobs[radixJob.Spec.AppName] = append(expectedAppJobs[:jobNameIndex], expectedAppJobs[jobNameIndex+1:]...) + } + } + for appName, radixJobNames := range ts.expectedRadixJobs { + for _, radixJobName := range radixJobNames { + s.Failf("missing RadixJob", "missing RadixJob %s for the app %s", radixJobName, appName) + } + } + }) + } +} + +func (s *RadixJobHistoryTestSuite) createRadixJob(radixClient radixclient.Interface, appName string, jobName string, created time.Time, + statusCondition radixv1.RadixJobCondition, hasDeployment bool, pipelineType radixv1.RadixPipelineType, targetEnv, targetBranch string) { + namespace := utils.GetAppNamespace(appName) + _, err := s.radixClient.RadixV1().RadixApplications(utils.GetAppNamespace(appName)).Get(context.Background(), appName, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + _, err := radixClient.RadixV1().RadixApplications(namespace). + Create(context.Background(), createRadixApplication(appName), metav1.CreateOptions{}) + s.Require().NoError(err) + } else { + s.Require().NoError(err) + } + } + _, err = radixClient.RadixV1().RadixJobs(namespace). + Create(context.Background(), createRadixJob(appName, jobName, created, statusCondition, pipelineType, targetEnv, targetBranch), metav1.CreateOptions{}) + s.Require().NoError(err) + if hasDeployment { + _, err := radixClient.RadixV1().RadixDeployments(utils.GetEnvironmentNamespace(appName, targetEnv)). + Create(context.Background(), createRadixDeployment(appName, jobName), metav1.CreateOptions{}) + s.Require().NoError(err) + } +} + +func createRadixDeployment(appName string, jobName string) *radixv1.RadixDeployment { + return &radixv1.RadixDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: utils.RandString(10), + Labels: labels.Merge( + radixlabels.ForApplicationName(appName), + radixlabels.ForPipelineJobName(jobName)), + }, + } +} + +func createRadixApplication(appName string) *radixv1.RadixApplication { + return &radixv1.RadixApplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: appName, + }, + Spec: radixv1.RadixApplicationSpec{ + Environments: []radixv1.Environment{ + {Name: env1, Build: radixv1.EnvBuild{From: envBranch1}}, + {Name: env2, Build: radixv1.EnvBuild{From: envBranch2}}, + {Name: env3, Build: radixv1.EnvBuild{From: envBranch2}}, + }, + }, + } +} + +func createRadixJob(appName, jobName string, created time.Time, statusCondition radixv1.RadixJobCondition, pipelineType radixv1.RadixPipelineType, targetEnv, targetBranch string) *radixv1.RadixJob { + radixJob := radixv1.RadixJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Labels: radixlabels.Merge( + radixlabels.ForApplicationName(appName), + radixlabels.ForPipelineJobName(jobName), + radixlabels.ForPipelineJobType(), + radixlabels.ForPipelineJobPipelineType(pipelineType), + ), + }, + Spec: radixv1.RadixJobSpec{ + AppName: appName, + PipeLineType: pipelineType, + }, + Status: radixv1.RadixJobStatus{ + Created: pointers.Ptr(metav1.NewTime(created)), + Condition: statusCondition, + }, + } + switch pipelineType { + case radixv1.Build, radixv1.BuildDeploy: + radixJob.Spec.Build.Branch = targetBranch + case radixv1.Deploy: + radixJob.Spec.Deploy.ToEnvironment = targetEnv + case radixv1.Promote: + radixJob.Spec.Promote.ToEnvironment = targetEnv + } + return &radixJob +} diff --git a/radix-operator/config/config.go b/radix-operator/config/config.go index b8775c264..3f1f479d7 100644 --- a/radix-operator/config/config.go +++ b/radix-operator/config/config.go @@ -3,6 +3,7 @@ package config import ( "strconv" "strings" + "time" "github.com/equinor/radix-common/utils/maps" apiconfig "github.com/equinor/radix-operator/pkg/apis/config" @@ -11,17 +12,45 @@ import ( "github.com/equinor/radix-operator/pkg/apis/config/dnsalias" "github.com/equinor/radix-operator/pkg/apis/config/pipelinejob" "github.com/equinor/radix-operator/pkg/apis/defaults" + "github.com/rs/zerolog/log" "github.com/spf13/viper" ) -// Gets pipeline job history limit per each list, grouped by pipeline branch and job status +const ( + minPipelineJobsHistoryLimit = 3 + minDeploymentsHistoryLimit = 3 + minPipelineJobsHistoryPeriodLimit = time.Hour * 24 +) + +// Gets pipeline job history limit per each list, grouped by pipeline environment and job status func getPipelineJobsHistoryLimit() int { - return getIntFromEnvVar(defaults.PipelineJobsHistoryLimitEnvironmentVariable, 0) + historyLimit := getIntFromEnvVar(defaults.PipelineJobsHistoryLimitEnvironmentVariable, 0) + if historyLimit < minPipelineJobsHistoryLimit { + log.Error().Msgf("Invalid or too small pipeline job history limit %d, set default %d", historyLimit, minPipelineJobsHistoryLimit) + return minPipelineJobsHistoryLimit + } + return historyLimit +} + +// Gets pipeline job history period limit per each list, grouped by pipeline environment and job status +func getPipelineJobsHistoryPeriodLimit() time.Duration { + period := viper.GetString(defaults.PipelineJobsHistoryPeriodLimitEnvironmentVariable) + duration, err := time.ParseDuration(period) + if err != nil || duration < minPipelineJobsHistoryPeriodLimit { + log.Error().Msgf("Invalid or too short pipeline job history period limit %s, set minimum period %s", duration.String(), minPipelineJobsHistoryPeriodLimit.String()) + return minPipelineJobsHistoryPeriodLimit + } + return duration } // Gets radix deployment history limit per application environment func getDeploymentsHistoryLimitPerEnvironment() int { - return getIntFromEnvVar(defaults.DeploymentsHistoryLimitEnvironmentVariable, 0) + historyLimit := getIntFromEnvVar(defaults.DeploymentsHistoryLimitEnvironmentVariable, 0) + if historyLimit < minDeploymentsHistoryLimit { + log.Error().Msgf("Invalid or too small RadixDeployment history limit %d, set minimum %d", historyLimit, minDeploymentsHistoryLimit) + return minDeploymentsHistoryLimit + } + return historyLimit } func getDNSZone() string { @@ -57,6 +86,7 @@ func NewConfig() *apiconfig.Config { }, PipelineJobConfig: &pipelinejob.Config{ PipelineJobsHistoryLimit: getPipelineJobsHistoryLimit(), + PipelineJobsHistoryPeriodLimit: getPipelineJobsHistoryPeriodLimit(), DeploymentsHistoryLimitPerEnvironment: getDeploymentsHistoryLimitPerEnvironment(), AppBuilderResourcesLimitsMemory: defaults.GetResourcesLimitsMemoryForAppBuilderNamespace(), AppBuilderResourcesRequestsCPU: defaults.GetResourcesRequestsCPUForAppBuilderNamespace(), diff --git a/radix-operator/job/controller.go b/radix-operator/job/controller.go index 0a57b482c..0e7f718d5 100644 --- a/radix-operator/job/controller.go +++ b/radix-operator/job/controller.go @@ -26,9 +26,9 @@ const ( ) // NewController creates a new controller that handles RadixJobs -func NewController(ctx context.Context, client kubernetes.Interface, radixClient radixclient.Interface, handler common.Handler, kubeInformerFactory kubeinformers.SharedInformerFactory, radixInformerFactory informers.SharedInformerFactory, waitForChildrenToSync bool, recorder record.EventRecorder) *common.Controller { +func NewController(ctx context.Context, client kubernetes.Interface, radixClient radixclient.Interface, handler Handler, kubeInformerFactory kubeinformers.SharedInformerFactory, radixInformerFactory informers.SharedInformerFactory, waitForChildrenToSync bool, recorder record.EventRecorder) *common.Controller { logger := log.With().Str("controller", controllerAgentName).Logger() - jobInformer := radixInformerFactory.Radix().V1().RadixJobs() + radixJobInformer := radixInformerFactory.Radix().V1().RadixJobs() kubernetesJobInformer := kubeInformerFactory.Batch().V1().Jobs() podInformer := kubeInformerFactory.Core().V1().Pods() @@ -37,7 +37,7 @@ func NewController(ctx context.Context, client kubernetes.Interface, radixClient HandlerOf: crType, KubeClient: client, RadixClient: radixClient, - Informer: jobInformer.Informer(), + Informer: radixJobInformer.Informer(), KubeInformerFactory: kubeInformerFactory, WorkQueue: common.NewRateLimitedWorkQueue(ctx, crType), Handler: handler, @@ -47,7 +47,7 @@ func NewController(ctx context.Context, client kubernetes.Interface, radixClient } logger.Info().Msg("Setting up event handlers") - if _, err := jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + if _, err := radixJobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(cur interface{}) { radixJob, _ := cur.(*v1.RadixJob) if job.IsRadixJobDone(radixJob) { @@ -61,6 +61,9 @@ func NewController(ctx context.Context, client kubernetes.Interface, radixClient logger.Error().Err(err).Msg("Failed to enqueue object received from RadixJob informer AddFunc") } metrics.CustomResourceAdded(crType) + if radixJobInformer.Informer().HasSynced() { + handler.CleanupJobHistory(ctx, radixJob.Spec.AppName) + } }, UpdateFunc: func(old, cur interface{}) { newRJ := cur.(*v1.RadixJob) diff --git a/radix-operator/job/controller_test.go b/radix-operator/job/controller_test.go index 7bf1e02de..be416a3be 100644 --- a/radix-operator/job/controller_test.go +++ b/radix-operator/job/controller_test.go @@ -14,9 +14,9 @@ import ( "github.com/equinor/radix-operator/pkg/apis/kube" "github.com/equinor/radix-operator/pkg/apis/test" "github.com/equinor/radix-operator/pkg/apis/utils" - radixclient "github.com/equinor/radix-operator/pkg/client/clientset/versioned" fakeradix "github.com/equinor/radix-operator/pkg/client/clientset/versioned/fake" informers "github.com/equinor/radix-operator/pkg/client/informers/externalversions" + "github.com/golang/mock/gomock" kedafake "github.com/kedacore/keda/v2/pkg/generated/clientset/versioned/fake" prometheusfake "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/fake" "github.com/stretchr/testify/suite" @@ -24,7 +24,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kubeinformers "k8s.io/client-go/informers" - "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/tools/record" secretproviderfake "sigs.k8s.io/secrets-store-csi-driver/pkg/client/clientset/versioned/fake" @@ -71,38 +70,18 @@ func (s *jobTestSuite) Test_Controller_Calls_Handler() { defer stop() defer close(synced) - kubeInformerFactory := kubeinformers.NewSharedInformerFactory(s.kubeUtil.KubeClient(), 0) - radixInformerFactory := informers.NewSharedInformerFactory(s.kubeUtil.RadixClient(), 0) - - cfg := &config.Config{ - DNSConfig: &dnsalias.DNSConfig{ - DNSZone: "dev.radix.equinor.com", - ReservedAppDNSAliases: map[string]string{"api": "radix-api"}, - ReservedDNSAliases: []string{"grafana"}, - }, - PipelineJobConfig: &pipelinejob.Config{ - PipelineJobsHistoryLimit: 3, - AppBuilderResourcesRequestsCPU: pointers.Ptr(resource.MustParse("100m")), - AppBuilderResourcesRequestsMemory: pointers.Ptr(resource.MustParse("1000Mi")), - AppBuilderResourcesLimitsMemory: pointers.Ptr(resource.MustParse("2000Mi")), - }, - } + hasSynced := func(syncedOk bool) { synced <- syncedOk } + ctrl := gomock.NewController(s.T()) + mockHistory := NewMockHistory(ctrl) + withHistoryOption := func(h *handler) { h.jobHistory = mockHistory } + jobHandler := s.createHandler(hasSynced, withHistoryOption) - jobHandler := NewHandler( - s.kubeUtil.KubeClient(), - s.kubeUtil, - s.kubeUtil.RadixClient(), - cfg, - func(syncedOk bool) { - synced <- syncedOk - }, - ) go func() { - err := startJobController(ctx, s.kubeUtil.KubeClient(), s.kubeUtil.RadixClient(), radixInformerFactory, kubeInformerFactory, jobHandler) + err := s.startJobController(ctx, jobHandler) s.Require().NoError(err) }() - // Test + mockHistory.EXPECT().Cleanup(gomock.Any(), anyAppName).Times(1) // Create job should sync rj, _ := s.tu.ApplyJob( @@ -143,13 +122,39 @@ func (s *jobTestSuite) Test_Controller_Calls_Handler() { s.True(op) } -func startJobController(ctx context.Context, client kubernetes.Interface, radixClient radixclient.Interface, radixInformerFactory informers.SharedInformerFactory, kubeInformerFactory kubeinformers.SharedInformerFactory, handler Handler) error { +func (s *jobTestSuite) createHandler(hasSynced func(syncedOk bool), opts ...handlerOpts) Handler { + return NewHandler( + s.kubeUtil.KubeClient(), + s.kubeUtil, + s.kubeUtil.RadixClient(), + createConfig(), + hasSynced, + opts..., + ) +} - eventRecorder := &record.FakeRecorder{} +func createConfig() *config.Config { + return &config.Config{ + DNSConfig: &dnsalias.DNSConfig{ + DNSZone: "dev.radix.equinor.com", + ReservedAppDNSAliases: map[string]string{"api": "radix-api"}, + ReservedDNSAliases: []string{"grafana"}, + }, + PipelineJobConfig: &pipelinejob.Config{ + PipelineJobsHistoryLimit: 3, + AppBuilderResourcesRequestsCPU: pointers.Ptr(resource.MustParse("100m")), + AppBuilderResourcesRequestsMemory: pointers.Ptr(resource.MustParse("1000Mi")), + AppBuilderResourcesLimitsMemory: pointers.Ptr(resource.MustParse("2000Mi")), + }, + } +} +func (s *jobTestSuite) startJobController(ctx context.Context, handler Handler) error { + kubeInformerFactory := kubeinformers.NewSharedInformerFactory(s.kubeUtil.KubeClient(), 0) + radixInformerFactory := informers.NewSharedInformerFactory(s.kubeUtil.RadixClient(), 0) + eventRecorder := &record.FakeRecorder{} const waitForChildrenToSync = false - controller := NewController(ctx, client, radixClient, &handler, kubeInformerFactory, radixInformerFactory, waitForChildrenToSync, eventRecorder) - + controller := NewController(ctx, s.kubeUtil.KubeClient(), s.kubeUtil.RadixClient(), handler, kubeInformerFactory, radixInformerFactory, waitForChildrenToSync, eventRecorder) kubeInformerFactory.Start(ctx.Done()) radixInformerFactory.Start(ctx.Done()) return controller.Run(ctx, 4) diff --git a/radix-operator/job/handler.go b/radix-operator/job/handler.go index 8c2a7e7ae..d97db62ee 100644 --- a/radix-operator/job/handler.go +++ b/radix-operator/job/handler.go @@ -2,6 +2,7 @@ package job import ( "context" + "time" apiconfig "github.com/equinor/radix-operator/pkg/apis/config" "github.com/equinor/radix-operator/pkg/apis/job" @@ -25,31 +26,42 @@ const ( MessageResourceSynced = "Radix Job synced successfully" ) -// Handler Instance variables -type Handler struct { +// Handler Common handler interface +type Handler interface { + common.Handler + // CleanupJobHistory Cleanup the pipeline job history for the Radix application + CleanupJobHistory(ctx context.Context, appName string) +} + +type handler struct { kubeclient kubernetes.Interface radixclient radixclient.Interface kubeutil *kube.Kube hasSynced common.HasSynced config *apiconfig.Config + jobHistory job.History } -// NewHandler Constructor -func NewHandler(kubeclient kubernetes.Interface, kubeutil *kube.Kube, radixclient radixclient.Interface, config *apiconfig.Config, hasSynced common.HasSynced) Handler { +type handlerOpts func(*handler) - handler := Handler{ +// NewHandler Constructor +func NewHandler(kubeclient kubernetes.Interface, kubeUtil *kube.Kube, radixClient radixclient.Interface, config *apiconfig.Config, hasSynced common.HasSynced, opts ...handlerOpts) Handler { + handler := handler{ kubeclient: kubeclient, - radixclient: radixclient, - kubeutil: kubeutil, + radixclient: radixClient, + kubeutil: kubeUtil, hasSynced: hasSynced, config: config, + jobHistory: job.NewHistory(radixClient, kubeUtil, config.PipelineJobConfig.PipelineJobsHistoryLimit, config.PipelineJobConfig.PipelineJobsHistoryPeriodLimit), } - - return handler + for _, opt := range opts { + opt(&handler) + } + return &handler } // Sync Is created on sync of resource -func (t *Handler) Sync(ctx context.Context, namespace, name string, eventRecorder record.EventRecorder) error { +func (t *handler) Sync(ctx context.Context, namespace, name string, eventRecorder record.EventRecorder) error { radixJob, err := t.radixclient.RadixV1().RadixJobs(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { // The Job resource may no longer exist, in which case we stop @@ -78,3 +90,14 @@ func (t *Handler) Sync(ctx context.Context, namespace, name string, eventRecorde eventRecorder.Event(syncJob, corev1.EventTypeNormal, SuccessSynced, MessageResourceSynced) return nil } + +// CleanupJobHistory Cleanup the pipeline job history +func (t *handler) CleanupJobHistory(ctx context.Context, appName string) { + ctxWithTimeout, cancel := context.WithTimeout(ctx, time.Minute*5) + go func() { + defer cancel() + if err := t.jobHistory.Cleanup(ctxWithTimeout, appName); err != nil { + log.Ctx(ctx).Error().Err(err).Msgf("Failed to cleanup job historyfor the Radix application %s", appName) + } + }() +} diff --git a/radix-operator/job/handler_mock.go b/radix-operator/job/handler_mock.go new file mode 100644 index 000000000..85b1f3b5c --- /dev/null +++ b/radix-operator/job/handler_mock.go @@ -0,0 +1,62 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: ./radix-operator/job/handler.go + +// Package job is a generated GoMock package. +package job + +import ( + context "context" + reflect "reflect" + + gomock "github.com/golang/mock/gomock" + record "k8s.io/client-go/tools/record" +) + +// MockHandler is a mock of Handler interface. +type MockHandler struct { + ctrl *gomock.Controller + recorder *MockHandlerMockRecorder +} + +// MockHandlerMockRecorder is the mock recorder for MockHandler. +type MockHandlerMockRecorder struct { + mock *MockHandler +} + +// NewMockHandler creates a new mock instance. +func NewMockHandler(ctrl *gomock.Controller) *MockHandler { + mock := &MockHandler{ctrl: ctrl} + mock.recorder = &MockHandlerMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockHandler) EXPECT() *MockHandlerMockRecorder { + return m.recorder +} + +// CleanupJobHistory mocks base method. +func (m *MockHandler) CleanupJobHistory(ctx context.Context, appName string) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "CleanupJobHistory", ctx, appName) +} + +// CleanupJobHistory indicates an expected call of CleanupJobHistory. +func (mr *MockHandlerMockRecorder) CleanupJobHistory(ctx, radixJob interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CleanupJobHistory", reflect.TypeOf((*MockHandler)(nil).CleanupJobHistory), ctx, radixJob) +} + +// Sync mocks base method. +func (m *MockHandler) Sync(ctx context.Context, namespace, name string, eventRecorder record.EventRecorder) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Sync", ctx, namespace, name, eventRecorder) + ret0, _ := ret[0].(error) + return ret0 +} + +// Sync indicates an expected call of Sync. +func (mr *MockHandlerMockRecorder) Sync(ctx, namespace, name, eventRecorder interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Sync", reflect.TypeOf((*MockHandler)(nil).Sync), ctx, namespace, name, eventRecorder) +} diff --git a/radix-operator/job/job_history_mock.go b/radix-operator/job/job_history_mock.go new file mode 100644 index 000000000..8c3fb2be6 --- /dev/null +++ b/radix-operator/job/job_history_mock.go @@ -0,0 +1,49 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: ./pkg/apis/job/job_history.go + +// Package job is a generated GoMock package. +package job + +import ( + context "context" + reflect "reflect" + + gomock "github.com/golang/mock/gomock" +) + +// MockHistory is a mock of History interface. +type MockHistory struct { + ctrl *gomock.Controller + recorder *MockHistoryMockRecorder +} + +// MockHistoryMockRecorder is the mock recorder for MockHistory. +type MockHistoryMockRecorder struct { + mock *MockHistory +} + +// NewMockHistory creates a new mock instance. +func NewMockHistory(ctrl *gomock.Controller) *MockHistory { + mock := &MockHistory{ctrl: ctrl} + mock.recorder = &MockHistoryMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockHistory) EXPECT() *MockHistoryMockRecorder { + return m.recorder +} + +// Cleanup mocks base method. +func (m *MockHistory) Cleanup(ctx context.Context, appName string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Cleanup", ctx, appName) + ret0, _ := ret[0].(error) + return ret0 +} + +// Cleanup indicates an expected call of Cleanup. +func (mr *MockHistoryMockRecorder) Cleanup(ctx, appName interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Cleanup", reflect.TypeOf((*MockHistory)(nil).Cleanup), ctx, appName) +} diff --git a/radix-operator/main.go b/radix-operator/main.go index ac7ab3ec3..e037c36a4 100644 --- a/radix-operator/main.go +++ b/radix-operator/main.go @@ -346,7 +346,7 @@ func (a *App) createJobController(ctx context.Context) *common.Controller { ctx, a.kubeUtil.KubeClient(), a.kubeUtil.RadixClient(), - &handler, a.kubeInformerFactory, a.radixInformerFactory, true, a.eventRecorder) + handler, a.kubeInformerFactory, a.radixInformerFactory, true, a.eventRecorder) } func (a *App) createAlertController(ctx context.Context) *common.Controller {