diff --git a/docs/metrics/workload/job-metrics.md b/docs/metrics/workload/job-metrics.md index d5959c4dd7..807d533288 100644 --- a/docs/metrics/workload/job-metrics.md +++ b/docs/metrics/workload/job-metrics.md @@ -17,3 +17,4 @@ | kube_job_complete | Gauge | | `job_name`=<job-name>
`namespace`=<job-namespace>
`condition`=<true\|false\|unknown> | STABLE | | kube_job_failed | Gauge | | `job_name`=<job-name>
`namespace`=<job-namespace>
`condition`=<true\|false\|unknown> | STABLE | | kube_job_created | Gauge | | `job_name`=<job-name>
`namespace`=<job-namespace> | STABLE | +| kube_job_status_suspended | Gauge | | `job_name`=<job-name>
`namespace`=<job-namespace> | EXPERIMENTAL | diff --git a/internal/store/job.go b/internal/store/job.go index 8dd53b0a19..bac4806384 100644 --- a/internal/store/job.go +++ b/internal/store/job.go @@ -26,6 +26,7 @@ import ( generator "k8s.io/kube-state-metrics/v2/pkg/metric_generator" v1batch "k8s.io/api/batch/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" @@ -355,6 +356,27 @@ func jobMetricFamilies(allowAnnotationsList, allowLabelsList []string) []generat } }), ), + *generator.NewFamilyGeneratorWithStability( + "kube_job_status_suspended", + "The number of pods which reached Phase Suspended.", + metric.Gauge, + basemetrics.ALPHA, + "", + wrapJobFunc(func(j *v1batch.Job) *metric.Family { + ms := []*metric.Metric{} + for _, c := range j.Status.Conditions { + if c.Type == v1batch.JobSuspended { + ms = append(ms, &metric.Metric{ + Value: boolFloat64(c.Status == v1.ConditionTrue), + }) + } + } + + return &metric.Family{ + Metrics: ms, + } + }), + ), *generator.NewFamilyGeneratorWithStability( "kube_job_owner", "Information about the Job's owner.", diff --git a/internal/store/job_test.go b/internal/store/job_test.go index b808c321b5..67d0a22633 100644 --- a/internal/store/job_test.go +++ b/internal/store/job_test.go @@ -44,6 +44,7 @@ var ( func TestJobStore(t *testing.T) { var trueValue = true + var falseValue = false // Fixed metadata on type and help text. We prepend this to every expected // output so we only have to modify a single place when doing adjustments. @@ -77,7 +78,10 @@ func TestJobStore(t *testing.T) { # HELP kube_job_status_start_time [STABLE] StartTime represents time when the job was acknowledged by the Job Manager. # TYPE kube_job_status_start_time gauge # HELP kube_job_status_succeeded [STABLE] The number of pods which reached Phase Succeeded. - # TYPE kube_job_status_succeeded gauge` + # TYPE kube_job_status_succeeded gauge + # HELP kube_job_status_suspended The number of pods which reached Phase Suspended. + # TYPE kube_job_status_suspended gauge + ` cases := []generateMetricsTestCase{ { @@ -272,6 +276,74 @@ func TestJobStore(t *testing.T) { kube_job_status_failed{job_name="SuccessfulJob2NoActiveDeadlineSeconds",namespace="ns1"} 0 kube_job_status_start_time{job_name="SuccessfulJob2NoActiveDeadlineSeconds",namespace="ns1"} 1.495800607e+09 kube_job_status_succeeded{job_name="SuccessfulJob2NoActiveDeadlineSeconds",namespace="ns1"} 1 +`, + }, + { + Obj: &v1batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "SuspendedNoActiveDeadlineSeconds", + Namespace: "ns1", + Generation: 1, + }, + Status: v1batch.JobStatus{ + Active: 0, + Failed: 0, + Succeeded: 0, + StartTime: &metav1.Time{Time: SuccessfulJob2StartTime}, + Conditions: []v1batch.JobCondition{ + {Type: v1batch.JobSuspended, Status: v1.ConditionTrue}, + }, + }, + Spec: v1batch.JobSpec{ + Suspend: &trueValue, + Parallelism: &Parallelism1, + Completions: &Completions1, + }, + }, + Want: metadata + ` + kube_job_owner{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1",owner_is_controller="",owner_kind="",owner_name=""} 1 + kube_job_info{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1 + kube_job_spec_completions{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1 + kube_job_spec_parallelism{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1 + kube_job_status_active{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0 + kube_job_status_failed{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0 + kube_job_status_start_time{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1.495800607e+09 + kube_job_status_succeeded{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0 + kube_job_status_suspended{job_name="SuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1 +`, + }, + { + Obj: &v1batch.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "UnsuspendedNoActiveDeadlineSeconds", + Namespace: "ns1", + Generation: 1, + }, + Status: v1batch.JobStatus{ + Active: 0, + Failed: 0, + Succeeded: 0, + StartTime: &metav1.Time{Time: SuccessfulJob2StartTime}, + Conditions: []v1batch.JobCondition{ + {Type: v1batch.JobSuspended, Status: v1.ConditionFalse}, + }, + }, + Spec: v1batch.JobSpec{ + Suspend: &falseValue, + Parallelism: &Parallelism1, + Completions: &Completions1, + }, + }, + Want: metadata + ` + kube_job_owner{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1",owner_is_controller="",owner_kind="",owner_name=""} 1 + kube_job_info{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1 + kube_job_spec_completions{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1 + kube_job_spec_parallelism{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1 + kube_job_status_active{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0 + kube_job_status_failed{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0 + kube_job_status_start_time{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1"} 1.495800607e+09 + kube_job_status_succeeded{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0 + kube_job_status_suspended{job_name="UnsuspendedNoActiveDeadlineSeconds",namespace="ns1"} 0 `, }, }