Skip to content

Commit

Permalink
Fix Monotonic Counter and Add New Build Counters (#606)
Browse files Browse the repository at this point in the history
* I think its working

* Done

* Removed dead code
  • Loading branch information
aidanleuck authored Dec 29, 2023
1 parent ce2e589 commit 73e8034
Show file tree
Hide file tree
Showing 21 changed files with 911 additions and 86 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ work
.classpath
.settings/
build/
*.DS_Store
*.DS_Store
.vscode/*
3 changes: 0 additions & 3 deletions .vscode/settings.json

This file was deleted.

3 changes: 3 additions & 0 deletions docs/metrics/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ will just return the last build. You can enable per build metrics in the configu
| default_jenkins_builds_duration_milliseconds_summary | Summary of Jenkins build times in milliseconds by Job | summary |
| default_jenkins_builds_success_build_count | Successful build count | counter |
| default_jenkins_builds_failed_build_count | Failed build count | counter |
| default_jenkins_builds_unstable_build_count | Unstable build count | counter |
| default_jenkins_builds_total_build_count | Total build count (excluding not_built statuses) | counter |
| default_jenkins_builds_aborted_build_count | Aborted build count | counter |
| default_jenkins_builds_health_score | Health score of a job | gauge |
| default_jenkins_builds_<buildname>_last_build_result_ordinal | Build status of a job (0=SUCCESS,1=UNSTABLE,2=FAILURE,3=NOT_BUILT,4=ABORTED) | gauge |
| default_jenkins_builds_<buildname>_last_build_result | Build status of a job as a boolean value - 0 or 1. Where 0 is: SUCCESS,UNSTABLE and 1: all other States | gauge |
Expand Down
95 changes: 45 additions & 50 deletions src/main/java/org/jenkinsci/plugins/prometheus/JobCollector.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package org.jenkinsci.plugins.prometheus;

import hudson.model.Job;
import hudson.model.Result;
import hudson.model.Run;
import io.prometheus.client.Collector;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.jenkinsci.plugins.prometheus.collectors.CollectorFactory;
import org.jenkinsci.plugins.prometheus.collectors.CollectorType;
import org.jenkinsci.plugins.prometheus.collectors.MetricCollector;
import org.jenkinsci.plugins.prometheus.collectors.builds.BuildCompletionListener;
import org.jenkinsci.plugins.prometheus.collectors.builds.CounterManager;
import org.jenkinsci.plugins.prometheus.collectors.builds.JobLabel;
import org.jenkinsci.plugins.prometheus.collectors.builds.BuildCompletionListener.CloseableIterator;
import org.jenkinsci.plugins.prometheus.config.PrometheusConfiguration;
import org.jenkinsci.plugins.prometheus.util.Jobs;
import org.jenkinsci.plugins.prometheus.util.Runs;
Expand All @@ -18,7 +20,6 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class JobCollector extends Collector {

Expand All @@ -29,6 +30,9 @@ public class JobCollector extends Collector {
private MetricCollector<Run<?, ?>, ? extends Collector> summary;
private MetricCollector<Run<?, ?>, ? extends Collector> jobSuccessCount;
private MetricCollector<Run<?, ?>, ? extends Collector> jobFailedCount;
private MetricCollector<Run<?, ?>, ? extends Collector> jobAbortedCount;
private MetricCollector<Run<?, ?>, ? extends Collector> jobUnstableCount;
private MetricCollector<Run<?, ?>, ? extends Collector> jobTotalCount;
private MetricCollector<Job<?, ?>, ? extends Collector> jobHealthScoreGauge;
private MetricCollector<Job<?, ?>, ? extends Collector> nbBuildsGauge;
private MetricCollector<Job<?, ?>, ? extends Collector> buildDiscardGauge;
Expand Down Expand Up @@ -71,8 +75,7 @@ public void initCollectors(String[] labelNameArray) {
private final BuildMetrics lastBuildMetrics = new BuildMetrics("last");
private final BuildMetrics perBuildMetrics = new BuildMetrics("");

public JobCollector() {
}
public JobCollector() {}

@Override
public List<MetricFamilySamples> collect() {
Expand Down Expand Up @@ -113,13 +116,40 @@ public List<MetricFamilySamples> collect() {
return samples;
}

// Below three metrics use labelNameArray which might include the optional labels
// Below metrics use labelNameArray which might include the optional labels
// of "parameters" or "status"
summary = factory.createRunCollector(CollectorType.BUILD_DURATION_SUMMARY, labelNameArray, null);

jobSuccessCount = factory.createRunCollector(CollectorType.BUILD_SUCCESSFUL_COUNTER, labelNameArray, null);

jobFailedCount = factory.createRunCollector(CollectorType.BUILD_FAILED_COUNTER, labelNameArray, null);
BuildCompletionListener listener = BuildCompletionListener.getInstance();

// Counter manager acts as a DB to retrieve any counters that are already in memory instead of reinitializing
// them with each iteration of collect.
var manager = CounterManager.getManager();
jobSuccessCount = manager.getCounter(CollectorType.BUILD_SUCCESSFUL_COUNTER, labelBaseNameArray, null);
jobFailedCount = manager.getCounter(CollectorType.BUILD_FAILED_COUNTER, labelBaseNameArray, null);
jobTotalCount = manager.getCounter(CollectorType.BUILD_TOTAL_COUNTER, labelBaseNameArray, null);
jobAbortedCount = manager.getCounter(CollectorType.BUILD_ABORTED_COUNTER, labelBaseNameArray, null);
jobUnstableCount = manager.getCounter(CollectorType.BUILD_UNSTABLE_COUNTER, labelBaseNameArray, null);

// This is a try with resources block it ensures close is called
// so if an exception occurs we don't reach deadlock. This is analogous to a using
// block where dispose is called after we leave the block.
// The closeable iterator synchronizes receiving jobs and reading the iterator
// so we don't modify the collection while iterating.
try (CloseableIterator<Run<?,?>> iterator = listener.iterator()) {
// Go through each run received since the last scrape.
while (iterator.hasNext()) {

Check warning on line 140 in src/main/java/org/jenkinsci/plugins/prometheus/JobCollector.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 140 is only partially covered, one branch is missing
Run<?,?> run = iterator.next();
Job<?,?> job = run.getParent();

// Calculate the metrics.
String[] labelValues = JobLabel.getBaseLabelValues(job);
jobFailedCount.calculateMetric(run, labelValues);
jobSuccessCount.calculateMetric(run, labelValues);
jobTotalCount.calculateMetric(run, labelValues);
jobAbortedCount.calculateMetric(run, labelValues);
jobUnstableCount.calculateMetric(run,labelValues);
}

Check warning on line 151 in src/main/java/org/jenkinsci/plugins/prometheus/JobCollector.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 141-151 are not covered by tests
}

// This metric uses "base" labels as it is just the health score reported
// by the job object and the optional labels params and status don't make much
Expand Down Expand Up @@ -165,6 +195,9 @@ public List<MetricFamilySamples> collect() {
addSamples(samples, summary.collect(), "Adding [{}] samples from summary ({})");
addSamples(samples, jobSuccessCount.collect(), "Adding [{}] samples from counter ({})");
addSamples(samples, jobFailedCount.collect(), "Adding [{}] samples from counter ({})");
addSamples(samples, jobAbortedCount.collect(), "Adding [{}] samples from counter ({})");
addSamples(samples, jobUnstableCount.collect(), "Adding [{}] samples from counter ({})");
addSamples(samples, jobTotalCount.collect(), "Adding [{}] samples from counter ({})");
addSamples(samples, jobHealthScoreGauge.collect(), "Adding [{}] samples from gauge ({})");
addSamples(samples, nbBuildsGauge.collect(), "Adding [{}] samples from gauge ({})");
addSamples(samples, buildDiscardGauge.collect(), "Adding [{}] samples from gauge ({})");
Expand Down Expand Up @@ -201,17 +234,8 @@ private void addSamples(List<MetricFamilySamples> allSamples, BuildMetrics build
}

protected void appendJobMetrics(Job<?, ?> job) {
boolean isAppendParamLabel = PrometheusConfiguration.get().isAppendParamLabel();
boolean isAppendStatusLabel = PrometheusConfiguration.get().isAppendStatusLabel();
boolean isPerBuildMetrics = PrometheusConfiguration.get().isPerBuildMetrics();
String[] buildParameterNamesAsArray = PrometheusConfiguration.get().getLabeledBuildParameterNamesAsArray();

// Add this to the repo as well so I can group by Github Repository
String repoName = StringUtils.substringBetween(job.getFullName(), "/");
if (repoName == null) {
repoName = NOT_AVAILABLE;
}
String[] baseLabelValueArray = {job.getFullName(), repoName, String.valueOf(job.isBuildable())};
String[] baseLabelValueArray = JobLabel.getBaseLabelValues(job);

Run<?, ?> lastBuild = job.getLastBuild();
// Never built
Expand All @@ -233,38 +257,9 @@ protected void appendJobMetrics(Job<?, ?> job) {
logger.debug("getting metrics for run [{}] from job [{}], include per run metrics [{}]", run.getNumber(), job.getName(), isPerBuildMetrics);
if (Runs.includeBuildInMetrics(run)) {
logger.debug("getting build info for run [{}] from job [{}]", run.getNumber(), job.getName());

Result runResult = run.getResult();
String[] labelValueArray = baseLabelValueArray;

if (isAppendParamLabel) {
String params = Runs.getBuildParameters(run).entrySet().stream().map(e -> e.getKey() + "=" + e.getValue()).collect(Collectors.joining(";"));
labelValueArray = Arrays.copyOf(labelValueArray, labelValueArray.length + 1);
labelValueArray[labelValueArray.length - 1] = params;
}
if (isAppendStatusLabel) {
String resultString = UNDEFINED;
if (runResult != null) {
resultString = runResult.toString();
}
labelValueArray = Arrays.copyOf(labelValueArray, labelValueArray.length + 1);
labelValueArray[labelValueArray.length - 1] = run.isBuilding() ? "RUNNING" : resultString;
}

for (String configBuildParam : buildParameterNamesAsArray) {
labelValueArray = Arrays.copyOf(labelValueArray, labelValueArray.length + 1);
String paramValue = UNDEFINED;
Object paramInBuild = Runs.getBuildParameters(run).get(configBuildParam);
if (paramInBuild != null) {
paramValue = String.valueOf(paramInBuild);
}
labelValueArray[labelValueArray.length - 1] = paramValue;
}
String[] labelValueArray = JobLabel.getJobLabelVaues(job, run);

Check warning on line 260 in src/main/java/org/jenkinsci/plugins/prometheus/JobCollector.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 238-260 are not covered by tests

summary.calculateMetric(run, labelValueArray);
jobFailedCount.calculateMetric(run, labelValueArray);
jobSuccessCount.calculateMetric(run, labelValueArray);

if (isPerBuildMetrics) {
labelValueArray = Arrays.copyOf(labelValueArray, labelValueArray.length + 1);
labelValueArray[labelValueArray.length - 1] = String.valueOf(run.getNumber());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ public enum CollectorType {
NODES_ONLINE_GAUGE("nodes_online"),
BUILD_DURATION_GAUGE("build_duration_milliseconds"),
BUILD_DURATION_SUMMARY("duration_milliseconds_summary"),
BUILD_FAILED_COUNTER("failed_build_count"),
BUILD_RESULT_GAUGE("build_result"),
BUILD_RESULT_ORDINAL_GAUGE("build_result_ordinal"),
BUILD_START_GAUGE("build_start_time_milliseconds"),
BUILD_FAILED_COUNTER("failed_build_count"),
BUILD_TOTAL_COUNTER("total_build_count"),
BUILD_SUCCESSFUL_COUNTER("success_build_count"),
BUILD_UNSTABLE_COUNTER("unstable_build_count"),
BUILD_ABORTED_COUNTER("aborted_build_count"),
BUILD_LIKELY_STUCK_GAUGE("likely_stuck"),

FAILED_TESTS_GAUGE("build_tests_failing"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.jenkinsci.plugins.prometheus.collectors.builds;

import hudson.model.Result;
import hudson.model.Run;
import io.prometheus.client.Counter;
import io.prometheus.client.SimpleCollector;

import org.jenkinsci.plugins.prometheus.collectors.CollectorType;

public class BuildAbortedCounter extends BuildsMetricCollector<Run<?, ?>, Counter> {
protected BuildAbortedCounter(String[] labelNames, String namespace, String subsystem) {
super(labelNames, namespace, subsystem);
}

protected BuildAbortedCounter(String[] labelNames, String namespace, String subsystem, String prefix) {
super(labelNames, namespace, subsystem, prefix);
}

@Override
protected CollectorType getCollectorType() {
return CollectorType.BUILD_ABORTED_COUNTER;
}

@Override
protected String getHelpText() {
return "aborted build count";
}

@Override
protected SimpleCollector.Builder<?, Counter> getCollectorBuilder() {
return Counter.build();
}

@Override
public void calculateMetric(Run<?, ?> jenkinsObject, String[] labelValues) {
// Increment counter if result was unstable.
if(jenkinsObject.getResult() == Result.ABORTED){
this.collector.labels(labelValues).inc();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import org.jenkinsci.plugins.prometheus.collectors.MetricCollector;
import org.jenkinsci.plugins.prometheus.collectors.NoOpMetricCollector;

import static org.jenkinsci.plugins.prometheus.collectors.CollectorType.*;

public class BuildCollectorFactory extends BaseCollectorFactory {

public BuildCollectorFactory() {
Expand Down Expand Up @@ -41,6 +39,12 @@ public BuildCollectorFactory() {
return saveBuildCollector(new TotalTestsGauge(labelNames, namespace, subsystem, prefix));
case BUILD_LIKELY_STUCK_GAUGE:
return saveBuildCollector(new BuildLikelyStuckGauge(labelNames, namespace, subsystem, prefix));
case BUILD_ABORTED_COUNTER:
return saveBuildCollector(new BuildAbortedCounter(labelNames, namespace, subsystem, prefix));
case BUILD_UNSTABLE_COUNTER:
return saveBuildCollector(new BuildUnstableCounter(labelNames, namespace, subsystem, prefix));
case BUILD_TOTAL_COUNTER:
return saveBuildCollector(new BuildTotalCounter(labelNames, namespace, subsystem, prefix));
default:
return new NoOpMetricCollector<>();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package org.jenkinsci.plugins.prometheus.collectors.builds;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import hudson.Extension;
import hudson.model.Run;
import hudson.model.TaskListener;
import hudson.model.listeners.RunListener;

/*
* Listens to builds that have been completed and stores them in a list.
* The JobCollector reads items in the list when it performs a scrape and
* publishes the data.
* Class extends https://javadoc.jenkins.io/hudson/model/listeners/RunListener.html
*/
public class BuildCompletionListener extends RunListener<Run<?,?>> {
// static instance of the class to use as a singleton.
private static BuildCompletionListener _Listener;

// Lock to synchronize iteration and adding to the collection
private Lock lock;

// Holds the list o runs in queue.
private List<Run<?,?>> runStack;

// Iterable that defines a close method (allows us to use try resource) block
// in JobCollector.java
public interface CloseableIterator<T> extends Iterator<T>, AutoCloseable {
void close();
}

// Protected so no one can create their own copy of the class.
protected BuildCompletionListener(){
runStack = Collections.synchronizedList(new ArrayList<>());
lock = new ReentrantLock();
}

/*
* Extension tells Jenkins to register this class as a RunListener and to use
* this method in order to retrieve an instance of the class. It is a singleton
* so we can get the same reference registered in Jenkins in another class.
*/
@Extension
public synchronized static BuildCompletionListener getInstance(){
if(_Listener == null){

Check warning on line 50 in src/main/java/org/jenkinsci/plugins/prometheus/collectors/builds/BuildCompletionListener.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Partially covered line

Line 50 is only partially covered, one branch is missing
_Listener = new BuildCompletionListener();
}
return _Listener;
}

/*
* Fires on completion of a job.
*/
public void onCompleted(Run<?,?> run, TaskListener listener){
push(run);
}

/*
* Pushes a run onto the list
*/
private synchronized void push(Run<?,?> run){
// Acquire lock
lock.lock();

// Try to add the run to the list. If something goes wrong, make sure
// we still unlock the lock!
try{
runStack.add(run);
}
finally{
lock.unlock();
}
}

Check warning on line 78 in src/main/java/org/jenkinsci/plugins/prometheus/collectors/builds/BuildCompletionListener.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 60-78 are not covered by tests

/*
* Returns a closeable iterator
*/
public synchronized CloseableIterator<Run<?,?>> iterator(){
// acquire lock before iterating
lock.lock();
return new CloseableIterator<Run<?,?>>() {
// Get iterator from the list
private Iterator<Run<?,?>> iterator = runStack.iterator();

@Override
public boolean hasNext() {
return iterator.hasNext();
}

@Override
public Run<?,?> next() {
return iterator.next();
}

@Override
public void remove() {
iterator.remove();
}

Check warning on line 103 in src/main/java/org/jenkinsci/plugins/prometheus/collectors/builds/BuildCompletionListener.java

View check run for this annotation

ci.jenkins.io / Code Coverage

Not covered lines

Lines 97-103 are not covered by tests

// When we close the iterator, clear the list right before we unlock.
// This ensures we don't see the same job twice if iterator is called again.
public void close() {
runStack.clear();
lock.unlock();
}
};
}
}
Loading

0 comments on commit 73e8034

Please sign in to comment.