Skip to content

Commit

Permalink
[JENKINS-72009] allow agents to define diskspace monitoring thresholds
Browse files Browse the repository at this point in the history
Introduces a NodeProperty to define agent specific thresholds for free
disk and temp space.
Enhance the tooltip in the executor widget so it includes the threshold
Show a tooltip in the nodes table when agent was marked offline

todo: add a warning threshold
  • Loading branch information
mawinter69 committed Oct 10, 2023
1 parent 7d1217b commit 217e80f
Show file tree
Hide file tree
Showing 32 changed files with 205 additions and 61 deletions.
10 changes: 7 additions & 3 deletions core/src/main/java/hudson/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -2299,13 +2299,17 @@ public static String humanReadableByteSize(long size) {
double number = size;
if (number >= 1024) {
number = number / 1024;
measure = "KB";
measure = "KiB";
if (number >= 1024) {
number = number / 1024;
measure = "MB";
measure = "MiB";
if (number >= 1024) {
number = number / 1024;
measure = "GB";
measure = "GiB";
if (number >= 1024) {
number = number / 1024;
measure = "TiB";
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ protected AbstractDiskSpaceMonitor(String threshold) throws ParseException {
}

protected AbstractDiskSpaceMonitor() {
this.freeSpaceThreshold = "1GB";
this.freeSpaceThreshold = "1GiB";
}

public long getThresholdBytes() {
Expand All @@ -37,12 +37,16 @@ public long getThresholdBytes() {
}
}

public long getThresholdBytes(Computer c) {
return getThresholdBytes();
}

@Override
public Object data(Computer c) {
DiskSpace size = markNodeOfflineIfDiskspaceIsTooLow(c);

// mark online (again), if free space is over threshold
if (size != null && size.size > getThresholdBytes() && c.isOffline() && c.getOfflineCause() instanceof DiskSpace)
if (size != null && size.size > getThresholdBytes(c) && c.isOffline() && c.getOfflineCause() instanceof DiskSpace)
if (this.getClass().equals(((DiskSpace) c.getOfflineCause()).getTrigger()))
if (getDescriptor().markOnline(c)) {
LOGGER.info(Messages.DiskSpaceMonitor_MarkedOnline(c.getDisplayName()));
Expand All @@ -59,10 +63,14 @@ public Object data(Computer c) {
@Restricted(NoExternalUse.class)
public DiskSpace markNodeOfflineIfDiskspaceIsTooLow(Computer c) {
DiskSpace size = (DiskSpace) super.data(c);
if (size != null && size.size < getThresholdBytes()) {
size.setTriggered(this.getClass(), true);
if (getDescriptor().markOffline(c, size)) {
LOGGER.warning(Messages.DiskSpaceMonitor_MarkedOffline(c.getDisplayName()));
long threshold = getThresholdBytes(c);
if (size != null) {
size.setThreshold(threshold);
if (size.size < threshold) {
size.setTriggered(this.getClass(), true);
if (getDescriptor().markOffline(c, size)) {
LOGGER.warning(Messages.DiskSpaceMonitor_MarkedOffline(c.getDisplayName()));
}
}
}
return size;
Expand Down
16 changes: 16 additions & 0 deletions core/src/main/java/hudson/node_monitors/DiskSpaceMonitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,22 @@ public DiskSpaceMonitor(String freeSpaceThreshold) throws ParseException {

public DiskSpaceMonitor() {}

@Override
public long getThresholdBytes(Computer c) {
Node node = c.getNode();
if (node != null) {
DiskSpaceMonitorNodeProperty nodeProperty = node.getNodeProperty(DiskSpaceMonitorNodeProperty.class);
if (nodeProperty != null) {
try {
return DiskSpace.parse(nodeProperty.getFreeDiskSpaceThreshold()).size;
} catch (ParseException e) {
return getThresholdBytes();
}
}
}
return getThresholdBytes();
}

public DiskSpace getFreeSpace(Computer c) {
return DESCRIPTOR.get(c);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,17 @@
package hudson.node_monitors;

import hudson.Functions;
import hudson.Util;
import hudson.node_monitors.DiskSpaceMonitorDescriptor.DiskSpace;
import hudson.remoting.VirtualChannel;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.math.BigDecimal;
import java.text.ParseException;
import java.util.Locale;
import jenkins.MasterToSlaveFileCallable;
import org.kohsuke.accmod.Restricted;
import org.kohsuke.accmod.restrictions.DoNotUse;
import org.kohsuke.accmod.restrictions.NoExternalUse;
import org.kohsuke.stapler.export.Exported;
import org.kohsuke.stapler.export.ExportedBean;

Expand All @@ -58,6 +57,7 @@ public static final class DiskSpace extends MonitorOfflineCause implements Seria

private boolean triggered;
private Class<? extends AbstractDiskSpaceMonitor> trigger;
private long threshold;

/**
* @param path
Expand All @@ -71,7 +71,13 @@ public DiskSpace(String path, long size) {
@Override
public String toString() {
if (triggered) {
return Messages.DiskSpaceMonitorDescriptor_DiskSpace_FreeSpaceTooLow(getGbLeft(), path);
if (threshold >= 0) {
return Messages.DiskSpaceMonitorDescriptor_DiskSpace_FreeSpaceTooLow(
getGbLeft(), path, Functions.humanReadableByteSize(threshold));
} else {
return Messages.DiskSpaceMonitorDescriptor_DiskSpace_FreeSpaceTooLow(
getGbLeft(), path, "unset");
}
}
return Messages.DiskSpaceMonitorDescriptor_DiskSpace_FreeSpace(getGbLeft(), path);
}
Expand All @@ -96,22 +102,19 @@ public long getFreeSize() {
* Gets GB left.
*/
public String getGbLeft() {
long space = size;
space /= 1024L; // convert to KB
space /= 1024L; // convert to MB

return new BigDecimal(space).scaleByPowerOfTen(-3).toPlainString();
return Functions.humanReadableByteSize(size);
}

/**
* Returns the HTML representation of the space.
*/
public String toHtml() {
String humanReadableSpace = Functions.humanReadableByteSize(size);
if (triggered) {
return Util.wrapToErrorSpan(humanReadableSpace);
}
return humanReadableSpace;
return Functions.humanReadableByteSize(size);
}

@Restricted(NoExternalUse.class)
public boolean isTriggered() {
return triggered;
}

/**
Expand All @@ -130,20 +133,26 @@ protected void setTriggered(Class<? extends AbstractDiskSpaceMonitor> trigger, b
this.triggered = triggered;
}

protected void setThreshold(long threshold) {
this.threshold = threshold;
}

@Override
public Class<? extends AbstractDiskSpaceMonitor> getTrigger() {
return trigger;
}

/**
* Parses a human readable size description like "1GB", "0.5m", etc. into {@link DiskSpace}
* Parses a human readable size description like "1GB", "0.5m", "500KiB", etc. into {@link DiskSpace}
*
* @throws ParseException
* If failed to parse.
*/
public static DiskSpace parse(String size) throws ParseException {
size = size.toUpperCase(Locale.ENGLISH).trim();
if (size.endsWith("B")) // cut off 'B' from KB, MB, etc.
if (size.endsWith("B")) // cut off 'B' from KB, MB, KiB, etc.
size = size.substring(0, size.length() - 1);
if (size.endsWith("I")) // cut off 'i' from KiB, MiB, etc.
size = size.substring(0, size.length() - 1);

long multiplier = 1;
Expand All @@ -167,7 +176,9 @@ public static DiskSpace parse(String size) throws ParseException {
}

protected static final class GetUsableSpace extends MasterToSlaveFileCallable<DiskSpace> {
public GetUsableSpace() {}

public GetUsableSpace() {
}

@Override
public DiskSpace invoke(File f, VirtualChannel channel) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package hudson.node_monitors;

import edu.umd.cs.findbugs.annotations.NonNull;
import hudson.Extension;
import hudson.model.Node;
import hudson.slaves.NodeProperty;
import hudson.slaves.NodePropertyDescriptor;
import org.jenkinsci.Symbol;
import org.kohsuke.stapler.DataBoundConstructor;

public class DiskSpaceMonitorNodeProperty extends NodeProperty<Node> {
private final String freeDiskSpaceThreshold;
private final String freeTempSpaceThreshold;

@DataBoundConstructor
public DiskSpaceMonitorNodeProperty(String freeDiskSpaceThreshold, String freeTempSpaceThreshold) {
this.freeDiskSpaceThreshold = freeDiskSpaceThreshold;
this.freeTempSpaceThreshold = freeTempSpaceThreshold;
}

public String getFreeDiskSpaceThreshold() {
return freeDiskSpaceThreshold;
}

public String getFreeTempSpaceThreshold() {
return freeTempSpaceThreshold;
}

@Extension
@Symbol("diskspaceMonitor")
public static class DescriptorImpl extends NodePropertyDescriptor {

@NonNull
@Override
public String getDisplayName() {
return Messages.DiskSpaceMonitorNodeProperty_DisplayName();
}
}

}
16 changes: 16 additions & 0 deletions core/src/main/java/hudson/node_monitors/TemporarySpaceMonitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ public TemporarySpaceMonitor(String freeSpaceThreshold) throws ParseException {

public TemporarySpaceMonitor() {}

@Override
public long getThresholdBytes(Computer c) {
Node node = c.getNode();
if (node != null) {
DiskSpaceMonitorNodeProperty nodeProperty = node.getNodeProperty(DiskSpaceMonitorNodeProperty.class);
if (nodeProperty != null) {
try {
return DiskSpace.parse(nodeProperty.getFreeTempSpaceThreshold()).size;
} catch (ParseException e) {
return getThresholdBytes();
}
}
}
return getThresholdBytes();
}

public DiskSpace getFreeSpace(Computer c) {
DiskSpaceMonitorDescriptor descriptor = (DiskSpaceMonitorDescriptor) Jenkins.get().getDescriptor(TemporarySpaceMonitor.class);
return descriptor != null ? descriptor.get(c) : null;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
<div>
This option configures the amount of minimum amount of free disk space desired
for an agent's proper operation, such as "1.5GB", "100KB", etc. If an agent is
found to have less free disk space than this amount, it will be marked
offline.
for an agent's proper operation, such as "1.5GB", "100KiB", etc. If an agent
is found to have less free disk space than this amount, it will be marked
temporarily offline. The threshold can be configured individually for each
agent via a property.
<p>
Set to 0 to disable this check except for agents that configure the
threshold explicitly.
</p>
</div>
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,14 @@ THE SOFTWARE.
<td align="right" data="-1">N/A</td>
</j:when>
<j:otherwise>
<td align="right" data="${data.freeSize}"><j:out value="${data.toHtml()}"/></td>
<td align="right" data="${data.freeSize}">
<j:if test="${data.triggered}">
<span class="error" tooltip="${data.toString()}">${data.toHtml()}</span>
</j:if>
<j:if test="${!data.triggered}">
${data.toHtml()}
</j:if>
</td>
</j:otherwise>
</j:choose>
</j:jelly>
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<div>
This monitors the available disk space of
<code>$JENKINS_HOME</code>
on each agent, and if it gets below a threshold, the agent will be marked
offline.
on the controller or the remote root directory on each agent, and if it gets
below a threshold, the agent will be marked temporarily offline.

<p>
This directory is where all your builds are performed, so if it fills up,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?jelly escape-by-default='true'?>
<j:jelly xmlns:j="jelly:core" xmlns:st="jelly:stapler" xmlns:d="jelly:define" xmlns:l="/lib/layout" xmlns:t="/lib/hudson" xmlns:f="/lib/form">
<f:entry field="freeDiskSpaceThreshold" title="${%Free Disk Space Threshold}">
<f:textbox default="1GB"/>
</f:entry>
<f:entry field="freeTempSpaceThreshold" title="${%Free Temp Space Threshold}">
<f:textbox default="1GB"/>
</f:entry>
</j:jelly>
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Free\ Disk\ Space\ Threshold=Schwellwert für freien Festplattenspeicherplatz
Free\ Temp\ Space\ Threshold=Schwellwert für freien temporären Speicherplatz
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<div>
This option configures the amount of minimum amount of free disk space desired
for the agent's proper operation, such as "1.5GB", "100KiB", etc. If an agent
is found to have less free disk space than this amount, it will be marked
temporarily offline.
<p>Set to 0 to disable this check on this agent.</p>
</div>
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<div>
This option configures the amount of minimum amount of free temp space desired
for the agent's proper operation, such as "1.5GB", "100KiB", etc. If an agent
is found to have less free temp space than this amount, it will be marked
temporarily offline.
<p>Set to 0 to disable this check on this agent.</p>
</div>
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ ResponseTimeMonitor.TimeOut=Timed out for last {0} attempts
SwapSpaceMonitor.DisplayName=Free Swap Space
TemporarySpaceMonitor.DisplayName=Free Temp Space
AbstractNodeMonitorDescriptor.NoDataYet=Not yet
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Disk space is too low. Only {0}GB left on {1}.
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpace={0}GB left on {1}.
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Disk space is below threshold of {2}. Only {0} left on {1}.
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpace={0} left on {1}.
MonitorMarkedNodeOffline.DisplayName=Node Marked Offline Due to Health Check
DiskSpaceMonitorNodeProperty.DisplayName=Disk Space Monitoring Thresholds
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ TemporarySpaceMonitor.DisplayName=\
AbstractNodeMonitorDescriptor.NoDataYet=\
Няма данни
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=\
Дисковото пространство привършва. На {1} остават само {0} GB.
Дисковото пространство привършва. На {1} остават само {0} .
MonitorMarkedNodeOffline.DisplayName=\
Компютърът е отбелязан като извън линия в резултат на проверката на\
състоянието му.
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Málo místa na disku. Zbývá pouze {0}GB.
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Málo místa na disku. Zbývá pouze {0}.
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ SwapSpaceMonitor.DisplayName=Ledig Swap Plads
ArchitectureMonitor.DisplayName=Arkitektur
ResponseTimeMonitor.MarkedOffline=Tager {0} offline midlertidigt da den ikke svarer
ResponseTimeMonitor.DisplayName=Respons Tid
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Disk plads for lav. Kun {0}GB tilbage.
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Disk plads for lav. Kun {0} tilbage.
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ ResponseTimeMonitor.DisplayName=Antwortzeit
ResponseTimeMonitor.TimeOut= {0} mal keine Antwort
SwapSpaceMonitor.DisplayName=Freier Swap Space
TemporarySpaceMonitor.DisplayName=Freier TEMP-Platz
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Zu wenig Festplattenplatz: Nur noch {0}GB frei.
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Festplattenplatz unterhalb der Schwelle von {2}. Nur noch {0} frei in {1}.
DiskSpaceMonitor.MarkedOnline=Nehme {0} wieder online, da wieder genug Festplattenplatz verfügbar ist.
MonitorMarkedNodeOffline.DisplayName=Knoten durch Health-Check-Prüfung offline
ResponseTimeMonitor.MarkedOffline=Nehme {0} offline, da er nicht antwortet.
AbstractNodeMonitorDescriptor.NoDataYet=Noch keine Daten
DiskSpaceMonitorNodeProperty.DisplayName=Schwellwerte für die Festplattenspeicherplatzüberwachung
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ ResponseTimeMonitor.MarkedOffline=Poniendo temporalmente {0} fuera de línea por
ResponseTimeMonitor.TimeOut=Se sobrepasó el tiempo de espera en el último intento de {0}
SwapSpaceMonitor.DisplayName=Espacio de intercambio libre
TemporarySpaceMonitor.DisplayName=Espacio temporal libre
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=El espacio en disco es muy bajo, sólo quedan {0}GB.
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=El espacio en disco es muy bajo, sólo quedan {0}.
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ ResponseTimeMonitor.DisplayName=Temps de réponse
ResponseTimeMonitor.MarkedOffline={0} est marqué comme déconnecté temporairement, parce qu''il ne répond pas
ResponseTimeMonitor.TimeOut=Time out du dernier essai {0}
SwapSpaceMonitor.DisplayName=Espace de swap disponible
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Espace disque insuffisant. Seulement {0} GB restant sur {1}
DiskSpaceMonitorDescriptor.DiskSpace.FreeSpaceTooLow=Espace disque insuffisant. Seulement {0} restant sur {1}
Loading

0 comments on commit 217e80f

Please sign in to comment.