diff --git a/HISTORY b/HISTORY index de3f3d2..a5961d9 100644 --- a/HISTORY +++ b/HISTORY @@ -44,3 +44,12 @@ Version 1.2: Improvements for Icinga 2, made by Onkobu - Added the ability to switch between old and new style with an -n option - Fixed multi-sensor with identical thresholds + +Version 1.3: + Fixes for multi sensor, made by Onkobu + - each sensor appears exactly once in message and in performance metrics + Test infrastructure + - some Bash scripts and sample output mocking sensors' output + - outlined some terms (device, adapter, sensor) in README + - short explanation how to add tests in README + diff --git a/README b/README index dff2e25..3e8fd22 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ -check_temp +# check_temp -A small Nagios plugin that checks the CPU (or M/B) temperature with lm-sensors. +A small Nagios/Icinga plugin that checks the CPU (or M/B) temperature with lm-sensors. It's written in Bash and uses *nix "sensors" and some sed & awk. Default is to check the CPU temperature but this can be changed to, for example, the motherboard temperature with a "--sensor" argument. @@ -14,7 +14,7 @@ There is a very good Perl fork of check_temp written by Chad Columbus. It's available on Nagios Exchange at http://exchange.nagios.org/directory/Plugins/Operating-Systems/Linux/check_temp-2Epl/details -Required: +## Required 1) Install lm-sensors: a) On Debian/Ubuntu... apt-get install lm-sensors b) On OpenSUSE etc... zypper in sensors @@ -36,3 +36,65 @@ MAKE SURE YOU TYPE "YES" TO THE LAST OPTION: "Do you want to add these lines aut a) /etc/init.d/module-init-tools restart or b) service module-init-tools restart + +# Sensor 101 + +The output normally has a set of one or more blocks. This totally depends +on your hardware, available and loaded modules. A single block looks like +the following: + +``` +k10temp-pci-00c3 +Adapter: PCI adapter +temp1: +45.2°C (high = +70.0°C) + (crit = +105.0°C, hyst = +97.5°C) +``` + +The first line is the label and represents a device. As shown above the +device is named "k10temp-pci-00c3". The second line can be suppressed +with `sensors -A`. It represents the type of adapter of the device. +Each device has at least one sensor. In the above case the sensor name is +temp1. + +There can be other devices like a laptop battery. It could have only +a single sensor and it shows the battery's current voltage. A more complex +example with a battery a GPU and a CPU: + +``` +BAT1-acpi-0 +in0: 15.54 V + +radeon-pci-0008 +temp1: +45.5°C (crit = +120.0°C, hyst = +90.0°C) + +k10temp-pci-00c3 +temp1: +44.1°C (high = +70.0°C) + (crit = +105.0°C, hyst = +97.5°C) +``` + +This sample shows a quite common issue of `--sensor` without +`-n`: temp1 is not unique and the first occurence will become the +input for the monitoring system. + +# Tests + +Under "tests" is a script `run_tests.sh` that executes all test scripts in +random order. Each test script comes with a mocked output of sensors. It +plays tricks on `check_temp.sh`. It is not necessary to instrument any +binary nor `sudo` or symbolic links. + +To create your test case: + +1. Think of a good name for the use case the tests will assert, e.g. "new_filter" +1. Redirect the output to a data file: `sensors >tests/data/sample_new_filter` +1. copy one of the existing tests that suits your needs best, e.g. `test_new_filter.sh` +1. edit the skeleton to invoke check_temp.sh with the necessary parameters +1. file a pull request to outline the problem with the newly added test code + +The common case is check_temp.sh without the -n switch which invokes sensors +command without any parameters. Use TESTDATA and a single file as mocked +sensors output. + +If you intend to test with -n switch instead use TESTDATA_PREFIX. Each device +specified with --sensor is mapped to the mock file TESTDATA_PREFIX-. +The test script `test_new_filter.sh` illustrates this. diff --git a/check_temp.sh b/check_temp.sh index 69c1316..dfb6a1e 100755 --- a/check_temp.sh +++ b/check_temp.sh @@ -33,7 +33,7 @@ # # ############################################################################### -VERSION="Version 1.2" +VERSION="Version 1.3" AUTHOR="(c) 2011 Jack-Benny Persson (jack-benny@cyberinfo.se), (c) 2020 Onkobu Tanaake (oss@onkobutanaake.de)" # Sensor program @@ -112,7 +112,9 @@ sensor_declared=false STATE=$STATE_OK # See if we have sensors program installed and can execute it -if [[ ! -x "$SENSORPROG" ]]; then +if [ $SENSORPROG = 'sensors' ] && [ "$(LC_ALL=C type -t $SENSORPROG)" = "function" ]; then + echo "!!! TEST MODE !!!" +elif [[ ! -x "$SENSORPROG" ]]; then echo "It appears you don't have lm-sensors installed. You may find help in the readme for this script." exit $STATE_UNKNOWN fi @@ -278,7 +280,10 @@ while [[ -n "$1" ]]; do process_sensor "$default_sensor" else process_sensor "$sensors_to_check" + unset sensors_to_check fi + unset thresh_warn + unset thresh_crit fi done diff --git a/tests/common.inc b/tests/common.inc new file mode 100644 index 0000000..28f35ee --- /dev/null +++ b/tests/common.inc @@ -0,0 +1,78 @@ +whereis() { + echo lala sensors +} + +export -f whereis + +sensors() { + if [ $# -eq 2 ]; then + switch=$1 + sensor=$2 + if [ ! -z "$TESTDATA" ]; then + >&2 echo WARNING: TESTDATA is set while check_temp uses -n + >&2 echo WARNING: Therefore ./data/sample_$sensor is loaded instead + >&2 echo WARNING: of $TESTDATA + fi + cat ${TESTDATA_PREFIX}-$sensor | grep -v -e '^#' + else + cat $TESTDATA | grep -v -e '^#' + fi +} + +export -f sensors + +sample_from_scriptname() { + scriptName=$(basename $1) + stage1=${scriptName%.sh} + echo ./data/${stage1/test/sample} +} + +# Matches one or more temperatures, only the last is without +# a comma. All others must have a comma at their end +# +has_temperature() { + output="$1" + + expectedTemp="$2" + + tempFound=0 + for token in $output; do + if [ "$token" = 'temperature:' ]; then + tempFound=1 + elif [ $tempFound -eq 1 ]; then + if [ "$token" = "$expectedTemp" ]; then + if [ $# -eq 2 ]; then + echo 0 + return + else + shift + expectedTemp="$2" + fi + else + >&2 echo -e "Expected temperature $expectedTemp not in output:\n$output" + echo 1 + return + fi + tempFound=0 + + fi + done + + echo 1 +} + +# matches the stats as a block, thus order is implicit +# +has_stats() { + output="$1" + expectedStats="$2" + + stats=${output#*| } + + if [ "$stats" = "$expectedStats" ]; then + echo 0 + else + >&2 echo -e "Expected statistcs $expectedStats not in output:\n$output" + echo 1 + fi +} diff --git a/tests/data/sample_cpu_default b/tests/data/sample_cpu_default new file mode 100644 index 0000000..86cdc0d --- /dev/null +++ b/tests/data/sample_cpu_default @@ -0,0 +1,9 @@ +# A basic test with the default sensor CPU. It is very important +# to not repeat the sensor name in the first line of the output. +# Only the first occurance of the sensor name is taken apart for +# temperature values +# +A fancy output +Adapter: PCI adapter +CPU: +44.1°C (high = +70.0°C) + (crit = +105.0°C, hyst = +97.5°C) diff --git a/tests/data/sample_multi_sensor b/tests/data/sample_multi_sensor new file mode 100644 index 0000000..6121139 --- /dev/null +++ b/tests/data/sample_multi_sensor @@ -0,0 +1,13 @@ +# A basic test with the default sensor CPU. It is very important +# to not repeat the sensor name in the first line of the output. +# Only the first occurance of the sensor name is taken apart for +# temperature values +# +A fancy output +Adapter: PCI adapter +first: +34.1°C (high = +70.0°C) + (crit = +105.0°C, hyst = +97.5°C)A fancy output +Another Line +Adapter: PCI adapter +second: +45.1°C (high = +70.0°C) + (crit = +105.0°C, hyst = +97.5°C) diff --git a/tests/data/sample_new_filter-k10temp-pci-00c3 b/tests/data/sample_new_filter-k10temp-pci-00c3 new file mode 100644 index 0000000..0b0fc0e --- /dev/null +++ b/tests/data/sample_new_filter-k10temp-pci-00c3 @@ -0,0 +1,9 @@ +# The -n switch forces sensors to output only the specific +# device instead of filtering for the sensor line. Thus temp1 +# does not appear anywhere, not in the stats, not in the parameter +# list. +# +k10temp-pci-00c3 +temp1: +45.2°C (high = +70.0°C) + (crit = +105.0°C, hyst = +97.5°C) + diff --git a/tests/data/sample_new_filter-radeon-pci-0008 b/tests/data/sample_new_filter-radeon-pci-0008 new file mode 100644 index 0000000..d72173f --- /dev/null +++ b/tests/data/sample_new_filter-radeon-pci-0008 @@ -0,0 +1,3 @@ +radeon-pci-0008 +temp1: +40.7°C (crit = +120.0°C, hyst = +90.0°C) + diff --git a/tests/data/sample_relabel b/tests/data/sample_relabel new file mode 100644 index 0000000..86cdc0d --- /dev/null +++ b/tests/data/sample_relabel @@ -0,0 +1,9 @@ +# A basic test with the default sensor CPU. It is very important +# to not repeat the sensor name in the first line of the output. +# Only the first occurance of the sensor name is taken apart for +# temperature values +# +A fancy output +Adapter: PCI adapter +CPU: +44.1°C (high = +70.0°C) + (crit = +105.0°C, hyst = +97.5°C) diff --git a/tests/run_test.sh b/tests/run_test.sh new file mode 100755 index 0000000..fdc9642 --- /dev/null +++ b/tests/run_test.sh @@ -0,0 +1,34 @@ +#!/bin/bash +export esc="\e" +export COLOR_RST="$esc[0m" +export COLOR_RED="$esc[31m" +export COLOR_GRN="$esc[32m" +export COLOR_YLW="$esc[33m" +export COLOR_LGREEN="$esc[1;32m" + +PATTERN='%-25s %-45s %s' +totalTests=0 +testsFailed=0 +testsSucceeded=0 + +for testCase in $(ls ./test_*.sh); do + (( totalTests++ )) + echo "*** Start testcase $testCase" + $testCase + errCode=$? + + if [ $errCode -ne 0 ]; then + (( testsFailed++ )) + status="${COLOR_RED}failed${COLOR_RST}" + else + (( testsSucceeded++ )) + status="${COLOR_LGREEN}success${COLOR_RST}" + fi + + printf -v msg "$PATTERN" $testCase "->" $status + + echo -e "$msg" + echo -e "----------------------------------------\n" +done + +echo Of $totalTests tests $testsFailed failed and $testsSucceeded succeeded diff --git a/tests/test_cpu_default.sh b/tests/test_cpu_default.sh new file mode 100755 index 0000000..07a57cd --- /dev/null +++ b/tests/test_cpu_default.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. common.inc + +export TESTDATA=$(sample_from_scriptname $0) + +result=$(../check_temp.sh -w 65 -c 75) + +matcher1=$(has_temperature "$result" '+44.1°C') +matcher2=$(has_stats "$result" 'CPU=44;65;75') + +exit $(( matcher1 + matcher2 )) + diff --git a/tests/test_multi_sensor.sh b/tests/test_multi_sensor.sh new file mode 100755 index 0000000..1105a44 --- /dev/null +++ b/tests/test_multi_sensor.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. common.inc + +export TESTDATA=$(sample_from_scriptname $0) + +result=$(../check_temp.sh -w 65 -c 75 --sensor first -w 60 -c 70 --sensor second) + +matcher1=$(has_temperature "$result" '+34.1°C,' '+45.1°C') +matcher2=$(has_stats "$result" 'first=34;65;75 second=45;60;70') + +exit $(( matcher1 + matcher2 )) + diff --git a/tests/test_new_filter.sh b/tests/test_new_filter.sh new file mode 100755 index 0000000..c23553e --- /dev/null +++ b/tests/test_new_filter.sh @@ -0,0 +1,16 @@ +#!/bin/bash +. common.inc + +export TESTDATA_PREFIX=$(sample_from_scriptname $0) + +echo $TESTDATA + +# -n enables filtering by device through sensors command +# +result=$(../check_temp.sh -n -w 65 -c 75 --sensor k10temp-pci-00c3,CPU -w 60 -c 80 --sensor radeon-pci-0008,GPU) + +matcher1=$(has_temperature "$result" '+45.2°C,' '+40.7°C') +matcher2=$(has_stats "$result" 'CPU=45;65;75 GPU=40;60;80') + +exit $(( matcher1 + matcher2 )) + diff --git a/tests/test_relabel.sh b/tests/test_relabel.sh new file mode 100755 index 0000000..78a06e1 --- /dev/null +++ b/tests/test_relabel.sh @@ -0,0 +1,11 @@ +#!/bin/bash +. common.inc + +export TESTDATA=$(sample_from_scriptname $0) + +result=$(../check_temp.sh -w 65 -c 75 --sensor CPU,Emily) + +matcher2=$(has_stats "$result" 'Emily=44;65;75') + +exit $(( matcher1 + matcher2 )) +