-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathultimate_cron.nagios.inc
245 lines (216 loc) · 7.1 KB
/
ultimate_cron.nagios.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
<?php
use Drupal\Core\Logger\RfcLogLevel;
use Drupal\ultimate_cron\CronJobDiscovery;
/**
* Implements hook_nagios_info().
*/
function ultimate_cron_nagios_info() {
return array(
'name' => t('Ultimate Cron Monitoring'),
'id' => 'ULTIMATE_CRON',
);
}
/**
* Implementation of hook_nagios().
*/
function ultimate_cron_nagios($check = 'nagios') {
$status = array();
foreach(ultimate_cron_nagios_functions() as $function => $description) {
if (variable_get('ultimate_cron_nagios_func_' . $function, TRUE) && ($check == 'nagios' || $check == $function)) {
$func = $function . '_check';
$result = $func();
$status[$result['key']] = $result['data'];
}
}
return $status;
}
/**
* Implementation of hook_nagios_settings().
*/
function ultimate_cron_nagios_settings() {
$form = array();
foreach(ultimate_cron_nagios_functions() as $function => $description) {
$var = 'ultimate_cron_nagios_func_' . $function;
$form[$var] = array(
'#type' => 'checkbox',
'#title' => $function,
'#default_value' => variable_get($var, TRUE),
'#description' => $description,
);
}
$group = 'thresholds';
$form[$group] = array(
'#type' => 'fieldset',
'#collapsible' => TRUE,
'#collapsed' => FALSE,
'#title' => t('Thresholds'),
'#description' => t('Thresholds for reporting critical alerts to Nagios.'),
);
$form[$group]['ultimate_cron_nagios_running_threshold'] = array(
'#type' => 'textfield',
'#title' => t('Running jobs count'),
'#default_value' => variable_get('ultimate_cron_nagios_running_threshold', 50),
'#description' => t('Issue a critical alert when more than this number of jobs are running. Default is 50.'),
);
$form[$group]['ultimate_cron_nagios_failed_threshold'] = array(
'#type' => 'textfield',
'#title' => t('Failed jobs count'),
'#default_value' => variable_get('ultimate_cron_nagios_failed_threshold', 10),
'#description' => t('Issue a critical alert when more than this number of jobs failed their last run. Default is 10.'),
);
$form[$group]['ultimate_cron_nagios_longrunning_threshold'] = array(
'#type' => 'textfield',
'#title' => t('Long running jobs'),
'#default_value' => variable_get('ultimate_cron_nagios_longrunning_threshold', 0),
'#description' => t('Issue a critical alert when more than this number of jobs are running longer than usual. Default is 0.')
);
return $form;
}
/**
* Implementation of hook_nagios_checks().
*/
function ultimate_cron_nagios_checks() {
return ultimate_cron_nagios_functions();
}
/**
* Implementation of drush hook_nagios_check().
*/
function ultimate_cron_nagios_check($function) {
// We don't bother to check if the function has been enabled by the user.
// Since this runs via drush, web security is not an issue.
$func = $function . '_check';
$result = $func();
$status[$result['key']] = $result['data'];
return $status;
}
/************** HELPER FUNCTIONS ***********************************/
/**
* Return a list of nagios check functions
* @see ultimate_cron_nagios()
*/
function ultimate_cron_nagios_functions() {
return array(
'ultimate_cron_running' => t('Check number of currently running jobs'),
'ultimate_cron_failed' => t('Check the number of jobs that failed last run'),
'ultimate_cron_longrunning' => t('Check the number of jobs that are running longer than usual'),
);
}
/**
* Get information about running jobs - currently running or failed.
*
* @staticvar array $overview
* @param string $mode Which mode to get info about; 'running' or 'error'
* @return int
*/
function ultimate_cron_nagios_get_job_info($mode = 'running') {
// Ensure valid mode
if (!in_array($mode, array('running', 'error'))) {
$mode = 'running';
}
static $overview = array();
if (!isset($overview[$mode])) {
$overview[$mode] = 0;
// Get hooks and their data
$hooks = CronJobDiscovery::getHooks();
$modules = array();
foreach ($hooks as $name => $hook) {
if (!$module || $module == $hook['module']) {
$log = ultimate_cron_get_log($name);
if ($hook['background_process']) {
$overview['running']++;
}
$severity_type = $log['severity'] < 0 ? 'success' : ($log['severity'] >= RfcLogLevel::NOTICE ? 'info' : ($log['severity'] >= RfcLogLevel::NOTICE ? 'warning' : 'error'));
$overview[$severity_type]++;
}
}
}
return $overview[$mode];
}
/*************** NAGIOS CHECK FUNCTIONS ********************************/
/**
* Check number of running jobs.
*
* @return array
*/
function ultimate_cron_running_check() {
$running = ultimate_cron_nagios_get_job_info('running');
$threshold = variable_get('ultimate_cron_nagios_running_threshold', 50);
if (count($running) > $threshold) {
$data = array(
'status' => NAGIOS_STATUS_CRITICAL,
'type' => 'state',
'text' => t('@jobs currently running - it is more than @threshold', array('@jobs' => $running, '@threshold' => $threshold)),
);
}
else {
$data = array(
'status' => NAGIOS_STATUS_OK,
'type' => 'state',
'text' => t('@jobs currently running', array('@jobs' => $running)),
);
}
return array(
'key' => 'ULTIMATE_CRON_RUNNING',
'data' => $data,
);
}
/**
* Check number of jobs that failed last run.
*
* @return array
*/
function ultimate_cron_failed_check() {
$failed = ultimate_cron_nagios_get_job_info('errors');
$threshold = variable_get('ultimate_cron_nagios_failed_threshold', 10);
if (count($failed) > $threshold) {
$data = array(
'status' => NAGIOS_STATUS_CRITICAL,
'type' => 'state',
'text' => t('@jobs failed their last run - it is more than @threshold', array('@jobs' => $failed, '@threshold' => $threshold)),
);
}
else {
$data = array(
'status' => NAGIOS_STATUS_OK,
'type' => 'state',
'text' => t('@jobs failed their last run', array('@jobs' => $failed)),
);
}
return array(
'key' => 'ULTIMATE_CRON_FAILED',
'data' => $data,
);
}
/**
* Check number of jobs running longer than usual.
*
* @return array
*
* @todo Implement the logic
*/
function ultimate_cron_longrunning_check() {
$longrunning = 0;
// Get running jobs
// Find out how long they have been running
// Calculate average run time per job (over a threshold? E.g. queues run very fast if there is nothing to process)
// If
$threshold = variable_get('ultimate_cron_nagios_longrunning_threshold', 0);
if ($longrunning > $threshold) {
$data = array(
'status' => NAGIOS_STATUS_CRITICAL,
'type' => 'state',
'text' => t('@jobs jobs are running longer than usual - it is more than @threshold', array('@jobs' => $longrunning, '@threshold' => $threshold)),
);
}
else {
$data = array(
'status' => NAGIOS_STATUS_OK,
'type' => 'state',
'text' => t('@jobs jobs are running longer than usual', array('@jobs' => $longrunning)),
);
}
return array(
'key' => 'ULTIMATE_CRON_LONGRUNNING',
'data' => $data,
);
}