From 976d0fb293d8888c849ed3e46eef138a51cf061e Mon Sep 17 00:00:00 2001 From: Matthew Hilton Date: Tue, 3 Oct 2023 12:00:53 +1000 Subject: [PATCH] WIP better checks --- croncheck.php | 114 +++++++++++++++++++++++++------------------------- 1 file changed, 58 insertions(+), 56 deletions(-) diff --git a/croncheck.php b/croncheck.php index 550dcfb..c789552 100644 --- a/croncheck.php +++ b/croncheck.php @@ -219,39 +219,36 @@ $delay = ''; $maxdelay = 0; -$tasks = core\task\manager::get_all_scheduled_tasks(); -foreach ($tasks as $task) { - if ($task->get_disabled()) { - continue; - } - $faildelay = $task->get_fail_delay(); - if ($faildelay == 0) { - continue; - } + +// Instead of using task API here, we read directly from the database. +// This stops errors originating from broken tasks. +$scheduledtasks = $DB->get_records_sql("SELECT * FROM {task_scheduled} WHERE faildelay > 0 AND disabled = 0"); +foreach ($scheduledtasks as $task) { + $faildelay = $task->faildelay; + $taskname = $task->classname; + if ($faildelay > $maxdelay) { $maxdelay = $faildelay; } - $delay .= "SCHEDULED TASK: " . get_class($task) . ' (' .$task->get_name() . ") Delay: $faildelay\n"; + $delay .= "SCHEDULED TASK: " . $taskname . " Delay: $faildelay\n"; } -// Find any failed adhoc tasks. +// Instead of using task API here, we read directly from the database. +// This stops errors originating from broken tasks. $records = $DB->get_records_sql('SELECT * from {task_adhoc} WHERE faildelay > 0'); $adhoctaskdelays = []; foreach ($records as $record) { - $task = \core\task\manager::adhoc_task_from_record($record); - if (!$task) { - continue; - } + $taskname = $record->classname; + $faildelay = $record->faildelay; - $faildelay = $task->get_fail_delay(); if ($faildelay == 0) { continue; } if ($faildelay > $maxdelay) { $maxdelay = $faildelay; } - $adhoctaskdelays[] = "ADHOC TASK: " .get_class($task) . " Delay: $faildelay"; + $adhoctaskdelays[] = "ADHOC TASK: " . $taskname . " Delay: $faildelay\n"; } // Deduplicate these, but record the count in the string, by doing the following: @@ -305,51 +302,56 @@ require_once($CFG->dirroot.'/mnet/lib.php'); } - $checks = \core\check\manager::get_checks('status'); - $output = ''; - // Should this check block emit as critical? - $critical = false; - - foreach ($checks as $check) { - $ref = $check->get_ref(); - $result = $check->get_result(); - - $status = $result->get_status(); - - // Summary is treated as html. - $summary = $result->get_summary(); - $summary = html_to_text($summary, 80, false); - - if ($status == \core\check\result::WARNING || - $status == \core\check\result::CRITICAL || - $status == \core\check\result::ERROR) { - - // If we have an error, how should we handle it. - if ($status == \core\check\result::ERROR && !$critical) { - $mapping = get_config('tool_heartbeat', 'errorcritical'); - if ($mapping === 'critical') { - $critical = true; - } else if ($mapping === 'criticalbusiness') { - // Here we should only set the critical flag between 0900 and 1700 server time. - $time = new DateTime('now', core_date::get_server_timezone_object()); - $hour = (int) $time->format('H'); - $critical = ($hour >= 9 && $hour < 17); + try { + $checks = \core\check\manager::get_checks('status'); + $output = ''; + // Should this check block emit as critical? + $critical = false; + + foreach ($checks as $check) { + $ref = $check->get_ref(); + $result = $check->get_result(); + + $status = $result->get_status(); + + // Summary is treated as html. + $summary = $result->get_summary(); + $summary = html_to_text($summary, 80, false); + + if ($status == \core\check\result::WARNING || + $status == \core\check\result::CRITICAL || + $status == \core\check\result::ERROR) { + + // If we have an error, how should we handle it. + if ($status == \core\check\result::ERROR && !$critical) { + $mapping = get_config('tool_heartbeat', 'errorcritical'); + if ($mapping === 'critical') { + $critical = true; + } else if ($mapping === 'criticalbusiness') { + // Here we should only set the critical flag between 0900 and 1700 server time. + $time = new DateTime('now', core_date::get_server_timezone_object()); + $hour = (int) $time->format('H'); + $critical = ($hour >= 9 && $hour < 17); + } + } else if (!$critical) { + $critical = $status == \core\check\result::CRITICAL; } - } else if (!$critical) { - $critical = $status == \core\check\result::CRITICAL; - } - $output .= $check->get_name() . "\n"; - $output .= "$summary\n"; + $output .= $check->get_name() . "\n"; + $output .= "$summary\n"; - $detail = new moodle_url('/report/status/index.php', ['detail' => $ref]); - $output .= 'Details: ' . $detail->out() . "\n\n"; + $detail = new moodle_url('/report/status/index.php', ['detail' => $ref]); + $output .= 'Details: ' . $detail->out() . "\n\n"; - $link = $check->get_action_link(); - if ($link) { - $output .= $link->url . "\n"; + $link = $check->get_action_link(); + if ($link) { + $output .= $link->url . "\n"; + } } } + } catch (\Throwable $e) { + $critical = true; + $output .= "Error scanning checks: " . $e->getMessage() . "\n"; } // Strictly some of these could a critical but softly softly.