Skip to content

Commit

Permalink
[#135] Added split cache check
Browse files Browse the repository at this point in the history
  • Loading branch information
brendanheywood authored and matthewhilton committed Oct 23, 2023
1 parent 789b1ef commit 88ce2ad
Show file tree
Hide file tree
Showing 7 changed files with 301 additions and 11 deletions.
134 changes: 134 additions & 0 deletions classes/check/cachecheck.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.

namespace tool_heartbeat\check;
use core\check\check;
use core\check\result;

/**
* Cache check class
*
* This detects some split brain cache setups
*
* @package tool_heartbeat
* @author Brendan Heywood <[email protected]>
* @copyright Catalyst IT 2023
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class cachecheck extends check {

/**
* Get Result.
*
* @return result
*/
public function get_result() : result {
$results = $this->check('web');
$results += $this->check('cron');

list($status, $summary) = $this->build_result($results);

$details = '';

if ($status != result::OK) {
$details .= get_string('checkcachedetails', 'tool_heartbeat');
}

$details .= '<table class="table table-sm w-auto table-bordered">';

foreach ($results as $key => $value) {
$details .= \html_writer::start_tag('tr');
$details .= \html_writer::tag('td', $key);
$details .= \html_writer::tag('td', $value);

// Use DATE_RSS to show seconds, as well as timezone.
$details .= \html_writer::tag('td', date(DATE_RSS, $value));
$details .= \html_writer::end_tag('tr');
}
$details .= '</table>';
return new result($status, $summary, $details);
}

/**
* Reads the results and buils a check API result.
* @param array $results from check() function.
* @return array of [result status, summary string]
*/
private function build_result(array $results): array {
// Nothing set for web API.
if (empty($results['webapi'])) {
return [result::CRITICAL, get_string('checkcachewebmissing', 'tool_heartbeat')];
}

// Nothing set for cron API.
if (empty($results['cronapi'])) {
return [result::CRITICAL, get_string('checkcachecronmissing', 'tool_heartbeat')];
}

// Check for split cron cache/db, web cache/db, and all of them together.
$cronsplit = $results['cronapi'] != $results['crondb'];
$websplit = $results['webapi'] != $results['webdb'];

if ($cronsplit || $websplit) {
$splits = [
'cron' => $cronsplit,
'web' => $websplit,
];
$splits = implode(",", array_keys(array_filter($splits)));

return [result::CRITICAL, get_string('checkcacheerrorsplit', 'tool_heartbeat', $splits)];
}

// Else OK.
return [result::OK, get_string('checkcachenotsplit', 'tool_heartbeat')];
}

/**
* Get the ping values from the cache and db to compare
* @param string $type type of check (e.g. web, cron)
*/
public function check($type) {
global $DB;

$return = [];
$key = "checkcache{$type}ping";

// Read from cache (e.g. get_config uses cache).
$return[$type . 'api'] = get_config('tool_heartbeat', $key);

// Read directly from database.
$return[$type . 'db'] = $DB->get_field('config_plugins', 'value', [
'plugin' => 'tool_heartbeat',
'name' => $key,
]);
return $return;
}

/**
* Sets a timestamp in config from web or cron
* @param string $type type of check (e.g. web, cron)
*/
public static function ping($type) {
$key = "checkcache{$type}ping";
$current = get_config('tool_heartbeat', $key);

// Only update if the currently cached time is very old.
if ($current < (time() - DAYSECS) || true) {
debugging("HEARTBEAT doing {$type} ping {$current}", DEBUG_DEVELOPER);
set_config($key, time(), 'tool_heartbeat');
}
}
}
46 changes: 46 additions & 0 deletions classes/task/cachecheck.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.


namespace tool_heartbeat\task;

/**
* Scheduled task to ping the cache from CRON.
*
* @package tool_heartbeat
* @author Brendan Heywood <[email protected]>
* @copyright Catalyst IT
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class cachecheck extends \core\task\scheduled_task {

/**
* Get task name
*/
public function get_name() {
return get_string('checkcachecheck', 'tool_heartbeat');
}

/**
* Execute task
*/
public function execute() {
\tool_heartbeat\check\cachecheck::ping('cron');
}

}


33 changes: 33 additions & 0 deletions db/install.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Cache split check.
*
* @package tool_heartbeat
* @author Brendan Heywood <[email protected]>
* @copyright Catalyst IT 2023
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/

/**
* Install
*/
function xmldb_tool_heartbeat_install() {
// If there are issues with split caches they need to be exposed
// after some time for them to diverge.
\tool_heartbeat\check\cachecheck::ping('web');
\tool_heartbeat\check\cachecheck::ping('cron');
}
36 changes: 36 additions & 0 deletions db/tasks.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Tool heartbeat
*
* @author Brendan Heywood <[email protected]>
* @copyright Catalyst IT 2023
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/

defined('MOODLE_INTERNAL') || die();

$tasks = [
[
'classname' => 'tool_heartbeat\task\cachecheck',
'minute' => '*',
'hour' => '*',
'day' => '*',
'dayofweek' => '*',
'month' => '*',
],
];

38 changes: 38 additions & 0 deletions db/upgrade.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* DB upgrade script.
*
* @package tool_heartbeat
* @author Matthew Hilton <[email protected]>
* @copyright Catalyst IT 2023
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/

/**
* Upgrade
* @param int $oldversion
*/
function xmldb_tool_heartbeat_upgrade($oldversion) {
if ($oldversion < 2023102400) {
// If there are issues with split caches they need to be exposed
// after some time for them to diverge.
\tool_heartbeat\check\cachecheck::ping('web');
\tool_heartbeat\check\cachecheck::ping('cron');
}

return true;
}
17 changes: 6 additions & 11 deletions lang/en/tool_heartbeat.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@
$string['authcorrect'] = 'Auth methods correctly configured.';
$string['builtinallowediplist'] = 'Builtin IP Blocking Configuration';
$string['builtinallowediplist_desc'] = 'This allowed IP list would allow some IPs to be editable in the UI in addition to those forced in config.php.';
$string['checkcachecheck'] = 'Cache consistency check';
$string['checkcachecronmissing'] = 'The cron cache check has not succeeded yet or is missing';
$string['checkcachedetails'] = 'A split brain cache was detected. The value stored in the database table config_plugins was not the same as the cached value returned from get_config. If you purge the cache and this check passes and then fails again after a few hours then that strongly suggests a cache misconfiguration.';
$string['checkcacheerrorsplit'] = 'The caches are not consistent: {$a}';
$string['checkcachenotsplit'] = 'Caches appear consistent between web and cron';
$string['checkcachewebmissing'] = 'The web cache check has not succeeded yet';
$string['configuredauths'] = 'Check auth methods';
$string['configuredauthsdesc'] = 'Auth methods to check are enabled in the Check API. A warning will be emitted if they are not enabled.';
$string['checkauthcheck'] = 'Authentication methods';
Expand All @@ -66,17 +72,6 @@
$string['checklogstoreok'] = 'Logstore checks are OK. One or more logstores are active.';
$string['checklogstorebad'] = 'Logstore checks are bad! Please ensure at least one logstore has been set and enabled.';
$string['ips_combine'] = 'The IPs listed above will be combined with the IPs listed below.';
$string['errorascritical'] = ' Report check errors as:';
$string['errorascritical_desc'] = 'This setting controls what check API errors are reported as in Nagios. "CRITICAL" is the most noisy, and "WARNING" is the least noisy for monitoring endpoints. Business hours is 9AM - 5PM in the server timezone ({$a}).';
$string['error_critical'] = 'CRITICAL';
$string['error_critical_business'] = 'CRITICAL during business hours';
$string['error_warning'] = 'WARNING';
$string['tasklatencymonitoring'] = 'Task monitoring';
$string['tasklatencymonitoring_desc'] = 'Enter configuration for monitoring specific cron tasks. Enter each task configuration on a new line. Configuration format is <code>\component\task\classname, (integer) runtime, (integer) starttimedrift, (integer) notrunning</code>. Runtime is the max runtime that an invididual run of a task cannot exceed. Start time drift is the drift from the scheduled run time of a task before the task begins execution. Not running is the configured time period for which a task must have run within. E.g. <code>{$a}</code>';
$string['latencydelayedstart'] = 'Task {$a->task} start is delayed past configured threshold: {$a->mins}.';
$string['latencynotrun'] = 'Task {$a->task} has not run within the configured latency threshold: {$a->mins}.';
$string['latencyruntime'] = 'Task {$a->task} was last run with a runtime longer than the configured threshold: {$a->mins}.';
$string['checktasklatencycheck'] = 'Task latency check';
$string['taskconfigbad'] = 'Bad configurations {$a}';
$string['tasklatencyok'] = 'Task latency OK.';
/*
Expand Down
8 changes: 8 additions & 0 deletions lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/

/**
* Runs before HTTP headers. Used to ping the cachecheck.
*/
function tool_heartbeat_before_http_headers() {
\tool_heartbeat\check\cachecheck::ping('web');
}

/**
* Status checks.
*
Expand All @@ -29,6 +36,7 @@
function tool_heartbeat_status_checks() {
return [
new \tool_heartbeat\check\authcheck(),
new \tool_heartbeat\check\cachecheck(),
new \tool_heartbeat\check\logstorecheck(),
new \tool_heartbeat\check\tasklatencycheck(),
];
Expand Down

0 comments on commit 88ce2ad

Please sign in to comment.