From d2bba4a5b581e798f8b4cba3f707e3827c5cbf79 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Tue, 26 Mar 2024 14:03:07 +0100 Subject: [PATCH] Document `wsrep_sync_wait` database option --- config.example.yml | 9 ++++++++ doc/03-Configuration.md | 49 ++++++++++++++++++++++++++++++----------- pkg/icingadb/db.go | 5 ++++- 3 files changed, 49 insertions(+), 14 deletions(-) diff --git a/config.example.yml b/config.example.yml index 61d39339a..432a95512 100644 --- a/config.example.yml +++ b/config.example.yml @@ -23,6 +23,15 @@ database: # Database password. password: CHANGEME + # List of low-level database options that can be set to influence some Icinga DB internal default behaviours. + options: + # Enforce Galera cluster nodes strict cluster-wide causality checks before executing specific SQL queries + # determined by the number you provided. By default, 7 is used, which includes "SELECT,DELETE,INSERT,REPLACE" + # query types and is usually sufficient for running Icinga DB with Galera setups with a load balancer, that doesn't + # randomly route connections to all the nodes evenly or your database cluster implement an IP fail over. Otherwise, + # you can set this to 0 to disable it completely. +# wsrep_sync_wait: 7 + # Connection configuration for the Redis server where Icinga 2 writes its configuration, state and history items. # This is the same connection as configured in the 'icingadb' feature of the corresponding Icinga 2 node. # High availability setups require a dedicated Redis server per Icinga 2 node and diff --git a/doc/03-Configuration.md b/doc/03-Configuration.md index 515e3aa9b..28cbb64a1 100644 --- a/doc/03-Configuration.md +++ b/doc/03-Configuration.md @@ -29,19 +29,26 @@ This is also the database used in [Icinga DB Web](https://icinga.com/docs/icinga-db-web) to view and work with the data. In high availability setups, all Icinga DB instances must write to the same database. -| Option | Description | -|----------|--------------------------------------------------------------------------------------------------------| -| type | **Optional.** Either `mysql` (default) or `pgsql`. | -| host | **Required.** Database host or absolute Unix socket path. | -| port | **Optional.** Database port. By default, the MySQL or PostgreSQL port, depending on the database type. | -| database | **Required.** Database name. | -| user | **Required.** Database username. | -| password | **Optional.** Database password. | -| tls | **Optional.** Whether to use TLS. | -| cert | **Optional.** Path to TLS client certificate. | -| key | **Optional.** Path to TLS private key. | -| ca | **Optional.** Path to TLS CA certificate. | -| insecure | **Optional.** Whether not to verify the peer. | +| Option | Description | +|----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| type | **Optional.** Either `mysql` (default) or `pgsql`. | +| host | **Required.** Database host or absolute Unix socket path. | +| port | **Optional.** Database port. By default, the MySQL or PostgreSQL port, depending on the database type. | +| database | **Required.** Database name. | +| user | **Required.** Database username. | +| password | **Optional.** Database password. | +| tls | **Optional.** Whether to use TLS. | +| cert | **Optional.** Path to TLS client certificate. | +| key | **Optional.** Path to TLS private key. | +| ca | **Optional.** Path to TLS CA certificate. | +| insecure | **Optional.** Whether not to verify the peer. | +| options | **Optional.** List of low-level database options that can be set to influence some Icinga DB internal default behaviours. See [database options](#database-options) for details. | + +### Database Options + +| Option | Description | +|-----------------|----------------------------------------------------------------------------------------------------------------------| +| wsrep_sync_wait | **Optional.** Enforce [Galera cluster](#galera-cluster) nodes strict cluster-wide causality checks. Defaults to `7`. | ## Logging Configuration @@ -91,3 +98,19 @@ allowing to keep this information for longer with a smaller storage footprint. A duration string is a sequence of decimal numbers and a unit suffix, such as `"20s"`. Valid units are `"ms"`, `"s"`, `"m"` and `"h"`. + +### Galera Cluster + +Icinga DB might not function properly or even crash occasionally when it is used with [Galera cluster](https://mariadb.com/kb/en/what-is-mariadb-galera-cluster/) setups. +To avoid such unexpected crashes and reading/writing inconsistent data, Icinga DB sets the [wsrep_sync_wait](https://mariadb.com/kb/en/galera-cluster-system-variables/#wsrep_sync_wait) +system variable for all its database connections. Consequently, strict cluster-wide causality checks are enforced before +executing specific SQL queries, which are determined by the value set in the `"wsrep_sync_wait"` system variable. By default, +Icinga DB sets this to `7`, which includes `READ, DELETE, INSERT, REPLACE` query types and is usually sufficient. +Unfortunately, this also has the downside that every single Icinga DB query will be blocked until the cluster nodes +resynchronise their states after each executed query, and may result in degraded performance. + +However, this does not necessarily have to be the case if, for instance, Icinga DB is only allowed to connect to a single +cluster node at a time. This is the case when a load balancer does not randomly route connections to all the nodes evenly, +but always to the same node until it fails, or if your database cluster does not implement an IP address fail over. In such +situations, you can set the `wsrep_sync_wait` system variable to `0` in the `/etc/icingadb/config.yml` file to disable +it entirely, as Icinga DB doesn't have to wait for cluster synchronization then. diff --git a/pkg/icingadb/db.go b/pkg/icingadb/db.go index 7b10a53f1..0016bc8a0 100644 --- a/pkg/icingadb/db.go +++ b/pkg/icingadb/db.go @@ -54,7 +54,10 @@ type Options struct { // The default is 2^13, which in our tests showed the best performance in terms of execution time and parallelism. MaxRowsPerTransaction int `yaml:"max_rows_per_transaction" default:"8192"` - // WsrepSyncWait defines which kinds of SQL statements catch up all pending sync between nodes first, see: + // WsrepSyncWait enforces Galera cluster nodes strict cluster-wide causality checks + // before executing specific SQL queries determined by the number you provided. + // The default value is 7, which includes "READ,DELETE,INSERT,REPLACE" query types and seems to be sufficient + // for running Icinga DB with Galera setups. Please refer to the below link for a complete list of options. // https://mariadb.com/kb/en/galera-cluster-system-variables/#wsrep_sync_wait WsrepSyncWait int `yaml:"wsrep_sync_wait" default:"7"` }