From afa3237617b7380987414dcb7817e089635d271c Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Tue, 5 Nov 2024 14:11:08 -0800 Subject: [PATCH] Use ClickHouse bools and disable sparse serialization - Use `Bool` data type in the `oximeter` database tables. Fixes #6994. - Disable "sparse serialization" optimization for ClickHouse columns, which are unlikely to help us an use an undocumented data format. This turns off sparse serialization for test and deployed versions of ClickHouse, both single-node and replicated. Fixes #6995. --- Cargo.lock | 1 + clickhouse-admin/types/src/config.rs | 5 ++ oximeter/db/schema/replicated/13/up1.sql | 4 ++ oximeter/db/schema/replicated/13/up2.sql | 4 ++ oximeter/db/schema/replicated/13/up3.sql | 4 ++ oximeter/db/schema/replicated/13/up4.sql | 4 ++ oximeter/db/schema/replicated/db-init-2.sql | 4 +- oximeter/db/schema/single-node/13/up1.sql | 1 + oximeter/db/schema/single-node/13/up2.sql | 1 + oximeter/db/schema/single-node/db-init.sql | 4 +- oximeter/db/src/client/mod.rs | 2 +- oximeter/db/src/configs/replica_config.xml | 5 ++ oximeter/db/src/model.rs | 20 +++---- smf/clickhouse/config.xml | 4 ++ test-utils/Cargo.toml | 1 + test-utils/src/dev/clickhouse.rs | 61 ++++++++++++++++++++- 16 files changed, 108 insertions(+), 17 deletions(-) create mode 100644 oximeter/db/schema/replicated/13/up1.sql create mode 100644 oximeter/db/schema/replicated/13/up2.sql create mode 100644 oximeter/db/schema/replicated/13/up3.sql create mode 100644 oximeter/db/schema/replicated/13/up4.sql create mode 100644 oximeter/db/schema/single-node/13/up1.sql create mode 100644 oximeter/db/schema/single-node/13/up2.sql diff --git a/Cargo.lock b/Cargo.lock index b06a8c5262..bc6a0c15c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7012,6 +7012,7 @@ dependencies = [ "reqwest 0.12.8", "ring 0.17.8", "rustls 0.22.4", + "serde", "slog", "subprocess", "tar", diff --git a/clickhouse-admin/types/src/config.rs b/clickhouse-admin/types/src/config.rs index 20aa3d71f8..27eb569b91 100644 --- a/clickhouse-admin/types/src/config.rs +++ b/clickhouse-admin/types/src/config.rs @@ -154,6 +154,11 @@ impl ReplicaConfig { 1000 + + + + 1.0 + {macros} {remote_servers} {keepers} diff --git a/oximeter/db/schema/replicated/13/up1.sql b/oximeter/db/schema/replicated/13/up1.sql new file mode 100644 index 0000000000..ac13ea6d01 --- /dev/null +++ b/oximeter/db/schema/replicated/13/up1.sql @@ -0,0 +1,4 @@ +ALTER TABLE oximeter.fields_bool_local +ON CLUSTER oximeter_cluster +ALTER COLUMN IF EXISTS +field_value TYPE Bool; diff --git a/oximeter/db/schema/replicated/13/up2.sql b/oximeter/db/schema/replicated/13/up2.sql new file mode 100644 index 0000000000..11f42784c5 --- /dev/null +++ b/oximeter/db/schema/replicated/13/up2.sql @@ -0,0 +1,4 @@ +ALTER TABLE oximeter.measurements_bool_local +ON CLUSTER oximeter_cluster +ALTER COLUMN IF EXISTS +datum TYPE Nullable(Bool); diff --git a/oximeter/db/schema/replicated/13/up3.sql b/oximeter/db/schema/replicated/13/up3.sql new file mode 100644 index 0000000000..e39f7d1c73 --- /dev/null +++ b/oximeter/db/schema/replicated/13/up3.sql @@ -0,0 +1,4 @@ +ALTER TABLE oximeter.fields_bool +ON CLUSTER oximeter_cluster +ALTER COLUMN IF EXISTS +field_value TYPE Bool; diff --git a/oximeter/db/schema/replicated/13/up4.sql b/oximeter/db/schema/replicated/13/up4.sql new file mode 100644 index 0000000000..03aec4da41 --- /dev/null +++ b/oximeter/db/schema/replicated/13/up4.sql @@ -0,0 +1,4 @@ +ALTER TABLE oximeter.measurements_bool +ON CLUSTER oximeter_cluster +ALTER COLUMN IF EXISTS +datum TYPE Nullable(Bool); diff --git a/oximeter/db/schema/replicated/db-init-2.sql b/oximeter/db/schema/replicated/db-init-2.sql index 51e64e20e0..7507041f40 100644 --- a/oximeter/db/schema/replicated/db-init-2.sql +++ b/oximeter/db/schema/replicated/db-init-2.sql @@ -35,7 +35,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_ timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Nullable(UInt8) + datum Nullable(Bool) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bool_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -595,7 +595,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool_local ON CLUSTER oximeter_cluste timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8, + field_value Bool, last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_bool_local', '{replica}') diff --git a/oximeter/db/schema/single-node/13/up1.sql b/oximeter/db/schema/single-node/13/up1.sql new file mode 100644 index 0000000000..36ffaad12b --- /dev/null +++ b/oximeter/db/schema/single-node/13/up1.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.fields_bool ALTER COLUMN IF EXISTS field_value TYPE Bool; diff --git a/oximeter/db/schema/single-node/13/up2.sql b/oximeter/db/schema/single-node/13/up2.sql new file mode 100644 index 0000000000..b7b2c9b299 --- /dev/null +++ b/oximeter/db/schema/single-node/13/up2.sql @@ -0,0 +1 @@ +ALTER TABLE oximeter.measurements_bool ALTER COLUMN IF EXISTS datum TYPE Nullable(Bool); diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql index 184951feeb..d505a21414 100644 --- a/oximeter/db/schema/single-node/db-init.sql +++ b/oximeter/db/schema/single-node/db-init.sql @@ -24,7 +24,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool timeseries_name String, timeseries_key UInt64, timestamp DateTime64(9, 'UTC'), - datum Nullable(UInt8) + datum Nullable(Bool) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) @@ -518,7 +518,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool timeseries_name String, timeseries_key UInt64, field_name String, - field_value UInt8, + field_value Bool, last_updated_at DateTime MATERIALIZED now() ) ENGINE = ReplacingMergeTree() diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index a4e73172cb..2b0781eddc 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -3039,7 +3039,7 @@ mod tests { client: &Client, ) -> Result<(), Error> { let field = FieldValue::Bool(true); - let as_json = serde_json::Value::from(1_u64); + let as_json = serde_json::Value::from(true); test_recall_field_value_impl(field, as_json, client).await?; Ok(()) } diff --git a/oximeter/db/src/configs/replica_config.xml b/oximeter/db/src/configs/replica_config.xml index bf424185b6..7d27b51ab4 100644 --- a/oximeter/db/src/configs/replica_config.xml +++ b/oximeter/db/src/configs/replica_config.xml @@ -484,4 +484,9 @@ false false + + + + 1.0 + diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index d57819b0d0..5e3508a14f 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -45,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 12; +pub const OXIMETER_VERSION: u64 = 13; // Wrapper type to represent a boolean in the database. // @@ -392,7 +392,7 @@ macro_rules! declare_field_row { } } -declare_field_row! {BoolFieldRow, DbBool, "bool"} +declare_field_row! {BoolFieldRow, bool, "bool"} declare_field_row! {I8FieldRow, i8, "i8"} declare_field_row! {U8FieldRow, u8, "u8"} declare_field_row! {I16FieldRow, i16, "i16"} @@ -630,7 +630,7 @@ fn unroll_from_source(sample: &Sample) -> BTreeMap> { timeseries_name, timeseries_key, field_name, - field_value: DbBool::from(*inner), + field_value: *inner, }; (row.table_name(), serde_json::to_string(&row).unwrap()) } @@ -1468,7 +1468,7 @@ trait FromDbScalar { const DATUM_TYPE: DatumType; } -impl FromDbScalar for DbBool { +impl FromDbScalar for bool { const DATUM_TYPE: DatumType = DatumType::Bool; } @@ -1633,7 +1633,7 @@ pub(crate) fn parse_measurement_from_row( ) -> (TimeseriesKey, Measurement) { match datum_type { DatumType::Bool => { - parse_timeseries_scalar_gauge_measurement::(line) + parse_timeseries_scalar_gauge_measurement::(line) } DatumType::I8 => parse_timeseries_scalar_gauge_measurement::(line), DatumType::U8 => parse_timeseries_scalar_gauge_measurement::(line), @@ -1757,11 +1757,11 @@ pub(crate) fn parse_field_select_row( // Parse the field value as the expected type let value = match expected_field.field_type { FieldType::Bool => { - FieldValue::Bool(bool::from(DbBool::from( + FieldValue::Bool( actual_field_value - .as_u64() - .expect("Expected a u64 for a boolean field from the database") - ))) + .as_bool() + .expect("Expected a boolean field from the database") + ) } FieldType::I8 => { let wide = actual_field_value @@ -2071,7 +2071,7 @@ mod tests { assert_eq!(measurement.datum(), datum); } - let line = r#"{"timeseries_key": 12, "timestamp": "2021-01-01 00:00:00.123456789", "datum": 1 }"#; + let line = r#"{"timeseries_key": 12, "timestamp": "2021-01-01 00:00:00.123456789", "datum": true }"#; let datum = Datum::from(true); run_test(line, &datum, timestamp); diff --git a/smf/clickhouse/config.xml b/smf/clickhouse/config.xml index b5b1f9c17f..58ae5dcaf5 100644 --- a/smf/clickhouse/config.xml +++ b/smf/clickhouse/config.xml @@ -38,4 +38,8 @@ + + + 1.0 + diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index eff73799ee..a7236f5142 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -43,6 +43,7 @@ uuid.workspace = true chrono.workspace = true expectorate.workspace = true gethostname.workspace = true +serde.workspace = true [features] seed-gen = ["dep:filetime"] diff --git a/test-utils/src/dev/clickhouse.rs b/test-utils/src/dev/clickhouse.rs index 8aa7b319c4..8956bb88a7 100644 --- a/test-utils/src/dev/clickhouse.rs +++ b/test-utils/src/dev/clickhouse.rs @@ -387,6 +387,9 @@ pub enum ClickHouseError { Timeout, } +const SINGLE_NODE_CONFIG_FILE: &str = + concat!(env!("CARGO_MANIFEST_DIR"), "/../smf/clickhouse/config.xml"); + impl ClickHouseProcess { /// Start a new single node ClickHouse server listening on the provided /// ports. @@ -395,8 +398,22 @@ impl ClickHouseProcess { ports: ClickHousePorts, ) -> Result { let data_dir = ClickHouseDataDir::new(logctx)?; + + // Copy the configuration file into the test directory, so we don't + // leave the preprocessed config file hanging around. + tokio::fs::copy(SINGLE_NODE_CONFIG_FILE, data_dir.config_file_path()) + .await + .with_context(|| { + format!( + "failed to copy config file from {} to test data path {}", + SINGLE_NODE_CONFIG_FILE, + data_dir.config_file_path(), + ) + })?; let args = vec![ "server".to_string(), + "--config-file".to_string(), + data_dir.config_file_path().to_string(), "--log-file".to_string(), data_dir.log_path().to_string(), "--errorlog-file".to_string(), @@ -731,6 +748,10 @@ impl ClickHouseDataDir { self.dir.path() } + fn config_file_path(&self) -> Utf8PathBuf { + self.root_path().join("config.xml") + } + fn datastore_path(&self) -> Utf8PathBuf { self.dir.path().join("datastore/") } @@ -1299,10 +1320,11 @@ async fn clickhouse_ready_from_log( #[cfg(test)] mod tests { use super::{ - discover_ready, wait_for_ports, ClickHouseError, ClickHousePorts, - CLICKHOUSE_HTTP_PORT_NEEDLE, CLICKHOUSE_READY, + discover_ready, wait_for_ports, ClickHouseDeployment, ClickHouseError, + ClickHousePorts, CLICKHOUSE_HTTP_PORT_NEEDLE, CLICKHOUSE_READY, CLICKHOUSE_TCP_PORT_NEEDLE, CLICKHOUSE_TIMEOUT, }; + use crate::dev::test_setup_log; use camino_tempfile::NamedUtf8TempFile; use std::process::Stdio; use std::{io::Write, sync::Arc, time::Duration}; @@ -1458,4 +1480,39 @@ mod tests { let second = ClickHousePorts { http: 1, native: 1 }; ClickHousePorts { http: 2, native: 2 }.assert_consistent(&second); } + + #[derive(Debug, serde::Deserialize)] + struct Setting { + name: String, + value: String, + changed: u8, + } + + #[tokio::test] + async fn sparse_serialization_is_disabled() { + let logctx = test_setup_log("sparse_serialization_is_disabled"); + let mut db = + ClickHouseDeployment::new_single_node(&logctx).await.unwrap(); + let client = reqwest::Client::new(); + let url = format!("http://{}", db.http_address()); + let setting: Setting = client + .post(url) + .body( + "SELECT name, value, changed \ + FROM system.merge_tree_settings \ + WHERE name == 'ratio_of_defaults_for_sparse_serialization' \ + FORMAT JSONEachRow", + ) + .send() + .await + .unwrap() + .json() + .await + .unwrap(); + assert_eq!(setting.name, "ratio_of_defaults_for_sparse_serialization"); + assert_eq!(setting.value, "1"); + assert_eq!(setting.changed, 1); + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } }