Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ClickHouse bools and disable sparse serialization #6997

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions clickhouse-admin/types/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ impl ReplicaConfig {
<!-- Controls how many tasks could be in the queue -->
<max_tasks_in_queue>1000</max_tasks_in_queue>
</distributed_ddl>

<!-- Disable sparse column serialization, which we expect to not need -->
<merge_tree>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
{macros}
{remote_servers}
{keepers}
Expand Down
4 changes: 4 additions & 0 deletions oximeter/db/schema/replicated/13/up1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE oximeter.fields_bool_local
ON CLUSTER oximeter_cluster
ALTER COLUMN IF EXISTS
field_value TYPE Bool;
4 changes: 4 additions & 0 deletions oximeter/db/schema/replicated/13/up2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE oximeter.measurements_bool_local
ON CLUSTER oximeter_cluster
ALTER COLUMN IF EXISTS
datum TYPE Nullable(Bool);
4 changes: 4 additions & 0 deletions oximeter/db/schema/replicated/13/up3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE oximeter.fields_bool
ON CLUSTER oximeter_cluster
ALTER COLUMN IF EXISTS
field_value TYPE Bool;
4 changes: 4 additions & 0 deletions oximeter/db/schema/replicated/13/up4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE oximeter.measurements_bool
ON CLUSTER oximeter_cluster
ALTER COLUMN IF EXISTS
datum TYPE Nullable(Bool);
4 changes: 2 additions & 2 deletions oximeter/db/schema/replicated/db-init-2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_
timeseries_name String,
timeseries_key UInt64,
timestamp DateTime64(9, 'UTC'),
datum Nullable(UInt8)
datum Nullable(Bool)
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bool_local', '{replica}')
ORDER BY (timeseries_name, timeseries_key, timestamp)
Expand Down Expand Up @@ -595,7 +595,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool_local ON CLUSTER oximeter_cluste
timeseries_name String,
timeseries_key UInt64,
field_name String,
field_value UInt8,
field_value Bool,
last_updated_at DateTime MATERIALIZED now()
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_bool_local', '{replica}')
Expand Down
1 change: 1 addition & 0 deletions oximeter/db/schema/single-node/13/up1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE oximeter.fields_bool ALTER COLUMN IF EXISTS field_value TYPE Bool;
1 change: 1 addition & 0 deletions oximeter/db/schema/single-node/13/up2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE oximeter.measurements_bool ALTER COLUMN IF EXISTS datum TYPE Nullable(Bool);
4 changes: 2 additions & 2 deletions oximeter/db/schema/single-node/db-init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool
timeseries_name String,
timeseries_key UInt64,
timestamp DateTime64(9, 'UTC'),
datum Nullable(UInt8)
datum Nullable(Bool)
)
ENGINE = MergeTree()
ORDER BY (timeseries_name, timeseries_key, timestamp)
Expand Down Expand Up @@ -518,7 +518,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool
timeseries_name String,
timeseries_key UInt64,
field_name String,
field_value UInt8,
field_value Bool,
last_updated_at DateTime MATERIALIZED now()
)
ENGINE = ReplacingMergeTree()
Expand Down
2 changes: 1 addition & 1 deletion oximeter/db/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3039,7 +3039,7 @@ mod tests {
client: &Client,
) -> Result<(), Error> {
let field = FieldValue::Bool(true);
let as_json = serde_json::Value::from(1_u64);
let as_json = serde_json::Value::from(true);
test_recall_field_value_impl(field, as_json, client).await?;
Ok(())
}
Expand Down
5 changes: 5 additions & 0 deletions oximeter/db/src/configs/replica_config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -484,4 +484,9 @@
<enabled>false</enabled>
<anonymize>false</anonymize>
</send_crash_reports>

<!-- Disable sparse column serialization -->
<merge_tree>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
</clickhouse>
20 changes: 10 additions & 10 deletions oximeter/db/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use uuid::Uuid;
/// - [`crate::Client::initialize_db_with_version`]
/// - [`crate::Client::ensure_schema`]
/// - The `clickhouse-schema-updater` binary in this crate
pub const OXIMETER_VERSION: u64 = 12;
pub const OXIMETER_VERSION: u64 = 13;

// Wrapper type to represent a boolean in the database.
//
Expand Down Expand Up @@ -392,7 +392,7 @@ macro_rules! declare_field_row {
}
}

declare_field_row! {BoolFieldRow, DbBool, "bool"}
declare_field_row! {BoolFieldRow, bool, "bool"}
declare_field_row! {I8FieldRow, i8, "i8"}
declare_field_row! {U8FieldRow, u8, "u8"}
declare_field_row! {I16FieldRow, i16, "i16"}
Expand Down Expand Up @@ -630,7 +630,7 @@ fn unroll_from_source(sample: &Sample) -> BTreeMap<String, Vec<String>> {
timeseries_name,
timeseries_key,
field_name,
field_value: DbBool::from(*inner),
field_value: *inner,
};
(row.table_name(), serde_json::to_string(&row).unwrap())
}
Expand Down Expand Up @@ -1468,7 +1468,7 @@ trait FromDbScalar {
const DATUM_TYPE: DatumType;
}

impl FromDbScalar for DbBool {
impl FromDbScalar for bool {
const DATUM_TYPE: DatumType = DatumType::Bool;
}

Expand Down Expand Up @@ -1633,7 +1633,7 @@ pub(crate) fn parse_measurement_from_row(
) -> (TimeseriesKey, Measurement) {
match datum_type {
DatumType::Bool => {
parse_timeseries_scalar_gauge_measurement::<DbBool>(line)
parse_timeseries_scalar_gauge_measurement::<bool>(line)
}
DatumType::I8 => parse_timeseries_scalar_gauge_measurement::<i8>(line),
DatumType::U8 => parse_timeseries_scalar_gauge_measurement::<u8>(line),
Expand Down Expand Up @@ -1757,11 +1757,11 @@ pub(crate) fn parse_field_select_row(
// Parse the field value as the expected type
let value = match expected_field.field_type {
FieldType::Bool => {
FieldValue::Bool(bool::from(DbBool::from(
FieldValue::Bool(
actual_field_value
.as_u64()
.expect("Expected a u64 for a boolean field from the database")
)))
.as_bool()
.expect("Expected a boolean field from the database")
)
}
FieldType::I8 => {
let wide = actual_field_value
Expand Down Expand Up @@ -2071,7 +2071,7 @@ mod tests {
assert_eq!(measurement.datum(), datum);
}

let line = r#"{"timeseries_key": 12, "timestamp": "2021-01-01 00:00:00.123456789", "datum": 1 }"#;
let line = r#"{"timeseries_key": 12, "timestamp": "2021-01-01 00:00:00.123456789", "datum": true }"#;
let datum = Datum::from(true);
run_test(line, &datum, timestamp);

Expand Down
4 changes: 4 additions & 0 deletions smf/clickhouse/config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@
<quotas>
<default/>
</quotas>

<merge_tree>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
</clickhouse>
1 change: 1 addition & 0 deletions test-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ uuid.workspace = true
chrono.workspace = true
expectorate.workspace = true
gethostname.workspace = true
serde.workspace = true

[features]
seed-gen = ["dep:filetime"]
Expand Down
61 changes: 59 additions & 2 deletions test-utils/src/dev/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,9 @@ pub enum ClickHouseError {
Timeout,
}

const SINGLE_NODE_CONFIG_FILE: &str =
concat!(env!("CARGO_MANIFEST_DIR"), "/../smf/clickhouse/config.xml");

impl ClickHouseProcess {
/// Start a new single node ClickHouse server listening on the provided
/// ports.
Expand All @@ -395,8 +398,22 @@ impl ClickHouseProcess {
ports: ClickHousePorts,
) -> Result<ClickHouseReplica, anyhow::Error> {
let data_dir = ClickHouseDataDir::new(logctx)?;

// Copy the configuration file into the test directory, so we don't
// leave the preprocessed config file hanging around.
tokio::fs::copy(SINGLE_NODE_CONFIG_FILE, data_dir.config_file_path())
.await
.with_context(|| {
format!(
"failed to copy config file from {} to test data path {}",
SINGLE_NODE_CONFIG_FILE,
data_dir.config_file_path(),
)
})?;
let args = vec![
"server".to_string(),
"--config-file".to_string(),
data_dir.config_file_path().to_string(),
"--log-file".to_string(),
data_dir.log_path().to_string(),
"--errorlog-file".to_string(),
Expand Down Expand Up @@ -731,6 +748,10 @@ impl ClickHouseDataDir {
self.dir.path()
}

fn config_file_path(&self) -> Utf8PathBuf {
self.root_path().join("config.xml")
}

fn datastore_path(&self) -> Utf8PathBuf {
self.dir.path().join("datastore/")
}
Expand Down Expand Up @@ -1299,10 +1320,11 @@ async fn clickhouse_ready_from_log(
#[cfg(test)]
mod tests {
use super::{
discover_ready, wait_for_ports, ClickHouseError, ClickHousePorts,
CLICKHOUSE_HTTP_PORT_NEEDLE, CLICKHOUSE_READY,
discover_ready, wait_for_ports, ClickHouseDeployment, ClickHouseError,
ClickHousePorts, CLICKHOUSE_HTTP_PORT_NEEDLE, CLICKHOUSE_READY,
CLICKHOUSE_TCP_PORT_NEEDLE, CLICKHOUSE_TIMEOUT,
};
use crate::dev::test_setup_log;
use camino_tempfile::NamedUtf8TempFile;
use std::process::Stdio;
use std::{io::Write, sync::Arc, time::Duration};
Expand Down Expand Up @@ -1458,4 +1480,39 @@ mod tests {
let second = ClickHousePorts { http: 1, native: 1 };
ClickHousePorts { http: 2, native: 2 }.assert_consistent(&second);
}

#[derive(Debug, serde::Deserialize)]
struct Setting {
name: String,
value: String,
changed: u8,
}

#[tokio::test]
async fn sparse_serialization_is_disabled() {
let logctx = test_setup_log("sparse_serialization_is_disabled");
let mut db =
ClickHouseDeployment::new_single_node(&logctx).await.unwrap();
let client = reqwest::Client::new();
let url = format!("http://{}", db.http_address());
let setting: Setting = client
.post(url)
.body(
"SELECT name, value, changed \
FROM system.merge_tree_settings \
WHERE name == 'ratio_of_defaults_for_sparse_serialization' \
FORMAT JSONEachRow",
)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
assert_eq!(setting.name, "ratio_of_defaults_for_sparse_serialization");
assert_eq!(setting.value, "1");
assert_eq!(setting.changed, 1);
db.cleanup().await.unwrap();
logctx.cleanup_successful();
}
}
Loading