Skip to content

Commit

Permalink
[reconfigurator] Retrieve keeper cluster config information (#6606)
Browse files Browse the repository at this point in the history
## Overview

This commit implements a new clickhouse-admin endpoint to retrieve and
parse information from the ClickHouse virtual node `/keeper/config`
which contains the last committed cluster configuration.

## Purpose

The main purpose of retrieving this information is to have the ability
to populate the inventory's `raft_config` in
`ClickhouseKeeperClusterMembership`.


https://github.com/oxidecomputer/omicron/blob/453311a880075b9f89626bb20cca1c1cd85ffb4f/nexus/types/src/inventory.rs#L499-L503

In a follow up PR an endpoint that specifically retrieves all
information to populate `ClickhouseKeeperClusterMembership`. This will
be done by making several calls to the `clickhouse keeper-client` and
using the parsing function here to populate `raft_config`.

The endpoint itself will be useful to retrieve information for
debugging.

## Manual testing

```console
$ curl http://[::1]:8888/keeper/raft-config
{"keeper_servers":[{"server_id":1,"host":{"ipv6":"::1"},"raft_port":21001,"server_type":"participant","priority":1},{"server_id":2,"host":{"ipv6":"::1"},"raft_port":21002,"server_type":"participant","priority":1},{"server_id":3,"host":{"ipv6":"::1"},"raft_port":21003,"server_type":"participant","priority":1}]}
```

Related: #5999
  • Loading branch information
karencfv authored Sep 23, 2024
1 parent 2461d17 commit 48f932f
Show file tree
Hide file tree
Showing 7 changed files with 663 additions and 10 deletions.
14 changes: 13 additions & 1 deletion clickhouse-admin/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use clickhouse_admin_types::config::{KeeperConfig, ReplicaConfig};
use clickhouse_admin_types::{KeeperSettings, Lgif, ServerSettings};
use clickhouse_admin_types::{
KeeperSettings, Lgif, RaftConfig, ServerSettings,
};
use dropshot::{
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
};
Expand Down Expand Up @@ -63,4 +65,14 @@ pub trait ClickhouseAdminApi {
async fn lgif(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<Lgif>, HttpError>;

/// Retrieve information from ClickHouse virtual node /keeper/config which
/// contains last committed cluster configuration.
#[endpoint {
method = GET,
path = "/keeper/raft-config",
}]
async fn raft_config(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<RaftConfig>, HttpError>;
}
12 changes: 11 additions & 1 deletion clickhouse-admin/src/clickhouse_cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use anyhow::Result;
use camino::Utf8PathBuf;
use clickhouse_admin_types::Lgif;
use clickhouse_admin_types::{Lgif, RaftConfig};
use dropshot::HttpError;
use illumos_utils::{output_to_exec_error, ExecutionError};
use slog::Logger;
Expand Down Expand Up @@ -82,6 +82,16 @@ impl ClickhouseCli {
.await
}

pub async fn raft_config(&self) -> Result<RaftConfig, ClickhouseCliError> {
self.keeper_client_non_interactive(
"get /keeper/config",
"Retrieve raft configuration information",
RaftConfig::parse,
self.log.clone().unwrap(),
)
.await
}

async fn keeper_client_non_interactive<F, T>(
&self,
query: &str,
Expand Down
10 changes: 9 additions & 1 deletion clickhouse-admin/src/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
use crate::context::ServerContext;
use clickhouse_admin_api::*;
use clickhouse_admin_types::config::{KeeperConfig, ReplicaConfig};
use clickhouse_admin_types::Lgif;
use clickhouse_admin_types::{Lgif, RaftConfig};
use dropshot::{
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
};
Expand Down Expand Up @@ -55,4 +55,12 @@ impl ClickhouseAdminApi for ClickhouseAdminImpl {
let output = ctx.clickhouse_cli().lgif().await?;
Ok(HttpResponseOk(output))
}

async fn raft_config(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<RaftConfig>, HttpError> {
let ctx = rqctx.context();
let output = ctx.clickhouse_cli().raft_config().await?;
Ok(HttpResponseOk(output))
}
}
80 changes: 78 additions & 2 deletions clickhouse-admin/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@

use anyhow::Context;
use camino::Utf8PathBuf;
use clickhouse_admin_types::config::ClickhouseHost;
use clickhouse_admin_types::{KeeperServerInfo, KeeperServerType, RaftConfig};
use clickward::{BasePorts, Deployment, DeploymentConfig, KeeperId};
use dropshot::test_util::log_prefix_for_test;
use omicron_clickhouse_admin::ClickhouseCli;
use omicron_test_utils::dev::test_setup_log;
use oximeter_test_utils::wait_for_keepers;
use slog::info;
use std::collections::BTreeSet;
use std::net::{Ipv6Addr, SocketAddrV6};
use std::str::FromStr;

Expand All @@ -22,8 +25,8 @@ async fn test_lgif_parsing() -> anyhow::Result<()> {
let path = parent_dir.join(format!("{prefix}-oximeter-clickward-test"));
std::fs::create_dir(&path)?;

// We use the default ports in `test_schemas_disjoint` and must use a
// separate set here in case the two tests run concurrently.
// We spin up several replicated clusters and must use a
// separate set of ports in case the tests run concurrently.
let base_ports = BasePorts {
keeper: 29000,
raft: 29100,
Expand Down Expand Up @@ -67,3 +70,76 @@ async fn test_lgif_parsing() -> anyhow::Result<()> {
logctx.cleanup_successful();
Ok(())
}

#[tokio::test]
async fn test_raft_config_parsing() -> anyhow::Result<()> {
let logctx = test_setup_log("test_raft_config_parsing");
let log = logctx.log.clone();

let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name());
let path = parent_dir.join(format!("{prefix}-oximeter-clickward-test"));
std::fs::create_dir(&path)?;

// We spin up several replicated clusters and must use a
// separate set of ports in case the tests run concurrently.
let base_ports = BasePorts {
keeper: 39000,
raft: 39100,
clickhouse_tcp: 39200,
clickhouse_http: 39300,
clickhouse_interserver_http: 39400,
};

let config = DeploymentConfig {
path: path.clone(),
base_ports,
cluster_name: "oximeter_cluster".to_string(),
};

let mut deployment = Deployment::new(config);

let num_keepers = 3;
let num_replicas = 1;
deployment
.generate_config(num_keepers, num_replicas)
.context("failed to generate config")?;
deployment.deploy().context("failed to deploy")?;

wait_for_keepers(
&log,
&deployment,
(1..=num_keepers).map(KeeperId).collect(),
)
.await?;

let clickhouse_cli = ClickhouseCli::new(
Utf8PathBuf::from_str("clickhouse").unwrap(),
SocketAddrV6::new(Ipv6Addr::LOCALHOST, 39001, 0, 0),
)
.with_log(log.clone());

let raft_config = clickhouse_cli.raft_config().await.unwrap();

let mut keeper_servers = BTreeSet::new();

for i in 1..=num_keepers {
let raft_port = u16::try_from(39100 + i).unwrap();
keeper_servers.insert(KeeperServerInfo {
server_id: clickhouse_admin_types::KeeperId(i),
host: ClickhouseHost::Ipv6("::1".parse().unwrap()),
raft_port,
server_type: KeeperServerType::Participant,
priority: 1,
});
}

let expected_raft_config = RaftConfig { keeper_servers };

assert_eq!(raft_config, expected_raft_config);

info!(&log, "Cleaning up test");
deployment.teardown()?;
std::fs::remove_dir_all(path)?;
logctx.cleanup_successful();
Ok(())
}
36 changes: 34 additions & 2 deletions clickhouse-admin/types/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use crate::{KeeperId, ServerId, OXIMETER_CLUSTER};
use anyhow::{bail, Error};
use camino::Utf8PathBuf;
use omicron_common::address::{
CLICKHOUSE_HTTP_PORT, CLICKHOUSE_INTERSERVER_PORT,
Expand All @@ -15,8 +16,8 @@ use schemars::{
JsonSchema,
};
use serde::{Deserialize, Serialize};
use std::fmt::Display;
use std::net::{Ipv4Addr, Ipv6Addr};
use std::{fmt::Display, str::FromStr};

// Used for schemars to be able to be used with camino:
// See https://github.com/camino-rs/camino/issues/91#issuecomment-2027908513
Expand Down Expand Up @@ -294,14 +295,45 @@ impl KeeperConfigsForReplica {
}
}

#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
#[derive(
Debug,
Clone,
PartialEq,
Eq,
Deserialize,
PartialOrd,
Ord,
Serialize,
JsonSchema,
)]
#[serde(rename_all = "snake_case")]
pub enum ClickhouseHost {
Ipv6(Ipv6Addr),
Ipv4(Ipv4Addr),
DomainName(String),
}

impl FromStr for ClickhouseHost {
type Err = Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Ok(ipv6) = s.parse() {
Ok(ClickhouseHost::Ipv6(ipv6))
} else if let Ok(ipv4) = s.parse() {
Ok(ClickhouseHost::Ipv4(ipv4))
// Validating whether a string is a valid domain or
// not is a complex process that isn't necessary for
// this function. In the case of ClickhouseHost, we wil
// only be dealing with our in internal DNS service
// which provides names that always end with `.internal`.
} else if s.ends_with(".internal") {
Ok(ClickhouseHost::DomainName(s.to_string()))
} else {
bail!("{s} is not a valid address or domain name")
}
}
}

#[derive(Debug, Clone, PartialEq, Eq, JsonSchema, Serialize, Deserialize)]
pub struct KeeperNodeConfig {
pub host: ClickhouseHost,
Expand Down
Loading

0 comments on commit 48f932f

Please sign in to comment.