Skip to content

Commit

Permalink
[reconfigurator] Enable clickhouse services via clickhouse-admin (#6769)
Browse files Browse the repository at this point in the history
## Overview

This commit makes a few changes to the way the `clickhouse_server` and
`clickhouse_keeper` SMF services are launched:

- They are now disabled by default on zone boot.
- They are enabled by clickhouse-admin after the configuration files
have been generated.

Related: #5999
  • Loading branch information
karencfv authored Oct 11, 2024
1 parent e627935 commit 5200f2a
Show file tree
Hide file tree
Showing 12 changed files with 60 additions and 256 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions clickhouse-admin/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,23 @@ pub trait ClickhouseAdminApi {
type Context;

/// Generate a ClickHouse configuration file for a server node on a specified
/// directory.
/// directory and enable the SMF service.
#[endpoint {
method = PUT,
path = "/server/config",
path = "/server/config-and-enable",
}]
async fn generate_server_config(
async fn generate_server_config_and_enable(
rqctx: RequestContext<Self::Context>,
body: TypedBody<ServerConfigurableSettings>,
) -> Result<HttpResponseCreated<ReplicaConfig>, HttpError>;

/// Generate a ClickHouse configuration file for a keeper node on a specified
/// directory.
/// directory and enable the SMF service.
#[endpoint {
method = PUT,
path = "/keeper/config",
path = "/keeper/config-and-enable",
}]
async fn generate_keeper_config(
async fn generate_keeper_config_and_enable(
rqctx: RequestContext<Self::Context>,
body: TypedBody<KeeperConfigurableSettings>,
) -> Result<HttpResponseCreated<KeeperConfig>, HttpError>;
Expand Down
19 changes: 13 additions & 6 deletions clickhouse-admin/src/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use clickhouse_admin_types::{
use dropshot::{
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
};
use illumos_utils::svcadm::Svcadm;
use std::sync::Arc;

type ClickhouseApiDescription = dropshot::ApiDescription<Arc<ServerContext>>;
Expand All @@ -25,28 +26,34 @@ enum ClickhouseAdminImpl {}
impl ClickhouseAdminApi for ClickhouseAdminImpl {
type Context = Arc<ServerContext>;

async fn generate_server_config(
async fn generate_server_config_and_enable(
rqctx: RequestContext<Self::Context>,
body: TypedBody<ServerConfigurableSettings>,
) -> Result<HttpResponseCreated<ReplicaConfig>, HttpError> {
let ctx = rqctx.context();
let replica_server = body.into_inner();
// TODO(https://github.com/oxidecomputer/omicron/issues/5999): Do something
// with the generation number `replica_server.generation`
let output =
ctx.clickward().generate_server_config(replica_server.settings)?;

// Once we have generated the client we can safely enable the clickhouse_server service
let fmri = "svc:/oxide/clickhouse_server:default".to_string();
Svcadm::enable_service(fmri)?;

Ok(HttpResponseCreated(output))
}

async fn generate_keeper_config(
async fn generate_keeper_config_and_enable(
rqctx: RequestContext<Self::Context>,
body: TypedBody<KeeperConfigurableSettings>,
) -> Result<HttpResponseCreated<KeeperConfig>, HttpError> {
let ctx = rqctx.context();
let keeper = body.into_inner();
// TODO(https://github.com/oxidecomputer/omicron/issues/5999): Do something
// with the generation number `keeper.generation`
let output = ctx.clickward().generate_keeper_config(keeper.settings)?;

// Once we have generated the client we can safely enable the clickhouse_keeper service
let fmri = "svc:/oxide/clickhouse_keeper:default".to_string();
Svcadm::enable_service(fmri)?;

Ok(HttpResponseCreated(output))
}

Expand Down
3 changes: 3 additions & 0 deletions illumos-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ camino.workspace = true
camino-tempfile.workspace = true
cfg-if.workspace = true
crucible-smf.workspace = true
dropshot.workspace = true
futures.workspace = true
http.workspace = true
ipnetwork.workspace = true
libc.workspace = true
macaddr.workspace = true
Expand All @@ -29,6 +31,7 @@ oxnet.workspace = true
schemars.workspace = true
serde.workspace = true
slog.workspace = true
slog-error-chain.workspace = true
smf.workspace = true
thiserror.workspace = true
tokio.workspace = true
Expand Down
14 changes: 14 additions & 0 deletions illumos-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

//! Wrappers around illumos-specific commands.

use dropshot::HttpError;
use slog_error_chain::InlineErrorChain;
#[allow(unused)]
use std::sync::atomic::{AtomicBool, Ordering};

Expand Down Expand Up @@ -71,6 +73,18 @@ pub enum ExecutionError {
NotRunning,
}

impl From<ExecutionError> for HttpError {
fn from(err: ExecutionError) -> Self {
let message = InlineErrorChain::new(&err).to_string();
HttpError {
status_code: http::StatusCode::INTERNAL_SERVER_ERROR,
error_code: Some(String::from("Internal")),
external_message: message.clone(),
internal_message: message,
}
}
}

// We wrap this method in an inner module to make it possible to mock
// these free functions.
#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))]
Expand Down
7 changes: 7 additions & 0 deletions illumos-utils/src/svcadm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,11 @@ impl Svcadm {
execute(cmd)?;
Ok(())
}

pub fn enable_service(fmri: String) -> Result<(), ExecutionError> {
let mut cmd = std::process::Command::new(PFEXEC);
let cmd = cmd.args(&[SVCADM, "enable", "-s", &fmri]);
execute(cmd)?;
Ok(())
}
}
12 changes: 6 additions & 6 deletions openapi/clickhouse-admin.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@
}
}
},
"/keeper/config": {
"/keeper/config-and-enable": {
"put": {
"summary": "Generate a ClickHouse configuration file for a keeper node on a specified",
"description": "directory.",
"operationId": "generate_keeper_config",
"description": "directory and enable the SMF service.",
"operationId": "generate_keeper_config_and_enable",
"requestBody": {
"content": {
"application/json": {
Expand Down Expand Up @@ -143,11 +143,11 @@
}
}
},
"/server/config": {
"/server/config-and-enable": {
"put": {
"summary": "Generate a ClickHouse configuration file for a server node on a specified",
"description": "directory.",
"operationId": "generate_server_config",
"description": "directory and enable the SMF service.",
"operationId": "generate_server_config_and_enable",
"requestBody": {
"content": {
"application/json": {
Expand Down
32 changes: 6 additions & 26 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1650,7 +1650,6 @@ impl ServiceManager {
};

let listen_addr = *underlay_address;
let listen_port = CLICKHOUSE_HTTP_PORT.to_string();

let nw_setup_service = Self::zone_network_setup_install(
Some(&info.underlay_address),
Expand All @@ -1660,19 +1659,10 @@ impl ServiceManager {

let dns_service = Self::dns_install(info, None, None).await?;

let config = PropertyGroupBuilder::new("config")
.add_property(
"listen_addr",
"astring",
listen_addr.to_string(),
)
.add_property("listen_port", "astring", listen_port)
.add_property("store", "astring", "/data");
let clickhouse_server_service =
let disabled_clickhouse_server_service =
ServiceBuilder::new("oxide/clickhouse_server")
.add_instance(
ServiceInstanceBuilder::new("default")
.add_property_group(config),
ServiceInstanceBuilder::new("default").disable(),
);

let admin_address = SocketAddr::new(
Expand Down Expand Up @@ -1705,7 +1695,7 @@ impl ServiceManager {
let profile = ProfileBuilder::new("omicron")
.add_service(nw_setup_service)
.add_service(disabled_ssh_service)
.add_service(clickhouse_server_service)
.add_service(disabled_clickhouse_server_service)
.add_service(dns_service)
.add_service(enabled_dns_client_service)
.add_service(clickhouse_admin_service);
Expand Down Expand Up @@ -1735,7 +1725,6 @@ impl ServiceManager {
};

let listen_addr = *underlay_address;
let listen_port = &CLICKHOUSE_KEEPER_TCP_PORT.to_string();

let nw_setup_service = Self::zone_network_setup_install(
Some(&info.underlay_address),
Expand All @@ -1745,19 +1734,10 @@ impl ServiceManager {

let dns_service = Self::dns_install(info, None, None).await?;

let config = PropertyGroupBuilder::new("config")
.add_property(
"listen_addr",
"astring",
listen_addr.to_string(),
)
.add_property("listen_port", "astring", listen_port)
.add_property("store", "astring", "/data");
let clickhouse_keeper_service =
let disaled_clickhouse_keeper_service =
ServiceBuilder::new("oxide/clickhouse_keeper")
.add_instance(
ServiceInstanceBuilder::new("default")
.add_property_group(config),
ServiceInstanceBuilder::new("default").disable(),
);

let admin_address = SocketAddr::new(
Expand Down Expand Up @@ -1790,7 +1770,7 @@ impl ServiceManager {
let profile = ProfileBuilder::new("omicron")
.add_service(nw_setup_service)
.add_service(disabled_ssh_service)
.add_service(clickhouse_keeper_service)
.add_service(disaled_clickhouse_keeper_service)
.add_service(dns_service)
.add_service(enabled_dns_client_service)
.add_service(clickhouse_admin_service);
Expand Down
8 changes: 1 addition & 7 deletions smf/clickhouse_keeper/manifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<service_bundle type='manifest' name='clickhouse_keeper'>

<service name='oxide/clickhouse_keeper' type='service' version='1'>
<create_default_instance enabled='true' />
<create_default_instance enabled='false' />

<dependency name='multi_user' grouping='require_all' restart_on='none'
type='service'>
Expand All @@ -26,12 +26,6 @@
timeout_seconds='0' />
<exec_method type='method' name='stop' exec=':kill' timeout_seconds='0' />

<property_group name='config' type='application'>
<propval name='listen_addr' type='astring' value='unknown' />
<propval name='listen_port' type='astring' value='unknown' />
<propval name='store' type='astring' value='unknown' />
</property_group>

<property_group name='startd' type='framework'>
<propval name='duration' type='astring' value='contract' />
</property_group>
Expand Down
96 changes: 0 additions & 96 deletions smf/clickhouse_keeper/method_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,102 +6,6 @@ set -o pipefail

. /lib/svc/share/smf_include.sh

LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")"
LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")"
DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")"

# Retrieve hostnames (SRV records in internal DNS) of all keeper nodes.
K_ADDRS="$(/opt/oxide/internal-dns-cli/bin/dnswait clickhouse-keeper -H)"

if [[ -z "$K_ADDRS" ]]; then
printf 'ERROR: found no hostnames for other ClickHouse Keeper nodes\n' >&2
exit "$SMF_EXIT_ERR_CONFIG"
fi

declare -a keepers=($K_ADDRS)

for i in "${keepers[@]}"
do
if ! grep -q "host.control-plane.oxide.internal" <<< "${i}"; then
printf 'ERROR: retrieved ClickHouse Keeper hostname does not match the expected format\n' >&2
exit "$SMF_EXIT_ERR_CONFIG"
fi
done

if [[ "${#keepers[@]}" != 3 ]]
then
printf "ERROR: expected 3 ClickHouse Keeper hosts, found "${#keepers[@]}" instead\n" >&2
exit "$SMF_EXIT_ERR_CONFIG"
fi

# Assign hostnames to replicas and keeper nodes
KEEPER_HOST_01="${keepers[0]}"
KEEPER_HOST_02="${keepers[1]}"
KEEPER_HOST_03="${keepers[2]}"

# Generate unique reproduceable number IDs by removing letters from
# KEEPER_IDENTIFIER_* Keeper IDs must be numbers, and they cannot be reused
# (i.e. when a keeper node is unrecoverable the ID must be changed to something
# new). By trimming the hosts we can make sure all keepers will always be up to
# date when a new keeper is spun up. Clickhouse does not allow very large
# numbers, so we will be reducing to 7 characters. This should be enough
# entropy given the small amount of keepers we have.
KEEPER_ID_01="$( echo "${KEEPER_HOST_01}" | tr -dc [:digit:] | cut -c1-7)"
KEEPER_ID_02="$( echo "${KEEPER_HOST_02}" | tr -dc [:digit:] | cut -c1-7)"
KEEPER_ID_03="$( echo "${KEEPER_HOST_03}" | tr -dc [:digit:] | cut -c1-7)"

# Identify the node type this is as this will influence how the config is
# constructed
# TODO(https://github.com/oxidecomputer/omicron/issues/3824): There are
# probably much better ways to do this service name lookup, but this works for
# now. The services contain the same IDs as the hostnames.
KEEPER_SVC="$(zonename | tr -dc [:digit:] | cut -c1-7)"
if [[ $KEEPER_ID_01 == $KEEPER_SVC ]]
then
KEEPER_ID_CURRENT=$KEEPER_ID_01
elif [[ $KEEPER_ID_02 == $KEEPER_SVC ]]
then
KEEPER_ID_CURRENT=$KEEPER_ID_02
elif [[ $KEEPER_ID_03 == $KEEPER_SVC ]]
then
KEEPER_ID_CURRENT=$KEEPER_ID_03
else
printf 'ERROR: service name does not match any of the identified ClickHouse Keeper hostnames\n' >&2
exit "$SMF_EXIT_ERR_CONFIG"
fi

curl -X put http://[${LISTEN_ADDR}]:8888/keeper/config \
-H "Content-Type: application/json" \
-d '{
"generation": 0,
"settings": {
"id": '${KEEPER_ID_CURRENT}',
"raft_servers": [
{
"id": '${KEEPER_ID_01}',
"host": {
"domain_name": "'${KEEPER_HOST_01}'"
}
},
{
"id": '${KEEPER_ID_02}',
"host": {
"domain_name": "'${KEEPER_HOST_02}'"
}
},
{
"id": '${KEEPER_ID_03}',
"host": {
"domain_name": "'${KEEPER_HOST_03}'"
}
}
],
"config_dir": "/opt/oxide/clickhouse_keeper",
"datastore_path": "'${DATASTORE}'",
"listen_addr": "'${LISTEN_ADDR}'"
}
}'

# The clickhouse binary must be run from within the directory that contains it.
# Otherwise, it does not automatically detect the configuration files, nor does
# it append them when necessary
Expand Down
Loading

0 comments on commit 5200f2a

Please sign in to comment.