Skip to content

Commit

Permalink
[DPE-5402] Fix wrong roles setting of "coordinating" node (#429)
Browse files Browse the repository at this point in the history
## Issue
This PR implements
[DPE-5402](https://warthogs.atlassian.net/browse/DPE-5402) - namely this
PR fixes:
- setting a wrong role `coordinating_only`
- not setting `node.roles: []` when only the `coordinating` role was set
- This PR also prevents the TLS the workflow to start on non-main
orchestrator when the current cluster was **not a consumer** (which
would create a mismatch in the APP_ADMIN tls values).

[DPE-5402]:
https://warthogs.atlassian.net/browse/DPE-5402?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ
  • Loading branch information
Mehdi-Bendriss authored Sep 9, 2024
1 parent 6dd6b71 commit b740e20
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 14 deletions.
2 changes: 1 addition & 1 deletion lib/charms/opensearch/v0/helper_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ClusterTopology:
@staticmethod
def generated_roles() -> List[str]:
"""Get generated roles for a Node."""
return ["data", "ingest", "ml", "coordinating_only", "cluster_manager"]
return ["data", "ingest", "ml", "cluster_manager"]

@staticmethod
def get_cluster_settings(
Expand Down
8 changes: 6 additions & 2 deletions lib/charms/opensearch/v0/opensearch_base_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,9 @@ def _set_node_conf(self, nodes: List[Node]) -> None:
cm_ips = ClusterTopology.get_cluster_managers_ips(nodes)

contribute_to_bootstrap = False
if "cluster_manager" in computed_roles:
if computed_roles == ["coordinating"]:
computed_roles = [] # to mark a node as dedicated coordinating only, we clear the list
elif "cluster_manager" in computed_roles:
cm_names.append(self.unit_name)
cm_ips.append(self.unit_ip)

Expand Down Expand Up @@ -1403,8 +1405,10 @@ def _reconfigure_and_restart_unit_if_needed(self):
return

current_conf = self.opensearch_config.load_node()
stored_roles = current_conf["node.roles"] or ["coordinating"]
new_conf_roles = new_node_conf.roles or ["coordinating"]
if (
sorted(current_conf["node.roles"]) == sorted(new_node_conf.roles)
sorted(stored_roles) == sorted(new_conf_roles)
and current_conf.get("node.attr.temp") == new_node_conf.temperature
):
# no conf change (roles for now)
Expand Down
4 changes: 3 additions & 1 deletion lib/charms/opensearch/v0/opensearch_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def set_node(
self.CONFIG_YML, "network.publish_host", self._opensearch.host
)

self._opensearch.config.put(self.CONFIG_YML, "node.roles", roles)
self._opensearch.config.put(
self.CONFIG_YML, "node.roles", roles, inline_array=len(roles) == 0
)
if node_temperature:
self._opensearch.config.put(self.CONFIG_YML, "node.attr.temp", node_temperature)
else:
Expand Down
27 changes: 22 additions & 5 deletions lib/charms/opensearch/v0/opensearch_tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,17 @@ def _on_set_tls_private_key(self, event: ActionEvent) -> None:
if self.charm.upgrade_in_progress:
event.fail("Setting private key not supported while upgrade in-progress")
return

cert_type = CertType(event.params["category"]) # type
scope = Scope.APP if cert_type == CertType.APP_ADMIN else Scope.UNIT

if scope == Scope.APP and not self.charm.unit.is_leader():
event.log("Only the juju leader unit can set private key for the admin certificates.")
if scope == Scope.APP and not (
self.charm.unit.is_leader()
and self.charm.opensearch_peer_cm.deployment_desc().typ
== DeploymentType.MAIN_ORCHESTRATOR
):
event.log(
"Only the juju leader unit of the main orchestrator can set private key for the admin certificates."
)
return

try:
Expand Down Expand Up @@ -144,15 +150,26 @@ def _on_tls_relation_created(self, event: RelationCreatedEvent) -> None:
if not (deployment_desc := self.charm.opensearch_peer_cm.deployment_desc()):
event.defer()
return
admin_cert = self.charm.secrets.get_object(Scope.APP, CertType.APP_ADMIN.val)

admin_secrets = self.charm.secrets.get_object(Scope.APP, CertType.APP_ADMIN.val)

# TODO: should this be deleted when the TLS rotation workflow adapted to large deployments?
# or is this enough?
if (
self.charm.opensearch_peer_cm.deployment_desc().typ != DeploymentType.MAIN_ORCHESTRATOR
and not (admin_secrets and self.charm.opensearch_peer_cm.is_consumer())
):
event.defer()
return

if self.charm.unit.is_leader():
# create passwords for both ca trust_store/admin key_store
self._create_keystore_pwd_if_not_exists(Scope.APP, CertType.APP_ADMIN, "ca")
self._create_keystore_pwd_if_not_exists(
Scope.APP, CertType.APP_ADMIN, CertType.APP_ADMIN.val
)

if admin_cert is None and deployment_desc.typ == DeploymentType.MAIN_ORCHESTRATOR:
if admin_secrets is None and deployment_desc.typ == DeploymentType.MAIN_ORCHESTRATOR:
self._request_certificate(Scope.APP, CertType.APP_ADMIN)

# create passwords for both unit-http/transport key_stores
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/ha/test_large_deployments_relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ async def test_large_deployment_fully_formed(
assert len(nodes) == 8, f"Wrong node count. Expecting 8 online nodes, found: {len(nodes)}."

# check the roles
auto_gen_roles = ["cluster_manager", "coordinating_only", "data", "ingest", "ml"]
auto_gen_roles = ["cluster_manager", "data", "ingest", "ml"]
data_roles = ["data", "ml"]
for app, node_count in [(MAIN_APP, 3), (FAILOVER_APP, 3), (DATA_APP, 2)]:
current_app_nodes = [
Expand Down
2 changes: 0 additions & 2 deletions tests/integration/ha/test_roles_managements.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ async def test_set_roles_manually(
for node in nodes:
assert sorted(node.roles) == [
"cluster_manager",
"coordinating_only",
"data",
"ingest",
"ml",
Expand Down Expand Up @@ -163,7 +162,6 @@ async def test_switch_back_to_auto_generated_roles(
for node in nodes:
assert sorted(node.roles) == [
"cluster_manager",
"coordinating_only",
"data",
"ingest",
"ml",
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def mock_response_nodes(unit_name: str, host: str, node_id: str = NODE_ID):
"build_type": "tar",
"build_hash": "30dd870855093c9dca23fc6f8cfd5c0d7c83127d",
"total_indexing_buffer": 107374182,
"roles": ["cluster_manager", "coordinating_only", "data", "ingest", "ml"],
"roles": ["cluster_manager", "data", "ingest", "ml"],
"attributes": {
"shard_indexing_pressure_enabled": "true",
"app_id": "617e5f02-5be5-4e25-85f0-276b2347a5ad/opensearch",
Expand Down
7 changes: 6 additions & 1 deletion tests/unit/lib/test_opensearch_tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,18 +191,23 @@ def test_on_relation_broken(self, on_tls_relation_broken):

on_tls_relation_broken.assert_called_once()

@patch(
f"{BASE_LIB_PATH}.opensearch_peer_clusters.OpenSearchPeerClustersManager.deployment_desc"
)
@patch("charms.opensearch.v0.opensearch_tls.OpenSearchTLS._request_certificate")
@patch("charm.OpenSearchOperatorCharm._put_or_update_internal_user_leader")
@patch("charm.OpenSearchOperatorCharm._purge_users")
def test_on_set_tls_private_key(self, _, __, _request_certificate):
def test_on_set_tls_private_key(self, _, __, _request_certificate, deployment_desc):
"""Test _on_set_tls private key event."""
event_mock = MagicMock(params={"category": "app-admin"})

self.harness.set_leader(is_leader=False)
deployment_desc.return_value = self.deployment_descriptions["ko"]
self.charm.tls._on_set_tls_private_key(event_mock)
_request_certificate.assert_not_called()

self.harness.set_leader(is_leader=True)
deployment_desc.return_value = self.deployment_descriptions["ok"]
self.charm.tls._on_set_tls_private_key(event_mock)
_request_certificate.assert_called_once()

Expand Down

0 comments on commit b740e20

Please sign in to comment.