Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-5615] - Manage chain file for requests like that of a CA #472

Open
wants to merge 2 commits into
base: 2/edge
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 27 additions & 22 deletions lib/charms/opensearch/v0/opensearch_base_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 2
LIBPATCH = 3


SERVICE_MANAGER = "service"
Expand Down Expand Up @@ -642,6 +642,14 @@ def _on_update_status(self, event: UpdateStatusEvent): # noqa: C901
):
self.opensearch_provider.remove_lingering_relation_users_and_roles()

# If the unit reloads its certs but the other units are not ready yet
# we need to wait for them all to be ready before deleting the old CA
if (
self.tls._read_stored_ca("old-ca")
and self.tls.ca_and_certs_rotation_complete_in_cluster()
):
logger.debug("update_status: Detected CA rotation complete in cluster")
self.tls.on_ca_certs_rotation_complete()
# If relation not broken - leave
if self.model.get_relation("certificates") is not None:
return
Expand Down Expand Up @@ -818,14 +826,17 @@ def on_tls_conf_set(
logger.error("Could not reload TLS certificates via API, will restart.")
self._restart_opensearch_event.emit()
else:
# the chain.pem file should only be updated after applying the new certs
# otherwise there could be TLS verification errors after renewing the CA
self.tls.update_request_ca_bundle()
self.status.clear(TLSNotFullyConfigured)
self.tls.reset_ca_rotation_state()
# cleaning the former CA certificate from the truststore
# must only be done AFTER all renewed certificates are available and loaded
self.tls.remove_old_ca()
# if all certs are stored and CA rotation is complete in the cluster
# we delete the old ca and update the chain to only include the new one
if (
self.tls._read_stored_ca("old-ca")
and self.tls.ca_and_certs_rotation_complete_in_cluster()
):
logger.info("on_tls_conf_set: Detected CA rotation complete in cluster")
self.tls.on_ca_certs_rotation_complete()

else:
event.defer()
return
Expand Down Expand Up @@ -949,12 +960,6 @@ def _start_opensearch(self, event: _StartOpenSearch) -> None: # noqa: C901
return

if not self._can_service_start():
# after rotating the CA and certificates:
# the last host in the cluster to restart might not be able to connect to the other
# hosts anymore, because it is the last to renew the pem-file for requests
# in this case we update the pem-file to be able to connect and start the host
if self.peers_data.get(Scope.UNIT, "tls_ca_renewed", False):
self.tls.update_request_ca_bundle()
self.node_lock.release()
logger.info("Could not start opensearch service. Will retry next event.")
event.defer()
Expand Down Expand Up @@ -989,11 +994,6 @@ def _start_opensearch(self, event: _StartOpenSearch) -> None: # noqa: C901
self.unit.status = BlockedStatus(str(e))
return

# we should update the chain.pem file to avoid TLS verification errors
# this happens on restarts after applying a new admin cert on CA rotation
if self.peers_data.get(Scope.UNIT, "tls_ca_renewed", False):
self.tls.update_request_ca_bundle()

try:
self.opensearch.start(
wait_until_http_200=(
Expand Down Expand Up @@ -1161,10 +1161,6 @@ def _post_start_init(self, event: _StartOpenSearch): # noqa: C901
if self.opensearch_peer_cm.is_provider():
self.peer_cluster_provider.refresh_relation_data(event, can_defer=False)

# before resetting the CA rotation state, we remove the old ca from the truststore
if self.peers_data.get(Scope.UNIT, "tls_ca_renewed", False):
self.tls.remove_old_ca()

# update the peer relation data for TLS CA rotation routine
self.tls.reset_ca_rotation_state()
if self.is_tls_full_configured_in_cluster():
Expand All @@ -1181,6 +1177,15 @@ def _post_start_init(self, event: _StartOpenSearch): # noqa: C901
self.tls.request_new_admin_certificate()
else:
self.tls.store_admin_tls_secrets_if_applies()
# If the reload through API failed, we restart the service
# We remove the old CA and update the chain to only include the new one
# if all certs are stored and CA rotation is complete in the cluster
if (
self.tls._read_stored_ca("old-ca")
and self.tls.ca_and_certs_rotation_complete_in_cluster()
):
logger.info("post_start_init: Detected CA rotation complete in cluster")
self.tls.on_ca_certs_rotation_complete()

def _stop_opensearch(self, *, restart: bool = False) -> None:
"""Stop OpenSearch if possible."""
Expand Down
68 changes: 66 additions & 2 deletions lib/charms/opensearch/v0/opensearch_tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import tempfile
import typing
from os.path import exists
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

from charms.opensearch.v0.constants_charm import (
Expand Down Expand Up @@ -61,7 +62,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 1
LIBPATCH = 2

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -556,6 +557,8 @@ def store_new_ca(self, secrets: Dict[str, Any]) -> bool: # noqa: C901
logging.error(f"Error storing the ca-cert: {e}")
return False

self._add_ca_to_request_bundle(secrets.get("chain"))

return True

def _read_stored_ca(self, alias: str = "ca") -> Optional[str]:
Expand Down Expand Up @@ -608,6 +611,8 @@ def remove_old_ca(self) -> None:
if f"Alias <{old_alias}> does not exist" in e.out:
return

old_ca_content = self._read_stored_ca(alias=old_alias)

run_cmd(
f"""{keytool} \
-delete \
Expand All @@ -617,6 +622,8 @@ def remove_old_ca(self) -> None:
-storetype PKCS12"""
)
logger.info(f"Removed {old_alias} from truststore.")
# remove it from the request bundle
self._remove_ca_from_request_bundle(old_ca_content)

def update_request_ca_bundle(self) -> None:
"""Create a new chain.pem file for requests module"""
Expand Down Expand Up @@ -870,6 +877,38 @@ def ca_rotation_complete_in_cluster(self) -> bool:

return rotation_complete

def ca_and_certs_rotation_complete_in_cluster(self) -> bool:
"""Check whether the CA rotation completed in all units."""
rotation_complete = True

# the current unit is not in the relation.units list
if self.charm.peers_data.get(Scope.UNIT, "tls_ca_renewing") or self.charm.peers_data.get(
Scope.UNIT,
"tls_ca_renewed" or self.charm.peers_data.get(Scope.UNIT, "tls_configured") != "True",
):
logger.debug("TLS CA rotation ongoing on this unit.")
return False

for relation_type in [
PeerRelationName,
PeerClusterRelationName,
PeerClusterOrchestratorRelationName,
]:
for relation in self.model.relations[relation_type]:
logger.debug(f"Checking relation {relation}: units: {relation.units}")
for unit in relation.units:
if (
"tls_ca_renewing" in relation.data[unit]
or "tls_ca_renewed" in relation.data[unit]
or relation.data[unit].get("tls_configured") != "True"
):
logger.debug(
f"TLS CA rotation not complete for unit {unit}: {relation} | tls_ca_renewing: {relation.data[unit].get('tls_ca_renewing')} | tls_ca_renewed: {relation.data[unit].get('tls_ca_renewed')} | tls_configured: {relation.data[unit].get('tls_configured')}"
)
rotation_complete = False
break
return rotation_complete

def is_ca_rotation_ongoing(self) -> bool:
"""Check whether the CA rotation is currently in progress."""
if (
Expand All @@ -884,10 +923,35 @@ def is_ca_rotation_ongoing(self) -> bool:
return False

def update_ca_rotation_flag_to_peer_cluster_relation(self, flag: str, operation: str) -> None:
"""Add a CA rotation flag to all related peer clusters in large deployments."""
"""Add or remove a CA rotation flag to all related peer clusters in large deployments."""
for relation_type in [PeerClusterRelationName, PeerClusterOrchestratorRelationName]:
for relation in self.model.relations[relation_type]:
if operation == "add":
relation.data[self.charm.unit][flag] = "True"
elif operation == "remove":
relation.data[self.charm.unit].pop(flag, None)

def on_ca_certs_rotation_complete(self) -> None:
"""Handle the completion of CA rotation."""
logger.info("CA rotation completed. Deleting old CA and updating request bundle.")
self.remove_old_ca()
self.update_request_ca_bundle()

def _add_ca_to_request_bundle(self, ca_cert: str) -> None:
"""Add the CA cert to the request bundle for the requests module."""
bundle_path = Path(self.certs_path) / "chain.pem"
if not bundle_path.exists():
return

bundle_content = bundle_path.read_text()
if ca_cert not in bundle_content:
bundle_path.write_text(f"{bundle_content}\n{ca_cert}")

def _remove_ca_from_request_bundle(self, ca_cert: str) -> None:
"""Remove the CA cert from the request bundle for the requests module."""
bundle_path = Path(self.certs_path) / "chain.pem"
if not bundle_path.exists():
return

bundle_content = bundle_path.read_text()
bundle_path.write_text(bundle_content.replace(ca_cert, ""))
18 changes: 11 additions & 7 deletions tests/unit/lib/test_opensearch_base_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,16 +357,20 @@ def test_on_update_status(self, _, cert_expiration_remaining_hours, _stop_opense
@patch(f"{BASE_CHARM_CLASS}.is_admin_user_configured")
@patch(f"{BASE_LIB_PATH}.opensearch_tls.OpenSearchTLS.is_fully_configured")
@patch(f"{BASE_LIB_PATH}.opensearch_tls.OpenSearchTLS.reload_tls_certificates")
@patch(f"{BASE_LIB_PATH}.opensearch_tls.OpenSearchTLS.update_request_ca_bundle")
@patch(f"{BASE_LIB_PATH}.opensearch_tls.OpenSearchTLS.remove_old_ca")
@patch(
f"{BASE_LIB_PATH}.opensearch_tls.OpenSearchTLS.ca_and_certs_rotation_complete_in_cluster"
)
@patch(f"{BASE_LIB_PATH}.opensearch_tls.OpenSearchTLS._read_stored_ca")
@patch(f"{BASE_LIB_PATH}.opensearch_tls.OpenSearchTLS.on_ca_certs_rotation_complete")
def test_reload_tls_certs_without_restart(
self,
store_admin_tls_secrets_if_applies,
is_admin_user_configured,
is_fully_configured,
reload_tls_certificates,
update_request_ca_bundle,
remove_old_ca,
ca_and_certs_rotation_complete_in_cluster,
_read_stored_ca,
on_ca_certs_rotation_complete,
):
"""Test that tls configuration set does not trigger restart."""
cert = "cert_12345"
Expand All @@ -376,12 +380,12 @@ def test_reload_tls_certs_without_restart(
self.charm.on_tls_conf_set(event_mock, scope="app", cert_type="app-admin", renewal=True)
is_admin_user_configured.return_value = True
is_fully_configured.return_value = True
ca_and_certs_rotation_complete_in_cluster.return_value = True
_read_stored_ca.return_value = "ca_1234"

store_admin_tls_secrets_if_applies.assert_called_once()
reload_tls_certificates.assert_called_once()
update_request_ca_bundle.assert_called_once()

remove_old_ca.assert_called_once()
on_ca_certs_rotation_complete.assert_called_once()
self.charm._restart_opensearch_event.emit.assert_not_called()

def test_app_peers_data(self):
Expand Down
Loading