Skip to content

Commit

Permalink
[DPE-3578] clear backup status (#186)
Browse files Browse the repository at this point in the history
Move the status messages in `opensearch_backup` module to comply with
`charm.status.{set,clear}`. All text has been moved to
`constant_charms.py`.

This PR also simplifies the exception structure, where the plugins
should log their own errors if that is not supposed to come back to the
plugin manager and the charm itself. The exception
`OpenSearchPluginClusterNotReadyError` has been abandoned in favor of
the default exception for the same type of error in
`constant_exceptions`.

Tests have been updated accordingly.
  • Loading branch information
phvalguima authored Feb 29, 2024
1 parent c2a1f74 commit d31ba2d
Show file tree
Hide file tree
Showing 9 changed files with 220 additions and 121 deletions.
7 changes: 6 additions & 1 deletion lib/charms/opensearch/v0/constants_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@
PClusterWrongNodesCountForQuorum = (
"Even number of members in quorum if current unit started. Add or remove 1 unit."
)
PluginConfigError = "Unexpected error during plugin configuration, check the logs"
BackupSetupFailed = "Backup setup failed, check logs for details"

# Wait status
RequestUnitServiceOps = "Requesting lock on operation: {}"
BackupDeferRelBrokenAsInProgress = "Backup service cannot be stopped: backup in progress."


# Maintenance statuses
Expand All @@ -72,7 +75,9 @@
NewIndexRequested = "new index {index} requested"
RestoreInProgress = "Restore in progress..."
PluginConfigStart = "Plugin configuration started."

BackupSetupStart = "Backup setup started."
BackupConfigureStart = "Configuring backup service..."
BackupInDisabling = "Disabling backup service..."

# Relation Interfaces
ClientRelationName = "opensearch-client"
Expand Down
66 changes: 35 additions & 31 deletions lib/charms/opensearch/v0/opensearch_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,26 @@ def __init__(...):
from typing import Any, Dict, List, Set, Tuple

from charms.data_platform_libs.v0.s3 import S3Requirer
from charms.opensearch.v0.constants_charm import RestoreInProgress
from charms.opensearch.v0.constants_charm import (
BackupConfigureStart,
BackupDeferRelBrokenAsInProgress,
BackupInDisabling,
BackupSetupFailed,
BackupSetupStart,
PluginConfigError,
RestoreInProgress,
)
from charms.opensearch.v0.helper_cluster import ClusterState, IndexStateEnum
from charms.opensearch.v0.helper_enums import BaseStrEnum
from charms.opensearch.v0.opensearch_exceptions import (
OpenSearchError,
OpenSearchHttpError,
OpenSearchNotFullyReadyError,
)
from charms.opensearch.v0.opensearch_plugins import (
OpenSearchBackupPlugin,
OpenSearchPluginRelationClusterNotReadyError,
PluginState,
)
from charms.opensearch.v0.opensearch_plugins import OpenSearchBackupPlugin, PluginState
from ops.charm import ActionEvent
from ops.framework import EventBase, Object
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, WaitingStatus
from ops.model import BlockedStatus, MaintenanceStatus, WaitingStatus
from tenacity import RetryError, Retrying, stop_after_attempt, wait_fixed

# The unique Charmhub library identifier, never change it
Expand Down Expand Up @@ -481,20 +486,18 @@ def _on_s3_credentials_changed(self, event: EventBase) -> None:
if self.s3_client.get_s3_connection_info().get("tls-ca-chain"):
raise NotImplementedError

self.charm.status.set(WaitingStatus("Setting up backup service"))
self.charm.status.set(MaintenanceStatus(BackupSetupStart))

try:
plugin = self.charm.plugin_manager.get_plugin(OpenSearchBackupPlugin)
if self.charm.plugin_manager.status(plugin) == PluginState.ENABLED:
self.charm.plugin_manager.apply_config(plugin.disable())
self.charm.plugin_manager.apply_config(plugin.config())
except OpenSearchError as e:
if isinstance(e, OpenSearchPluginRelationClusterNotReadyError):
self.charm.status.set(WaitingStatus("s3-changed: cluster not ready yet"))
if isinstance(e, OpenSearchNotFullyReadyError):
logger.warning("s3-changed: cluster not ready yet")
else:
self.charm.status.set(
BlockedStatus("Unexpected error during plugin configuration, check the logs")
)
self.charm.status.set(BlockedStatus(PluginConfigError))
# There was an unexpected error, log it and block the unit
logger.error(e)
event.defer()
Expand All @@ -504,15 +507,17 @@ def _on_s3_credentials_changed(self, event: EventBase) -> None:
PluginState.ENABLED,
PluginState.WAITING_FOR_UPGRADE,
]:
self.charm.status.set(WaitingStatus(f"Plugin in state {self._plugin_status}"))
event.defer()
return

if not self.charm.unit.is_leader():
# Plugin is configured locally for this unit. Now the leader proceed.
self.charm.status.set(ActiveStatus())
self.charm.status.clear(PluginConfigError)
self.charm.status.clear(BackupSetupStart)
return
self.apply_api_config_if_needed()
self.charm.status.clear(PluginConfigError)
self.charm.status.clear(BackupSetupStart)

def apply_api_config_if_needed(self) -> None:
"""Runs the post restart routine and API calls needed to setup/disable backup.
Expand All @@ -522,14 +527,16 @@ def apply_api_config_if_needed(self) -> None:
# Backup relation has been recently made available with all the parameters needed.
# Steps:
# (1) set up as maintenance;
self.charm.status.set(MaintenanceStatus("Configuring backup service..."))
self.charm.status.set(MaintenanceStatus(BackupConfigureStart))
# (2) run the request; and
state = self._register_snapshot_repo()
# (3) based on the response, set the message status
if state != BackupServiceState.SUCCESS:
self.charm.status.set(BlockedStatus(f"Backup setup failed with {state}"))
else:
self.charm.status.set(ActiveStatus())
logger.error(f"Failed to setup backup service with state {state}")
self.charm.status.set(BlockedStatus(BackupSetupFailed))
return
self.charm.status.clear(BackupSetupFailed)
self.charm.status.clear(BackupConfigureStart)

def _on_s3_broken(self, event: EventBase) -> None: # noqa: C901
"""Processes the broken s3 relation.
Expand All @@ -546,19 +553,17 @@ def _on_s3_broken(self, event: EventBase) -> None: # noqa: C901
event.defer()
return

self.charm.status.set(MaintenanceStatus("Disabling backup service..."))
self.charm.status.set(MaintenanceStatus(BackupInDisabling))
snapshot_status = self._check_snapshot_status()
if snapshot_status in [
BackupServiceState.SNAPSHOT_IN_PROGRESS,
]:
# 1) snapshot is either in progress or partially taken: block and defer this event
self.charm.status.set(
MaintenanceStatus(
f"Disabling backup postponed until backup in progress: {snapshot_status}"
)
)
self.charm.status.set(WaitingStatus(BackupDeferRelBrokenAsInProgress))
event.defer()
return
self.charm.status.clear(BackupDeferRelBrokenAsInProgress)

if snapshot_status in [
BackupServiceState.SNAPSHOT_PARTIALLY_TAKEN,
]:
Expand All @@ -584,17 +589,16 @@ def _on_s3_broken(self, event: EventBase) -> None: # noqa: C901
if self.charm.plugin_manager.status(plugin) == PluginState.ENABLED:
self.charm.plugin_manager.apply_config(plugin.disable())
except OpenSearchError as e:
if isinstance(e, OpenSearchPluginRelationClusterNotReadyError):
self.charm.status.set(WaitingStatus("s3-broken event: cluster not ready yet"))
if isinstance(e, OpenSearchNotFullyReadyError):
logger.warning("s3-changed: cluster not ready yet")
else:
self.charm.status.set(
BlockedStatus("Unexpected error during plugin configuration, check the logs")
)
self.charm.status.set(BlockedStatus(PluginConfigError))
# There was an unexpected error, log it and block the unit
logger.error(e)
event.defer()
return
self.charm.status.set(ActiveStatus())
self.charm.status.clear(BackupInDisabling)
self.charm.status.clear(PluginConfigError)

def _execute_s3_broken_calls(self):
"""Executes the s3 broken API calls."""
Expand Down
7 changes: 2 additions & 5 deletions lib/charms/opensearch/v0/opensearch_base_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,7 @@
StartMode,
)
from charms.opensearch.v0.opensearch_plugin_manager import OpenSearchPluginManager
from charms.opensearch.v0.opensearch_plugins import (
OpenSearchPluginError,
OpenSearchPluginRelationClusterNotReadyError,
)
from charms.opensearch.v0.opensearch_plugins import OpenSearchPluginError
from charms.opensearch.v0.opensearch_relation_provider import OpenSearchProvider
from charms.opensearch.v0.opensearch_secrets import OpenSearchSecrets
from charms.opensearch.v0.opensearch_tls import OpenSearchTLS
Expand Down Expand Up @@ -502,7 +499,7 @@ def _on_config_changed(self, event: ConfigChangedEvent):
self.on[self.service_manager.name].acquire_lock.emit(
callback_override="_restart_opensearch"
)
except OpenSearchPluginRelationClusterNotReadyError:
except OpenSearchNotFullyReadyError:
logger.warning("Plugin management: cluster not ready yet at config changed")
event.defer()
return
Expand Down
94 changes: 59 additions & 35 deletions lib/charms/opensearch/v0/opensearch_plugin_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@
import logging
from typing import Any, Dict, List, Optional

from charms.opensearch.v0.opensearch_backups import OpenSearchBackupPlugin
from charms.opensearch.v0.opensearch_exceptions import OpenSearchCmdError
from charms.opensearch.v0.opensearch_exceptions import (
OpenSearchCmdError,
OpenSearchNotFullyReadyError,
)
from charms.opensearch.v0.opensearch_health import HealthColors
from charms.opensearch.v0.opensearch_keystore import OpenSearchKeystore
from charms.opensearch.v0.opensearch_plugins import (
OpenSearchBackupPlugin,
OpenSearchKnn,
OpenSearchPlugin,
OpenSearchPluginConfig,
Expand All @@ -26,7 +29,6 @@
OpenSearchPluginInstallError,
OpenSearchPluginMissingConfigError,
OpenSearchPluginMissingDepsError,
OpenSearchPluginRelationClusterNotReadyError,
OpenSearchPluginRemoveError,
PluginState,
)
Expand Down Expand Up @@ -140,19 +142,41 @@ def run(self) -> bool:
# The classes above should then defer their own events in waiting.
# Defer is important as next steps to configure plugins will involve
# calls to the APIs of the cluster.
raise OpenSearchPluginRelationClusterNotReadyError()
logger.info("Cluster not ready, wait for the next event...")
raise OpenSearchNotFullyReadyError()

err_msgs = []
restart_needed = False
for plugin in self.plugins:
restart_needed = any(
[
self._install_if_needed(plugin),
self._configure_if_needed(plugin),
self._disable_if_needed(plugin),
self._remove_if_needed(plugin),
restart_needed,
]
)
# These are independent plugins.
# Any plugin that errors, if that is an OpenSearchPluginError, then
# it is an "expected" error, such as missing additional config; should
# not influence the execution of other plugins.
# Capture them and raise all of them at the end.
try:
restart_needed = any(
[
self._install_if_needed(plugin),
self._configure_if_needed(plugin),
self._disable_if_needed(plugin),
self._remove_if_needed(plugin),
restart_needed,
]
)
except (
OpenSearchPluginMissingDepsError,
OpenSearchPluginMissingConfigError,
OpenSearchPluginInstallError,
OpenSearchPluginRemoveError,
) as e:
# This is a more serious issue, as we are missing some input from
# the user. The charm should block.
err_msgs.append(str(e))
except OpenSearchPluginError as e:
logger.error(f"Plugin {plugin.name} failed: {str(e)}")

if err_msgs:
raise OpenSearchPluginError("\n".join(err_msgs))
return restart_needed

def _install_plugin(self, plugin: OpenSearchPlugin) -> bool:
Expand All @@ -164,9 +188,7 @@ def _install_plugin(self, plugin: OpenSearchPlugin) -> bool:
if plugin.dependencies:
missing_deps = [dep for dep in plugin.dependencies if dep not in installed_plugins]
if missing_deps:
raise OpenSearchPluginMissingDepsError(
f"Failed to install {plugin.name}, missing dependencies: {missing_deps}"
)
raise OpenSearchPluginMissingDepsError(plugin.name, missing_deps)

# Add the plugin
try:
Expand All @@ -179,9 +201,7 @@ def _install_plugin(self, plugin: OpenSearchPlugin) -> bool:
# Check for dependencies
missing_deps = [dep for dep in plugin.dependencies if dep not in installed_plugins]
if missing_deps:
raise OpenSearchPluginMissingDepsError(
f"Failed to install {plugin.name}, missing dependencies: {missing_deps}"
)
raise OpenSearchPluginMissingDepsError(plugin.name, missing_deps)

self._opensearch.run_bin("opensearch-plugin", f"install --batch {plugin.name}")
except KeyError as e:
Expand All @@ -191,7 +211,7 @@ def _install_plugin(self, plugin: OpenSearchPlugin) -> bool:
logger.info(f"Plugin {plugin.name} already installed, continuing...")
# Nothing installed, as plugin already exists
return False
raise OpenSearchPluginInstallError(f"Failed to install plugin {plugin.name}: {e}")
raise OpenSearchPluginInstallError(plugin.name)
# Install successful
return True

Expand All @@ -212,16 +232,13 @@ def _install_if_needed(self, plugin: OpenSearchPlugin) -> bool:
def _configure_if_needed(self, plugin: OpenSearchPlugin) -> bool:
"""Gathers all the configuration changes needed and applies them."""
try:
if (
not self._user_requested_to_enable(plugin)
or self.status(plugin) != PluginState.INSTALLED
):
if self.status(plugin) != PluginState.INSTALLED:
# Leave this method if either user did not request to enable this plugin
# or plugin has been already enabled.
return False
return self.apply_config(plugin.config())
except KeyError as e:
raise OpenSearchPluginMissingConfigError(e)
raise OpenSearchPluginMissingConfigError(plugin.name, configs=[f"{e}"])

def _disable_if_needed(self, plugin: OpenSearchPlugin) -> bool:
"""If disabled, removes plugin configuration or sets it to other values."""
Expand All @@ -236,7 +253,7 @@ def _disable_if_needed(self, plugin: OpenSearchPlugin) -> bool:
return False
return self.apply_config(plugin.disable())
except KeyError as e:
raise OpenSearchPluginMissingConfigError(e)
raise OpenSearchPluginMissingConfigError(plugin.name, configs=[f"{e}"])

def apply_config(self, config: OpenSearchPluginConfig) -> bool:
"""Runs the configuration changes as passed via OpenSearchPluginConfig.
Expand Down Expand Up @@ -271,13 +288,19 @@ def status(self, plugin: OpenSearchPlugin) -> PluginState:
"""Returns the status for a given plugin."""
if not self._is_installed(plugin):
return PluginState.MISSING
if not self._is_enabled(plugin):
if self._user_requested_to_enable(plugin):
return PluginState.INSTALLED
return PluginState.DISABLED

if self._needs_upgrade(plugin):
return PluginState.WAITING_FOR_UPGRADE
return PluginState.ENABLED

# The _user_request_to_enable comes first, as it ensures there is a relation/config
# set, which will be used by _is_enabled to determine if we are enabled or not.
if not self._user_requested_to_enable(plugin) and not self._is_enabled(plugin):
return PluginState.DISABLED

if self._is_enabled(plugin):
return PluginState.ENABLED

return PluginState.INSTALLED

def _is_installed(self, plugin: OpenSearchPlugin) -> bool:
"""Returns true if plugin is installed."""
Expand All @@ -286,9 +309,10 @@ def _is_installed(self, plugin: OpenSearchPlugin) -> bool:
def _user_requested_to_enable(self, plugin: OpenSearchPlugin) -> bool:
"""Returns True if user requested plugin to be enabled."""
plugin_data = ConfigExposedPlugins[plugin.name]
if not self._charm.config.get(
plugin_data["config"], False
) and not self._is_plugin_relation_set(plugin_data["relation"]):
if not (
self._charm.config.get(plugin_data["config"], False)
or self._is_plugin_relation_set(plugin_data["relation"])
):
# User asked to disable this plugin
return False
return True
Expand Down Expand Up @@ -354,7 +378,7 @@ def _remove_plugin(self, plugin: OpenSearchPlugin) -> bool:
if "not found" in str(e):
logger.info(f"Plugin {plugin.name} to be deleted, not found. Continuing...")
return False
raise OpenSearchPluginRemoveError(f"Failed to remove plugin {plugin.name}: {e}")
raise OpenSearchPluginRemoveError(plugin.name)
return True

def _installed_plugins(self) -> List[str]:
Expand Down
Loading

0 comments on commit d31ba2d

Please sign in to comment.