Skip to content

Commit

Permalink
[DPE-5328] - Fix large deployments for cross models relations (#425)
Browse files Browse the repository at this point in the history
## Issue
This PR implements
[DPE-5328](https://warthogs.atlassian.net/browse/DPE-5328). Namely, this
PR implements:
- fix for large cross model relations where remote relation id is taken
into account

[DPE-5328]:
https://warthogs.atlassian.net/browse/DPE-5328?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ
  • Loading branch information
Mehdi-Bendriss authored Sep 8, 2024
1 parent a887c2b commit c293705
Showing 1 changed file with 32 additions and 21 deletions.
53 changes: 32 additions & 21 deletions lib/charms/opensearch/v0/opensearch_relation_peer_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _on_peer_cluster_relation_joined(self, event: RelationJoinedEvent):
if not self.charm.unit.is_leader():
return

self.refresh_relation_data(event, can_defer=False)
self.refresh_relation_data(event, event_rel_id=event.relation.id, can_defer=False)

def _on_peer_cluster_relation_changed(self, event: RelationChangedEvent):
"""Event received by all units in sub-cluster when a new sub-cluster joins the relation."""
Expand Down Expand Up @@ -228,7 +228,9 @@ def _on_peer_cluster_relation_departed(self, event: RelationDepartedEvent) -> No
trigger_rel_id=event.relation.id,
)

def refresh_relation_data(self, event: EventBase, can_defer: bool = True) -> None:
def refresh_relation_data(
self, event: EventBase, event_rel_id: int | None = None, can_defer: bool = True
) -> None:
"""Refresh the peer cluster rel data (new cm node, admin password change etc.)."""
if not self.charm.unit.is_leader():
return
Expand All @@ -250,7 +252,8 @@ def refresh_relation_data(self, event: EventBase, can_defer: bool = True) -> Non
rel_data = self._rel_data(deployment_desc, orchestrators)

# exit if current cluster should not have been considered a provider
if self._notify_if_wrong_integration(rel_data, all_relation_ids):
if self._notify_if_wrong_integration(rel_data, all_relation_ids) and event_rel_id:
self.delete_from_rel("trigger", rel_id=event_rel_id)
return

# store the main/failover-cm planned units count
Expand All @@ -260,6 +263,10 @@ def refresh_relation_data(self, event: EventBase, can_defer: bool = True) -> Non
"main" if deployment_desc.typ == DeploymentType.MAIN_ORCHESTRATOR else "failover"
)

# flag the trigger of the rel changed update on the consumer side
if event_rel_id:
self.put_in_rel({"trigger": cluster_type}, rel_id=event_rel_id)

# update reported orchestrators on local orchestrator
orchestrators = orchestrators.to_dict()
orchestrators[f"{cluster_type}_app"] = deployment_desc.app.to_dict()
Expand Down Expand Up @@ -514,8 +521,11 @@ def _on_peer_cluster_relation_changed(self, event: RelationChangedEvent): # noq
if not (data := event.relation.data.get(event.app)):
return

# fetch the trigger of this event
trigger = data.get("trigger")

# fetch main and failover clusters relations ids if any
orchestrators = self._orchestrators(event, data, deployment_desc)
orchestrators = self._orchestrators(event, data, trigger)

# should we add a check where only the failover rel has data while the main has none yet?
if orchestrators.failover_app and not orchestrators.main_app:
Expand Down Expand Up @@ -596,31 +606,32 @@ def _orchestrators(
self,
event: RelationChangedEvent,
data: MutableMapping[str, str],
deployment_desc: DeploymentDescription,
trigger: Optional[str],
) -> PeerClusterOrchestrators:
"""Fetch related orchestrator IDs and App names."""
orchestrators = self.get_obj_from_rel(key="orchestrators", rel_id=event.relation.id)
remote_orchestrators = self.get_obj_from_rel(key="orchestrators", rel_id=event.relation.id)
if not remote_orchestrators:
remote_orchestrators = json.loads(data["orchestrators"])

# fetch the (main/failover)-cluster-orchestrator relations
cm_relations = [rel.id for rel in self.model.relations[self.relation_name]]
cm_relations = [
rel.id
for rel in self.model.relations[self.relation_name]
if rel.id != event.relation.id
]
for rel_id in cm_relations:
orchestrators.update(self.get_obj_from_rel(key="orchestrators", rel_id=rel_id))

if not orchestrators:
orchestrators = json.loads(data["orchestrators"])
remote_orchestrators.update(self.get_obj_from_rel(key="orchestrators", rel_id=rel_id))

# handle case where the current is a designated failover
if deployment_desc.typ == DeploymentType.FAILOVER_ORCHESTRATOR:
local_orchestrators = PeerClusterOrchestrators.from_dict(
self.charm.peers_data.get_object(Scope.APP, "orchestrators") or {}
local_orchestrators = self.charm.peers_data.get_object(Scope.APP, "orchestrators") or {}
if trigger in {"main", "failover"}:
local_orchestrators.update(
{
f"{trigger}_rel_id": event.relation.id,
f"{trigger}_app": remote_orchestrators[f"{trigger}_app"],
}
)
if (
local_orchestrators.failover_app
and local_orchestrators.failover_app.id == deployment_desc.app.id
):
orchestrators["failover_app"] = local_orchestrators.failover_app.to_dict()

return PeerClusterOrchestrators.from_dict(orchestrators)
return PeerClusterOrchestrators.from_dict(local_orchestrators)

def _put_current_app(
self, event: RelationEvent, deployment_desc: DeploymentDescription
Expand Down

0 comments on commit c293705

Please sign in to comment.