Skip to content

Commit

Permalink
feat(mria_membership): notify monitoring processes when mria is down
Browse files Browse the repository at this point in the history
The event will be sent in the following cases:
1. A replicant node detects that another (either core or replicant) node is down.
3. A core node detects that a replicant node is down.

Core nodes still track each other only at Mnesia level (by subscribing to Mnesia system events).

An exception to the above is when Mria is down on a local node.
In this case mria_membership will notify its monitoring processes before terminating.
  • Loading branch information
SergeTupchiy committed Jan 11, 2024
1 parent d260ede commit 16353e0
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/mria_membership.erl
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,8 @@ handle_info({'DOWN', _MRef, process, DownPid, _Reason},
insert(Member#member{mnesia = stopped});
[] -> ignore
end,
%% It's Mria (not necessarily Mnesia) which is down
notify({mria, down, Node}, State),
?tp(mria_membership_proc_down,
#{registered_name => ?MODULE, node => Node});
_ -> ignore
Expand All @@ -446,7 +448,8 @@ handle_info(Info, State) ->
?LOG(error, "Unexpected info: ~p", [Info]),
{noreply, State}.

terminate(_Reason, _State) ->
terminate(_Reason, State) ->
notify({mria, down, node()}, State),
?terminate_tp,
ok.

Expand Down
17 changes: 16 additions & 1 deletion test/mria_membership_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ t_core_member_is_stopped_core_observes(_) ->
try
{[N0, N1] = Cores, Replicants} = start_core_replicant_cluster(Cluster),
assert_membership(Cores, Replicants),
test_member_is_stopped_replicant_observes(mria_membership_mnesia_down, N1, N0, members)
test_member_is_stopped_node_observes(mria_membership_mnesia_down, N1, N0, members)
after
mria_ct:teardown_cluster(Cluster)
end,
Expand Down Expand Up @@ -338,12 +338,18 @@ t_member_node_down(_) ->
try
{Cores, [N2, _N3] = Replicants} = start_core_replicant_cluster(Cluster),
assert_membership(Cores, Replicants),
ok = erpc:call(N, mria_membership, monitor, [membership, self(), true]),
?wait_async_action(
mria_ct:teardown_cluster([NodeSpec]),
#{ ?snk_kind := mria_membership_insert
, member := #member{node = N, status = down}
, ?snk_meta := #{node := N2}
}),
receive
{membership, {mria, down, N}} -> ok
after 5000 ->
ct:fail("expected_membership_event_not_received")
end,
?assertEqual(1, length(erpc:call(N2, mria_membership, running_core_nodelist, [])))
after
mria_ct:teardown_cluster(Cluster1)
Expand Down Expand Up @@ -453,6 +459,15 @@ test_node_leaves( LeaveKind, JoinKind, LeaveNode, ObserveNode, Seed
?assertEqual(ExpectAfterJoin, erpc:call(ObserveNode, mria_membership, AssertF, [])).

test_member_is_stopped_replicant_observes(WaitKind, StopNode, ObserveNode, AssertF) ->
ok = erpc:call(ObserveNode, mria_membership, monitor, [membership, self(), true]),
test_member_is_stopped_node_observes(WaitKind, StopNode, ObserveNode, AssertF),
receive
{membership, {mria, down, StopNode}} -> ok
after 5000 ->
ct:fail("expected_membership_event_not_received")
end.

test_member_is_stopped_node_observes(WaitKind, StopNode, ObserveNode, AssertF) ->
wait_action(WaitKind, StopNode, ObserveNode, mria, stop, []),
%% No leave announce, StopNode must not be deleted from membership table
?assertEqual( [stopped]
Expand Down

0 comments on commit 16353e0

Please sign in to comment.