Skip to content

Commit

Permalink
rabbit_feature_flags: Protect concurrent reloads of the registry
Browse files Browse the repository at this point in the history
[Why]
When a node joins another node, it resets its feature flags registry
(the registry is unloaded) and it copies the feature flags states from
the remote cluster.

Before this patch, there was a small window where a concurrent use of
the registry right between these two steps would reload a registry from
the default/empty states, which does not reflect the intent.

This could happen because another node is running peer discovery and
queries the cluster membership of the node joining a cluster.

[How]
We acquire the registry reload lock around the reset+copy and in
`rabbit_ff_registry_wrapper`.
  • Loading branch information
dumbbell committed Dec 4, 2023
1 parent 4ec0ca9 commit a1ab343
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 8 deletions.
19 changes: 17 additions & 2 deletions deps/rabbit/src/rabbit_db_cluster.erl
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,23 @@ join(RemoteNode, NodeType)
end
end,

ok = rabbit_db:reset(),
rabbit_feature_flags:copy_feature_states_after_reset(RemoteNode),
%% We acquire the feature flags registry reload lock because
%% between the time we reset the registry (as part of
%% `rabbit_db:reset/0' and the states copy from the remote node,
%% there could be a concurrent reload of the registry (for instance
%% because of peer discovery on another node) with the
%% default/empty states.
%%
%% To make this work, the lock is also acquired from
%% `rabbit_ff_registry_wrapper'.
rabbit_ff_registry_factory:acquire_state_change_lock(),
try
ok = rabbit_db:reset(),
rabbit_feature_flags:copy_feature_states_after_reset(
RemoteNode)
after
rabbit_ff_registry_factory:release_state_change_lock()
end,

?LOG_INFO(
"DB: joining cluster using remote nodes:~n~tp", [ClusterNodes],
Expand Down
26 changes: 20 additions & 6 deletions deps/rabbit/src/rabbit_ff_registry_wrapper.erl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
get(FeatureName) ->
case rabbit_ff_registry:get(FeatureName) of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
get(FeatureName);
Ret ->
Ret
Expand All @@ -74,7 +74,7 @@ get(FeatureName) ->
list(Which) ->
case rabbit_ff_registry:list(Which) of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
list(Which);
Ret ->
Ret
Expand All @@ -93,7 +93,7 @@ list(Which) ->
states() ->
case rabbit_ff_registry:states() of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
states();
Ret ->
Ret
Expand All @@ -115,7 +115,7 @@ states() ->
is_supported(FeatureName) ->
case rabbit_ff_registry:is_supported(FeatureName) of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
is_supported(FeatureName);
Ret ->
Ret
Expand All @@ -137,7 +137,7 @@ is_supported(FeatureName) ->
is_enabled(FeatureName) ->
case rabbit_ff_registry:is_enabled(FeatureName) of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
is_enabled(FeatureName);
Ret ->
Ret
Expand All @@ -150,8 +150,22 @@ is_enabled(FeatureName) ->
inventory() ->
case rabbit_ff_registry:inventory() of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
inventory();
Ret ->
Ret
end.

initialize_registry() ->
%% We acquire the feature flags registry reload lock here to make sure we
%% don't reload the registry in the middle of a cluster join. Indeed, the
%% registry is reset and feature flags states are copied from a remote
%% node. Therefore, there is a small window where the registry is not
%% loaded and the states on disk do not reflect the intent.
rabbit_ff_registry_factory:acquire_state_change_lock(),
try
_ = rabbit_ff_registry_factory:initialize_registry(),
ok
after
rabbit_ff_registry_factory:release_state_change_lock()
end.

0 comments on commit a1ab343

Please sign in to comment.