Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rabbit_feature_flags: Protect concurrent reloads of the registry #10027

Merged
merged 1 commit into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions deps/rabbit/src/rabbit_db_cluster.erl
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,23 @@ join(RemoteNode, NodeType)
end
end,

ok = rabbit_db:reset(),
rabbit_feature_flags:copy_feature_states_after_reset(RemoteNode),
%% We acquire the feature flags registry reload lock because
%% between the time we reset the registry (as part of
%% `rabbit_db:reset/0' and the states copy from the remote node,
%% there could be a concurrent reload of the registry (for instance
%% because of peer discovery on another node) with the
%% default/empty states.
%%
%% To make this work, the lock is also acquired from
%% `rabbit_ff_registry_wrapper'.
rabbit_ff_registry_factory:acquire_state_change_lock(),
try
ok = rabbit_db:reset(),
rabbit_feature_flags:copy_feature_states_after_reset(
RemoteNode)
after
rabbit_ff_registry_factory:release_state_change_lock()
end,

?LOG_INFO(
"DB: joining cluster using remote nodes:~n~tp", [ClusterNodes],
Expand Down
26 changes: 20 additions & 6 deletions deps/rabbit/src/rabbit_ff_registry_wrapper.erl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
get(FeatureName) ->
case rabbit_ff_registry:get(FeatureName) of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
get(FeatureName);
Ret ->
Ret
Expand All @@ -74,7 +74,7 @@ get(FeatureName) ->
list(Which) ->
case rabbit_ff_registry:list(Which) of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
list(Which);
Ret ->
Ret
Expand All @@ -93,7 +93,7 @@ list(Which) ->
states() ->
case rabbit_ff_registry:states() of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
states();
Ret ->
Ret
Expand All @@ -115,7 +115,7 @@ states() ->
is_supported(FeatureName) ->
case rabbit_ff_registry:is_supported(FeatureName) of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
is_supported(FeatureName);
Ret ->
Ret
Expand All @@ -137,7 +137,7 @@ is_supported(FeatureName) ->
is_enabled(FeatureName) ->
case rabbit_ff_registry:is_enabled(FeatureName) of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
is_enabled(FeatureName);
Ret ->
Ret
Expand All @@ -150,8 +150,22 @@ is_enabled(FeatureName) ->
inventory() ->
case rabbit_ff_registry:inventory() of
init_required ->
_ = rabbit_ff_registry_factory:initialize_registry(),
initialize_registry(),
inventory();
Ret ->
Ret
end.

initialize_registry() ->
%% We acquire the feature flags registry reload lock here to make sure we
%% don't reload the registry in the middle of a cluster join. Indeed, the
%% registry is reset and feature flags states are copied from a remote
%% node. Therefore, there is a small window where the registry is not
%% loaded and the states on disk do not reflect the intent.
rabbit_ff_registry_factory:acquire_state_change_lock(),
try
_ = rabbit_ff_registry_factory:initialize_registry(),
ok
after
rabbit_ff_registry_factory:release_state_change_lock()
end.
Loading