diff --git a/deps/rabbit/src/rabbit_db_cluster.erl b/deps/rabbit/src/rabbit_db_cluster.erl index e54af8b75e37..b4121fa5e6e0 100644 --- a/deps/rabbit/src/rabbit_db_cluster.erl +++ b/deps/rabbit/src/rabbit_db_cluster.erl @@ -118,8 +118,23 @@ join(RemoteNode, NodeType) end end, - ok = rabbit_db:reset(), - rabbit_feature_flags:copy_feature_states_after_reset(RemoteNode), + %% We acquire the feature flags registry reload lock because + %% between the time we reset the registry (as part of + %% `rabbit_db:reset/0' and the states copy from the remote node, + %% there could be a concurrent reload of the registry (for instance + %% because of peer discovery on another node) with the + %% default/empty states. + %% + %% To make this work, the lock is also acquired from + %% `rabbit_ff_registry_wrapper'. + rabbit_ff_registry_factory:acquire_state_change_lock(), + try + ok = rabbit_db:reset(), + rabbit_feature_flags:copy_feature_states_after_reset( + RemoteNode) + after + rabbit_ff_registry_factory:release_state_change_lock() + end, ?LOG_INFO( "DB: joining cluster using remote nodes:~n~tp", [ClusterNodes], diff --git a/deps/rabbit/src/rabbit_ff_registry_wrapper.erl b/deps/rabbit/src/rabbit_ff_registry_wrapper.erl index 5d8555e84a0f..904ed3e010f1 100644 --- a/deps/rabbit/src/rabbit_ff_registry_wrapper.erl +++ b/deps/rabbit/src/rabbit_ff_registry_wrapper.erl @@ -52,7 +52,7 @@ get(FeatureName) -> case rabbit_ff_registry:get(FeatureName) of init_required -> - _ = rabbit_ff_registry_factory:initialize_registry(), + initialize_registry(), get(FeatureName); Ret -> Ret @@ -74,7 +74,7 @@ get(FeatureName) -> list(Which) -> case rabbit_ff_registry:list(Which) of init_required -> - _ = rabbit_ff_registry_factory:initialize_registry(), + initialize_registry(), list(Which); Ret -> Ret @@ -93,7 +93,7 @@ list(Which) -> states() -> case rabbit_ff_registry:states() of init_required -> - _ = rabbit_ff_registry_factory:initialize_registry(), + initialize_registry(), states(); Ret -> Ret @@ -115,7 +115,7 @@ states() -> is_supported(FeatureName) -> case rabbit_ff_registry:is_supported(FeatureName) of init_required -> - _ = rabbit_ff_registry_factory:initialize_registry(), + initialize_registry(), is_supported(FeatureName); Ret -> Ret @@ -137,7 +137,7 @@ is_supported(FeatureName) -> is_enabled(FeatureName) -> case rabbit_ff_registry:is_enabled(FeatureName) of init_required -> - _ = rabbit_ff_registry_factory:initialize_registry(), + initialize_registry(), is_enabled(FeatureName); Ret -> Ret @@ -150,8 +150,22 @@ is_enabled(FeatureName) -> inventory() -> case rabbit_ff_registry:inventory() of init_required -> - _ = rabbit_ff_registry_factory:initialize_registry(), + initialize_registry(), inventory(); Ret -> Ret end. + +initialize_registry() -> + %% We acquire the feature flags registry reload lock here to make sure we + %% don't reload the registry in the middle of a cluster join. Indeed, the + %% registry is reset and feature flags states are copied from a remote + %% node. Therefore, there is a small window where the registry is not + %% loaded and the states on disk do not reflect the intent. + rabbit_ff_registry_factory:acquire_state_change_lock(), + try + _ = rabbit_ff_registry_factory:initialize_registry(), + ok + after + rabbit_ff_registry_factory:release_state_change_lock() + end.