From d7482e6f81b6e12bb6a524572c7d0d73e51e1a4d Mon Sep 17 00:00:00 2001 From: Mark Kemel Date: Sun, 13 Oct 2024 22:47:38 +0300 Subject: [PATCH] Make Agent Register request asynchronous The "Register" agent->controller method call could get stuck when the IP address configured as ControllerHost does not exist. This timeout of about 2 seconds could completely occupy the event loop and would not allow other calls on the DBus API of the Agent. Making the "Register" method call asynchronous we allow the event loop to stay unoccupied and be able to accept SwitchController request. Resolves: https://github.com/eclipse-bluechi/bluechi/issues/950 Signed-off-by: Mark Kemel --- src/agent/agent.c | 136 +++++++++++++++++++++++++++------------ src/agent/agent.h | 5 +- src/libbluechi/bus/bus.c | 3 +- 3 files changed, 100 insertions(+), 44 deletions(-) diff --git a/src/agent/agent.c b/src/agent/agent.c index 03f99f4192..7f6d9c1ea5 100644 --- a/src/agent/agent.c +++ b/src/agent/agent.c @@ -169,6 +169,17 @@ static int agent_heartbeat_timer_callback(sd_event_source *event_source, UNUSED Agent *agent = (Agent *) userdata; int r = 0; + /* Being in CONNECTING state when the timer callback is executed implies that + * the agent hasn't received a reply from the controller yet. In this case, + * we drop the existing register call and the agent is set into RETRY state.*/; + if (agent->connection_state == AGENT_CONNECTION_STATE_CONNECTING) { + bc_log_error("Agent connection attempt failed, retrying"); + if (agent->register_call_slot != NULL) { + sd_bus_slot_unrefp(&agent->register_call_slot); + agent->register_call_slot = NULL; + } + agent->connection_state = AGENT_CONNECTION_STATE_RETRY; + } if (agent->connection_state == AGENT_CONNECTION_STATE_CONNECTED && agent_check_controller_liveness(agent)) { r = sd_bus_emit_signal( @@ -532,6 +543,9 @@ void agent_unref(Agent *agent) { sd_event_unrefp(&agent->event); } + if (agent->register_call_slot != NULL) { + sd_bus_slot_unrefp(&agent->register_call_slot); + } if (agent->metrics_slot != NULL) { sd_bus_slot_unrefp(&agent->metrics_slot); } @@ -2694,51 +2708,18 @@ void agent_stop(Agent *agent) { } } -static bool agent_connect(Agent *agent) { - peer_bus_close(agent->peer_dbus); - - bc_log_infof("Connecting to controller on %s", agent->assembled_controller_address); - - agent->peer_dbus = peer_bus_open( - agent->event, "peer-bus-to-controller", agent->assembled_controller_address); - if (agent->peer_dbus == NULL) { - bc_log_error("Failed to open peer dbus"); - return false; - } - - bus_socket_set_options(agent->peer_dbus, agent->peer_socket_options); - - int r = sd_bus_add_object_vtable( - agent->peer_dbus, - NULL, - INTERNAL_AGENT_OBJECT_PATH, - INTERNAL_AGENT_INTERFACE, - internal_agent_vtable, - agent); - if (r < 0) { - bc_log_errorf("Failed to add agent vtable: %s", strerror(-r)); - return false; - } +static bool agent_process_register_callback(sd_bus_message *m, Agent *agent) { + int r = 0; - _cleanup_sd_bus_message_ sd_bus_message *bus_msg = NULL; - sd_bus_error error = SD_BUS_ERROR_NULL; - r = sd_bus_call_method( - agent->peer_dbus, - BC_DBUS_NAME, - INTERNAL_CONTROLLER_OBJECT_PATH, - INTERNAL_CONTROLLER_INTERFACE, - "Register", - &error, - &bus_msg, - "s", - agent->name); - if (r < 0) { - bc_log_errorf("Registering as '%s' failed: %s", agent->name, error.message); - sd_bus_error_free(&error); - return false; + if (sd_bus_message_is_method_error(m, NULL)) { + bc_log_errorf("Registering as '%s' failed: %s", + agent->name, + sd_bus_message_get_error(m)->message); + return -EPERM; } - r = sd_bus_message_read(bus_msg, ""); + bc_log_info("Register call response received"); + r = sd_bus_message_read(m, ""); if (r < 0) { bc_log_errorf("Failed to parse response message: %s", strerror(-r)); return false; @@ -2805,6 +2786,70 @@ static bool agent_connect(Agent *agent) { return true; } +static int agent_register_callback(sd_bus_message *m, void *userdata, UNUSED sd_bus_error *ret_error) { + Agent *agent = (Agent *) userdata; + + if (agent->connection_state != AGENT_CONNECTION_STATE_CONNECTING) { + bc_log_error("Agent is not in CONNECTING state, dropping Register callback"); + if (agent->register_call_slot != NULL) { + sd_bus_slot_unrefp(&agent->register_call_slot); + agent->register_call_slot = NULL; + } + return 0; + } + + if (!agent_process_register_callback(m, agent)) { + agent->connection_state = AGENT_CONNECTION_STATE_RETRY; + } + + return 0; +} + +static bool agent_connect(Agent *agent) { + bc_log_infof("Connecting to controller on %s", agent->assembled_controller_address); + agent->connection_state = AGENT_CONNECTION_STATE_CONNECTING; + + agent->peer_dbus = peer_bus_open( + agent->event, "peer-bus-to-controller", agent->assembled_controller_address); + if (agent->peer_dbus == NULL) { + bc_log_error("Failed to open peer dbus"); + return false; + } + + bus_socket_set_options(agent->peer_dbus, agent->peer_socket_options); + + int r = sd_bus_add_object_vtable( + agent->peer_dbus, + NULL, + INTERNAL_AGENT_OBJECT_PATH, + INTERNAL_AGENT_INTERFACE, + internal_agent_vtable, + agent); + if (r < 0) { + bc_log_errorf("Failed to add agent vtable: %s", strerror(-r)); + return false; + } + + _cleanup_sd_bus_message_ sd_bus_message *bus_msg = NULL; + r = sd_bus_call_method_async( + agent->peer_dbus, + &agent->register_call_slot, + BC_DBUS_NAME, + INTERNAL_CONTROLLER_OBJECT_PATH, + INTERNAL_CONTROLLER_INTERFACE, + "Register", + agent_register_callback, + agent, + "s", + agent->name); + if (r < 0) { + bc_log_errorf("Registering as '%s' failed: %s", agent->name, strerror(-r)); + return false; + } + + return true; +} + static bool agent_reconnect(Agent *agent) { _cleanup_free_ char *assembled_controller_address = NULL; @@ -2827,6 +2872,13 @@ static bool agent_reconnect(Agent *agent) { } } + if (agent->register_call_slot != NULL) { + sd_bus_slot_unref(agent->register_call_slot); + agent->register_call_slot = NULL; + } + peer_bus_close(agent->peer_dbus); + agent->peer_dbus = NULL; + return agent_connect(agent); } diff --git a/src/agent/agent.h b/src/agent/agent.h index 4a05a5fe8f..f194404f80 100644 --- a/src/agent/agent.h +++ b/src/agent/agent.h @@ -48,7 +48,8 @@ typedef struct JobTracker JobTracker; typedef enum { AGENT_CONNECTION_STATE_DISCONNECTED, AGENT_CONNECTION_STATE_CONNECTED, - AGENT_CONNECTION_STATE_RETRY + AGENT_CONNECTION_STATE_RETRY, + AGENT_CONNECTION_STATE_CONNECTING } AgentConnectionState; struct Agent { @@ -81,6 +82,8 @@ struct Agent { sd_bus *systemd_dbus; sd_bus *peer_dbus; + sd_bus_slot *register_call_slot; + bool metrics_enabled; sd_bus_slot *metrics_slot; diff --git a/src/libbluechi/bus/bus.c b/src/libbluechi/bus/bus.c index f4a6eca611..2a7bc47661 100644 --- a/src/libbluechi/bus/bus.c +++ b/src/libbluechi/bus/bus.c @@ -77,13 +77,14 @@ sd_bus *peer_bus_open(sd_event *event, const char *dbus_description, const char int peer_bus_close(sd_bus *peer_dbus) { if (peer_dbus != NULL) { + bc_log_debug("Closing peer bus"); int r = sd_bus_detach_event(peer_dbus); if (r < 0) { bc_log_errorf("Failed to detach bus from event: %s", strerror(-r)); return r; } - sd_bus_flush_close_unref(peer_dbus); + sd_bus_close_unref(peer_dbus); } return 0;