From 3010f721a8f132eef5cc3d401e4b15b25d1b30d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Mon, 15 Jul 2024 16:36:47 +0200 Subject: [PATCH] Add reg test for 'busy/inf' loops on client SSL conns --- .../reg_test__ssl_client_busy_wait-t.cpp | 323 ++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 test/tap/tests/reg_test__ssl_client_busy_wait-t.cpp diff --git a/test/tap/tests/reg_test__ssl_client_busy_wait-t.cpp b/test/tap/tests/reg_test__ssl_client_busy_wait-t.cpp new file mode 100644 index 0000000000..e940f65968 --- /dev/null +++ b/test/tap/tests/reg_test__ssl_client_busy_wait-t.cpp @@ -0,0 +1,323 @@ +/** + * @file reg_test_3273_ssl_con-t.cpp + * @brief Regression test for SSL busy/infinite loops for frontend connections. + * @details When client disconnects unexpectedly closing the socket on a SSL connection, depending on the + * timing conditions, either an infinite loop or a busy loop could take place. These scenarios are: + * 1. Closed socket while query running on backend (before data arrives), leads to busy loop. + * 2. Closed socket after all the data has been written into the socket, since no more writing would take + * place in the socket an infinite loop would take place. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + + +#include "mysql.h" + +#include "tap.h" +#include "command_line.h" +#include "utils.h" + +using std::string; +using std::vector; + +/* Helper function to do the waiting for events on the socket. */ +static int wait_for_mysql(MYSQL *mysql, int status) { + struct pollfd pfd; + int timeout, res; + + pfd.fd = mysql_get_socket(mysql); + pfd.events = + (status & MYSQL_WAIT_READ ? POLLIN : 0) | + (status & MYSQL_WAIT_WRITE ? POLLOUT : 0) | + (status & MYSQL_WAIT_EXCEPT ? POLLPRI : 0); + if (status & MYSQL_WAIT_TIMEOUT) + timeout = 1000*mysql_get_timeout_value(mysql); + else + timeout = -1; + res = poll(&pfd, 1, timeout); + if (res == 0) + return MYSQL_WAIT_TIMEOUT; + else if (res < 0) + return MYSQL_WAIT_TIMEOUT; + else { + int status = 0; + if (pfd.revents & POLLIN) status |= MYSQL_WAIT_READ; + if (pfd.revents & POLLOUT) status |= MYSQL_WAIT_WRITE; + if (pfd.revents & POLLPRI) status |= MYSQL_WAIT_EXCEPT; + return status; + } +} + +enum BUSY_LOOP_T { + BUSY_LOOP=0, + INF_LOOP=1 +}; + +struct th_args__in_t { + // Input + int argc { 0 }; + char** argv { nullptr }; + int secs { 0 }; + int busy_loop_type = BUSY_LOOP_T::BUSY_LOOP; + CommandLine& cl; +}; + +struct th_args__out_t { + // Output + volatile int* query_started { nullptr }; + volatile int* routine_rc { nullptr }; +}; + +struct th_args_t { + th_args__in_t in_args; + th_args__out_t out_args {}; +}; + +void* perform_async_query(void* arg) { + th_args_t* th_args = static_cast(arg); + + MYSQL* mysql = nullptr; + + { + CommandLine& cl = th_args->in_args.cl; + mysql = mysql_init(NULL); + MYSQL* ret = NULL; + int query_ret = 0; + + mysql_options(mysql, MYSQL_OPT_NONBLOCK, 0); + mysql_ssl_set(mysql, NULL, NULL, NULL, NULL, NULL); + + int status = 0; + + if (th_args->in_args.argc == 2 && (strcmp(th_args->in_args.argv[1], "admin") == 0)) { + status = mysql_real_connect_start( + &ret, mysql, cl.host, "radmin", "radmin", NULL, 6032, NULL, CLIENT_SSL + ); + diag("Creating 'Admin' connection thread=%ld ret=%p status=%d", pthread_self(), ret, status); + } else { + status = mysql_real_connect_start( + &ret, mysql, cl.host, cl.username, cl.password, NULL, cl.port, NULL, CLIENT_SSL + ); + + diag("Creating 'MySQL' connection thread=%ld ret=%p status=%d", pthread_self(), ret, status); + } + if (status == 0) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(mysql)); + __sync_fetch_and_add(th_args->out_args.routine_rc, 1); + return NULL; + } + + diag("Continue connection establishment thread=%ld ret=%p status=%d", pthread_self(), ret, status); + while (status) { + status = wait_for_mysql(mysql, status); + status = mysql_real_connect_cont(&ret, mysql, status); + diag("'mysql_real_connect_cont' thread=%ld ret=%p status=%d", pthread_self(), ret, status); + } + + // NOTE: mariadbclient has an incompatibility between SSL and NONBLOCK flags. Flag needs to be reset + // after 'mysql_real_connect_cont', otherwise API would become blocking. + mysql_options(mysql, MYSQL_OPT_NONBLOCK, 0); + int f=fcntl(mysql->net.fd, F_GETFL); + fcntl(mysql->net.fd, F_SETFL, f|O_NONBLOCK); + + const string sleep_query { "SELECT SLEEP(" + std::to_string(th_args->in_args.secs) + ")" }; + diag("mysql_query_start thread=%ld query=%s", pthread_self(), sleep_query.c_str()); + + status = mysql_real_query_start(&query_ret, mysql, sleep_query.c_str(), sleep_query.size()); + + if (th_args->in_args.busy_loop_type == BUSY_LOOP_T::INF_LOOP) { + // NOTE: When signaling after 'mysql_query_start' has finished, ProxySQL wont attempt to write more + // to the closed pipe, this corresponds to 'busy_loop_type=2' and infinite loop. + while (status) { + status = wait_for_mysql(mysql, status); + status = mysql_real_query_cont(&query_ret, mysql, status); + diag("'mysql_real_connect_cont' thread=%ld ret=%p status=%d", pthread_self(), ret, status); + } + } + + diag("Signaling query start thread=%ld status=%d query_ret=%d", pthread_self(), status, query_ret); + __sync_fetch_and_add(th_args->out_args.query_started, 1); + + // NOTE: Required for triggering the issue, thread exit isn't enough, either 'process exit' or + // 'close()'. They should be immediate to the previous action, otherwise timing could be invalid. + close(mysql->net.fd); + + while (true) { + diag( + "Sleeping after query started... thread=%ld status=%d query_ret=%d", + pthread_self(), status, query_ret + ); + sleep(1); + } + } + + return NULL; +} + +struct pthread_data_t { + pthread_t id { 0 }; + int query_started { 0 }; + int routine_rc { EXIT_SUCCESS }; +}; + +const int BUSY_THREADS = get_env_int("TAP_SSL_BUSY_WAIT__BUSY_THREADS", 4); +const int BUSY_WAIT_SECS = get_env_int("TAP_SSL_BUSY_WAIT__BUSY_WAIT_SECS", 5); +const int MAX_IDLE_CPU = get_env_int("TAP_SSL_BUSY_WAIT__MAX_IDLE_CPU", 20); +const int MAX_BUSY_CPU = get_env_int("TAP_SSL_BUSY_WAIT__MAX_BUSY_CPU", 25); +const int SAMPLE_INTV_SECS = get_env_int("TAP_SSL_BUSY_WAIT__SAMPLE_INTV_SEC", 2); + +void create_busy_loops(int argc, char** argv, CommandLine& cl, BUSY_LOOP_T loop_type) { + vector ths_data {}; + vector> ths_args {}; + + ths_data.resize(BUSY_THREADS); + + for (size_t i = 0; i < BUSY_THREADS; i++) { + pthread_data_t& th_data = ths_data[i]; + std::unique_ptr th_args { + new th_args_t { + th_args__in_t { + argc, argv, BUSY_WAIT_SECS, loop_type, cl + }, + th_args__out_t { + &th_data.query_started, + &th_data.routine_rc + } + } + }; + + pthread_create(&th_data.id, NULL, perform_async_query, th_args.get()); + ths_args.push_back(std::move(th_args)); + + diag("Thread created thread=%ld", th_data.id); + } + + bool missing_query = true; + bool query_failed = false; + + while (missing_query && !query_failed) { + bool all_query_started = true; + + for (pthread_data_t& th_data : ths_data) { + bool query_started = __sync_fetch_and_add(&th_data.query_started, 0); + diag( + "Thread data thread=%ld routine_rc=%d query_started=%d", + th_data.id, th_data.routine_rc, query_started + ); + + if (th_data.id == 0 && query_started == 1) { + diag( + "Thread alreay cancelled thread=%ld routine_rc=%d query_started=%d", + th_data.id, th_data.routine_rc, query_started + ); + continue; + } + + query_failed = __sync_fetch_and_add(&th_data.routine_rc, 0); + + if (query_failed) { + diag( + "Async query failed; aborting test thread=%ld routine_rc=%d query_started=%d", + th_data.id, th_data.routine_rc, query_started + ); + break; + } + + all_query_started &= query_started; + + if (query_started) { + diag( + "Async query started, killing thread thread=%ld routine_rc=%d query_started=%d", + th_data.id, th_data.routine_rc, query_started + ); + pthread_cancel(th_data.id); + th_data.id = 0; + } else { + diag( + "Waiting for async query to start... thread=%ld routine_rc=%d query_started=%d", + th_data.id, th_data.routine_rc, query_started + ); + } + } + + missing_query = !all_query_started; + usleep(500 * 1000); + } +} + +int main(int argc, char** argv) { + CommandLine cl; + + if (cl.getEnv()) { + diag("Failed to get the required environmental variables."); + return -1; + } + + plan(2); + + diag("Checking ProxySQL idle CPU usage"); + double idle_cpu = 0; + int ret_i_cpu = get_proxysql_cpu_usage(cl, SAMPLE_INTV_SECS, idle_cpu); + if (ret_i_cpu) { + diag("Getting initial CPU usage failed with error - %d", ret_i_cpu); + diag("Aborting further testing"); + + return EXIT_FAILURE; + } + + ok( + idle_cpu < MAX_IDLE_CPU, "Idle CPU usage should be below expected - Exp:%d%%, Act: %lf%%", + MAX_IDLE_CPU, idle_cpu + ); + + create_busy_loops(argc, argv, cl, BUSY_LOOP_T::BUSY_LOOP); + + double final_cpu_usage = 0; + int ret_f_cpu = get_proxysql_cpu_usage(cl, SAMPLE_INTV_SECS, final_cpu_usage); + diag("Getting the final CPU usage returned - %d", ret_f_cpu); + + ok( + final_cpu_usage < MAX_BUSY_CPU, + "ProxySQL CPU usage should be below expected - Exp: %d%%, Act: %lf%%", + MAX_BUSY_CPU, final_cpu_usage + ); + + // Extra wait to ensure cleanup of faulty client conns + sleep(BUSY_WAIT_SECS < 5 ? 5 : BUSY_WAIT_SECS / 2); + + ret_i_cpu = get_proxysql_cpu_usage(cl, SAMPLE_INTV_SECS, idle_cpu); + if (ret_i_cpu) { + diag("Getting initial CPU usage failed with error - %d", ret_i_cpu); + diag("Aborting further testing"); + + return EXIT_FAILURE; + } + + ok( + idle_cpu < MAX_IDLE_CPU, "Idle CPU usage should be below expected - Exp:%d%%, Act: %lf%%", + MAX_IDLE_CPU, idle_cpu + ); + + create_busy_loops(argc, argv, cl, BUSY_LOOP_T::INF_LOOP); + + final_cpu_usage = 0; + ret_f_cpu = get_proxysql_cpu_usage(cl, SAMPLE_INTV_SECS, final_cpu_usage); + diag("Getting the final CPU usage returned - %d", ret_f_cpu); + + ok( + final_cpu_usage < MAX_BUSY_CPU, + "ProxySQL CPU usage should be below expected - Exp: %d%%, Act: %lf%%", + MAX_BUSY_CPU, final_cpu_usage + ); + + + return exit_status(); +}