From 6c9e4c058b9bfe9aebfff5f05991f2a3a6dd4021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 11 Aug 2023 08:30:48 +0200 Subject: [PATCH 1/3] Fix infinite loop for SSL connections - Closes #4314 If a peer closes the socket under certain timing conditions, e.g. during connection establishment, an infinite loop will take place if 'POLLHUP' and 'POLLIN' are set in the socket, and no data is present, just the final 'EOF'. --- lib/mysql_data_stream.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/mysql_data_stream.cpp b/lib/mysql_data_stream.cpp index f27652ff5b..8b51556075 100644 --- a/lib/mysql_data_stream.cpp +++ b/lib/mysql_data_stream.cpp @@ -667,9 +667,10 @@ int MySQL_Data_Stream::read_from_net() { proxy_debug(PROXY_DEBUG_NET, 5, "Session=%p, Datastream=%p -- SSL_get_error() is SSL_ERROR_SYSCALL, errno: %d\n", sess, this, errno); } else { if (r==0) { // we couldn't read any data - if (revents==1) { - // revents returns 1 , but recv() returns 0 , so there is no data. - // Therefore the socket is already closed + if ((revents & POLLIN) || ((revents & POLLIN) && (revents & POLLHUP))) { + // If revents is holding either POLLIN, or POLLIN and POLLHUP, but 'recv()' returns 0, + // reading no data, the socket has been already closed by the peer. Second part of the + // check is obviously redundant, but is left for clarity on the cases being covered. proxy_debug(PROXY_DEBUG_NET, 5, "Session=%p, Datastream=%p -- shutdown soft\n", sess, this); shut_soft(); } From 6aaf9c81cc21305234cdab81b0d3e9256fb0d905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Fri, 11 Aug 2023 10:03:33 +0200 Subject: [PATCH 2/3] Improve regression tests measuring CPU usage - Fix regression test 'reg_test_3273_ssl_con-t' with more restrictive traffic control rules so they don't affect the whole interface. - Fix measurement of CPU usage in two regression tests. --- test/tap/tap/utils.cpp | 28 +-- test/tap/tap/utils.h | 2 +- test/tap/tests/reg_test_3273_ssl_con-t.cpp | 163 ++++++++++-------- .../tap/tests/reg_test_3765_ssl_pollout-t.cpp | 62 ++++--- 4 files changed, 149 insertions(+), 106 deletions(-) diff --git a/test/tap/tap/utils.cpp b/test/tap/tap/utils.cpp index 818b4c2ad3..8413260610 100644 --- a/test/tap/tap/utils.cpp +++ b/test/tap/tap/utils.cpp @@ -710,12 +710,12 @@ string tap_curtime() { return s; } -int get_proxysql_cpu_usage(const CommandLine& cl, uint32_t intv, uint32_t& cpu_usage) { +int get_proxysql_cpu_usage(const CommandLine& cl, uint32_t intv, double& cpu_usage) { // check if proxysql process is consuming higher cpu than it should MYSQL* proxysql_admin = mysql_init(NULL); if (!mysql_real_connect(proxysql_admin, cl.host, cl.admin_username, cl.admin_password, NULL, cl.admin_port, NULL, 0)) { fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql_admin)); - return -1; + return EXIT_FAILURE; } // recover admin variables @@ -724,19 +724,25 @@ int get_proxysql_cpu_usage(const CommandLine& cl, uint32_t intv, uint32_t& cpu_u MYSQL_QUERY(proxysql_admin, "LOAD ADMIN VARIABLES TO RUNTIME"); // sleep during the required interval + safe threshold - sleep(intv + 2); + sleep(2 * intv + 2); - MYSQL_QUERY(proxysql_admin, "SELECT * FROM system_cpu ORDER BY timestamp DESC LIMIT 1"); + MYSQL_QUERY(proxysql_admin, "SELECT * FROM system_cpu ORDER BY timestamp DESC LIMIT 2"); MYSQL_RES* admin_res = mysql_store_result(proxysql_admin); MYSQL_ROW row = mysql_fetch_row(admin_res); - double s_clk = (1.0 / sysconf(_SC_CLK_TCK)) * 1000; - int utime_ms = atoi(row[1]) * s_clk; - int stime_ms = atoi(row[2]) * s_clk; - int t_ms = utime_ms + stime_ms; + double s_clk = (1000.0 / sysconf(_SC_CLK_TCK)); + + int final_utime_s = atoi(row[1]) * s_clk; + int final_stime_s = atoi(row[2]) * s_clk; + int final_t_s = final_utime_s + final_stime_s; + + row = mysql_fetch_row(admin_res); - // return the cpu usage - cpu_usage = t_ms; + int init_utime_s = atoi(row[1]) * s_clk; + int init_stime_s = atoi(row[2]) * s_clk; + int init_t_s = init_utime_s + init_stime_s; + + cpu_usage = 100.0 * ((final_t_s - init_t_s) / (static_cast(intv) * 1000)); // free the result mysql_free_result(admin_res); @@ -747,7 +753,7 @@ int get_proxysql_cpu_usage(const CommandLine& cl, uint32_t intv, uint32_t& cpu_u mysql_close(proxysql_admin); - return 0; + return EXIT_SUCCESS; } MYSQL* wait_for_proxysql(const conn_opts_t& opts, int timeout) { diff --git a/test/tap/tap/utils.h b/test/tap/tap/utils.h index 6cc2aa997f..ce0a93fb9e 100644 --- a/test/tap/tap/utils.h +++ b/test/tap/tap/utils.h @@ -240,7 +240,7 @@ std::string tap_curtime(); * 'ms' in the specified interval. * @return 0 if success, -1 in case of error. */ -int get_proxysql_cpu_usage(const CommandLine& cl, uint32_t intv, uint32_t& cpu_usage); +int get_proxysql_cpu_usage(const CommandLine& cl, uint32_t intv, double& cpu_usage); /** * @brief Helper struct holding connection options for helper functions creating MySQL connections. diff --git a/test/tap/tests/reg_test_3273_ssl_con-t.cpp b/test/tap/tests/reg_test_3273_ssl_con-t.cpp index c2479a3db0..6cc59cd3d4 100644 --- a/test/tap/tests/reg_test_3273_ssl_con-t.cpp +++ b/test/tap/tests/reg_test_3273_ssl_con-t.cpp @@ -35,6 +35,8 @@ #include "command_line.h" #include "utils.h" +using std::string; +using std::vector; /* Helper function to do the waiting for events on the socket. */ static int wait_for_mysql(MYSQL *mysql, int status) { @@ -65,12 +67,14 @@ static int wait_for_mysql(MYSQL *mysql, int status) { } const uint32_t REPORT_INTV_SEC = 5; -#ifdef TEST_WITHASAN -const double MAX_ALLOWED_CPU_USAGE = 5.00; -#else -//const double MAX_ALLOWED_CPU_USAGE = 0.15; -const double MAX_ALLOWED_CPU_USAGE = 0.3; // doubled it because of extra load due to cluster -#endif +const double MAX_ALLOWED_CPU_USAGE = 70; + +const vector tc_rules { + "sudo -n tc qdisc add dev lo root handle 1: prio priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + "sudo -n tc qdisc add dev lo parent 1:2 handle 20: netem delay 1000ms", + "sudo -n tc filter add dev lo parent 1:0 protocol ip u32 match ip sport 6033 0xffff flowid 1:2", + "sudo -n tc filter add dev lo parent 1:0 protocol ip u32 match ip dport 6033 0xffff flowid 1:2" +}; int main(int argc, char** argv) { CommandLine cl; @@ -80,94 +84,117 @@ int main(int argc, char** argv) { return -1; } - plan(1); + plan(2 + tc_rules.size()); - // set a traffic rule introducing the proper delay to reproduce the issue - int tc_err = system("sudo -n tc qdisc add dev lo root netem delay 1000ms"); - if (tc_err) { - const char* err_msg = "Warning: User doesn't have enough permissions to run `tc`, exiting without error."; - fprintf(stdout, "File %s, line %d, Error: '%s'\n", __FILE__, __LINE__, err_msg); - return exit_status(); - } + diag("Checking ProxySQL idle CPU usage"); + double idle_cpu = 0; + int ret_i_cpu = get_proxysql_cpu_usage(cl, REPORT_INTV_SEC, idle_cpu); + if (ret_i_cpu) { + diag("Getting initial CPU usage failed with error - %d", ret_i_cpu); + diag("Aborting further testing"); - // get ProxySQL idle cpu usage - uint32_t idle_cpu_ms = 0; - int idle_err = get_proxysql_cpu_usage(cl, REPORT_INTV_SEC, idle_cpu_ms); - if (idle_err) { - fprintf(stdout, "File %s, line %d, Error: '%s'\n", __FILE__, __LINE__, "Unable to get 'idle_cpu' usage."); - return idle_err; + return EXIT_FAILURE; } - MYSQL* proxysql = mysql_init(NULL); - MYSQL* ret = NULL; - mysql_options(proxysql, MYSQL_OPT_NONBLOCK, 0); - mysql_ssl_set(proxysql, NULL, NULL, NULL, NULL, NULL); + ok(idle_cpu < 20, "Idle CPU usage should be below 20%% - Act: %%%lf", idle_cpu); - int status = 0; + MYSQL* proxy = nullptr; - if (argc == 2 && (strcmp(argv[1], "admin") == 0)) { - status = mysql_real_connect_start(&ret, proxysql, cl.host, "radmin", "radmin", NULL, 6032, NULL, CLIENT_SSL); - fprintf(stdout, "Testing admin\n"); - } else { - status = mysql_real_connect_start(&ret, proxysql, cl.host, cl.username, cl.password, NULL, cl.port, NULL, CLIENT_SSL); - fprintf(stdout, "Testing regular connection\n"); - } + diag("Establish several traffic control rules to reproduce the issue"); + for (const string& rule : tc_rules) { + const char* s_rule = rule.c_str(); - if (status == 0) { - fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql)); - return -1; + diag("Setting up rule - '%s'", s_rule); + int ret = system(s_rule); + if (ret != -1) { errno = 0; } + + ok( + ret == 0, "Setting up 'tc' rule should succeed - ret: %d, errno: %d, rule: '%s'", + ret, errno, s_rule + ); + + if (ret != 0) { + goto cleanup; + } } - my_socket sockt = mysql_get_socket(proxysql); + { + proxy = mysql_init(NULL); + MYSQL* ret = NULL; + mysql_options(proxy, MYSQL_OPT_NONBLOCK, 0); + mysql_ssl_set(proxy, NULL, NULL, NULL, NULL, NULL); - int state = 0; - while (status) { - status = wait_for_mysql(proxysql, status); - if (state == 1) { - std::thread closer {[sockt]() -> void { - usleep(1500000); - close(sockt); - }}; - closer.detach(); + int status = 0; + + if (argc == 2 && (strcmp(argv[1], "admin") == 0)) { + status = mysql_real_connect_start(&ret, proxy, cl.host, "radmin", "radmin", NULL, 6032, NULL, CLIENT_SSL); + diag("Testing 'Admin' connections"); + } else { + status = mysql_real_connect_start(&ret, proxy, cl.host, cl.username, cl.password, NULL, cl.port, NULL, CLIENT_SSL); + diag("Testing 'MySQL' connection"); } - status = mysql_real_connect_cont(&ret, proxysql, status); - if (state == 0 && status == 0) { - fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxysql)); - ok(false, "Unable to connect to ProxySQL"); - break; + if (status == 0) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxy)); + goto cleanup; } - state++; - if (state == 2) { - close(sockt); - break; + my_socket sockt = mysql_get_socket(proxy); + + diag("Starting 'mysql_real_connect_cont' on stablished connection"); + int state = 0; + while (status) { + status = wait_for_mysql(proxy, status); + if (state == 1) { + // Specific wait based on the network delay. After '1.5' seconds, the client should have + // already replied with the first packet to ProxySQL, and it's time to shutdown the socket + // before any further communication takes place. + std::thread closer {[sockt]() -> void { + usleep(1500000); + diag("Closing socket from thread"); + close(sockt); + }}; + closer.detach(); + } + + status = mysql_real_connect_cont(&ret, proxy, status); + if (state == 0 && status == 0) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(proxy)); + ok(false, "Unable to connect to ProxySQL"); + break; + } + + state++; + if (state == 2) { + diag("Closing socket from main"); + close(sockt); + break; + } } } - // recover the traffic rules to their normal state - tc_err = system("sudo -n tc qdisc delete dev lo root netem delay 1000ms"); +cleanup: + + // Recover the traffic rules to their normal state + diag("Delete previously established traffic control rules"); + int tc_err = system("sudo -n tc qdisc delete dev lo root"); if (tc_err) { ok(false, "ERROR: Failed to execute `tc` to recover the system!"); return exit_status(); } - uint32_t final_cpu_ms = 0; - int final_err = get_proxysql_cpu_usage(cl, REPORT_INTV_SEC, final_cpu_ms); - if (final_err) { - fprintf(stdout, "File %s, line %d, Error: '%s'\n", __FILE__, __LINE__, "Unable to get 'idle_cpu' usage."); - return idle_err; - } - - // compute the '%' of CPU used during the last interval - uint32_t cpu_usage_ms = final_cpu_ms - idle_cpu_ms; - double cpu_usage_pct = cpu_usage_ms / (REPORT_INTV_SEC * 1000.0); + double final_cpu_usage = 0; + int ret_f_cpu = get_proxysql_cpu_usage(cl, REPORT_INTV_SEC, final_cpu_usage); + diag("Getting the final CPU usage returned - %d", ret_f_cpu); ok( - cpu_usage_pct < MAX_ALLOWED_CPU_USAGE, "ProxySQL CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", - MAX_ALLOWED_CPU_USAGE, cpu_usage_pct + final_cpu_usage < MAX_ALLOWED_CPU_USAGE, + "ProxySQL CPU usage should be below expected - Exp: %%%lf, Act: %%%lf", + MAX_ALLOWED_CPU_USAGE, final_cpu_usage ); + mysql_close(proxy); + return exit_status(); } diff --git a/test/tap/tests/reg_test_3765_ssl_pollout-t.cpp b/test/tap/tests/reg_test_3765_ssl_pollout-t.cpp index f2757b804e..4abaedf4b0 100644 --- a/test/tap/tests/reg_test_3765_ssl_pollout-t.cpp +++ b/test/tap/tests/reg_test_3765_ssl_pollout-t.cpp @@ -59,9 +59,9 @@ int create_connections(const conn_opts_t& conn_opts, uint32_t cons_num, std::vec const uint32_t ADMIN_CONN_NUM = 100; const uint32_t MYSQL_CONN_NUM = 100; const uint32_t REPORT_INTV_SEC = 5; -const double MAX_ALLOWED_CPU_USAGE = 3.0; +const double MAX_ALLOWED_CPU_USAGE = 10.0; -int get_idle_conns_cpu_usage(CommandLine& cl, uint64_t mode, uint32_t& idle_cpu_ms, uint32_t& final_cpu_ms) { +int get_idle_conns_cpu_usage(CommandLine& cl, uint64_t mode, double& idle_cpu_ms, double& final_cpu_ms) { // get ProxySQL idle cpu usage int idle_err = get_proxysql_cpu_usage(cl, REPORT_INTV_SEC, idle_cpu_ms); if (idle_err) { @@ -107,9 +107,10 @@ int main(int argc, char** argv) { return exit_status(); } - plan(3); - uint32_t idle_cpu_ms = 0; - uint32_t final_cpu_ms = 0; + plan(6); + + double idle_cpu_ms = 0; + double final_cpu_ms = 0; MYSQL* proxysql_admin = mysql_init(NULL); if (!mysql_real_connect(proxysql_admin, cl.host, cl.admin_username, cl.admin_password, NULL, cl.admin_port, NULL, 0)) { @@ -122,42 +123,51 @@ int main(int argc, char** argv) { mysql_close(proxysql_admin); diag("Testing regular connections..."); - int cpu_usage_res = get_idle_conns_cpu_usage(cl, 0, idle_cpu_ms, final_cpu_ms); - if (cpu_usage_res != EXIT_SUCCESS) { return EXIT_FAILURE; } + int ret_cpu_usage = get_idle_conns_cpu_usage(cl, 0, idle_cpu_ms, final_cpu_ms); + if (ret_cpu_usage != EXIT_SUCCESS) { return EXIT_FAILURE; } - // compute the '%' of CPU used during the last interval - uint32_t cpu_usage_ms = final_cpu_ms - idle_cpu_ms; - double cpu_usage_pct = cpu_usage_ms / (REPORT_INTV_SEC * 1000.0); + ok( + idle_cpu_ms < MAX_ALLOWED_CPU_USAGE, + "ProxySQL 'no clients' CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", + MAX_ALLOWED_CPU_USAGE, idle_cpu_ms + ); ok( - cpu_usage_pct < MAX_ALLOWED_CPU_USAGE, "ProxySQL CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", - MAX_ALLOWED_CPU_USAGE, cpu_usage_pct + final_cpu_ms < MAX_ALLOWED_CPU_USAGE, + "ProxySQL 'clients' CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", + MAX_ALLOWED_CPU_USAGE, final_cpu_ms ); diag("Testing SSL connections..."); - cpu_usage_res = get_idle_conns_cpu_usage(cl, CLIENT_SSL, idle_cpu_ms, final_cpu_ms); - if (cpu_usage_res != EXIT_SUCCESS) { return EXIT_FAILURE; } + ret_cpu_usage = get_idle_conns_cpu_usage(cl, CLIENT_SSL, idle_cpu_ms, final_cpu_ms); + if (ret_cpu_usage != EXIT_SUCCESS) { return EXIT_FAILURE; } - // compute the '%' of CPU used during the last interval - cpu_usage_ms = final_cpu_ms - idle_cpu_ms; - cpu_usage_pct = cpu_usage_ms / (REPORT_INTV_SEC * 1000.0); + ok( + idle_cpu_ms < MAX_ALLOWED_CPU_USAGE, + "ProxySQL 'no clients' CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", + MAX_ALLOWED_CPU_USAGE, idle_cpu_ms + ); ok( - cpu_usage_pct < MAX_ALLOWED_CPU_USAGE, "ProxySQL CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", - MAX_ALLOWED_CPU_USAGE, cpu_usage_pct + final_cpu_ms < MAX_ALLOWED_CPU_USAGE, + "ProxySQL 'SSL clients' CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", + MAX_ALLOWED_CPU_USAGE, final_cpu_ms ); diag("Testing SSL and compressed connections..."); - cpu_usage_res = get_idle_conns_cpu_usage(cl, CLIENT_SSL|CLIENT_COMPRESS, idle_cpu_ms, final_cpu_ms); - if (cpu_usage_res != EXIT_SUCCESS) { return EXIT_FAILURE; } + ret_cpu_usage = get_idle_conns_cpu_usage(cl, CLIENT_SSL|CLIENT_COMPRESS, idle_cpu_ms, final_cpu_ms); + if (ret_cpu_usage != EXIT_SUCCESS) { return EXIT_FAILURE; } - // compute the '%' of CPU used during the last interval - cpu_usage_ms = final_cpu_ms - idle_cpu_ms; - cpu_usage_pct = cpu_usage_ms / (REPORT_INTV_SEC * 1000.0); + ok( + idle_cpu_ms < MAX_ALLOWED_CPU_USAGE, + "ProxySQL 'no clients' CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", + MAX_ALLOWED_CPU_USAGE, idle_cpu_ms + ); ok( - cpu_usage_pct < MAX_ALLOWED_CPU_USAGE, "ProxySQL CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", - MAX_ALLOWED_CPU_USAGE, cpu_usage_pct + final_cpu_ms < MAX_ALLOWED_CPU_USAGE, + "ProxySQL 'SSL|COMPRESS clients' CPU usage should be below expected: (Exp: %%%lf, Act: %%%lf)", + MAX_ALLOWED_CPU_USAGE, final_cpu_ms ); return exit_status(); From bb2b429bca1e3ff9a7ce332d2312c4722452a0a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Jaramago=20Fern=C3=A1ndez?= Date: Tue, 15 Aug 2023 12:07:02 +0200 Subject: [PATCH 3/3] Simplify check introduced in commit '6c9e4c05' - Removed redundant part of check. - Updated comment, elaborating on 'POLLHUP' omission in the check. --- lib/mysql_data_stream.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/mysql_data_stream.cpp b/lib/mysql_data_stream.cpp index 8b51556075..5eea659e21 100644 --- a/lib/mysql_data_stream.cpp +++ b/lib/mysql_data_stream.cpp @@ -667,10 +667,10 @@ int MySQL_Data_Stream::read_from_net() { proxy_debug(PROXY_DEBUG_NET, 5, "Session=%p, Datastream=%p -- SSL_get_error() is SSL_ERROR_SYSCALL, errno: %d\n", sess, this, errno); } else { if (r==0) { // we couldn't read any data - if ((revents & POLLIN) || ((revents & POLLIN) && (revents & POLLHUP))) { + if (revents & POLLIN) { // If revents is holding either POLLIN, or POLLIN and POLLHUP, but 'recv()' returns 0, - // reading no data, the socket has been already closed by the peer. Second part of the - // check is obviously redundant, but is left for clarity on the cases being covered. + // reading no data, the socket has been already closed by the peer. Due to this we can + // ignore POLLHUP in this check, since we should reach here ONLY if POLLIN was set. proxy_debug(PROXY_DEBUG_NET, 5, "Session=%p, Datastream=%p -- shutdown soft\n", sess, this); shut_soft(); }