From aa47a35384288d1e69f3c702aee3023c515eae29 Mon Sep 17 00:00:00 2001 From: camby Date: Tue, 7 Jan 2025 13:46:32 +0800 Subject: [PATCH] [fix](mem) heap-buffer-overflow for function convert_to (#46405) (#46502) pick #46405 to branch-2.1 --- be/src/vec/functions/function_string.h | 11 ++++++++--- .../nereids_function_p0/scalar_function/C.out | 3 +++ .../string_functions/test_string_function.out | Bin 4589 -> 4644 bytes .../scalar_function/C.groovy | 2 +- .../test_string_function.groovy | 2 ++ 5 files changed, 14 insertions(+), 4 deletions(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 41af863bda4b9a..613dfe36855074 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -4085,8 +4085,9 @@ class FunctionConvertTo : public IFunction { auto& res_offset = col_res->get_offsets(); auto& res_chars = col_res->get_chars(); res_offset.resize(input_rows_count); - // max pinyin size is 6, double of utf8 chinese word 3, add one char to set '~' - res_chars.resize(str_chars.size() * 2 + input_rows_count); + // max pinyin size is 6 + 1 (first '~') for utf8 chinese word 3 + size_t pinyin_size = (str_chars.size() + 2) / 3 * 7; + res_chars.resize(pinyin_size); size_t in_len = 0, out_len = 0; for (int i = 0; i < input_rows_count; ++i) { @@ -4127,7 +4128,11 @@ class FunctionConvertTo : public IFunction { } auto end = strchr(buf, ' '); - auto len = end != nullptr ? end - buf : MAX_PINYIN_LEN; + // max len for pinyin is 6 + int len = MAX_PINYIN_LEN; + if (end != nullptr && end - buf < MAX_PINYIN_LEN) { + len = end - buf; + } // set first char '~' just make sure all english word lower than chinese word *dest = 126; memcpy(dest + 1, buf, len); diff --git a/regression-test/data/nereids_function_p0/scalar_function/C.out b/regression-test/data/nereids_function_p0/scalar_function/C.out index e36fdc7f3742be..924ddf5b148105 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/C.out +++ b/regression-test/data/nereids_function_p0/scalar_function/C.out @@ -1418,3 +1418,6 @@ Doris Doris -- !convert -- 1 1 +-- !convert_1 -- +~zhuang~zhuang~zhuang~zhuang~zhuang + diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out index a64ba474599bcfb36aa3e51b0801fe87b7cdeb68..86953f95ab0e8944bf6f13e4337f5619001c766d 100644 GIT binary patch delta 63 scmaE>yhLTg0S<%Y{JgT%qLO$+1zlaPx~h!Q#JqHT)aG3r#+-~?04b^%i2wiq delta 12 TcmZ3Y@>Y4n0glbzICMDyCWi$$ diff --git a/regression-test/suites/nereids_function_p0/scalar_function/C.groovy b/regression-test/suites/nereids_function_p0/scalar_function/C.groovy index bf072c9ad8cf74..0a14602eff9c17 100644 --- a/regression-test/suites/nereids_function_p0/scalar_function/C.groovy +++ b/regression-test/suites/nereids_function_p0/scalar_function/C.groovy @@ -197,5 +197,5 @@ suite("nereids_scalar_fn_C") { qt_bitmap_not_nullable "select count(kbitmap) from fn_test_bitmap_not_nullable" qt_char "select char(68, 111, 114, 105, 115), char(68, 111, 114, 105, 115 using utf8);" qt_convert "select convert(1 using gbk), convert(1, string);" - + qt_convert_1 "select convert('装装装装装' using gbk);" } diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy index eb38c3e0fa832a..d24f8f61663ff2 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy @@ -163,6 +163,8 @@ suite("test_string_function", "arrow_flight_sql") { qt_sql "select right(\"Hello doris\", 120);" qt_sql "select right(\"Hello doris\", -6);" + qt_convert_1 "select convert('装装装装装' using gbk);" + sql """ drop table if exists left_right_test; """ sql """ create table left_right_test ( id INT NULL,