Skip to content

Commit

Permalink
fix mem overflow for convert_to function
Browse files Browse the repository at this point in the history
  • Loading branch information
cambyzju committed Jan 3, 2025
1 parent c28c00a commit d940ac5
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 6 deletions.
2 changes: 1 addition & 1 deletion be/src/clucene
13 changes: 9 additions & 4 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -3770,9 +3770,10 @@ class FunctionConvertTo : public IFunction {
auto& res_offset = col_res->get_offsets();
auto& res_chars = col_res->get_chars();
res_offset.resize(input_rows_count);
// max pinyin size is 6, double of utf8 chinese word 3, add one char to set '~'
ColumnString::check_chars_length(str_chars.size() * 2 + input_rows_count, 0);
res_chars.resize(str_chars.size() * 2 + input_rows_count);
// max pinyin size is 6 + 1 (first '~') for utf8 chinese word 3
size_t pinyin_size = (str_chars.size() + 2) / 3 * 7;
ColumnString::check_chars_length(pinyin_size, 0);
res_chars.resize(pinyin_size);

size_t in_len = 0, out_len = 0;
for (int i = 0; i < input_rows_count; ++i) {
Expand Down Expand Up @@ -3813,7 +3814,11 @@ class FunctionConvertTo : public IFunction {
}

auto end = strchr(buf, ' ');
auto len = end != nullptr ? end - buf : MAX_PINYIN_LEN;
// max len for pinyin is 6
int len = MAX_PINYIN_LEN;
if (end != nullptr && end - buf < MAX_PINYIN_LEN) {
len = end - buf;
}
// set first char '~' just make sure all english word lower than chinese word
*dest = 126;
memcpy(dest + 1, buf, len);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1418,3 +1418,6 @@ Doris Doris
-- !convert --
1 1

-- !convert_1 --
~zhuang~zhuang~zhuang~zhuang~zhuang

Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -197,5 +197,5 @@ suite("nereids_scalar_fn_C") {
qt_bitmap_not_nullable "select count(kbitmap) from fn_test_bitmap_not_nullable"
qt_char "select char(68, 111, 114, 105, 115), char(68, 111, 114, 105, 115 using utf8);"
qt_convert "select convert(1 using gbk), convert(1, string);"

qt_convert_1 "select convert('装装装装装' using gbk);"
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ suite("test_string_function", "arrow_flight_sql") {
qt_sql "select right(\"Hello doris\", 120);"
qt_sql "select right(\"Hello doris\", -6);"

qt_convert_1 "select convert('装装装装装' using gbk);"

sql """ drop table if exists left_right_test; """
sql """ create table left_right_test (
id INT NULL,
Expand Down

0 comments on commit d940ac5

Please sign in to comment.