Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BugFix] Fix UDTF wrong result when miss multibyte UTF-8 (backport #51232) #51234

Open
wants to merge 1 commit into
base: branch-3.3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions be/src/udf/java/java_udf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,14 +406,11 @@ jobject JVMFunctionHelper::newString(const char* data, size_t size) {
return nstr;
}

size_t JVMFunctionHelper::string_length(jstring jstr) {
return _env->GetStringUTFLength(jstr);
}

Slice JVMFunctionHelper::sliceVal(jstring jstr, std::string* buffer) {
size_t length = this->string_length(jstr);
buffer->resize(length);
_env->GetStringUTFRegion(jstr, 0, length, buffer->data());
const size_t utf_length = _env->GetStringUTFLength(jstr);
buffer->resize(utf_length);
const size_t string_length = _env->GetStringLength(jstr);
_env->GetStringUTFRegion(jstr, 0, string_length, buffer->data());
return {buffer->data(), buffer->length()};
}

Expand Down
1 change: 0 additions & 1 deletion be/src/udf/java/java_udf.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ class JVMFunctionHelper {
DECLARE_NEW_BOX(double, double, Double)

jobject newString(const char* data, size_t size);
size_t string_length(jstring jstr);

Slice sliceVal(jstring jstr, std::string* buffer);
jclass string_clazz() { return _string_class; }
Expand Down
28 changes: 28 additions & 0 deletions test/sql/test_udf/R/test_jvm_udf
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,34 @@ select count(udtfdouble) from t0, udtfdouble(c1);
-- result:
81920
-- !result
select * from TABLE(udtfstring(""));
-- result:
-- !result
select * from TABLE(udtfstring("▁▂▃▄▅▆▇█"));
-- result:
▁▂▃▄▅▆▇█
▁▂▃▄▅▆▇█
-- !result
select * from TABLE(udtfstring("中文测试"));
-- result:
中文测试
中文测试
-- !result
select * from TABLE(udtfstring("∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა"));
-- result:
∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა
∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა
-- !result
select * from TABLE(udtfstring("2H₂ + O₂ ⇌ 2H₂O"));
-- result:
2H₂ + O₂ ⇌ 2H₂O
2H₂ + O₂ ⇌ 2H₂O
-- !result
select * from TABLE(udtfstring("ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ"));
-- result:
ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ
ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ
-- !result
set streaming_preaggregation_mode="force_streaming";
-- result:
-- !result
Expand Down
8 changes: 8 additions & 0 deletions test/sql/test_udf/T/test_jvm_udf
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@ select count(udtfint) from t0, udtfint(c1);
select count(udtfbigint) from t0, udtfbigint(c1);
select count(udtffloat) from t0, udtffloat(c1);
select count(udtfdouble) from t0, udtfdouble(c1);
-- test udtf with utf8 case
select * from TABLE(udtfstring(""));
select * from TABLE(udtfstring("▁▂▃▄▅▆▇█"));
select * from TABLE(udtfstring("中文测试"));
select * from TABLE(udtfstring("∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ fi�⑀₂ἠḂӥẄɐː⍎אԱა"));
select * from TABLE(udtfstring("2H₂ + O₂ ⇌ 2H₂O"));
select * from TABLE(udtfstring("ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ"));


-- test group by limit case:
set streaming_preaggregation_mode="force_streaming";
Expand Down
Loading