From 7a9d32b37a86e21f178922de74be9c7071c3d380 Mon Sep 17 00:00:00 2001 From: LiBinfeng Date: Wed, 11 Dec 2024 14:33:28 +0800 Subject: [PATCH] fix split part --- .../functions/executable/StringArithmetic.java | 10 ++++++---- .../fold_constant_string_arithmatic.groovy | 14 +++++++------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java index 6c40460ccba7a48..3060d115ba79a3e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/StringArithmetic.java @@ -333,12 +333,14 @@ public static Expression instr(StringLikeLiteral first, StringLikeLiteral second * Executable arithmetic functions Ascii */ @ExecFunction(name = "ascii") - public static Expression ascii(StringLikeLiteral first) { + public static Expression ascii(StringLikeLiteral first) throws UnsupportedEncodingException { if (first.getValue().length() == 0) { return new IntegerLiteral(0); } - char firstChar = first.getValue().charAt(0); - return new IntegerLiteral(firstChar); + String character = first.getValue(); + byte[] utf8Bytes = character.getBytes("UTF-8"); + int firstByteAscii = utf8Bytes[0] & 0xFF; + return new IntegerLiteral(firstByteAscii); } /** @@ -675,7 +677,7 @@ public static Expression splitPart(StringLikeLiteral first, StringLikeLiteral ch } if (parts.length < Math.abs(number.getValue()) || number.getValue() == 0) { - if (parts.length == Math.abs(number.getValue()) - 1) { + if (parts.length == Math.abs(number.getValue())) { if (number.getValue() < 0 && first.getValue().startsWith(chr.getValue()) || number.getValue() > 0 && first.getValue().endsWith(chr.getValue())) { return castStringLikeLiteral(first, ""); diff --git a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy index fb5824605810380..35ed03d9b998cd0 100644 --- a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy +++ b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_string_arithmatic.groovy @@ -52,15 +52,16 @@ suite("fold_constant_string_arithmatic") { testFoldConst("select ascii('1')") testFoldConst("select ascii('a')") testFoldConst("select ascii('A')") - // Unicode code point or UTF-8? pg is 12371 mysql is 227 because it is stored as E3 81 82 so E3 would be showed -// testFoldConst("select ascii('こ')") -// testFoldConst("select ascii('안')") + testFoldConst("select ascii('こ')") + testFoldConst("select ascii('안')") + testFoldConst("select ascii('中')") testFoldConst("select ascii(cast('1' as string))") testFoldConst("select ascii(cast('a' as string))") testFoldConst("select ascii(cast('A' as string))") testFoldConst("select ascii(cast('!' as string))") -// testFoldConst("select ascii(cast('こ' as string))") -// testFoldConst("select ascii(cast('안' as string))") + testFoldConst("select ascii(cast('こ' as string))") + testFoldConst("select ascii(cast('안' as string))") + testFoldConst("select ascii(cast('中' as string))") // bin testFoldConst("select bin(5)") @@ -431,8 +432,7 @@ suite("fold_constant_string_arithmatic") { testFoldConst("SELECT split_part('哈哈哈AAA','A', 2)") testFoldConst("SELECT split_part('哈哈哈AAA','A', 3)") testFoldConst("SELECT split_part('哈哈哈AAA','A', 4)") - // should exceed become null? pg is empty but not null -// testFoldConst("SELECT split_part('哈哈哈AAA','A', 5)") + testFoldConst("SELECT split_part('哈哈哈AAA','A', 5)") testFoldConst("SELECT split_part('哈哈哈AA+','A', -4)") testFoldConst("SELECT split_part('哈哈哈AA+','A', -3)") testFoldConst("SELECT split_part('哈哈哈AA+','A', -2)")