Skip to content

Commit

Permalink
[fix](Nereids) fix fe folding constant of string functions and add mo…
Browse files Browse the repository at this point in the history
…re cases (apache#45233)

Issue Number: apache#44666
Related PR: apache#40441

Problem Summary:
- select substring_index('哈哈哈AAA','A', 1);
String.split function has second parameter 'limit', which is default
zero. When 'limit' is zero, it means it would remove trailing
empty strings split of '哈哈哈AAA', which would be '哈哈哈' only. But what we
expect is '哈哈哈', '','','' when part function is used by substring index.
So we should change splitpart limit to -1 to enable trailing empty
character in splitpart list
- reorganize fold constant of string functions in fe and add more cases
  • Loading branch information
LiBinfeng-01 committed Jan 7, 2025
1 parent 5e9847a commit c02f57a
Show file tree
Hide file tree
Showing 2 changed files with 674 additions and 652 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,20 @@ public static Expression right(StringLikeLiteral first, IntegerLiteral second) {
*/
@ExecFunction(name = "locate")
public static Expression locate(StringLikeLiteral first, StringLikeLiteral second) {
return new IntegerLiteral(second.getValue().trim().indexOf(first.getValue()) + 1);
return new IntegerLiteral(second.getValue().indexOf(first.getValue()) + 1);
}

/**
* Executable arithmetic functions Locate
*/
@ExecFunction(name = "locate")
public static Expression locate(StringLikeLiteral first, StringLikeLiteral second, IntegerLiteral third) {
int result = second.getValue().indexOf(first.getValue()) + 1;
if (third.getValue() <= 0 || !substringImpl(second.getValue(), third.getValue(),
second.getValue().length()).contains(first.getValue())) {
result = 0;
}
return new IntegerLiteral(result);
}

/**
Expand All @@ -334,12 +347,14 @@ public static Expression instr(StringLikeLiteral first, StringLikeLiteral second
* Executable arithmetic functions Ascii
*/
@ExecFunction(name = "ascii")
public static Expression ascii(StringLikeLiteral first) {
public static Expression ascii(StringLikeLiteral first) throws UnsupportedEncodingException {
if (first.getValue().length() == 0) {
return new IntegerLiteral(0);
}
char firstChar = first.getValue().charAt(0);
return new IntegerLiteral(firstChar);
String character = first.getValue();
byte[] utf8Bytes = character.getBytes("UTF-8");
int firstByteAscii = utf8Bytes[0] & 0xFF;
return new IntegerLiteral(firstByteAscii);
}

/**
Expand Down Expand Up @@ -584,7 +599,7 @@ public static Expression fieldVarchar(StringLikeLiteral first, VarcharLiteral...
}

private static int findStringInSet(String target, String input) {
String[] split = input.split(",");
String[] split = input.split(",", -1);
for (int i = 0; i < split.length; i++) {
if (split[i].equals(target)) {
return i + 1;
Expand Down Expand Up @@ -634,6 +649,10 @@ public static Expression reverseVarchar(StringLikeLiteral first) {
*/
@ExecFunction(name = "space")
public static Expression space(IntegerLiteral first) {
// when it is too large for fe to make result string, do not folding on fe, limit 1 MB
if (first.getValue() > 1000000) {
throw new AnalysisException("space too large to fold const by fe");
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < first.getValue(); i++) {
sb.append(' ');
Expand All @@ -646,7 +665,7 @@ public static Expression space(IntegerLiteral first) {
*/
@ExecFunction(name = "split_by_char")
public static Expression splitByChar(StringLikeLiteral first, StringLikeLiteral second) {
String[] result = first.getValue().split(second.getValue());
String[] result = first.getValue().split(second.getValue(), -1);
List<Literal> items = new ArrayList<>();
for (int i = 1; i < result.length; i++) {
items.add((Literal) castStringLikeLiteral(first, result[i]));
Expand All @@ -673,16 +692,16 @@ public static Expression splitPart(StringLikeLiteral first, StringLikeLiteral ch
if (".$|()[{^?*+\\".contains(separator) || separator.startsWith("\\")) {
separator = "\\" + separator;
}
parts = sb.reverse().toString().split(separator);
parts = sb.reverse().toString().split(separator, -1);
} else {
if (".$|()[{^?*+\\".contains(separator) || separator.startsWith("\\")) {
separator = "\\" + separator;
}
parts = first.getValue().split(separator);
parts = first.getValue().split(separator, -1);
}

if (parts.length < Math.abs(number.getValue()) || number.getValue() == 0) {
if (parts.length == Math.abs(number.getValue()) - 1) {
if (parts.length == Math.abs(number.getValue())) {
if (number.getValue() < 0 && first.getValue().startsWith(chr.getValue())
|| number.getValue() > 0 && first.getValue().endsWith(chr.getValue())) {
return castStringLikeLiteral(first, "");
Expand All @@ -702,7 +721,10 @@ public static Expression splitPart(StringLikeLiteral first, StringLikeLiteral ch
*/
@ExecFunction(name = "substring_index")
public static Expression substringIndex(StringLikeLiteral first, StringLikeLiteral chr, IntegerLiteral number) {
String[] parts = first.getValue().split(chr.getValue());
if (chr.getValue().isEmpty()) {
return chr;
}
String[] parts = first.getValue().split(chr.getValue(), -1);
if (Math.abs(number.getValue()) >= parts.length) {
return first;
}
Expand Down Expand Up @@ -899,13 +921,13 @@ public static Expression extractUrlParameter(StringLikeLiteral first, StringLike
return castStringLikeLiteral(first, "");
}

String[] urlParts = first.getValue().split("\\?");
String[] urlParts = first.getValue().split("\\?", -1);
if (urlParts.length > 1) {
String query = urlParts[1];
String[] pairs = query.split("&");
String[] pairs = query.split("&", -1);

for (String pair : pairs) {
String[] keyValue = pair.split("=");
String[] keyValue = pair.split("=", -1);
if (second.getValue().equals(keyValue[0])) {
return castStringLikeLiteral(first, keyValue[1]);
}
Expand Down
Loading

0 comments on commit c02f57a

Please sign in to comment.