Skip to content

Commit

Permalink
[enhancement](Nereids) boost characterLiteralTypeCoercion (apache#42941)
Browse files Browse the repository at this point in the history
Boost characterLiteralTypeCoercion by check the string format and skip
throw Throwable when can not parse string to
integer/float/date/datetime.

This logical usually appear when search `If` function signature, because
the If has lots of signature, we need cast argument to the signature
type to matches the best signature, for example:

```
select if(column_1, 'xxx', 'yyy')
```
we will check whether the 'xxx' and 'yyy' can be parsed to int/datetime
and so on.



In some scenarios, this optimize can provide 16% QPS

before: optimize:
<img width="1901" alt="image"
src="https://github.com/user-attachments/assets/b03d2d29-5d3b-45a6-ba54-2bcc7c2dccca">

<img width="1484" alt="image"
src="https://github.com/user-attachments/assets/82cbb2b0-dfe8-4a05-bc2f-ebb35dc23209">

after optimize:
<img width="1724" alt="image"
src="https://github.com/user-attachments/assets/d60a867d-596d-4ac1-9377-6460ed6d3dd1">

<img width="1722" alt="image"
src="https://github.com/user-attachments/assets/c9c9f72c-3a5f-4c24-95d9-9ca99ecab0a6">
  • Loading branch information
924060929 authored Nov 1, 2024
1 parent 07607a7 commit 105a0ad
Show file tree
Hide file tree
Showing 28 changed files with 1,604 additions and 319 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,14 @@ private boolean doMatchTypes(FunctionSignature sig, List<Expression> arguments,
int arity = arguments.size();
for (int i = 0; i < arity; i++) {
DataType sigArgType = sig.getArgType(i);
DataType realType = arguments.get(i).getDataType();
Expression argument = arguments.get(i);
DataType realType = argument.getDataType();
// we need to try to do string literal coercion when search signature.
// for example, FUNC_A has two signature FUNC_A(datetime) and FUNC_A(string)
// if SQL block is `FUNC_A('2020-02-02 00:00:00')`, we should return signature FUNC_A(datetime).
if (arguments.get(i).isLiteral() && realType.isStringLikeType()) {
realType = TypeCoercionUtils.characterLiteralTypeCoercion(((Literal) arguments.get(i)).getStringValue(),
sigArgType).orElse(arguments.get(i)).getDataType();
if (!argument.isNullLiteral() && argument.isLiteral() && realType.isStringLikeType()) {
realType = TypeCoercionUtils.characterLiteralTypeCoercion(((Literal) argument).getStringValue(),
sigArgType).orElse(argument).getDataType();
}
if (!typePredicate.apply(sigArgType, realType)) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,17 @@
public class DateLiteral extends Literal {
public static final String JAVA_DATE_FORMAT = "yyyy-MM-dd";

public static final Set<Character> punctuations = ImmutableSet.of('!', '@', '#', '$', '%', '^', '&', '*', '(', ')',
'-', '+', '=', '_', '{', '}', '[', ']', '|', '\\', ':', ';', '"', '\'', '<', '>', ',', '.', '?', '/', '~',
'`');

// for cast datetime type to date type.
private static final LocalDateTime START_OF_A_DAY = LocalDateTime.of(0, 1, 1, 0, 0, 0);
private static final LocalDateTime END_OF_A_DAY = LocalDateTime.of(9999, 12, 31, 23, 59, 59, 999999000);
private static final DateLiteral MIN_DATE = new DateLiteral(0, 1, 1);
private static final DateLiteral MAX_DATE = new DateLiteral(9999, 12, 31);
private static final int[] DAYS_IN_MONTH = new int[] {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};

private static final Set<Character> punctuations = ImmutableSet.of('!', '@', '#', '$', '%', '^', '&', '*', '(', ')',
'-', '+', '=', '_', '{', '}', '[', ']', '|', '\\', ':', ';', '"', '\'', '<', '>', ',', '.', '?', '/', '~',
'`');

protected long year;
protected long month;
protected long day;
Expand Down Expand Up @@ -145,7 +145,7 @@ private static boolean isPunctuation(char c) {
return punctuations.contains(c);
}

static String normalize(String s) {
static Result<String, AnalysisException> normalize(String s) {
// merge consecutive space
if (s.contains(" ")) {
s = s.replaceAll(" +", " ");
Expand Down Expand Up @@ -208,7 +208,10 @@ static String normalize(String s) {
sb.append('0').append(c);
}
} else {
throw new AnalysisException("date/datetime literal [" + s + "] is invalid");
final String currentString = s;
return Result.err(
() -> new AnalysisException("date/datetime literal [" + currentString + "] is invalid")
);
}
i = j;
partNumber += 1;
Expand All @@ -228,7 +231,10 @@ static String normalize(String s) {
} else if (partNumber > 3 && isPunctuation(c)) {
sb.append(':');
} else {
throw new AnalysisException("date/datetime literal [" + s + "] is invalid");
final String currentString = s;
return Result.err(
() -> new AnalysisException("date/datetime literal [" + currentString + "] is invalid")
);
}
} else {
break;
Expand Down Expand Up @@ -259,15 +265,33 @@ static String normalize(String s) {
// trim use to remove any blank before zone id or zone offset
sb.append(s.substring(i).trim());

return sb.toString();
return Result.ok(sb.toString());
}

protected static TemporalAccessor parse(String s) {
/** parseDateLiteral */
public static Result<DateLiteral, AnalysisException> parseDateLiteral(String s) {
Result<TemporalAccessor, AnalysisException> parseResult = parseDateTime(s);
if (parseResult.isError()) {
return parseResult.cast();
}
TemporalAccessor dateTime = parseResult.get();
int year = DateUtils.getOrDefault(dateTime, ChronoField.YEAR);
int month = DateUtils.getOrDefault(dateTime, ChronoField.MONTH_OF_YEAR);
int day = DateUtils.getOrDefault(dateTime, ChronoField.DAY_OF_MONTH);

if (checkDatetime(dateTime) || checkRange(year, month, day) || checkDate(year, month, day)) {
return Result.err(() -> new AnalysisException("date/datetime literal [" + s + "] is out of range"));
}
return Result.ok(new DateLiteral(year, month, day));
}

/** parseDateTime */
public static Result<TemporalAccessor, AnalysisException> parseDateTime(String s) {
// fast parse '2022-01-01'
if (s.length() == 10 && s.charAt(4) == '-' && s.charAt(7) == '-') {
TemporalAccessor date = fastParseDate(s);
if (date != null) {
return date;
return Result.ok(date);
}
}

Expand All @@ -289,15 +313,20 @@ protected static TemporalAccessor parse(String s) {
if (!containsPunctuation) {
s = normalizeBasic(s);
// mysql reject "20200219 010101" "200219 010101", can't use ' ' spilt basic date time.

if (!s.contains("T")) {
dateTime = DateTimeFormatterUtils.BASIC_FORMATTER_WITHOUT_T.parse(s);
} else {
dateTime = DateTimeFormatterUtils.BASIC_DATE_TIME_FORMATTER.parse(s);
}
return dateTime;
return Result.ok(dateTime);
}

s = normalize(s);
Result<String, AnalysisException> normalizeResult = normalize(s);
if (normalizeResult.isError()) {
return normalizeResult.cast();
}
s = normalizeResult.get();

if (!s.contains(" ")) {
dateTime = DateTimeFormatterUtils.ZONE_DATE_FORMATTER.parse(s);
Expand All @@ -307,32 +336,34 @@ protected static TemporalAccessor parse(String s) {

// if Year is not present, throw exception
if (!dateTime.isSupported(ChronoField.YEAR)) {
throw new AnalysisException("date/datetime literal [" + originalString + "] is invalid");
return Result.err(
() -> new AnalysisException("date/datetime literal [" + originalString + "] is invalid")
);
}

return dateTime;
return Result.ok(dateTime);
} catch (Exception ex) {
throw new AnalysisException("date/datetime literal [" + originalString + "] is invalid");
return Result.err(() -> new AnalysisException("date/datetime literal [" + originalString + "] is invalid"));
}
}

protected void init(String s) throws AnalysisException {
TemporalAccessor dateTime = parse(s);
TemporalAccessor dateTime = parseDateTime(s).get();
year = DateUtils.getOrDefault(dateTime, ChronoField.YEAR);
month = DateUtils.getOrDefault(dateTime, ChronoField.MONTH_OF_YEAR);
day = DateUtils.getOrDefault(dateTime, ChronoField.DAY_OF_MONTH);

if (checkDatetime(dateTime) || checkRange() || checkDate()) {
if (checkDatetime(dateTime) || checkRange(year, month, day) || checkDate(year, month, day)) {
throw new AnalysisException("date/datetime literal [" + s + "] is out of range");
}
}

protected boolean checkRange() {
protected static boolean checkRange(long year, long month, long day) {
return year > MAX_DATE.getYear() || month > MAX_DATE.getMonth() || day > MAX_DATE.getDay();
}

protected boolean checkDate() {
if (month != 0 && day > DAYS_IN_MONTH[((int) month)]) {
protected static boolean checkDate(long year, long month, long day) {
if (month != 0 && day > DAYS_IN_MONTH[(int) month]) {
if (month == 2 && day == 29 && (Year.isLeap(year) && year > 0)) {
return false;
}
Expand All @@ -345,7 +376,7 @@ protected static boolean isDateOutOfRange(LocalDateTime dateTime) {
return dateTime == null || dateTime.isBefore(START_OF_A_DAY) || dateTime.isAfter(END_OF_A_DAY);
}

private boolean checkDatetime(TemporalAccessor dateTime) {
private static boolean checkDatetime(TemporalAccessor dateTime) {
return DateUtils.getOrDefault(dateTime, ChronoField.HOUR_OF_DAY) != 0
|| DateUtils.getOrDefault(dateTime, ChronoField.MINUTE_OF_HOUR) != 0
|| DateUtils.getOrDefault(dateTime, ChronoField.SECOND_OF_MINUTE) != 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DateTimeType;
import org.apache.doris.nereids.types.DateTimeV2Type;
import org.apache.doris.nereids.types.coercion.DateLikeType;
import org.apache.doris.nereids.util.DateUtils;

Expand Down Expand Up @@ -108,7 +109,7 @@ public static int determineScale(String s) {
if (s.indexOf("-") == s.lastIndexOf("-") && s.indexOf(":") == s.lastIndexOf(":")) {
return 0;
}
s = normalize(s);
s = normalize(s).get();
if (s.length() <= 19 || s.charAt(19) != '.') {
return 0;
}
Expand All @@ -130,10 +131,73 @@ public static int determineScale(String s) {
return scale;
}

@Override
/** parseDateTimeLiteral */
public static Result<DateTimeLiteral, AnalysisException> parseDateTimeLiteral(String s, boolean isV2) {
Result<TemporalAccessor, AnalysisException> parseResult = parseDateTime(s);
if (parseResult.isError()) {
return parseResult.cast();
}

TemporalAccessor temporal = parseResult.get();
long year = DateUtils.getOrDefault(temporal, ChronoField.YEAR);
long month = DateUtils.getOrDefault(temporal, ChronoField.MONTH_OF_YEAR);
long day = DateUtils.getOrDefault(temporal, ChronoField.DAY_OF_MONTH);
long hour = DateUtils.getOrDefault(temporal, ChronoField.HOUR_OF_DAY);
long minute = DateUtils.getOrDefault(temporal, ChronoField.MINUTE_OF_HOUR);
long second = DateUtils.getOrDefault(temporal, ChronoField.SECOND_OF_MINUTE);

ZoneId zoneId = temporal.query(TemporalQueries.zone());
if (zoneId != null) {
// get correct DST of that time.
Instant thatTime = ZonedDateTime
.of((int) year, (int) month, (int) day, (int) hour, (int) minute, (int) second, 0, zoneId)
.toInstant();

int offset = DateUtils.getTimeZone().getRules().getOffset(thatTime).getTotalSeconds()
- zoneId.getRules().getOffset(thatTime).getTotalSeconds();
if (offset != 0) {
DateTimeLiteral tempLiteral = new DateTimeLiteral(year, month, day, hour, minute, second);
DateTimeLiteral result = (DateTimeLiteral) tempLiteral.plusSeconds(offset);
second = result.second;
minute = result.minute;
hour = result.hour;
day = result.day;
month = result.month;
year = result.year;
}
}

long microSecond = DateUtils.getOrDefault(temporal, ChronoField.NANO_OF_SECOND) / 100L;
// Microseconds have 7 digits.
long sevenDigit = microSecond % 10;
microSecond = microSecond / 10;
if (sevenDigit >= 5 && isV2) {
DateTimeV2Literal tempLiteral = new DateTimeV2Literal(year, month, day, hour, minute, second, microSecond);
DateTimeV2Literal result = (DateTimeV2Literal) tempLiteral.plusMicroSeconds(1);
second = result.second;
minute = result.minute;
hour = result.hour;
day = result.day;
month = result.month;
year = result.year;
microSecond = result.microSecond;
}

if (checkRange(year, month, day) || checkDate(year, month, day)) {
return Result.err(() -> new AnalysisException("datetime literal [" + s + "] is out of range"));
}

if (isV2) {
DateTimeV2Type type = DateTimeV2Type.forTypeFromString(s);
return Result.ok(new DateTimeV2Literal(type, year, month, day, hour, minute, second, microSecond));
} else {
return Result.ok(new DateTimeLiteral(DateTimeType.INSTANCE, year, month, day, hour, minute, second));
}
}

protected void init(String s) throws AnalysisException {
// TODO: check and do fast parse like fastParseDate
TemporalAccessor temporal = parse(s);
TemporalAccessor temporal = parseDateTime(s).get();

year = DateUtils.getOrDefault(temporal, ChronoField.YEAR);
month = DateUtils.getOrDefault(temporal, ChronoField.MONTH_OF_YEAR);
Expand Down Expand Up @@ -177,14 +241,13 @@ protected void init(String s) throws AnalysisException {
this.microSecond = result.microSecond;
}

if (checkRange() || checkDate()) {
if (checkRange(year, month, day) || checkDate(year, month, day)) {
throw new AnalysisException("datetime literal [" + s + "] is out of range");
}
}

@Override
protected boolean checkRange() {
return super.checkRange() || hour > MAX_DATETIME.getHour() || minute > MAX_DATETIME.getMinute()
return checkRange(year, month, day) || hour > MAX_DATETIME.getHour() || minute > MAX_DATETIME.getMinute()
|| second > MAX_DATETIME.getSecond() || microSecond > MAX_MICROSECOND;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private void roundMicroSecond(int scale) {
this.second = localDateTime.getSecond();
this.microSecond -= 1000000;
}
if (checkRange() || checkDate()) {
if (checkRange() || checkDate(year, month, day)) {
// may fallback to legacy planner. make sure the behaviour of rounding is same.
throw new AnalysisException("datetime literal [" + toString() + "] is out of range");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,16 @@ public DecimalV3Literal(BigDecimal value) {
* Constructor for DecimalV3Literal
*/
public DecimalV3Literal(DecimalV3Type dataType, BigDecimal value) {
super(DecimalV3Type.createDecimalV3TypeLooseCheck(dataType.getPrecision(), dataType.getScale()));
super(DecimalV3Type.createDecimalV3TypeLooseCheck(
dataType.getPrecision() == -1 ? value.precision() : dataType.getPrecision(),
dataType.getScale() == -1 ? value.scale() : dataType.getScale())
);

int precision = dataType.getPrecision() == -1 ? value.precision() : dataType.getPrecision();
int scale = dataType.getScale() == -1 ? value.scale() : dataType.getScale();
Objects.requireNonNull(value, "value not be null");
checkPrecisionAndScale(dataType.getPrecision(), dataType.getScale(), value);
BigDecimal adjustedValue = value.scale() < 0 ? value
: value.setScale(dataType.getScale(), RoundingMode.HALF_UP);
checkPrecisionAndScale(precision, scale, value);
BigDecimal adjustedValue = value.scale() < 0 ? value : value.setScale(scale, RoundingMode.HALF_UP);
this.value = Objects.requireNonNull(adjustedValue);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.literal;

import java.util.Optional;
import java.util.function.Supplier;

/** Result */
public class Result<R, T extends RuntimeException> {
private final Optional<R> result;
private final Optional<Supplier<T>> exceptionSupplier;

private Result(Optional<R> result, Optional<Supplier<T>> exceptionSupplier) {
this.result = result;
this.exceptionSupplier = exceptionSupplier;
}

public static <R, T extends RuntimeException> Result<R, T> ok(R result) {
return new Result<>(Optional.of(result), Optional.empty());
}

public static <R, T extends RuntimeException> Result<R, T> err(Supplier<T> exceptionSupplier) {
return new Result<>(Optional.empty(), Optional.of(exceptionSupplier));
}

public boolean isOk() {
return !exceptionSupplier.isPresent();
}

public boolean isError() {
return exceptionSupplier.isPresent();
}

public <R, T extends RuntimeException> Result<R, T> cast() {
return (Result<R, T>) this;
}

public R get() {
if (exceptionSupplier.isPresent()) {
throw exceptionSupplier.get().get();
}
return result.get();
}

public R orElse(R other) {
if (exceptionSupplier.isPresent()) {
return other;
}
return result.get();
}
}
Loading

0 comments on commit 105a0ad

Please sign in to comment.