Skip to content

Commit

Permalink
add druid.expressions.allowVectorizeFallback and default to false (#1…
Browse files Browse the repository at this point in the history
…7248)

changes:

adds ExpressionProcessing.allowVectorizeFallback() and ExpressionProcessingConfig.allowVectorizeFallback(), defaulting to false until few remaining bugs can be fixed (mostly complex types and some odd interactions with mixed types)
add cannotVectorizeUnlessFallback functions to make it easy to toggle the default of this config, and easy to know what to delete when we remove it in the future
  • Loading branch information
clintropolis authored Oct 5, 2024
1 parent d1709a3 commit 04fe568
Show file tree
Hide file tree
Showing 24 changed files with 221 additions and 41 deletions.
1 change: 0 additions & 1 deletion docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2218,7 +2218,6 @@ Supported query contexts:
|Key|Description|Default|
|---|-----------|-------|
|`druid.expressions.useStrictBooleans`|Controls the behavior of Druid boolean operators and functions, if set to `true` all boolean values are either `1` or `0`. This configuration has been deprecated and will be removed in a future release, taking on the `true` behavior. See [expression documentation](../querying/math-expr.md#logical-operator-modes) for more information.|true|
|`druid.expressions.allowNestedArrays`|If enabled, Druid array expressions can create nested arrays. This configuration has been deprecated and will be removed in a future release, taking on the `true` behavior.|true|

### Router

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
@Test
public void testApproxCountDistinctHllSketch()
{
cannotVectorizeUnlessFallback();
final String sql = "SELECT\n"
+ " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" // uppercase
Expand Down Expand Up @@ -1138,6 +1139,7 @@ public void testHllEstimateAsVirtualColumnOnNonHllCol()
@Test
public void testHllEstimateAsVirtualColumnWithGroupByOrderBy()
{
cannotVectorizeUnlessFallback();
testQuery(
"SELECT"
+ " HLL_SKETCH_ESTIMATE(hllsketch_dim1), count(*)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
@Test
public void testApproxCountDistinctThetaSketch()
{
cannotVectorizeUnlessFallback();
final String sql = "SELECT\n"
+ " SUM(cnt),\n"
+ " APPROX_COUNT_DISTINCT_DS_THETA(dim2),\n"
Expand Down Expand Up @@ -1157,6 +1158,7 @@ public void testThetaEstimateAsVirtualColumnOnNonThetaCol()
@Test
public void testThetaEstimateAsVirtualColumnWithGroupByOrderBy()
{
cannotVectorizeUnlessFallback();
testQuery(
"SELECT"
+ " THETA_SKETCH_ESTIMATE(thetasketch_dim1), count(*)"
Expand Down
7 changes: 7 additions & 0 deletions processing/src/main/java/org/apache/druid/math/expr/Expr.java
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,13 @@ default boolean canVectorize(InputBindingInspector inspector)
return false;
}


default boolean canFallbackVectorize(InputBindingInspector inspector, List<Expr> args)
{
return ExpressionProcessing.allowVectorizeFallback() &&
getOutputType(inspector) != null &&
inspector.canVectorize(args);
}
/**
* Possibly convert the {@link Expr} into an optimized, possibly not thread-safe {@link Expr}. Does not convert
* child {@link Expr}. Most callers should use {@link Expr#singleThreaded(Expr, InputBindingInspector)} to convert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ public BindingAnalysis analyzeInputs()
@Override
public boolean canVectorize(InputBindingInspector inspector)
{
return getOutputType(inspector) != null && inspector.canVectorize(args);
return canFallbackVectorize(inspector, args);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,25 @@ public class ExpressionProcessing
@VisibleForTesting
public static void initializeForTests()
{
INSTANCE = new ExpressionProcessingConfig(null, null, null);
INSTANCE = new ExpressionProcessingConfig(null, null, null, null);
}

@VisibleForTesting
public static void initializeForStrictBooleansTests(boolean useStrict)
{
INSTANCE = new ExpressionProcessingConfig(useStrict, null, null);
INSTANCE = new ExpressionProcessingConfig(useStrict, null, null, null);
}

@VisibleForTesting
public static void initializeForHomogenizeNullMultiValueStrings()
{
INSTANCE = new ExpressionProcessingConfig(null, null, true);
INSTANCE = new ExpressionProcessingConfig(null, null, true, null);
}

@VisibleForTesting
public static void initializeForFallback()
{
INSTANCE = new ExpressionProcessingConfig(null, null, null, true);
}

/**
Expand Down Expand Up @@ -90,6 +96,12 @@ public static boolean isHomogenizeNullMultiValueStringArrays()
return INSTANCE.isHomogenizeNullMultiValueStringArrays();
}

public static boolean allowVectorizeFallback()
{
checkInitialized();
return INSTANCE.allowVectorizeFallback();
}

private static void checkInitialized()
{
// this should only be null in a unit test context, in production this will be injected by the null handling module
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public class ExpressionProcessingConfig
// Coerce 'null', '[]', and '[null]' into '[null]' for backwards compat with 0.22 and earlier
public static final String HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS =
"druid.expressions.homogenizeNullMultiValueStringArrays";
public static final String ALLOW_VECTORIZE_FALLBACK = "druid.expressions.allowVectorizeFallback";

@JsonProperty("useStrictBooleans")
private final boolean useStrictBooleans;
Expand All @@ -47,11 +48,15 @@ public class ExpressionProcessingConfig
@JsonProperty("homogenizeNullMultiValueStringArrays")
private final boolean homogenizeNullMultiValueStringArrays;

@JsonProperty("allowVectorizeFallback")
private final boolean allowVectorizeFallback;

@JsonCreator
public ExpressionProcessingConfig(
@JsonProperty("useStrictBooleans") @Nullable Boolean useStrictBooleans,
@JsonProperty("processArraysAsMultiValueStrings") @Nullable Boolean processArraysAsMultiValueStrings,
@JsonProperty("homogenizeNullMultiValueStringArrays") @Nullable Boolean homogenizeNullMultiValueStringArrays
@JsonProperty("homogenizeNullMultiValueStringArrays") @Nullable Boolean homogenizeNullMultiValueStringArrays,
@JsonProperty("allowVectorizeFallback") @Nullable Boolean allowVectorizeFallback
)
{
this.useStrictBooleans = getWithPropertyFallback(
Expand All @@ -67,6 +72,10 @@ public ExpressionProcessingConfig(
homogenizeNullMultiValueStringArrays,
HOMOGENIZE_NULL_MULTIVALUE_STRING_ARRAYS
);
this.allowVectorizeFallback = getWithPropertyFallbackFalse(
allowVectorizeFallback,
ALLOW_VECTORIZE_FALLBACK
);
String version = ExpressionProcessingConfig.class.getPackage().getImplementationVersion();
if (version == null || version.contains("SNAPSHOT")) {
version = "latest";
Expand Down Expand Up @@ -95,6 +104,11 @@ public boolean isHomogenizeNullMultiValueStringArrays()
return homogenizeNullMultiValueStringArrays;
}

public boolean allowVectorizeFallback()
{
return allowVectorizeFallback;
}

private static boolean getWithPropertyFallbackFalse(@Nullable Boolean value, String property)
{
return getWithPropertyFallback(value, property, "false");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ public ExprEval eval(ObjectBinding bindings)
@Override
public boolean canVectorize(InputBindingInspector inspector)
{
return function.canVectorize(inspector, args)
|| (getOutputType(inspector) != null && inspector.canVectorize(args));
return function.canVectorize(inspector, args) || canFallbackVectorize(inspector, args);
}

@Override
Expand Down Expand Up @@ -226,7 +225,8 @@ public ExprEval eval(ObjectBinding bindings)
@Override
public boolean canVectorize(InputBindingInspector inspector)
{
return canVectorizeNative(inspector) || (getOutputType(inspector) != null && inspector.canVectorize(argsExpr));
return canVectorizeNative(inspector) ||
(canFallbackVectorize(inspector, argsExpr) && lambdaExpr.canVectorize(inspector));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.druid.query.expression.NestedDataExpressions;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Test;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -264,11 +265,17 @@ public void testStringFns()
@Test
public void testArrayFns()
{
testExpression("array(s1, s2)", types);
testExpression("array(l1, l2)", types);
testExpression("array(d1, d2)", types);
testExpression("array(l1, d2)", types);
testExpression("array(s1, l2)", types);
try {
ExpressionProcessing.initializeForFallback();
testExpression("array(s1, s2)", types);
testExpression("array(l1, l2)", types);
testExpression("array(d1, d2)", types);
testExpression("array(l1, d2)", types);
testExpression("array(s1, l2)", types);
}
finally {
ExpressionProcessing.initializeForTests();
}
}

@Test
Expand All @@ -284,6 +291,7 @@ public void testCastArraysRoundTrip()
@Test
public void testJsonFns()
{
Assume.assumeTrue(ExpressionProcessing.allowVectorizeFallback());
testExpression("json_object('k1', s1, 'k2', l1)", types);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.druid.java.util.common.granularity.PeriodGranularity;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.metrics.StubServiceEmitter;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.Druids;
import org.apache.druid.query.FinalizeResultsQueryRunner;
import org.apache.druid.query.MetricsEmittingQueryRunner;
Expand Down Expand Up @@ -2269,6 +2270,7 @@ public void testTimeSeriesWithFilteredAgg()
@Test
public void testTimeSeriesWithFilteredAggAndExpressionFilteredAgg()
{
cannotVectorizeUnlessFallback();
TimeseriesQuery query = Druids
.newTimeseriesQueryBuilder()
.dataSource(QueryRunnerTestHelper.DATA_SOURCE)
Expand Down Expand Up @@ -3278,4 +3280,12 @@ protected void cannotVectorize()
expectedException.expectMessage("Cannot vectorize!");
}
}

protected void cannotVectorizeUnlessFallback()
{
if (vectorize && !ExpressionProcessing.allowVectorizeFallback()) {
expectedException.expect(RuntimeException.class);
expectedException.expectMessage("Cannot vectorize!");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.QueryContext;
Expand Down Expand Up @@ -1192,6 +1193,19 @@ protected void assertFilterMatchesSkipVectorize(
}
}

protected void assertFilterMatchesSkipVectorizeUnlessFallback(
final DimFilter filter,
final List<String> expectedRows
)
{
final boolean vectorize = ExpressionProcessing.allowVectorizeFallback();
assertFilterMatches(filter, expectedRows, vectorize);
// test double inverted
if (!StringUtils.toLowerCase(testName).contains("concise")) {
assertFilterMatches(NotDimFilter.of(NotDimFilter.of(filter)), expectedRows, vectorize);
}
}

private void assertFilterMatches(
final DimFilter filter,
final List<String> expectedRows,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1644,7 +1644,7 @@ public void testArraysAsMvds()
"5", .. [null], [123L, 345L], null
*/

assertFilterMatches(
assertFilterMatchesSkipVectorizeUnlessFallback(
new EqualityFilter(
"arrayStringAsMvd",
ColumnType.STRING,
Expand All @@ -1653,7 +1653,7 @@ public void testArraysAsMvds()
),
ImmutableList.of("0", "3")
);
assertFilterMatches(
assertFilterMatchesSkipVectorizeUnlessFallback(
NotDimFilter.of(
new EqualityFilter(
"arrayStringAsMvd",
Expand All @@ -1667,7 +1667,7 @@ public void testArraysAsMvds()
: ImmutableList.of("1", "2", "4", "5")
);

assertFilterMatches(
assertFilterMatchesSkipVectorizeUnlessFallback(
new EqualityFilter(
"arrayLongAsMvd",
ColumnType.STRING,
Expand All @@ -1676,7 +1676,7 @@ public void testArraysAsMvds()
),
ImmutableList.of("0", "2")
);
assertFilterMatches(
assertFilterMatchesSkipVectorizeUnlessFallback(
NotDimFilter.of(
new EqualityFilter(
"arrayLongAsMvd",
Expand All @@ -1690,7 +1690,7 @@ public void testArraysAsMvds()
: ImmutableList.of("1", "3", "4", "5")
);

assertFilterMatches(
assertFilterMatchesSkipVectorizeUnlessFallback(
new EqualityFilter(
"arrayDoubleAsMvd",
ColumnType.STRING,
Expand All @@ -1699,7 +1699,7 @@ public void testArraysAsMvds()
),
ImmutableList.of("0", "1")
);
assertFilterMatches(
assertFilterMatchesSkipVectorizeUnlessFallback(
NotDimFilter.of(
new EqualityFilter(
"arrayDoubleAsMvd",
Expand All @@ -1713,7 +1713,7 @@ public void testArraysAsMvds()
: ImmutableList.of("2", "3", "4", "5")
);

assertFilterMatches(
assertFilterMatchesSkipVectorizeUnlessFallback(
new EqualityFilter(
"arrayConstantAsMvd",
ColumnType.STRING,
Expand Down
Loading

0 comments on commit 04fe568

Please sign in to comment.