Skip to content

Commit

Permalink
fix expression column capabilities to not report dictionary encoded u…
Browse files Browse the repository at this point in the history
…nless input is string (apache#16577)
  • Loading branch information
clintropolis authored Jun 8, 2024
1 parent 40ba429 commit 3fb6ba2
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,6 @@ public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnCo
bitmapSerdeFactory,
byteOrder
);
ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder();
capabilitiesBuilder.setDictionaryEncoded(true);
capabilitiesBuilder.setDictionaryValuesSorted(true);
capabilitiesBuilder.setDictionaryValuesUnique(true);
ColumnType simpleType = supplier.getLogicalType();
ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : simpleType;
builder.setType(logicalType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,12 +274,18 @@ public ColumnCapabilities inferColumnCapabilities(@Nullable ColumnType outputTyp
// since we don't know if the expression is 1:1 or if it retains ordering we can only piggy back only
// report as dictionary encoded, but it still allows us to use algorithms which work with dictionaryIds
// to create a dictionary encoded selector instead of an object selector to defer expression evaluation
// until query time
return ColumnCapabilitiesImpl.copyOf(underlyingCapabilities)
// until query time. However, currently dictionary encodedness is tied to string selectors and sad stuff
// happens if the input type isn't string, so we also limit this to string input types
final boolean useDictionary = underlyingCapabilities.isDictionaryEncoded().isTrue() &&
underlyingCapabilities.is(ValueType.STRING);
return ColumnCapabilitiesImpl.createDefault()
.setType(ColumnType.STRING)
.setDictionaryValuesSorted(false)
.setDictionaryValuesUnique(false)
.setHasBitmapIndexes(false)
.setDictionaryEncoded(useDictionary)
.setHasBitmapIndexes(underlyingCapabilities.hasBitmapIndexes())
.setHasMultipleValues(underlyingCapabilities.hasMultipleValues())
.setHasSpatialIndexes(underlyingCapabilities.hasSpatialIndexes())
// we aren't sure if the expression might produce null values or not, so always
// set hasNulls to true
.setHasNulls(true);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@

package org.apache.druid.segment.virtual;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.error.DruidException;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.expression.TestExprMacroTable;
Expand All @@ -36,11 +41,13 @@
import org.junit.rules.ExpectedException;

import javax.annotation.Nullable;
import java.util.List;
import java.util.Map;

public class ExpressionPlannerTest extends InitializedNullHandlingTest
{
public static final ColumnInspector SYNTHETIC_INSPECTOR = new ColumnInspector()
private static ColumnType DICTIONARY_COMPLEX = ColumnType.ofComplex("dictionaryComplex");
private static final ColumnInspector SYNTHETIC_INSPECTOR = new ColumnInspector()
{
private final Map<String, ColumnCapabilities> capabilitiesMap =
ImmutableMap.<String, ColumnCapabilities>builder()
Expand Down Expand Up @@ -141,6 +148,12 @@ public class ExpressionPlannerTest extends InitializedNullHandlingTest
"double_array_2",
ColumnCapabilitiesImpl.createSimpleArrayColumnCapabilities(ColumnType.DOUBLE_ARRAY)
)
.put(
"dictionary_complex",
ColumnCapabilitiesImpl.createDefault()
.setDictionaryEncoded(true)
.setType(DICTIONARY_COMPLEX)
)
.build();

@Nullable
Expand All @@ -151,6 +164,8 @@ public ColumnCapabilities getColumnCapabilities(String column)
}
};

private static final TestMacroTable MACRO_TABLE = new TestMacroTable();

@Rule
public ExpectedException expectedException = ExpectedException.none();

Expand Down Expand Up @@ -369,7 +384,7 @@ public void testScalarStringDictionaryEncoded()
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertFalse(inferred.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertTrue(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());

// multiple input columns
Expand Down Expand Up @@ -463,7 +478,7 @@ public void testMultiValueStringDictionaryEncoded()
Assert.assertFalse(inferred.areDictionaryValuesSorted().isMaybeTrue());
Assert.assertFalse(inferred.areDictionaryValuesUnique().isMaybeTrue());
Assert.assertTrue(inferred.hasMultipleValues().isTrue());
Assert.assertFalse(inferred.hasBitmapIndexes());
Assert.assertTrue(inferred.hasBitmapIndexes());
Assert.assertFalse(inferred.hasSpatialIndexes());

thePlan = plan("concat(scalar_string, multi_dictionary_string_nonunique)");
Expand Down Expand Up @@ -599,7 +614,7 @@ public void testIncompleteString()
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
Expand Down Expand Up @@ -671,7 +686,7 @@ public void testArrayOutput()
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
Expand Down Expand Up @@ -719,7 +734,22 @@ public void testScalarOutputMultiValueInput()

// what about a multi-valued input
thePlan = plan("array_to_string(array_append(scalar_string, multi_dictionary_string), ',')");
assertArrayInput(thePlan);
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED
)
);
Assert.assertFalse(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.VECTORIZABLE
)
);

Assert.assertEquals(
"array_to_string(map((\"multi_dictionary_string\") -> array_append(\"scalar_string\", \"multi_dictionary_string\"), \"multi_dictionary_string\"), ',')",
Expand Down Expand Up @@ -789,7 +819,7 @@ public void testArrayConstruction()
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
Expand All @@ -812,15 +842,14 @@ public void testNestedColumnExpression()
{
ExpressionPlan thePlan = plan("json_object('long1', long1, 'long2', long2)");
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS,
ExpressionPlan.Trait.VECTORIZABLE
ExpressionPlan.Trait.NON_SCALAR_INPUTS
)
);
Assert.assertEquals(ExpressionType.NESTED_DATA, thePlan.getOutputType());
Expand All @@ -837,9 +866,40 @@ public void testNestedColumnExpression()
);
}

@Test
public void testDictionaryComplexStringOutput()
{
ExpressionPlan thePlan = plan("dict_complex_to_string(dictionary_complex)");
Assert.assertFalse(
thePlan.any(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.NON_SCALAR_INPUTS
)
);
Assert.assertTrue(
thePlan.is(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.VECTORIZABLE
)
);
Assert.assertEquals(ExpressionType.STRING, thePlan.getOutputType());
ColumnCapabilities inferred = thePlan.inferColumnCapabilities(
ExpressionType.toColumnType(thePlan.getOutputType())
);
Assert.assertEquals(
ColumnType.STRING.getType(),
inferred.getType()
);
Assert.assertFalse(inferred.isDictionaryEncoded().isMaybeTrue());
}

private static ExpressionPlan plan(String expression)
{
return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR, Parser.parse(expression, TestExprMacroTable.INSTANCE));
return ExpressionPlanner.plan(SYNTHETIC_INSPECTOR, Parser.parse(expression, MACRO_TABLE));
}

private static void assertArrayInput(ExpressionPlan thePlan)
Expand All @@ -850,7 +910,7 @@ private static void assertArrayInput(ExpressionPlan thePlan)
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
Expand All @@ -871,7 +931,7 @@ private static void assertArrayInAndOut(ExpressionPlan thePlan)
)
);
Assert.assertFalse(
thePlan.is(
thePlan.any(
ExpressionPlan.Trait.SINGLE_INPUT_SCALAR,
ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE,
ExpressionPlan.Trait.INCOMPLETE_INPUTS,
Expand All @@ -881,4 +941,44 @@ private static void assertArrayInAndOut(ExpressionPlan thePlan)
)
);
}

private static class TestMacroTable extends ExprMacroTable
{
public TestMacroTable()
{
super(
ImmutableList.<ExprMacroTable.ExprMacro>builder()
.addAll(TestExprMacroTable.INSTANCE.getMacros())
.add(new ExprMacroTable.ExprMacro()
{
@Override
public Expr apply(List<Expr> args)
{
return new ExprMacroTable.BaseScalarMacroFunctionExpr(this, args)
{
@Override
public ExprEval eval(ObjectBinding bindings)
{
throw DruidException.defensive("just for planner test");
}

@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
return ExpressionType.STRING;
}
};
}

@Override
public String name()
{
return "dict_complex_to_string";
}
})
.build()
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
ImmutableMap.of(
"x", 3L,
"y", 4L,
"b", Arrays.asList(new String[]{"3", null, "5"})
"b", Arrays.asList("3", null, "5")
)
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7558,4 +7558,36 @@ public void testGroupByAutoDouble()
.build()
);
}

@Test
public void testToJsonString()
{
testQuery(
"SELECT TO_JSON_STRING(nester) FROM druid.nested GROUP BY 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(DATA_SOURCE)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)
)
)
.setVirtualColumns(
expressionVirtualColumn("v0", "to_json_string(\"nester\")", ColumnType.STRING)
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{NullHandling.defaultStringValue()},
new Object[]{"\"hello\""},
new Object[]{"2"},
new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":\"hello\"}}"},
new Object[]{"{\"array\":[\"a\",\"b\"],\"n\":{\"x\":1}}"}
),
RowSignature.builder().add("EXPR$0", ColumnType.STRING).build()
);
}
}

0 comments on commit 3fb6ba2

Please sign in to comment.