From fa541d2bda0247fac4ff94f82e1bc2b31b3163b1 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Wed, 13 Nov 2024 14:08:58 +0100 Subject: [PATCH] [8.x] ESQL: optimise aggregations filtered by false/null into evals (#115858) (#116713) * ESQL: optimise aggregations filtered by false/null into evals (#115858) This adds a new optimiser rule to extract aggregate functions filtered by a `FALSE` or `NULL` into evals. The value taken by the evaluation is `0L`, for `COUNT()` and `COUNT_DISTINCT()`, `NULL` otherwise. Example: ``` ... | STATS x = someAgg(y) WHERE FALSE {BY z} | ... => ... | STATS x = someAgg(y) {BY z} > | EVAL x = NULL | KEEP x{, z} | ... ``` Related: #114352. * swap out list's getFirst/Last --- docs/changelog/115858.yaml | 5 + .../src/main/resources/stats.csv-spec | 110 +++++++ .../optimizer/LocalLogicalPlanOptimizer.java | 27 +- .../esql/optimizer/LogicalPlanOptimizer.java | 2 + .../ReplaceStatsFilteredAggWithEval.java | 88 ++++++ .../xpack/esql/rule/RuleExecutor.java | 4 + .../optimizer/LogicalPlanOptimizerTests.java | 276 ++++++++++++++++++ 7 files changed, 505 insertions(+), 7 deletions(-) create mode 100644 docs/changelog/115858.yaml create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceStatsFilteredAggWithEval.java diff --git a/docs/changelog/115858.yaml b/docs/changelog/115858.yaml new file mode 100644 index 0000000000000..0c0408fa656f8 --- /dev/null +++ b/docs/changelog/115858.yaml @@ -0,0 +1,5 @@ +pr: 115858 +summary: "ESQL: optimise aggregations filtered by false/null into evals" +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 3a526c1549884..11f5689a4affe 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2382,6 +2382,116 @@ max:integer |max_a:integer|min:integer | min_a:integer 74999 |null |25324 | null ; +statsWithAllFiltersFalse +required_capability: per_agg_filtering +from employees +| stats max = max(height.float) where false, + min = min(height.float) where to_string(null) == "abc", + count = count(height.float) where false, + count_distinct = count_distinct(salary) where to_string(null) == "def" +; + +max:double |min:double |count:long |count_distinct:long +null |null |0 |0 +; + +statsWithExpressionsAllFiltersFalse +required_capability: per_agg_filtering +from employees +| stats max = max(height.float + 1) where null, + count = count(height.float) + 2 where false, + mix = min(height.float + 1) + count_distinct(emp_no) + 2 where length(null) == 3 +; + +max:double |count:long |mix:double +null |2 |null +; + +statsWithFalseFilterAndGroup +required_capability: per_agg_filtering +from employees +| stats max = max(height.float + 1) where null, + count = count(height.float) + 2 where false + by job_positions +| sort job_positions +| limit 4 +; + +max:double |count:long |job_positions:keyword +null |2 |Accountant +null |2 |Architect +null |2 |Business Analyst +null |2 |Data Scientist +; + +statsWithFalseFiltersAndGroups +required_capability: per_agg_filtering +from employees +| eval my_length = length(concat(first_name, null)) +| stats count_distinct = count_distinct(height.float + 1) where null, + count = count(height.float) + 2 where false, + values = values(first_name) where my_length > 3 + by job_positions, is_rehired +| sort job_positions, is_rehired +| limit 10 +; + +count_distinct:long |count:long |values:keyword |job_positions:keyword |is_rehired:boolean +0 |2 |null |Accountant |false +0 |2 |null |Accountant |true +0 |2 |null |Accountant |null +0 |2 |null |Architect |false +0 |2 |null |Architect |true +0 |2 |null |Architect |null +0 |2 |null |Business Analyst |false +0 |2 |null |Business Analyst |true +0 |2 |null |Business Analyst |null +0 |2 |null |Data Scientist |false +; + +statsWithMixedFiltersAndGroup +required_capability: per_agg_filtering +from employees +| eval my_length = length(concat(first_name, null)) +| stats count = count(my_length) where false, + values = mv_slice(mv_sort(values(first_name)), 0, 1) + by job_positions +| sort job_positions +| limit 4 +; + +count:long |values:keyword |job_positions:keyword +0 |[Arumugam, Bojan] |Accountant +0 |[Alejandro, Charlene] |Architect +0 |[Basil, Breannda] |Business Analyst +0 |[Berni, Breannda] |Data Scientist +; + +prunedStatsFollowedByStats +from employees +| eval my_length = length(concat(first_name, null)) +| stats count = count(my_length) where false, + values = mv_slice(values(first_name), 0, 1) where my_length > 0 +| stats count_distinct = count_distinct(count) +; + +count_distinct:long +1 +; + +statsWithFalseFiltersFromRow +required_capability: per_agg_filtering +row x = null, a = 1, b = [2,3,4] +| stats c=max(a) where x + by b +; + +c:integer |b:integer +null |2 +null |3 +null |4 +; + statsWithBasicExpressionFiltered required_capability: per_agg_filtering from employees diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index 44334ff112bad..3da07e9485af7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEmptyRelation; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStatsFilteredAggWithEval; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint; import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation; @@ -15,6 +16,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; import java.util.List; @@ -50,20 +52,31 @@ protected List> batches() { rules.add(local); // TODO: if the local rules haven't touched the tree, the rest of the rules can be skipped rules.addAll(asList(operators(), cleanup())); - replaceRules(rules); - return rules; + return replaceRules(rules); } + @SuppressWarnings("unchecked") private List> replaceRules(List> listOfRules) { - for (Batch batch : listOfRules) { + List> newBatches = new ArrayList<>(listOfRules.size()); + for (var batch : listOfRules) { var rules = batch.rules(); - for (int i = 0; i < rules.length; i++) { - if (rules[i] instanceof PropagateEmptyRelation) { - rules[i] = new LocalPropagateEmptyRelation(); + List> newRules = new ArrayList<>(rules.length); + boolean updated = false; + for (var r : rules) { + if (r instanceof PropagateEmptyRelation) { + newRules.add(new LocalPropagateEmptyRelation()); + updated = true; + } else if (r instanceof ReplaceStatsFilteredAggWithEval) { + // skip it: once a fragment contains an Agg, this can no longer be pruned, which the rule can do + updated = true; + } else { + newRules.add(r); } } + batch = updated ? batch.with(newRules.toArray(Rule[]::new)) : batch; + newBatches.add(batch); } - return listOfRules; + return newBatches; } public LogicalPlan localOptimize(LogicalPlan plan) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index 77c5a494437ab..a0e257d1a8953 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -46,6 +46,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceLimitAndSortAsTopN; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceOrderByExpressionWithEval; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceRegexMatch; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStatsFilteredAggWithEval; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceTrivialTypeConversions; import org.elasticsearch.xpack.esql.optimizer.rules.logical.SetAsOptimized; import org.elasticsearch.xpack.esql.optimizer.rules.logical.SimplifyComparisonsArithmetics; @@ -170,6 +171,7 @@ protected static Batch operators() { new CombineBinaryComparisons(), new CombineDisjunctions(), new SimplifyComparisonsArithmetics(DataType::areCompatible), + new ReplaceStatsFilteredAggWithEval(), // prune/elimination new PruneFilters(), new PruneColumns(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceStatsFilteredAggWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceStatsFilteredAggWithEval.java new file mode 100644 index 0000000000000..2cafcc2e07052 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceStatsFilteredAggWithEval.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.logical; + +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; +import org.elasticsearch.xpack.esql.expression.function.aggregate.CountDistinct; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; +import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.Project; +import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; +import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier; +import org.elasticsearch.xpack.esql.planner.PlannerUtils; + +import java.util.ArrayList; +import java.util.List; + +/** + * Replaces an aggregation function having a false/null filter with an EVAL node. + *
+ *     ... | STATS x = someAgg(y) WHERE FALSE {BY z} | ...
+ *     =>
+ *     ... | STATS x = someAgg(y) {BY z} > | EVAL x = NULL | KEEP x{, z} | ...
+ * 
+ */ +public class ReplaceStatsFilteredAggWithEval extends OptimizerRules.OptimizerRule { + @Override + protected LogicalPlan rule(Aggregate aggregate) { + int oldAggSize = aggregate.aggregates().size(); + List newAggs = new ArrayList<>(oldAggSize); + List newEvals = new ArrayList<>(oldAggSize); + List newProjections = new ArrayList<>(oldAggSize); + + for (var ne : aggregate.aggregates()) { + if (ne instanceof Alias alias + && alias.child() instanceof AggregateFunction aggFunction + && aggFunction.hasFilter() + && aggFunction.filter() instanceof Literal literal + && Boolean.FALSE.equals(literal.fold())) { + + Object value = aggFunction instanceof Count || aggFunction instanceof CountDistinct ? 0L : null; + Alias newAlias = alias.replaceChild(Literal.of(aggFunction, value)); + newEvals.add(newAlias); + newProjections.add(newAlias.toAttribute()); + } else { + newAggs.add(ne); // agg function unchanged or grouping key + newProjections.add(ne.toAttribute()); + } + } + + LogicalPlan plan = aggregate; + if (newEvals.isEmpty() == false) { + if (newAggs.isEmpty()) { // the Aggregate node is pruned + plan = localRelation(aggregate.source(), newEvals); + } else { + plan = aggregate.with(aggregate.child(), aggregate.groupings(), newAggs); + plan = new Eval(aggregate.source(), plan, newEvals); + plan = new Project(aggregate.source(), plan, newProjections); + } + } + return plan; + } + + private static LocalRelation localRelation(Source source, List newEvals) { + Block[] blocks = new Block[newEvals.size()]; + List attributes = new ArrayList<>(newEvals.size()); + for (int i = 0; i < newEvals.size(); i++) { + Alias alias = newEvals.get(i); + attributes.add(alias.toAttribute()); + blocks[i] = BlockUtils.constantBlock(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, ((Literal) alias.child()).value(), 1); + } + return new LocalRelation(source, attributes, LocalSupplier.of(blocks)); + + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutor.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutor.java index 3d73c0d45e9a0..7df5a029d724e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutor.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/rule/RuleExecutor.java @@ -68,6 +68,10 @@ public String name() { return name; } + public Batch with(Rule[] rules) { + return new Batch<>(name, limit, rules); + } + public Rule[] rules() { return rules; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index fdc4935d457e9..3f7e9e7c7c0b5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -11,6 +11,8 @@ import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.aggregation.QuantileStates; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.LongVectorBlock; import org.elasticsearch.core.Tuple; import org.elasticsearch.dissect.DissectParser; import org.elasticsearch.index.IndexMode; @@ -148,6 +150,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; +import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.EQ; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GT; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GTE; @@ -166,6 +169,7 @@ import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.startsWith; //@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug") @@ -564,6 +568,278 @@ public void testStatsWithFilteringDefaultAliasing() { assertThat(Expressions.names(agg.aggregates()), contains("sum(salary)", "sum(salary) WheRe last_name == \"Doe\"")); } + /* + * Limit[1000[INTEGER]] + * \_LocalRelation[[sum(salary) where false{r}#26],[ConstantNullBlock[positions=1]]] + */ + public void testReplaceStatsFilteredAggWithEvalSingleAgg() { + var plan = plan(""" + from test + | stats sum(salary) where false + """); + + var project = as(plan, Limit.class); + var source = as(project.child(), LocalRelation.class); + assertThat(Expressions.names(source.output()), contains("sum(salary) where false")); + Block[] blocks = source.supplier().get(); + assertThat(blocks.length, is(1)); + assertThat(blocks[0].getPositionCount(), is(1)); + assertTrue(blocks[0].areAllValuesNull()); + } + + /* + * Project[[sum(salary) + 1 where false{r}#68]] + * \_Eval[[$$SUM$sum(salary)_+_1$0{r$}#79 + 1[INTEGER] AS sum(salary) + 1 where false]] + * \_Limit[1000[INTEGER]] + * \_LocalRelation[[$$SUM$sum(salary)_+_1$0{r$}#79],[ConstantNullBlock[positions=1]]] + */ + public void testReplaceStatsFilteredAggWithEvalSingleAggWithExpression() { + var plan = plan(""" + from test + | stats sum(salary) + 1 where false + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("sum(salary) + 1 where false")); + + var eval = as(project.child(), Eval.class); + assertThat(eval.fields().size(), is(1)); + var alias = as(eval.fields().get(0), Alias.class); + assertThat(alias.name(), is("sum(salary) + 1 where false")); + var add = as(alias.child(), Add.class); + var literal = as(add.right(), Literal.class); + assertThat(literal.fold(), is(1)); + + var limit = as(eval.child(), Limit.class); + var source = as(limit.child(), LocalRelation.class); + + Block[] blocks = source.supplier().get(); + assertThat(blocks.length, is(1)); + assertThat(blocks[0].getPositionCount(), is(1)); + assertTrue(blocks[0].areAllValuesNull()); + } + + /* + * Project[[sum(salary) + 1 where false{r}#4, sum(salary) + 2{r}#6, emp_no{f}#7]] + * \_Eval[[null[LONG] AS sum(salary) + 1 where false, $$SUM$sum(salary)_+_2$1{r$}#18 + 2[INTEGER] AS sum(salary) + 2]] + * \_Limit[1000[INTEGER]] + * \_Aggregate[STANDARD,[emp_no{f}#7],[SUM(salary{f}#12,true[BOOLEAN]) AS $$SUM$sum(salary)_+_2$1, emp_no{f}#7]] + * \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..] + */ + public void testReplaceStatsFilteredAggWithEvalMixedFilterAndNoFilter() { + var plan = plan(""" + from test + | stats sum(salary) + 1 where false, + sum(salary) + 2 + by emp_no + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("sum(salary) + 1 where false", "sum(salary) + 2", "emp_no")); + var eval = as(project.child(), Eval.class); + assertThat(eval.fields().size(), is(2)); + + var alias = as(eval.fields().get(0), Alias.class); + assertTrue(alias.child().foldable()); + assertThat(alias.child().fold(), nullValue()); + assertThat(alias.child().dataType(), is(LONG)); + + alias = as(eval.fields().get(1), Alias.class); + assertThat(Expressions.name(alias.child()), containsString("sum(salary) + 2")); + + var limit = as(eval.child(), Limit.class); + var aggregate = as(limit.child(), Aggregate.class); + var source = as(aggregate.child(), EsRelation.class); + } + + /* + * Project[[sum(salary) + 1 where false{r}#3, sum(salary) + 3{r}#5, sum(salary) + 2 where false{r}#7]] + * \_Eval[[null[LONG] AS sum(salary) + 1 where false, $$SUM$sum(salary)_+_3$1{r$}#19 + 3[INTEGER] AS sum(salary) + 3, nu + * ll[LONG] AS sum(salary) + 2 where false]] + * \_Limit[1000[INTEGER]] + * \_Aggregate[STANDARD,[],[SUM(salary{f}#13,true[BOOLEAN]) AS $$SUM$sum(salary)_+_3$1]] + * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] + */ + public void testReplaceStatsFilteredAggWithEvalFilterFalseAndNull() { + var plan = plan(""" + from test + | stats sum(salary) + 1 where false, + sum(salary) + 3, + sum(salary) + 2 where null + """); + + var project = as(plan, Project.class); + assertThat( + Expressions.names(project.projections()), + contains("sum(salary) + 1 where false", "sum(salary) + 3", "sum(salary) + 2 where null") + ); + var eval = as(project.child(), Eval.class); + assertThat(eval.fields().size(), is(3)); + + var alias = as(eval.fields().get(0), Alias.class); + assertTrue(alias.child().foldable()); + assertThat(alias.child().fold(), nullValue()); + assertThat(alias.child().dataType(), is(LONG)); + + alias = as(eval.fields().get(1), Alias.class); + assertThat(Expressions.name(alias.child()), containsString("sum(salary) + 3")); + + alias = as(eval.fields().get(2), Alias.class); + assertTrue(alias.child().foldable()); + assertThat(alias.child().fold(), nullValue()); + assertThat(alias.child().dataType(), is(LONG)); + + var limit = as(eval.child(), Limit.class); + var aggregate = as(limit.child(), Aggregate.class); + var source = as(aggregate.child(), EsRelation.class); + } + + /* + * Limit[1000[INTEGER]] + * \_LocalRelation[[count(salary) where false{r}#3],[LongVectorBlock[vector=ConstantLongVector[positions=1, value=0]]]] + */ + public void testReplaceStatsFilteredAggWithEvalCount() { + var plan = plan(""" + from test + | stats count(salary) where false + """); + + var limit = as(plan, Limit.class); + var source = as(limit.child(), LocalRelation.class); + assertThat(Expressions.names(source.output()), contains("count(salary) where false")); + Block[] blocks = source.supplier().get(); + assertThat(blocks.length, is(1)); + var block = as(blocks[0], LongVectorBlock.class); + assertThat(block.getPositionCount(), is(1)); + assertThat(block.asVector().getLong(0), is(0L)); + } + + /* + * Project[[count_distinct(salary + 2) + 3 where false{r}#3]] + * \_Eval[[$$COUNTDISTINCT$count_distinct(>$0{r$}#15 + 3[INTEGER] AS count_distinct(salary + 2) + 3 where false]] + * \_Limit[1000[INTEGER]] + * \_LocalRelation[[$$COUNTDISTINCT$count_distinct(>$0{r$}#15],[LongVectorBlock[vector=ConstantLongVector[positions=1, value=0]]]] + */ + public void testReplaceStatsFilteredAggWithEvalCountDistinctInExpression() { + var plan = plan(""" + from test + | stats count_distinct(salary + 2) + 3 where false + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("count_distinct(salary + 2) + 3 where false")); + + var eval = as(project.child(), Eval.class); + assertThat(eval.fields().size(), is(1)); + var alias = as(eval.fields().get(0), Alias.class); + assertThat(alias.name(), is("count_distinct(salary + 2) + 3 where false")); + var add = as(alias.child(), Add.class); + var literal = as(add.right(), Literal.class); + assertThat(literal.fold(), is(3)); + + var limit = as(eval.child(), Limit.class); + var source = as(limit.child(), LocalRelation.class); + + Block[] blocks = source.supplier().get(); + assertThat(blocks.length, is(1)); + var block = as(blocks[0], LongVectorBlock.class); + assertThat(block.getPositionCount(), is(1)); + assertThat(block.asVector().getLong(0), is(0L)); + } + + /* + * Project[[max{r}#91, max_a{r}#94, min{r}#97, min_a{r}#100, emp_no{f}#101]] + * \_Eval[[null[INTEGER] AS max_a, null[INTEGER] AS min_a]] + * \_Limit[1000[INTEGER]] + * \_Aggregate[STANDARD,[emp_no{f}#101],[MAX(salary{f}#106,true[BOOLEAN]) AS max, MIN(salary{f}#106,true[BOOLEAN]) AS min, emp_ + * no{f}#101]] + * \_EsRelation[test][_meta_field{f}#107, emp_no{f}#101, first_name{f}#10..] + */ + public void testReplaceStatsFilteredAggWithEvalSameAggWithAndWithoutFilter() { + var plan = plan(""" + from test + | stats max = max(salary), max_a = max(salary) where null, + min = min(salary), min_a = min(salary) where to_string(null) == "abc" + by emp_no + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("max", "max_a", "min", "min_a", "emp_no")); + var eval = as(project.child(), Eval.class); + assertThat(eval.fields().size(), is(2)); + + var alias = as(eval.fields().get(0), Alias.class); + assertThat(Expressions.name(alias), containsString("max_a")); + assertTrue(alias.child().foldable()); + assertThat(alias.child().fold(), nullValue()); + assertThat(alias.child().dataType(), is(INTEGER)); + + alias = as(eval.fields().get(1), Alias.class); + assertThat(Expressions.name(alias), containsString("min_a")); + assertTrue(alias.child().foldable()); + assertThat(alias.child().fold(), nullValue()); + assertThat(alias.child().dataType(), is(INTEGER)); + + var limit = as(eval.child(), Limit.class); + + var aggregate = as(limit.child(), Aggregate.class); + assertThat(Expressions.names(aggregate.aggregates()), contains("max", "min", "emp_no")); + + var source = as(aggregate.child(), EsRelation.class); + } + + /* + * Limit[1000[INTEGER]] + * \_LocalRelation[[count{r}#7],[LongVectorBlock[vector=ConstantLongVector[positions=1, value=0]]]] + */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634") // i.e. PropagateEvalFoldables applicability to Aggs + public void testReplaceStatsFilteredAggWithEvalFilterUsingEvaledValue() { + var plan = plan(""" + from test + | eval my_length = length(concat(first_name, null)) + | stats count = count(my_length) where my_length > 0 + """); + + var limit = as(plan, Limit.class); + var source = as(limit.child(), LocalRelation.class); + assertThat(Expressions.names(source.output()), contains("count")); + Block[] blocks = source.supplier().get(); + assertThat(blocks.length, is(1)); + var block = as(blocks[0], LongVectorBlock.class); + assertThat(block.getPositionCount(), is(1)); + assertThat(block.asVector().getLong(0), is(0L)); + } + + /* + * Project[[c{r}#67, emp_no{f}#68]] + * \_Eval[[0[LONG] AS c]] + * \_Limit[1000[INTEGER]] + * \_Aggregate[STANDARD,[emp_no{f}#68],[emp_no{f}#68]] + * \_EsRelation[test][_meta_field{f}#74, emp_no{f}#68, first_name{f}#69, ..] + */ + public void testReplaceStatsFilteredAggWithEvalSingleAggWithGroup() { + var plan = plan(""" + from test + | stats c = count(emp_no) where false + by emp_no + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("c", "emp_no")); + + var eval = as(project.child(), Eval.class); + assertThat(eval.fields().size(), is(1)); + var alias = as(eval.fields().get(0), Alias.class); + assertThat(Expressions.name(alias), containsString("c")); + + var limit = as(eval.child(), Limit.class); + + var aggregate = as(limit.child(), Aggregate.class); + assertThat(Expressions.names(aggregate.aggregates()), contains("emp_no")); + + var source = as(aggregate.child(), EsRelation.class); + } + public void testQlComparisonOptimizationsApply() { var plan = plan(""" from test