From 74d964b9b16b7c8837ec0869b15e2c2bdee416cd Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 26 Aug 2024 15:01:22 -0400 Subject: [PATCH] ESQL: Fix a bug in `MV_PERCENTILE` (#112218) This fixes a bug in `MV_PERCENTILE` that was producing incorrect results on when the `Block` was in ascending order. We were always reading from the first entry in the block. Closes #112188 Closes #112187 Closes #112193 Closes #112180 --- docs/changelog/112218.yaml | 9 + muted-tests.yml | 12 -- .../scalar/multivalue/MvPercentile.java | 22 ++- .../multivalue/MvPercentileSimpleTests.java | 154 ++++++++++++++++++ .../scalar/multivalue/MvPercentileTests.java | 17 +- 5 files changed, 194 insertions(+), 20 deletions(-) create mode 100644 docs/changelog/112218.yaml create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentileSimpleTests.java diff --git a/docs/changelog/112218.yaml b/docs/changelog/112218.yaml new file mode 100644 index 0000000000000..c426dd7ade4ed --- /dev/null +++ b/docs/changelog/112218.yaml @@ -0,0 +1,9 @@ +pr: 112218 +summary: "ESQL: Fix a bug in `MV_PERCENTILE`" +area: ES|QL +type: bug +issues: + - 112193 + - 112180 + - 112187 + - 112188 diff --git a/muted-tests.yml b/muted-tests.yml index 85c29759cabb2..eff599b758a1d 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -151,24 +151,12 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/112144 - class: org.elasticsearch.smoketest.SmokeTestMultiNodeClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/112147 -- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT - method: test {mv_percentile.FromIndex SYNC} - issue: https://github.com/elastic/elasticsearch/issues/112180 -- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT - method: test {mv_percentile.FromIndex SYNC} - issue: https://github.com/elastic/elasticsearch/issues/112187 -- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT - method: test {mv_percentile.FromIndex ASYNC} - issue: https://github.com/elastic/elasticsearch/issues/112188 - class: org.elasticsearch.smoketest.WatcherYamlRestIT method: test {p0=watcher/usage/10_basic/Test watcher usage stats output} issue: https://github.com/elastic/elasticsearch/issues/112189 - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/inference_processor/Test create processor with missing mandatory fields} issue: https://github.com/elastic/elasticsearch/issues/112191 -- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT - method: test {mv_percentile.FromIndex ASYNC} - issue: https://github.com/elastic/elasticsearch/issues/112193 - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJobAsync issue: https://github.com/elastic/elasticsearch/issues/112212 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentile.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentile.java index b1e710b9b2a40..1eb0c70a7b08e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentile.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentile.java @@ -233,7 +233,7 @@ static void process( // Percentile calculators - private static double calculateDoublePercentile( + static double calculateDoublePercentile( DoubleBlock valuesBlock, int firstValueIndex, int valueCount, @@ -257,7 +257,11 @@ private static double calculateDoublePercentile( return valuesBlock.getDouble(valueCount - 1); } else { assert lowerIndex >= 0 && upperIndex < valueCount; - return calculateDoublePercentile(fraction, valuesBlock.getDouble(lowerIndex), valuesBlock.getDouble(upperIndex)); + return calculateDoublePercentile( + fraction, + valuesBlock.getDouble(firstValueIndex + lowerIndex), + valuesBlock.getDouble(firstValueIndex + upperIndex) + ); } } @@ -289,7 +293,7 @@ private static double calculateDoublePercentile( return calculateDoublePercentile(fraction, scratch.values[lowerIndex], scratch.values[upperIndex]); } - private static int calculateIntPercentile( + static int calculateIntPercentile( IntBlock valuesBlock, int firstValueIndex, int valueCount, @@ -313,8 +317,8 @@ private static int calculateIntPercentile( return valuesBlock.getInt(valueCount - 1); } else { assert lowerIndex >= 0 && upperIndex < valueCount; - var lowerValue = valuesBlock.getInt(lowerIndex); - var upperValue = valuesBlock.getInt(upperIndex); + var lowerValue = valuesBlock.getInt(firstValueIndex + lowerIndex); + var upperValue = valuesBlock.getInt(firstValueIndex + upperIndex); var difference = (long) upperValue - lowerValue; return lowerValue + (int) (fraction * difference); } @@ -351,7 +355,7 @@ private static int calculateIntPercentile( return lowerValue + (int) (fraction * difference); } - private static long calculateLongPercentile( + static long calculateLongPercentile( LongBlock valuesBlock, int firstValueIndex, int valueCount, @@ -375,7 +379,11 @@ private static long calculateLongPercentile( return valuesBlock.getLong(valueCount - 1); } else { assert lowerIndex >= 0 && upperIndex < valueCount; - return calculateLongPercentile(fraction, valuesBlock.getLong(lowerIndex), valuesBlock.getLong(upperIndex)); + return calculateLongPercentile( + fraction, + valuesBlock.getLong(firstValueIndex + lowerIndex), + valuesBlock.getLong(firstValueIndex + upperIndex) + ); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentileSimpleTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentileSimpleTests.java new file mode 100644 index 0000000000000..81ae8efb7aba7 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentileSimpleTests.java @@ -0,0 +1,154 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.TestBlockFactory; + +import static org.hamcrest.Matchers.equalTo; + +public class MvPercentileSimpleTests extends ESTestCase { + public void testDoubleMvAsc() { + try (DoubleBlock.Builder builder = TestBlockFactory.getNonBreakingInstance().newDoubleBlockBuilder(10)) { + builder.beginPositionEntry(); + builder.appendDouble(80); + builder.appendDouble(90); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendDouble(-6.33); + builder.appendDouble(-3.34); + builder.appendDouble(-0.31); + builder.appendDouble(6.23); + builder.endPositionEntry(); + builder.mvOrdering(Block.MvOrdering.SORTED_ASCENDING); + try (DoubleBlock block = builder.build()) { + MvPercentile.DoubleSortingScratch scratch = new MvPercentile.DoubleSortingScratch(); + double p0 = MvPercentile.calculateDoublePercentile(block, block.getFirstValueIndex(0), block.getValueCount(0), 75, scratch); + double p1 = MvPercentile.calculateDoublePercentile(block, block.getFirstValueIndex(1), block.getValueCount(1), 75, scratch); + assertThat(p0, equalTo(87.5)); + assertThat(p1, equalTo(1.325)); + } + } + } + + public void testDoubleRandomOrder() { + try (DoubleBlock.Builder builder = TestBlockFactory.getNonBreakingInstance().newDoubleBlockBuilder(10)) { + builder.beginPositionEntry(); + builder.appendDouble(80); + builder.appendDouble(90); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendDouble(-3.34); + builder.appendDouble(-6.33); + builder.appendDouble(6.23); + builder.appendDouble(-0.31); + builder.endPositionEntry(); + try (DoubleBlock block = builder.build()) { + MvPercentile.DoubleSortingScratch scratch = new MvPercentile.DoubleSortingScratch(); + double p0 = MvPercentile.calculateDoublePercentile(block, block.getFirstValueIndex(0), block.getValueCount(0), 75, scratch); + double p1 = MvPercentile.calculateDoublePercentile(block, block.getFirstValueIndex(1), block.getValueCount(1), 75, scratch); + assertThat(p0, equalTo(87.5)); + assertThat(p1, equalTo(1.325)); + } + } + } + + public void testIntMvAsc() { + try (IntBlock.Builder builder = TestBlockFactory.getNonBreakingInstance().newIntBlockBuilder(10)) { + builder.beginPositionEntry(); + builder.appendInt(80); + builder.appendInt(90); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendInt(-6); + builder.appendInt(-3); + builder.appendInt(0); + builder.appendInt(6); + builder.endPositionEntry(); + builder.mvOrdering(Block.MvOrdering.SORTED_ASCENDING); + try (IntBlock block = builder.build()) { + MvPercentile.IntSortingScratch scratch = new MvPercentile.IntSortingScratch(); + int p0 = MvPercentile.calculateIntPercentile(block, block.getFirstValueIndex(0), block.getValueCount(0), 75, scratch); + int p1 = MvPercentile.calculateIntPercentile(block, block.getFirstValueIndex(1), block.getValueCount(1), 75, scratch); + assertThat(p0, equalTo(87)); + assertThat(p1, equalTo(1)); + } + } + } + + public void testIntRandomOrder() { + try (IntBlock.Builder builder = TestBlockFactory.getNonBreakingInstance().newIntBlockBuilder(10)) { + builder.beginPositionEntry(); + builder.appendInt(80); + builder.appendInt(90); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendInt(-3); + builder.appendInt(-6); + builder.appendInt(6); + builder.appendInt(0); + builder.endPositionEntry(); + try (IntBlock block = builder.build()) { + MvPercentile.IntSortingScratch scratch = new MvPercentile.IntSortingScratch(); + int p0 = MvPercentile.calculateIntPercentile(block, block.getFirstValueIndex(0), block.getValueCount(0), 75, scratch); + int p1 = MvPercentile.calculateIntPercentile(block, block.getFirstValueIndex(1), block.getValueCount(1), 75, scratch); + assertThat(p0, equalTo(87)); + assertThat(p1, equalTo(1)); + } + } + } + + public void testLongMvAsc() { + try (LongBlock.Builder builder = TestBlockFactory.getNonBreakingInstance().newLongBlockBuilder(10)) { + builder.beginPositionEntry(); + builder.appendLong(80); + builder.appendLong(90); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendLong(-6); + builder.appendLong(-3); + builder.appendLong(0); + builder.appendLong(6); + builder.endPositionEntry(); + builder.mvOrdering(Block.MvOrdering.SORTED_ASCENDING); + try (LongBlock block = builder.build()) { + MvPercentile.LongSortingScratch scratch = new MvPercentile.LongSortingScratch(); + long p0 = MvPercentile.calculateLongPercentile(block, block.getFirstValueIndex(0), block.getValueCount(0), 75, scratch); + long p1 = MvPercentile.calculateLongPercentile(block, block.getFirstValueIndex(1), block.getValueCount(1), 75, scratch); + assertThat(p0, equalTo(87L)); + assertThat(p1, equalTo(1L)); + } + } + } + + public void testLongRandomOrder() { + try (LongBlock.Builder builder = TestBlockFactory.getNonBreakingInstance().newLongBlockBuilder(10)) { + builder.beginPositionEntry(); + builder.appendLong(80); + builder.appendLong(90); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendLong(-3); + builder.appendLong(-6); + builder.appendLong(6); + builder.appendLong(0); + builder.endPositionEntry(); + try (LongBlock block = builder.build()) { + MvPercentile.LongSortingScratch scratch = new MvPercentile.LongSortingScratch(); + long p0 = MvPercentile.calculateLongPercentile(block, block.getFirstValueIndex(0), block.getValueCount(0), 75, scratch); + long p1 = MvPercentile.calculateLongPercentile(block, block.getFirstValueIndex(1), block.getValueCount(1), 75, scratch); + assertThat(p0, equalTo(87L)); + assertThat(p1, equalTo(1L)); + } + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentileTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentileTests.java index 3410b95458302..29cc959e6a943 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentileTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvPercentileTests.java @@ -59,7 +59,7 @@ public static Iterable parameters() { } } - for (var percentileType : List.of(INTEGER, LONG, DataType.DOUBLE)) { + for (var percentileType : List.of(INTEGER, LONG, DOUBLE)) { cases.addAll( List.of( // Doubles @@ -334,6 +334,21 @@ public static Iterable parameters() { ); } } + cases.add( + new TestCaseSupplier( + "from example", + List.of(DOUBLE, INTEGER), + () -> new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(List.of(-3.34, -6.33, 6.23, -0.31), DOUBLE, "field"), + new TestCaseSupplier.TypedData(75, INTEGER, "percentile") + ), + evaluatorString(DOUBLE, INTEGER), + DOUBLE, + equalTo(1.325) + ) + ) + ); return parameterSuppliersFromTypedDataWithDefaultChecks( (nullPosition, nullValueDataType, original) -> nullValueDataType == DataType.NULL && nullPosition == 0