From d6c532135e7f066f9ccb40d157dfe6a44a9c7db5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?=
Date: Thu, 1 Aug 2024 12:37:07 +0200
Subject: [PATCH 01/36] [DOCS] Adds adaptive_allocations to inference and
trained model API docs (#111476)
---
.../inference/service-elasticsearch.asciidoc | 48 ++++++++++-
.../inference/service-elser.asciidoc | 47 +++++++++-
docs/reference/ml/ml-shared.asciidoc | 24 ++++++
.../start-trained-model-deployment.asciidoc | 86 +++++++++++++++----
.../update-trained-model-deployment.asciidoc | 43 +++++++++-
5 files changed, 225 insertions(+), 23 deletions(-)
diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc
index 6fb0b4a38d0ef..99fd41ee2db65 100644
--- a/docs/reference/inference/service-elasticsearch.asciidoc
+++ b/docs/reference/inference/service-elasticsearch.asciidoc
@@ -51,6 +51,22 @@ include::inference-shared.asciidoc[tag=service-settings]
These settings are specific to the `elasticsearch` service.
--
+`adaptive_allocations`:::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation]
+
+`enabled`::::
+(Optional, Boolean)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-enabled]
+
+`max_number_of_allocations`::::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-max-number]
+
+`min_number_of_allocations`::::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-min-number]
+
`model_id`:::
(Required, string)
The name of the model to use for the {infer} task.
@@ -59,7 +75,9 @@ It can be the ID of either a built-in model (for example, `.multilingual-e5-smal
`num_allocations`:::
(Required, integer)
-The total number of allocations this model is assigned across machine learning nodes. Increasing this value generally increases the throughput.
+The total number of allocations this model is assigned across machine learning nodes.
+Increasing this value generally increases the throughput.
+If `adaptive_allocations` is enabled, do not set this value, because it's automatically set.
`num_threads`:::
(Required, integer)
@@ -137,3 +155,31 @@ PUT _inference/text_embedding/my-msmarco-minilm-model <1>
<1> Provide an unique identifier for the inference endpoint. The `inference_id` must be unique and must not match the `model_id`.
<2> The `model_id` must be the ID of a text embedding model which has already been
{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland].
+
+[discrete]
+[[inference-example-adaptive-allocation]]
+==== Setting adaptive allocation for E5 via the `elasticsearch` service
+
+The following example shows how to create an {infer} endpoint called
+`my-e5-model` to perform a `text_embedding` task type and configure adaptive
+allocations.
+
+The API request below will automatically download the E5 model if it isn't
+already downloaded and then deploy the model.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/text_embedding/my-e5-model
+{
+ "service": "elasticsearch",
+ "service_settings": {
+ "adaptive_allocations": {
+ "enabled": true,
+ "min_number_of_allocations": 3,
+ "max_number_of_allocations": 10
+ },
+ "model_id": ".multilingual-e5-small"
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
\ No newline at end of file
diff --git a/docs/reference/inference/service-elser.asciidoc b/docs/reference/inference/service-elser.asciidoc
index 34c0f7d0a9c53..fdce94901984b 100644
--- a/docs/reference/inference/service-elser.asciidoc
+++ b/docs/reference/inference/service-elser.asciidoc
@@ -48,9 +48,27 @@ include::inference-shared.asciidoc[tag=service-settings]
These settings are specific to the `elser` service.
--
+`adaptive_allocations`:::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation]
+
+`enabled`::::
+(Optional, Boolean)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-enabled]
+
+`max_number_of_allocations`::::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-max-number]
+
+`min_number_of_allocations`::::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-min-number]
+
`num_allocations`:::
(Required, integer)
-The total number of allocations this model is assigned across machine learning nodes. Increasing this value generally increases the throughput.
+The total number of allocations this model is assigned across machine learning nodes.
+Increasing this value generally increases the throughput.
+If `adaptive_allocations` is enabled, do not set this value, because it's automatically set.
`num_threads`:::
(Required, integer)
@@ -107,3 +125,30 @@ This error usually just reflects a timeout, while the model downloads in the bac
You can check the download progress in the {ml-app} UI.
If using the Python client, you can set the `timeout` parameter to a higher value.
====
+
+[discrete]
+[[inference-example-elser-adaptive-allocation]]
+==== Setting adaptive allocation for the ELSER service
+
+The following example shows how to create an {infer} endpoint called
+`my-elser-model` to perform a `sparse_embedding` task type and configure
+adaptive allocations.
+
+The request below will automatically download the ELSER model if it isn't
+already downloaded and then deploy the model.
+
+[source,console]
+------------------------------------------------------------
+PUT _inference/sparse_embedding/my-elser-model
+{
+ "service": "elser",
+ "service_settings": {
+ "adaptive_allocations": {
+ "enabled": true,
+ "min_number_of_allocations": 3,
+ "max_number_of_allocations": 10
+ }
+ }
+}
+------------------------------------------------------------
+// TEST[skip:TBD]
\ No newline at end of file
diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc
index a69fd2f1812e9..15a994115c88c 100644
--- a/docs/reference/ml/ml-shared.asciidoc
+++ b/docs/reference/ml/ml-shared.asciidoc
@@ -1,3 +1,27 @@
+tag::adaptive-allocation[]
+Adaptive allocations configuration object.
+If enabled, the number of allocations of the model is set based on the current load the process gets.
+When the load is high, a new model allocation is automatically created (respecting the value of `max_number_of_allocations` if it's set).
+When the load is low, a model allocation is automatically removed (respecting the value of `min_number_of_allocations` if it's set).
+The number of model allocations cannot be scaled down to less than `1` this way.
+If `adaptive_allocations` is enabled, do not set the number of allocations manually.
+end::adaptive-allocation[]
+
+tag::adaptive-allocation-enabled[]
+If `true`, `adaptive_allocations` is enabled.
+Defaults to `false`.
+end::adaptive-allocation-enabled[]
+
+tag::adaptive-allocation-max-number[]
+Specifies the maximum number of allocations to scale to.
+If set, it must be greater than or equal to `min_number_of_allocations`.
+end::adaptive-allocation-max-number[]
+
+tag::adaptive-allocation-min-number[]
+Specifies the minimum number of allocations to scale to.
+If set, it must be greater than or equal to `1`.
+end::adaptive-allocation-min-number[]
+
tag::aggregations[]
If set, the {dfeed} performs aggregation searches. Support for aggregations is
limited and should be used only with low cardinality data. For more information,
diff --git a/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc b/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc
index f1b3fffb8a9a2..6f7e2a4d9f988 100644
--- a/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc
+++ b/docs/reference/ml/trained-models/apis/start-trained-model-deployment.asciidoc
@@ -30,7 +30,10 @@ must be unique and should not match any other deployment ID or model ID, unless
it is the same as the ID of the model being deployed. If `deployment_id` is not
set, it defaults to the `model_id`.
-Scaling inference performance can be achieved by setting the parameters
+You can enable adaptive allocations to automatically scale model allocations up
+and down based on the actual resource requirement of the processes.
+
+Manually scaling inference performance can be achieved by setting the parameters
`number_of_allocations` and `threads_per_allocation`.
Increasing `threads_per_allocation` means more threads are used when an
@@ -58,6 +61,46 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-id]
[[start-trained-model-deployment-query-params]]
== {api-query-parms-title}
+`deployment_id`::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
++
+--
+Defaults to `model_id`.
+--
+
+`timeout`::
+(Optional, time)
+Controls the amount of time to wait for the model to deploy. Defaults to 30
+seconds.
+
+`wait_for`::
+(Optional, string)
+Specifies the allocation status to wait for before returning. Defaults to
+`started`. The value `starting` indicates deployment is starting but not yet on
+any node. The value `started` indicates the model has started on at least one
+node. The value `fully_allocated` indicates the deployment has started on all
+valid nodes.
+
+[[start-trained-model-deployment-request-body]]
+== {api-request-body-title}
+
+`adaptive_allocations`::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation]
+
+`enabled`:::
+(Optional, Boolean)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-enabled]
+
+`max_number_of_allocations`:::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-max-number]
+
+`min_number_of_allocations`:::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-min-number]
+
`cache_size`::
(Optional, <>)
The inference cache size (in memory outside the JVM heap) per node for the
@@ -65,15 +108,11 @@ model. In serverless, the cache is disabled by default. Otherwise, the default v
`model_size_bytes` field in the <>. To disable the
cache, `0b` can be provided.
-`deployment_id`::
-(Optional, string)
-include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
-Defaults to `model_id`.
-
`number_of_allocations`::
(Optional, integer)
The total number of allocations this model is assigned across {ml} nodes.
-Increasing this value generally increases the throughput. Defaults to 1.
+Increasing this value generally increases the throughput. Defaults to `1`.
+If `adaptive_allocations` is enabled, do not set this value, because it's automatically set.
`priority`::
(Optional, string)
@@ -110,18 +149,6 @@ compute-bound process; `threads_per_allocations` must not exceed the number of
available allocated processors per node. Defaults to 1. Must be a power of 2.
Max allowed value is 32.
-`timeout`::
-(Optional, time)
-Controls the amount of time to wait for the model to deploy. Defaults to 30
-seconds.
-
-`wait_for`::
-(Optional, string)
-Specifies the allocation status to wait for before returning. Defaults to
-`started`. The value `starting` indicates deployment is starting but not yet on
-any node. The value `started` indicates the model has started on at least one
-node. The value `fully_allocated` indicates the deployment has started on all
-valid nodes.
[[start-trained-model-deployment-example]]
== {api-examples-title}
@@ -182,3 +209,24 @@ The `my_model` trained model can be deployed again with a different ID:
POST _ml/trained_models/my_model/deployment/_start?deployment_id=my_model_for_search
--------------------------------------------------
// TEST[skip:TBD]
+
+
+[[start-trained-model-deployment-adaptive-allocation-example]]
+=== Setting adaptive allocations
+
+The following example starts a new deployment of the `my_model` trained model
+with the ID `my_model_for_search` and enables adaptive allocations with the
+minimum number of 3 allocations and the maximum number of 10.
+
+[source,console]
+--------------------------------------------------
+POST _ml/trained_models/my_model/deployment/_start?deployment_id=my_model_for_search
+{
+ "adaptive_allocations": {
+ "enabled": true,
+ "min_number_of_allocations": 3,
+ "max_number_of_allocations": 10
+ }
+}
+--------------------------------------------------
+// TEST[skip:TBD]
\ No newline at end of file
diff --git a/docs/reference/ml/trained-models/apis/update-trained-model-deployment.asciidoc b/docs/reference/ml/trained-models/apis/update-trained-model-deployment.asciidoc
index ea5508fac26dd..d49ee3c6e872c 100644
--- a/docs/reference/ml/trained-models/apis/update-trained-model-deployment.asciidoc
+++ b/docs/reference/ml/trained-models/apis/update-trained-model-deployment.asciidoc
@@ -25,7 +25,11 @@ Requires the `manage_ml` cluster privilege. This privilege is included in the
== {api-description-title}
You can update a trained model deployment whose `assignment_state` is `started`.
-You can either increase or decrease the number of allocations of such a deployment.
+You can enable adaptive allocations to automatically scale model allocations up
+and down based on the actual resource requirement of the processes.
+Or you can manually increase or decrease the number of allocations of a model
+deployment.
+
[[update-trained-model-deployments-path-parms]]
== {api-path-parms-title}
@@ -37,17 +41,34 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=deployment-id]
[[update-trained-model-deployment-request-body]]
== {api-request-body-title}
+`adaptive_allocations`::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation]
+
+`enabled`:::
+(Optional, Boolean)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-enabled]
+
+`max_number_of_allocations`:::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-max-number]
+
+`min_number_of_allocations`:::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-min-number]
+
`number_of_allocations`::
(Optional, integer)
The total number of allocations this model is assigned across {ml} nodes.
Increasing this value generally increases the throughput.
+If `adaptive_allocations` is enabled, do not set this value, because it's automatically set.
[[update-trained-model-deployment-example]]
== {api-examples-title}
The following example updates the deployment for a
- `elastic__distilbert-base-uncased-finetuned-conll03-english` trained model to have 4 allocations:
+`elastic__distilbert-base-uncased-finetuned-conll03-english` trained model to have 4 allocations:
[source,console]
--------------------------------------------------
@@ -84,3 +105,21 @@ The API returns the following results:
}
}
----
+
+The following example updates the deployment for a
+`elastic__distilbert-base-uncased-finetuned-conll03-english` trained model to
+enable adaptive allocations with the minimum number of 3 allocations and the
+maximum number of 10:
+
+[source,console]
+--------------------------------------------------
+POST _ml/trained_models/elastic__distilbert-base-uncased-finetuned-conll03-english/deployment/_update
+{
+ "adaptive_allocations": {
+ "enabled": true,
+ "min_number_of_allocations": 3,
+ "max_number_of_allocations": 10
+ }
+}
+--------------------------------------------------
+// TEST[skip:TBD]
\ No newline at end of file
From dfbedb20fab5b486b50150bd946be92816c972c1 Mon Sep 17 00:00:00 2001
From: Nik Everett
Date: Thu, 1 Aug 2024 06:41:13 -0400
Subject: [PATCH 02/36] ESQL: Fix some test randomization (#111496)
Fix a test that would sometimes get stuck in an infinite loop because it
couldn't randomize some data. In some cases the configuration would lock
it to never making changes. In that case, we have to randomize in a
different way.
Closes #111480
---
muted-tests.yml | 3 ---
...AbstractLogicalPlanSerializationTests.java | 4 ++--
.../logical/LookupSerializationTests.java | 6 ++---
...a => LocalRelationSerializationTests.java} | 23 ++++++++++++++-----
4 files changed, 22 insertions(+), 14 deletions(-)
rename x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/local/{LocalRelationSerialiationTests.java => LocalRelationSerializationTests.java} (64%)
diff --git a/muted-tests.yml b/muted-tests.yml
index cbd6b14c07dd3..4635bf9541acb 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -138,9 +138,6 @@ tests:
- class: org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvPSeriesWeightedSumTests
method: testFold {TestCase=, }
issue: https://github.com/elastic/elasticsearch/issues/111479
-- class: org.elasticsearch.xpack.esql.plan.logical.local.LocalRelationSerialiationTests
- method: testEqualsAndHashcode
- issue: https://github.com/elastic/elasticsearch/issues/111480
- class: org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvPSeriesWeightedSumTests
method: testEvaluateBlockWithoutNulls {TestCase=, }
issue: https://github.com/elastic/elasticsearch/issues/111498
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/AbstractLogicalPlanSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/AbstractLogicalPlanSerializationTests.java
index d9e11dcb61d52..a2175c3a92ab0 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/AbstractLogicalPlanSerializationTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/AbstractLogicalPlanSerializationTests.java
@@ -15,7 +15,7 @@
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests;
import org.elasticsearch.xpack.esql.plan.AbstractNodeSerializationTests;
-import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelationSerialiationTests;
+import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelationSerializationTests;
import java.util.ArrayList;
import java.util.List;
@@ -27,7 +27,7 @@ public static LogicalPlan randomChild(int depth) {
return LookupSerializationTests.randomLookup(depth + 1);
}
// TODO more random options
- return LocalRelationSerialiationTests.randomLocalRelation();
+ return LocalRelationSerializationTests.randomLocalRelation();
}
public static List randomFieldAttributes(int min, int max, boolean onlyRepresentable) {
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LookupSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LookupSerializationTests.java
index ed9199784809b..53f36e124ebb0 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LookupSerializationTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LookupSerializationTests.java
@@ -12,7 +12,7 @@
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests;
import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
-import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelationSerialiationTests;
+import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelationSerializationTests;
import java.io.IOException;
import java.util.List;
@@ -23,7 +23,7 @@ public static Lookup randomLookup(int depth) {
LogicalPlan child = randomChild(depth);
Expression tableName = AbstractExpressionSerializationTests.randomChild();
List matchFields = randomFieldAttributes(1, 10, false);
- LocalRelation localRelation = randomBoolean() ? null : LocalRelationSerialiationTests.randomLocalRelation();
+ LocalRelation localRelation = randomBoolean() ? null : LocalRelationSerializationTests.randomLocalRelation();
return new Lookup(source, child, tableName, matchFields, localRelation);
}
@@ -45,7 +45,7 @@ protected Lookup mutateInstance(Lookup instance) throws IOException {
case 2 -> matchFields = randomValueOtherThan(matchFields, () -> randomFieldAttributes(1, 10, false));
case 3 -> localRelation = randomValueOtherThan(
localRelation,
- () -> randomBoolean() ? null : LocalRelationSerialiationTests.randomLocalRelation()
+ () -> randomBoolean() ? null : LocalRelationSerializationTests.randomLocalRelation()
);
}
return new Lookup(source, child, tableName, matchFields, localRelation);
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelationSerialiationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelationSerializationTests.java
similarity index 64%
rename from x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelationSerialiationTests.java
rename to x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelationSerializationTests.java
index ca5227538815f..b8fb67737ec16 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelationSerialiationTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/local/LocalRelationSerializationTests.java
@@ -18,7 +18,7 @@
import java.io.IOException;
import java.util.List;
-public class LocalRelationSerialiationTests extends AbstractLogicalPlanSerializationTests {
+public class LocalRelationSerializationTests extends AbstractLogicalPlanSerializationTests {
public static LocalRelation randomLocalRelation() {
Source source = randomSource();
List output = randomFieldAttributes(1, 10, true);
@@ -45,14 +45,25 @@ protected LocalRelation createTestInstance() {
@Override
protected LocalRelation mutateInstance(LocalRelation instance) throws IOException {
+ /*
+ * There are two ways we could mutate this. Either we mutate just
+ * the data, or we mutate the attributes and the data. Some attributes
+ * don't *allow* for us to mutate the data. For example, if the attributes
+ * are all NULL typed. In that case we can't mutate the data.
+ *
+ * So we flip a coin. If that lands on true, we *try* to modify that data.
+ * If that spits out the same data - or if the coin lands on false - we'll
+ * modify the attributes and the data.
+ */
if (randomBoolean()) {
List output = instance.output();
- LocalSupplier supplier = randomValueOtherThan(instance.supplier(), () -> randomLocalSupplier(output));
- return new LocalRelation(instance.source(), output, supplier);
- } else {
- List output = randomValueOtherThan(instance.output(), () -> randomFieldAttributes(1, 10, true));
LocalSupplier supplier = randomLocalSupplier(output);
- return new LocalRelation(instance.source(), output, supplier);
+ if (supplier.equals(instance.supplier()) == false) {
+ return new LocalRelation(instance.source(), output, supplier);
+ }
}
+ List output = randomValueOtherThan(instance.output(), () -> randomFieldAttributes(1, 10, true));
+ LocalSupplier supplier = randomLocalSupplier(output);
+ return new LocalRelation(instance.source(), output, supplier);
}
}
From c5da25754007af51029a676a8784b16be16d8995 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?=
Date: Thu, 1 Aug 2024 14:16:26 +0200
Subject: [PATCH 03/36] Add test cases for nulls and wrong types to aggregation
tests (#111482)
- Migrated the anyNullIsNull and wrong types cases to `AbstractFunctionTestCase`
- Minor fixes on anyNullIsNull to work with multi-row values. Just some conditions to return a List of null instead of a null. Everything else in these functions was mostly untouched
- Implemented it in some aggregations
- Fixed some errors around the aggregation tests code
Not all aggregations were migrated. Many of them have edge cases that don't work with some of those things.
For example, if `WEIGHTED_AVG(value, weight)` has a literal on the value, it ignores the weight, which makes anyNullIsNull fail as it expects a null return.
Such cases can be handled later.
Closes https://github.com/elastic/elasticsearch/issues/109917
---
.../expression/function/aggregate/Max.java | 2 +-
.../expression/function/aggregate/Min.java | 2 +-
.../expression/function/aggregate/Values.java | 11 +-
.../xpack/esql/analysis/AnalyzerTests.java | 8 +-
.../xpack/esql/analysis/VerifierTests.java | 4 +-
.../function/AbstractAggregationTestCase.java | 67 ++--
.../function/AbstractFunctionTestCase.java | 330 ++++++++++++++++++
.../AbstractScalarFunctionTestCase.java | 327 -----------------
.../expression/function/TestCaseSupplier.java | 4 +-
.../function/aggregate/AvgTests.java | 2 +-
.../function/aggregate/MaxTests.java | 73 +---
.../function/aggregate/MinTests.java | 73 +---
.../function/aggregate/ValuesTests.java | 6 +-
.../function/aggregate/WeightedAvgTests.java | 18 +-
14 files changed, 417 insertions(+), 510 deletions(-)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java
index 4438ccec04c4c..22224628e23ad 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java
@@ -76,7 +76,7 @@ public Max replaceChildren(List newChildren) {
@Override
protected TypeResolution resolveType() {
return TypeResolutions.isType(
- this,
+ field(),
e -> e == DataType.BOOLEAN || e == DataType.DATETIME || e == DataType.IP || (e.isNumeric() && e != DataType.UNSIGNED_LONG),
sourceText(),
DEFAULT,
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java
index 490d227206e06..8e7bb6bc3e799 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java
@@ -76,7 +76,7 @@ public Min replaceChildren(List newChildren) {
@Override
protected TypeResolution resolveType() {
return TypeResolutions.isType(
- this,
+ field(),
e -> e == DataType.BOOLEAN || e == DataType.DATETIME || e == DataType.IP || (e.isNumeric() && e != DataType.UNSIGNED_LONG),
sourceText(),
DEFAULT,
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java
index 79276b26be6d5..136e1233601f9 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java
@@ -17,10 +17,10 @@
import org.elasticsearch.compute.aggregation.ValuesLongAggregatorFunctionSupplier;
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
-import org.elasticsearch.xpack.esql.expression.EsqlTypeResolutions;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
@@ -30,6 +30,7 @@
import java.util.List;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
+import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG;
public class Values extends AggregateFunction implements ToAggregator {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Values", Values::new);
@@ -84,7 +85,13 @@ public DataType dataType() {
@Override
protected TypeResolution resolveType() {
- return EsqlTypeResolutions.isNotSpatial(field(), sourceText(), DEFAULT);
+ return TypeResolutions.isType(
+ field(),
+ dt -> DataType.isSpatial(dt) == false && dt != UNSIGNED_LONG,
+ sourceText(),
+ DEFAULT,
+ "any type except unsigned_long and spatial types"
+ );
}
@Override
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
index 7333bd0e9f8a6..f0dd72e18ac2f 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java
@@ -1834,13 +1834,13 @@ public void testUnsupportedTypesInStats() {
line 2:20: argument of [count_distinct(x)] must be [any exact type except unsigned_long, _source, or counter types],\
found value [x] type [unsigned_long]
line 2:39: argument of [max(x)] must be [boolean, datetime, ip or numeric except unsigned_long or counter types],\
- found value [max(x)] type [unsigned_long]
+ found value [x] type [unsigned_long]
line 2:47: argument of [median(x)] must be [numeric except unsigned_long or counter types],\
found value [x] type [unsigned_long]
line 2:58: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long or counter types],\
found value [x] type [unsigned_long]
line 2:88: argument of [min(x)] must be [boolean, datetime, ip or numeric except unsigned_long or counter types],\
- found value [min(x)] type [unsigned_long]
+ found value [x] type [unsigned_long]
line 2:96: first argument of [percentile(x, 10)] must be [numeric except unsigned_long],\
found value [x] type [unsigned_long]
line 2:115: argument of [sum(x)] must be [numeric except unsigned_long or counter types],\
@@ -1854,13 +1854,13 @@ public void testUnsupportedTypesInStats() {
line 2:10: argument of [avg(x)] must be [numeric except unsigned_long or counter types],\
found value [x] type [version]
line 2:18: argument of [max(x)] must be [boolean, datetime, ip or numeric except unsigned_long or counter types],\
- found value [max(x)] type [version]
+ found value [x] type [version]
line 2:26: argument of [median(x)] must be [numeric except unsigned_long or counter types],\
found value [x] type [version]
line 2:37: argument of [median_absolute_deviation(x)] must be [numeric except unsigned_long or counter types],\
found value [x] type [version]
line 2:67: argument of [min(x)] must be [boolean, datetime, ip or numeric except unsigned_long or counter types],\
- found value [min(x)] type [version]
+ found value [x] type [version]
line 2:75: first argument of [percentile(x, 10)] must be [numeric except unsigned_long], found value [x] type [version]
line 2:94: argument of [sum(x)] must be [numeric except unsigned_long or counter types], found value [x] type [version]""");
}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
index 08b1ef9f6fef6..49372da04d8c3 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
@@ -494,7 +494,7 @@ public void testAggregateOnCounter() {
equalTo(
"1:20: argument of [min(network.bytes_in)] must be"
+ " [boolean, datetime, ip or numeric except unsigned_long or counter types],"
- + " found value [min(network.bytes_in)] type [counter_long]"
+ + " found value [network.bytes_in] type [counter_long]"
)
);
@@ -503,7 +503,7 @@ public void testAggregateOnCounter() {
equalTo(
"1:20: argument of [max(network.bytes_in)] must be"
+ " [boolean, datetime, ip or numeric except unsigned_long or counter types],"
- + " found value [max(network.bytes_in)] type [counter_long]"
+ + " found value [network.bytes_in] type [counter_long]"
)
);
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractAggregationTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractAggregationTestCase.java
index 25ff4f9c2122d..65425486ea4e0 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractAggregationTestCase.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractAggregationTestCase.java
@@ -57,6 +57,25 @@ public abstract class AbstractAggregationTestCase extends AbstractFunctionTestCa
* Use if possible, as this method may get updated with new checks in the future.
*
*/
+ protected static Iterable parameterSuppliersFromTypedDataWithDefaultChecks(
+ List suppliers,
+ boolean entirelyNullPreservesType,
+ PositionalErrorMessageSupplier positionalErrorMessageSupplier
+ ) {
+ return parameterSuppliersFromTypedData(
+ errorsForCasesWithoutExamples(
+ withNoRowsExpectingNull(anyNullIsNull(entirelyNullPreservesType, randomizeBytesRefsOffset(suppliers))),
+ positionalErrorMessageSupplier
+ )
+ );
+ }
+
+ // TODO: Remove and migrate everything to the method with all the parameters
+ /**
+ * @deprecated Use {@link #parameterSuppliersFromTypedDataWithDefaultChecks(List, boolean, PositionalErrorMessageSupplier)} instead.
+ * This method doesn't add all the default checks.
+ */
+ @Deprecated
protected static Iterable parameterSuppliersFromTypedDataWithDefaultChecks(List suppliers) {
return parameterSuppliersFromTypedData(withNoRowsExpectingNull(randomizeBytesRefsOffset(suppliers)));
}
@@ -119,24 +138,9 @@ public void testFold() {
Expression expression = buildLiteralExpression(testCase);
resolveExpression(expression, aggregatorFunctionSupplier -> {
- // An aggregation cannot be folded
- }, evaluableExpression -> {
- assertTrue(evaluableExpression.foldable());
- if (testCase.foldingExceptionClass() == null) {
- Object result = evaluableExpression.fold();
- // Decode unsigned longs into BigIntegers
- if (testCase.expectedType() == DataType.UNSIGNED_LONG && result != null) {
- result = NumericUtils.unsignedLongAsBigInteger((Long) result);
- }
- assertThat(result, testCase.getMatcher());
- if (testCase.getExpectedWarnings() != null) {
- assertWarnings(testCase.getExpectedWarnings());
- }
- } else {
- Throwable t = expectThrows(testCase.foldingExceptionClass(), evaluableExpression::fold);
- assertThat(t.getMessage(), equalTo(testCase.foldingExceptionMessage()));
- }
- });
+ // An aggregation cannot be folded.
+ // It's not an error either as not all aggregations are foldable.
+ }, this::evaluate);
}
private void aggregateSingleMode(Expression expression) {
@@ -263,13 +267,19 @@ private void aggregateWithIntermediates(Expression expression) {
}
private void evaluate(Expression evaluableExpression) {
- Object result;
- try (var evaluator = evaluator(evaluableExpression).get(driverContext())) {
- try (Block block = evaluator.eval(row(testCase.getDataValues()))) {
- result = toJavaObjectUnsignedLongAware(block, 0);
- }
+ assertTrue(evaluableExpression.foldable());
+
+ if (testCase.foldingExceptionClass() != null) {
+ Throwable t = expectThrows(testCase.foldingExceptionClass(), evaluableExpression::fold);
+ assertThat(t.getMessage(), equalTo(testCase.foldingExceptionMessage()));
+ return;
}
+ Object result = evaluableExpression.fold();
+ // Decode unsigned longs into BigIntegers
+ if (testCase.expectedType() == DataType.UNSIGNED_LONG && result != null) {
+ result = NumericUtils.unsignedLongAsBigInteger((Long) result);
+ }
assertThat(result, not(equalTo(Double.NaN)));
assert testCase.getMatcher().matches(Double.POSITIVE_INFINITY) == false;
assertThat(result, not(equalTo(Double.POSITIVE_INFINITY)));
@@ -435,16 +445,23 @@ private IntBlock makeGroupsVector(int groupStart, int groupEnd, int rowCount) {
*/
private void processPageGrouping(GroupingAggregator aggregator, Page inputPage, int groupCount) {
var groupSliceSize = 1;
+ var allValuesNull = IntStream.range(0, inputPage.getBlockCount())
+ .mapToObj(inputPage::getBlock)
+ .anyMatch(Block::areAllValuesNull);
// Add data to chunks of groups
for (int currentGroupOffset = 0; currentGroupOffset < groupCount;) {
- var seenGroupIds = new SeenGroupIds.Range(0, currentGroupOffset + groupSliceSize);
+ int groupSliceRemainingSize = Math.min(groupSliceSize, groupCount - currentGroupOffset);
+ var seenGroupIds = new SeenGroupIds.Range(0, allValuesNull ? 0 : currentGroupOffset + groupSliceRemainingSize);
var addInput = aggregator.prepareProcessPage(seenGroupIds, inputPage);
var positionCount = inputPage.getPositionCount();
var dataSliceSize = 1;
// Divide data in chunks
for (int currentDataOffset = 0; currentDataOffset < positionCount;) {
- try (var groups = makeGroupsVector(currentGroupOffset, currentGroupOffset + groupSliceSize, dataSliceSize)) {
+ int dataSliceRemainingSize = Math.min(dataSliceSize, positionCount - currentDataOffset);
+ try (
+ var groups = makeGroupsVector(currentGroupOffset, currentGroupOffset + groupSliceRemainingSize, dataSliceRemainingSize)
+ ) {
addInput.add(currentDataOffset, groups);
}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java
index 20c583d3ac898..0c4bd6fe38b6a 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java
@@ -40,6 +40,7 @@
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
+import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.expression.predicate.nulls.IsNotNull;
import org.elasticsearch.xpack.esql.core.expression.predicate.nulls.IsNull;
import org.elasticsearch.xpack.esql.core.session.Configuration;
@@ -49,6 +50,8 @@
import org.elasticsearch.xpack.esql.core.util.NumericUtils;
import org.elasticsearch.xpack.esql.core.util.StringUtils;
import org.elasticsearch.xpack.esql.evaluator.EvalMapper;
+import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
+import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.RLike;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLike;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
@@ -69,6 +72,7 @@
import org.elasticsearch.xpack.esql.planner.Layout;
import org.elasticsearch.xpack.esql.planner.PlannerUtils;
import org.elasticsearch.xpack.versionfield.Version;
+import org.hamcrest.Matcher;
import org.junit.After;
import org.junit.AfterClass;
@@ -95,6 +99,8 @@
import java.util.TreeSet;
import java.util.function.Function;
import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
import static java.util.Map.entry;
import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
@@ -106,6 +112,7 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.nullValue;
/**
* Base class for function tests.
@@ -191,6 +198,318 @@ protected static Iterable parameterSuppliersFromTypedData(List
+ * Note: This won't add more than a single null to any existing test case,
+ * just to keep the number of test cases from exploding totally.
+ *
+ *
+ * @param entirelyNullPreservesType should a test case that only contains parameters
+ * with the {@code null} type keep it's expected type?
+ * This is mostly going to be {@code true}
+ * except for functions that base their type entirely
+ * on input types like {@link Greatest} or {@link Coalesce}.
+ */
+ protected static List anyNullIsNull(boolean entirelyNullPreservesType, List testCaseSuppliers) {
+ return anyNullIsNull(
+ testCaseSuppliers,
+ (nullPosition, nullValueDataType, original) -> entirelyNullPreservesType == false
+ && nullValueDataType == DataType.NULL
+ && original.getData().size() == 1 ? DataType.NULL : original.expectedType(),
+ (nullPosition, nullData, original) -> original
+ );
+ }
+
+ public interface ExpectedType {
+ DataType expectedType(int nullPosition, DataType nullValueDataType, TestCaseSupplier.TestCase original);
+ }
+
+ public interface ExpectedEvaluatorToString {
+ Matcher evaluatorToString(int nullPosition, TestCaseSupplier.TypedData nullData, Matcher original);
+ }
+
+ protected static List anyNullIsNull(
+ List testCaseSuppliers,
+ ExpectedType expectedType,
+ ExpectedEvaluatorToString evaluatorToString
+ ) {
+ typesRequired(testCaseSuppliers);
+ List suppliers = new ArrayList<>(testCaseSuppliers.size());
+ suppliers.addAll(testCaseSuppliers);
+
+ /*
+ * For each original test case, add as many copies as there were
+ * arguments, replacing one of the arguments with null and keeping
+ * the others.
+ *
+ * Also, if this was the first time we saw the signature we copy it
+ * *again*, replacing the argument with null, but annotating the
+ * argument's type as `null` explicitly.
+ */
+ Set> uniqueSignatures = new HashSet<>();
+ for (TestCaseSupplier original : testCaseSuppliers) {
+ boolean firstTimeSeenSignature = uniqueSignatures.add(original.types());
+ for (int nullPosition = 0; nullPosition < original.types().size(); nullPosition++) {
+ int finalNullPosition = nullPosition;
+ suppliers.add(new TestCaseSupplier(original.name() + " null in " + nullPosition, original.types(), () -> {
+ TestCaseSupplier.TestCase oc = original.get();
+ List data = IntStream.range(0, oc.getData().size()).mapToObj(i -> {
+ TestCaseSupplier.TypedData od = oc.getData().get(i);
+ if (i != finalNullPosition) {
+ return od;
+ }
+ return od.withData(od.isMultiRow() ? Collections.singletonList(null) : null);
+ }).toList();
+ TestCaseSupplier.TypedData nulledData = oc.getData().get(finalNullPosition);
+ return new TestCaseSupplier.TestCase(
+ data,
+ evaluatorToString.evaluatorToString(finalNullPosition, nulledData, oc.evaluatorToString()),
+ expectedType.expectedType(finalNullPosition, nulledData.type(), oc),
+ nullValue(),
+ null,
+ oc.getExpectedTypeError(),
+ null,
+ null
+ );
+ }));
+
+ if (firstTimeSeenSignature) {
+ List typesWithNull = IntStream.range(0, original.types().size())
+ .mapToObj(i -> i == finalNullPosition ? DataType.NULL : original.types().get(i))
+ .toList();
+ boolean newSignature = uniqueSignatures.add(typesWithNull);
+ if (newSignature) {
+ suppliers.add(new TestCaseSupplier(typesWithNull, () -> {
+ TestCaseSupplier.TestCase oc = original.get();
+ List data = IntStream.range(0, oc.getData().size())
+ .mapToObj(
+ i -> i == finalNullPosition
+ ? (oc.getData().get(i).isMultiRow()
+ ? TestCaseSupplier.TypedData.MULTI_ROW_NULL
+ : TestCaseSupplier.TypedData.NULL)
+ : oc.getData().get(i)
+ )
+ .toList();
+ return new TestCaseSupplier.TestCase(
+ data,
+ equalTo("LiteralsEvaluator[lit=null]"),
+ expectedType.expectedType(finalNullPosition, DataType.NULL, oc),
+ nullValue(),
+ null,
+ oc.getExpectedTypeError(),
+ null,
+ null
+ );
+ }));
+ }
+ }
+ }
+ }
+
+ return suppliers;
+ }
+
+ @FunctionalInterface
+ protected interface PositionalErrorMessageSupplier {
+ /**
+ * This interface defines functions to supply error messages for incorrect types in specific positions. Functions which have
+ * the same type requirements for all positions can simplify this with a lambda returning a string constant.
+ *
+ * @param validForPosition - the set of {@link DataType}s that the test infrastructure believes to be allowable in the
+ * given position.
+ * @param position - the zero-index position in the list of parameters the function has detected the bad argument to be.
+ * @return The string describing the acceptable parameters for that position. Note that this function should not return
+ * the full error string; that will be constructed by the test. Just return the type string for that position.
+ */
+ String apply(Set validForPosition, int position);
+ }
+
+ /**
+ * Adds test cases containing unsupported parameter types that assert
+ * that they throw type errors.
+ */
+ protected static List errorsForCasesWithoutExamples(
+ List testCaseSuppliers,
+ PositionalErrorMessageSupplier positionalErrorMessageSupplier
+ ) {
+ return errorsForCasesWithoutExamples(testCaseSuppliers, (i, v, t) -> typeErrorMessage(i, v, t, positionalErrorMessageSupplier));
+ }
+
+ /**
+ * Build the expected error message for an invalid type signature.
+ */
+ protected static String typeErrorMessage(
+ boolean includeOrdinal,
+ List> validPerPosition,
+ List types,
+ PositionalErrorMessageSupplier expectedTypeSupplier
+ ) {
+ int badArgPosition = -1;
+ for (int i = 0; i < types.size(); i++) {
+ if (validPerPosition.get(i).contains(types.get(i)) == false) {
+ badArgPosition = i;
+ break;
+ }
+ }
+ if (badArgPosition == -1) {
+ throw new IllegalStateException(
+ "Can't generate error message for these types, you probably need a custom error message function"
+ );
+ }
+ String ordinal = includeOrdinal ? TypeResolutions.ParamOrdinal.fromIndex(badArgPosition).name().toLowerCase(Locale.ROOT) + " " : "";
+ String expectedTypeString = expectedTypeSupplier.apply(validPerPosition.get(badArgPosition), badArgPosition);
+ String name = types.get(badArgPosition).typeName();
+ return ordinal + "argument of [] must be [" + expectedTypeString + "], found value [" + name + "] type [" + name + "]";
+ }
+
+ @FunctionalInterface
+ protected interface TypeErrorMessageSupplier {
+ String apply(boolean includeOrdinal, List> validPerPosition, List types);
+ }
+
+ protected static List errorsForCasesWithoutExamples(
+ List testCaseSuppliers,
+ TypeErrorMessageSupplier typeErrorMessageSupplier
+ ) {
+ typesRequired(testCaseSuppliers);
+ List suppliers = new ArrayList<>(testCaseSuppliers.size());
+ suppliers.addAll(testCaseSuppliers);
+
+ Set> valid = testCaseSuppliers.stream().map(TestCaseSupplier::types).collect(Collectors.toSet());
+ List> validPerPosition = validPerPosition(valid);
+
+ testCaseSuppliers.stream()
+ .map(s -> s.types().size())
+ .collect(Collectors.toSet())
+ .stream()
+ .flatMap(count -> allPermutations(count))
+ .filter(types -> valid.contains(types) == false)
+ /*
+ * Skip any cases with more than one null. Our tests don't generate
+ * the full combinatorial explosions of all nulls - just a single null.
+ * Hopefully , cases will function the same as ,
+ * cases.
+ */.filter(types -> types.stream().filter(t -> t == DataType.NULL).count() <= 1)
+ .map(types -> typeErrorSupplier(validPerPosition.size() != 1, validPerPosition, types, typeErrorMessageSupplier))
+ .forEach(suppliers::add);
+ return suppliers;
+ }
+
+ private static List append(List orig, DataType extra) {
+ List longer = new ArrayList<>(orig.size() + 1);
+ longer.addAll(orig);
+ longer.add(extra);
+ return longer;
+ }
+
+ protected static Stream representable() {
+ return DataType.types().stream().filter(DataType::isRepresentable);
+ }
+
+ protected static TestCaseSupplier typeErrorSupplier(
+ boolean includeOrdinal,
+ List> validPerPosition,
+ List types,
+ PositionalErrorMessageSupplier errorMessageSupplier
+ ) {
+ return typeErrorSupplier(includeOrdinal, validPerPosition, types, (o, v, t) -> typeErrorMessage(o, v, t, errorMessageSupplier));
+ }
+
+ /**
+ * Build a test case that asserts that the combination of parameter types is an error.
+ */
+ protected static TestCaseSupplier typeErrorSupplier(
+ boolean includeOrdinal,
+ List> validPerPosition,
+ List types,
+ TypeErrorMessageSupplier errorMessageSupplier
+ ) {
+ return new TestCaseSupplier(
+ "type error for " + TestCaseSupplier.nameFromTypes(types),
+ types,
+ () -> TestCaseSupplier.TestCase.typeError(
+ types.stream().map(type -> new TestCaseSupplier.TypedData(randomLiteral(type).value(), type, type.typeName())).toList(),
+ errorMessageSupplier.apply(includeOrdinal, validPerPosition, types)
+ )
+ );
+ }
+
+ private static List> validPerPosition(Set> valid) {
+ int max = valid.stream().mapToInt(List::size).max().getAsInt();
+ List> result = new ArrayList<>(max);
+ for (int i = 0; i < max; i++) {
+ result.add(new HashSet<>());
+ }
+ for (List signature : valid) {
+ for (int i = 0; i < signature.size(); i++) {
+ result.get(i).add(signature.get(i));
+ }
+ }
+ return result;
+ }
+
+ protected static Stream> allPermutations(int argumentCount) {
+ if (argumentCount == 0) {
+ return Stream.of(List.of());
+ }
+ if (argumentCount > 3) {
+ throw new IllegalArgumentException("would generate too many combinations");
+ }
+ Stream> stream = validFunctionParameters().map(List::of);
+ for (int i = 1; i < argumentCount; i++) {
+ stream = stream.flatMap(types -> validFunctionParameters().map(t -> append(types, t)));
+ }
+ return stream;
+ }
+
+ /**
+ * The types that are valid in function parameters. This is used by the
+ * function tests to enumerate all possible parameters to test error messages
+ * for invalid combinations.
+ */
+ public static Stream validFunctionParameters() {
+ return Arrays.stream(DataType.values()).filter(t -> {
+ if (t == DataType.UNSUPPORTED) {
+ // By definition, functions never support UNSUPPORTED
+ return false;
+ }
+ if (t == DataType.DOC_DATA_TYPE || t == DataType.PARTIAL_AGG) {
+ /*
+ * Doc and partial_agg are special and functions aren't
+ * defined to take these. They'll use them implicitly if needed.
+ */
+ return false;
+ }
+ if (t == DataType.OBJECT || t == DataType.NESTED) {
+ // Object and nested fields aren't supported by any functions yet
+ return false;
+ }
+ if (t == DataType.SOURCE || t == DataType.TSID_DATA_TYPE) {
+ // No functions take source or tsid fields yet. We'll make some eventually and remove this.
+ return false;
+ }
+ if (t == DataType.DATE_PERIOD || t == DataType.TIME_DURATION) {
+ // We don't test that functions don't take date_period or time_duration. We should.
+ return false;
+ }
+ if (t.isCounter()) {
+ /*
+ * For now, we're assuming no functions take counters
+ * as parameters. That's not true - some do. But we'll
+ * need to update the tests to handle that.
+ */
+ return false;
+ }
+ if (t.widenSmallNumeric() != t) {
+ // Small numeric types are widened long before they arrive at functions.
+ return false;
+ }
+
+ return true;
+ }).sorted();
+ }
+
/**
* Build an {@link Attribute} that loads a field.
*/
@@ -997,6 +1316,17 @@ protected static DataType[] strings() {
return DataType.types().stream().filter(DataType::isString).toArray(DataType[]::new);
}
+ /**
+ * Validate that we know the types for all the test cases already created
+ * @param suppliers - list of suppliers before adding in the illegal type combinations
+ */
+ protected static void typesRequired(List suppliers) {
+ String bad = suppliers.stream().filter(s -> s.types() == null).map(s -> s.name()).collect(Collectors.joining("\n"));
+ if (bad.equals("") == false) {
+ throw new IllegalArgumentException("types required but not found for these tests:\n" + bad);
+ }
+ }
+
/**
* Returns true if the current test case is for an aggregation function.
*
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java
index 1caea78e79ad5..f4123af8abd0a 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java
@@ -22,21 +22,15 @@
import org.elasticsearch.indices.CrankyCircuitBreakerService;
import org.elasticsearch.xpack.esql.TestBlockFactory;
import org.elasticsearch.xpack.esql.core.expression.Expression;
-import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.util.NumericUtils;
-import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.AbstractMultivalueFunctionTestCase;
-import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
import org.elasticsearch.xpack.esql.optimizer.FoldNull;
import org.elasticsearch.xpack.esql.planner.PlannerUtils;
import org.hamcrest.Matcher;
import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
import java.util.List;
-import java.util.Locale;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
@@ -44,7 +38,6 @@
import java.util.concurrent.Future;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
-import java.util.stream.Stream;
import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
import static org.hamcrest.Matchers.either;
@@ -372,152 +365,6 @@ public final void testFold() {
}
}
- /**
- * Adds cases with {@code null} and asserts that the result is {@code null}.
- *
- * Note: This won't add more than a single null to any existing test case,
- * just to keep the number of test cases from exploding totally.
- *
- *
- * @param entirelyNullPreservesType should a test case that only contains parameters
- * with the {@code null} type keep it's expected type?
- * This is mostly going to be {@code true}
- * except for functions that base their type entirely
- * on input types like {@link Greatest} or {@link Coalesce}.
- */
- protected static List anyNullIsNull(boolean entirelyNullPreservesType, List testCaseSuppliers) {
- return anyNullIsNull(
- testCaseSuppliers,
- (nullPosition, nullValueDataType, original) -> entirelyNullPreservesType == false
- && nullValueDataType == DataType.NULL
- && original.getData().size() == 1 ? DataType.NULL : original.expectedType(),
- (nullPosition, nullData, original) -> original
- );
- }
-
- public interface ExpectedType {
- DataType expectedType(int nullPosition, DataType nullValueDataType, TestCaseSupplier.TestCase original);
- }
-
- public interface ExpectedEvaluatorToString {
- Matcher evaluatorToString(int nullPosition, TestCaseSupplier.TypedData nullData, Matcher original);
- }
-
- protected static List anyNullIsNull(
- List testCaseSuppliers,
- ExpectedType expectedType,
- ExpectedEvaluatorToString evaluatorToString
- ) {
- typesRequired(testCaseSuppliers);
- List suppliers = new ArrayList<>(testCaseSuppliers.size());
- suppliers.addAll(testCaseSuppliers);
-
- /*
- * For each original test case, add as many copies as there were
- * arguments, replacing one of the arguments with null and keeping
- * the others.
- *
- * Also, if this was the first time we saw the signature we copy it
- * *again*, replacing the argument with null, but annotating the
- * argument's type as `null` explicitly.
- */
- Set> uniqueSignatures = new HashSet<>();
- for (TestCaseSupplier original : testCaseSuppliers) {
- boolean firstTimeSeenSignature = uniqueSignatures.add(original.types());
- for (int nullPosition = 0; nullPosition < original.types().size(); nullPosition++) {
- int finalNullPosition = nullPosition;
- suppliers.add(new TestCaseSupplier(original.name() + " null in " + nullPosition, original.types(), () -> {
- TestCaseSupplier.TestCase oc = original.get();
- List data = IntStream.range(0, oc.getData().size()).mapToObj(i -> {
- TestCaseSupplier.TypedData od = oc.getData().get(i);
- return i == finalNullPosition ? od.withData(null) : od;
- }).toList();
- TestCaseSupplier.TypedData nulledData = oc.getData().get(finalNullPosition);
- return new TestCaseSupplier.TestCase(
- data,
- evaluatorToString.evaluatorToString(finalNullPosition, nulledData, oc.evaluatorToString()),
- expectedType.expectedType(finalNullPosition, nulledData.type(), oc),
- nullValue(),
- null,
- oc.getExpectedTypeError(),
- null,
- null
- );
- }));
-
- if (firstTimeSeenSignature) {
- List typesWithNull = IntStream.range(0, original.types().size())
- .mapToObj(i -> i == finalNullPosition ? DataType.NULL : original.types().get(i))
- .toList();
- boolean newSignature = uniqueSignatures.add(typesWithNull);
- if (newSignature) {
- suppliers.add(new TestCaseSupplier(typesWithNull, () -> {
- TestCaseSupplier.TestCase oc = original.get();
- List data = IntStream.range(0, oc.getData().size())
- .mapToObj(i -> i == finalNullPosition ? TestCaseSupplier.TypedData.NULL : oc.getData().get(i))
- .toList();
- return new TestCaseSupplier.TestCase(
- data,
- equalTo("LiteralsEvaluator[lit=null]"),
- expectedType.expectedType(finalNullPosition, DataType.NULL, oc),
- nullValue(),
- null,
- oc.getExpectedTypeError(),
- null,
- null
- );
- }));
- }
- }
- }
- }
-
- return suppliers;
-
- }
-
- /**
- * Adds test cases containing unsupported parameter types that assert
- * that they throw type errors.
- */
- protected static List errorsForCasesWithoutExamples(
- List testCaseSuppliers,
- PositionalErrorMessageSupplier positionalErrorMessageSupplier
- ) {
- return errorsForCasesWithoutExamples(
- testCaseSuppliers,
- (i, v, t) -> AbstractScalarFunctionTestCase.typeErrorMessage(i, v, t, positionalErrorMessageSupplier)
- );
- }
-
- protected static List errorsForCasesWithoutExamples(
- List testCaseSuppliers,
- TypeErrorMessageSupplier typeErrorMessageSupplier
- ) {
- typesRequired(testCaseSuppliers);
- List suppliers = new ArrayList<>(testCaseSuppliers.size());
- suppliers.addAll(testCaseSuppliers);
-
- Set> valid = testCaseSuppliers.stream().map(TestCaseSupplier::types).collect(Collectors.toSet());
- List> validPerPosition = validPerPosition(valid);
-
- testCaseSuppliers.stream()
- .map(s -> s.types().size())
- .collect(Collectors.toSet())
- .stream()
- .flatMap(count -> allPermutations(count))
- .filter(types -> valid.contains(types) == false)
- /*
- * Skip any cases with more than one null. Our tests don't generate
- * the full combinatorial explosions of all nulls - just a single null.
- * Hopefully , cases will function the same as ,
- * cases.
- */.filter(types -> types.stream().filter(t -> t == DataType.NULL).count() <= 1)
- .map(types -> typeErrorSupplier(validPerPosition.size() != 1, validPerPosition, types, typeErrorMessageSupplier))
- .forEach(suppliers::add);
- return suppliers;
- }
-
public static String errorMessageStringForBinaryOperators(
boolean includeOrdinal,
List> validPerPosition,
@@ -572,178 +419,4 @@ protected static List failureForCasesWithoutExamples(List suppliers) {
- String bad = suppliers.stream().filter(s -> s.types() == null).map(s -> s.name()).collect(Collectors.joining("\n"));
- if (bad.equals("") == false) {
- throw new IllegalArgumentException("types required but not found for these tests:\n" + bad);
- }
- }
-
- private static List> validPerPosition(Set> valid) {
- int max = valid.stream().mapToInt(List::size).max().getAsInt();
- List> result = new ArrayList<>(max);
- for (int i = 0; i < max; i++) {
- result.add(new HashSet<>());
- }
- for (List signature : valid) {
- for (int i = 0; i < signature.size(); i++) {
- result.get(i).add(signature.get(i));
- }
- }
- return result;
- }
-
- private static Stream> allPermutations(int argumentCount) {
- if (argumentCount == 0) {
- return Stream.of(List.of());
- }
- if (argumentCount > 3) {
- throw new IllegalArgumentException("would generate too many combinations");
- }
- Stream> stream = validFunctionParameters().map(List::of);
- for (int i = 1; i < argumentCount; i++) {
- stream = stream.flatMap(types -> validFunctionParameters().map(t -> append(types, t)));
- }
- return stream;
- }
-
- private static List append(List orig, DataType extra) {
- List longer = new ArrayList<>(orig.size() + 1);
- longer.addAll(orig);
- longer.add(extra);
- return longer;
- }
-
- @FunctionalInterface
- protected interface TypeErrorMessageSupplier {
- String apply(boolean includeOrdinal, List> validPerPosition, List types);
- }
-
- @FunctionalInterface
- protected interface PositionalErrorMessageSupplier {
- /**
- * This interface defines functions to supply error messages for incorrect types in specific positions. Functions which have
- * the same type requirements for all positions can simplify this with a lambda returning a string constant.
- *
- * @param validForPosition - the set of {@link DataType}s that the test infrastructure believes to be allowable in the
- * given position.
- * @param position - the zero-index position in the list of parameters the function has detected the bad argument to be.
- * @return The string describing the acceptable parameters for that position. Note that this function should not return
- * the full error string; that will be constructed by the test. Just return the type string for that position.
- */
- String apply(Set validForPosition, int position);
- }
-
- protected static TestCaseSupplier typeErrorSupplier(
- boolean includeOrdinal,
- List> validPerPosition,
- List types,
- PositionalErrorMessageSupplier errorMessageSupplier
- ) {
- return typeErrorSupplier(
- includeOrdinal,
- validPerPosition,
- types,
- (o, v, t) -> AbstractScalarFunctionTestCase.typeErrorMessage(o, v, t, errorMessageSupplier)
- );
- }
-
- /**
- * Build a test case that asserts that the combination of parameter types is an error.
- */
- protected static TestCaseSupplier typeErrorSupplier(
- boolean includeOrdinal,
- List> validPerPosition,
- List types,
- TypeErrorMessageSupplier errorMessageSupplier
- ) {
- return new TestCaseSupplier(
- "type error for " + TestCaseSupplier.nameFromTypes(types),
- types,
- () -> TestCaseSupplier.TestCase.typeError(
- types.stream().map(type -> new TestCaseSupplier.TypedData(randomLiteral(type).value(), type, type.typeName())).toList(),
- errorMessageSupplier.apply(includeOrdinal, validPerPosition, types)
- )
- );
- }
-
- /**
- * Build the expected error message for an invalid type signature.
- */
- protected static String typeErrorMessage(
- boolean includeOrdinal,
- List> validPerPosition,
- List types,
- PositionalErrorMessageSupplier expectedTypeSupplier
- ) {
- int badArgPosition = -1;
- for (int i = 0; i < types.size(); i++) {
- if (validPerPosition.get(i).contains(types.get(i)) == false) {
- badArgPosition = i;
- break;
- }
- }
- if (badArgPosition == -1) {
- throw new IllegalStateException(
- "Can't generate error message for these types, you probably need a custom error message function"
- );
- }
- String ordinal = includeOrdinal ? TypeResolutions.ParamOrdinal.fromIndex(badArgPosition).name().toLowerCase(Locale.ROOT) + " " : "";
- String expectedTypeString = expectedTypeSupplier.apply(validPerPosition.get(badArgPosition), badArgPosition);
- String name = types.get(badArgPosition).typeName();
- return ordinal + "argument of [] must be [" + expectedTypeString + "], found value [" + name + "] type [" + name + "]";
- }
-
- /**
- * The types that are valid in function parameters. This is used by the
- * function tests to enumerate all possible parameters to test error messages
- * for invalid combinations.
- */
- public static Stream validFunctionParameters() {
- return Arrays.stream(DataType.values()).filter(t -> {
- if (t == DataType.UNSUPPORTED) {
- // By definition, functions never support UNSUPPORTED
- return false;
- }
- if (t == DataType.DOC_DATA_TYPE || t == DataType.PARTIAL_AGG) {
- /*
- * Doc and partial_agg are special and functions aren't
- * defined to take these. They'll use them implicitly if needed.
- */
- return false;
- }
- if (t == DataType.OBJECT || t == DataType.NESTED) {
- // Object and nested fields aren't supported by any functions yet
- return false;
- }
- if (t == DataType.SOURCE || t == DataType.TSID_DATA_TYPE) {
- // No functions take source or tsid fields yet. We'll make some eventually and remove this.
- return false;
- }
- if (t == DataType.DATE_PERIOD || t == DataType.TIME_DURATION) {
- // We don't test that functions don't take date_period or time_duration. We should.
- return false;
- }
- if (t.isCounter()) {
- /*
- * For now, we're assuming no functions take counters
- * as parameters. That's not true - some do. But we'll
- * need to update the tests to handle that.
- */
- return false;
- }
- if (t.widenSmallNumeric() != t) {
- // Small numeric types are widened long before they arrive at functions.
- return false;
- }
-
- return true;
- }).sorted();
- }
-
}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java
index 3585e58bf97ab..6652cca0c4527 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java
@@ -30,6 +30,7 @@
import java.time.Period;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.function.BiFunction;
import java.util.function.BinaryOperator;
@@ -1455,6 +1456,7 @@ public TypedData get() {
*/
public static class TypedData {
public static final TypedData NULL = new TypedData(null, DataType.NULL, "");
+ public static final TypedData MULTI_ROW_NULL = TypedData.multiRow(Collections.singletonList(null), DataType.NULL, "");
private final Object data;
private final DataType type;
@@ -1583,7 +1585,7 @@ public Literal asLiteral() {
throw new IllegalStateException("Multirow values require exactly 1 element to be a literal, got " + values.size());
}
- return new Literal(Source.synthetic(name), values, type);
+ return new Literal(Source.synthetic(name), values.get(0), type);
}
return new Literal(Source.synthetic(name), data, type);
}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AvgTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AvgTests.java
index f456bd409059a..80737dac1aa58 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AvgTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AvgTests.java
@@ -53,7 +53,7 @@ public static Iterable parameters() {
)
);
- return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers);
+ return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers, true, (v, p) -> "numeric except unsigned_long or counter types");
}
@Override
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MaxTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MaxTests.java
index 1d489e0146ad3..52e908a51dd1e 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MaxTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MaxTests.java
@@ -49,73 +49,6 @@ public static Iterable parameters() {
suppliers.addAll(
List.of(
- // Surrogates
- new TestCaseSupplier(
- List.of(DataType.INTEGER),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(5, 8, -2, 0, 200), DataType.INTEGER, "field")),
- "Max[field=Attribute[channel=0]]",
- DataType.INTEGER,
- equalTo(200)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.LONG),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(5L, 8L, -2L, 0L, 200L), DataType.LONG, "field")),
- "Max[field=Attribute[channel=0]]",
- DataType.LONG,
- equalTo(200L)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.DOUBLE),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(5., 8., -2., 0., 200.), DataType.DOUBLE, "field")),
- "Max[field=Attribute[channel=0]]",
- DataType.DOUBLE,
- equalTo(200.)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.DATETIME),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(5L, 8L, 2L, 0L, 200L), DataType.DATETIME, "field")),
- "Max[field=Attribute[channel=0]]",
- DataType.DATETIME,
- equalTo(200L)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.BOOLEAN),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(true, false, false, true), DataType.BOOLEAN, "field")),
- "Max[field=Attribute[channel=0]]",
- DataType.BOOLEAN,
- equalTo(true)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.IP),
- () -> new TestCaseSupplier.TestCase(
- List.of(
- TestCaseSupplier.TypedData.multiRow(
- List.of(
- new BytesRef(InetAddressPoint.encode(InetAddresses.forString("127.0.0.1"))),
- new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::1"))),
- new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::"))),
- new BytesRef(InetAddressPoint.encode(InetAddresses.forString("ffff::")))
- ),
- DataType.IP,
- "field"
- )
- ),
- "Max[field=Attribute[channel=0]]",
- DataType.IP,
- equalTo(new BytesRef(InetAddressPoint.encode(InetAddresses.forString("ffff::"))))
- )
- ),
-
// Folding
new TestCaseSupplier(
List.of(DataType.INTEGER),
@@ -180,7 +113,11 @@ public static Iterable parameters() {
)
);
- return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers);
+ return parameterSuppliersFromTypedDataWithDefaultChecks(
+ suppliers,
+ false,
+ (v, p) -> "boolean, datetime, ip or numeric except unsigned_long or counter types"
+ );
}
@Override
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MinTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MinTests.java
index b5fb5b2c1c414..9514c817df497 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MinTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/MinTests.java
@@ -49,73 +49,6 @@ public static Iterable parameters() {
suppliers.addAll(
List.of(
- // Surrogates
- new TestCaseSupplier(
- List.of(DataType.INTEGER),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(5, 8, -2, 0, 200), DataType.INTEGER, "field")),
- "Min[field=Attribute[channel=0]]",
- DataType.INTEGER,
- equalTo(-2)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.LONG),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(5L, 8L, -2L, 0L, 200L), DataType.LONG, "field")),
- "Min[field=Attribute[channel=0]]",
- DataType.LONG,
- equalTo(-2L)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.DOUBLE),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(5., 8., -2., 0., 200.), DataType.DOUBLE, "field")),
- "Min[field=Attribute[channel=0]]",
- DataType.DOUBLE,
- equalTo(-2.)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.DATETIME),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(5L, 8L, 2L, 0L, 200L), DataType.DATETIME, "field")),
- "Min[field=Attribute[channel=0]]",
- DataType.DATETIME,
- equalTo(0L)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.BOOLEAN),
- () -> new TestCaseSupplier.TestCase(
- List.of(TestCaseSupplier.TypedData.multiRow(List.of(true, false, false, true), DataType.BOOLEAN, "field")),
- "Min[field=Attribute[channel=0]]",
- DataType.BOOLEAN,
- equalTo(false)
- )
- ),
- new TestCaseSupplier(
- List.of(DataType.IP),
- () -> new TestCaseSupplier.TestCase(
- List.of(
- TestCaseSupplier.TypedData.multiRow(
- List.of(
- new BytesRef(InetAddressPoint.encode(InetAddresses.forString("127.0.0.1"))),
- new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::1"))),
- new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::"))),
- new BytesRef(InetAddressPoint.encode(InetAddresses.forString("ffff::")))
- ),
- DataType.IP,
- "field"
- )
- ),
- "Min[field=Attribute[channel=0]]",
- DataType.IP,
- equalTo(new BytesRef(InetAddressPoint.encode(InetAddresses.forString("::"))))
- )
- ),
-
// Folding
new TestCaseSupplier(
List.of(DataType.INTEGER),
@@ -180,7 +113,11 @@ public static Iterable parameters() {
)
);
- return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers);
+ return parameterSuppliersFromTypedDataWithDefaultChecks(
+ suppliers,
+ false,
+ (v, p) -> "boolean, datetime, ip or numeric except unsigned_long or counter types"
+ );
}
@Override
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ValuesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ValuesTests.java
index 23b70b94d0d7f..55320543d0ec3 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ValuesTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ValuesTests.java
@@ -53,7 +53,11 @@ public static Iterable parameters() {
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.TEXT)
).flatMap(List::stream).map(ValuesTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers));
- return parameterSuppliersFromTypedDataWithDefaultChecks(suppliers);
+ return parameterSuppliersFromTypedDataWithDefaultChecks(
+ suppliers,
+ false,
+ (v, p) -> "any type except unsigned_long and spatial types"
+ );
}
@Override
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/WeightedAvgTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/WeightedAvgTests.java
index 2ba091437f237..2c2ffc97f268c 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/WeightedAvgTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/WeightedAvgTests.java
@@ -52,11 +52,11 @@ public static Iterable parameters() {
List.of(
// Folding
new TestCaseSupplier(
- List.of(DataType.INTEGER),
+ List.of(DataType.INTEGER, DataType.INTEGER),
() -> new TestCaseSupplier.TestCase(
List.of(
- TestCaseSupplier.TypedData.multiRow(List.of(5), DataType.INTEGER, "field"),
- TestCaseSupplier.TypedData.multiRow(List.of(100), DataType.INTEGER, "field")
+ TestCaseSupplier.TypedData.multiRow(List.of(5), DataType.INTEGER, "number"),
+ TestCaseSupplier.TypedData.multiRow(List.of(100), DataType.INTEGER, "weight")
),
"WeightedAvg[number=Attribute[channel=0],weight=Attribute[channel=1]]",
DataType.DOUBLE,
@@ -64,11 +64,11 @@ public static Iterable parameters() {
)
),
new TestCaseSupplier(
- List.of(DataType.LONG),
+ List.of(DataType.LONG, DataType.INTEGER),
() -> new TestCaseSupplier.TestCase(
List.of(
- TestCaseSupplier.TypedData.multiRow(List.of(5L), DataType.LONG, "field"),
- TestCaseSupplier.TypedData.multiRow(List.of(100), DataType.INTEGER, "field")
+ TestCaseSupplier.TypedData.multiRow(List.of(5L), DataType.LONG, "number"),
+ TestCaseSupplier.TypedData.multiRow(List.of(100), DataType.INTEGER, "weight")
),
"WeightedAvg[number=Attribute[channel=0],weight=Attribute[channel=1]]",
DataType.DOUBLE,
@@ -76,11 +76,11 @@ public static Iterable parameters() {
)
),
new TestCaseSupplier(
- List.of(DataType.DOUBLE),
+ List.of(DataType.DOUBLE, DataType.INTEGER),
() -> new TestCaseSupplier.TestCase(
List.of(
- TestCaseSupplier.TypedData.multiRow(List.of(5.), DataType.DOUBLE, "field"),
- TestCaseSupplier.TypedData.multiRow(List.of(100), DataType.INTEGER, "field")
+ TestCaseSupplier.TypedData.multiRow(List.of(5.), DataType.DOUBLE, "number"),
+ TestCaseSupplier.TypedData.multiRow(List.of(100), DataType.INTEGER, "weight")
),
"WeightedAvg[number=Attribute[channel=0],weight=Attribute[channel=1]]",
DataType.DOUBLE,
From 4034615e29eaeacab855b3f7eb223ecfa060737e Mon Sep 17 00:00:00 2001
From: Liam Thompson <32779855+leemthompo@users.noreply.github.com>
Date: Thu, 1 Aug 2024 13:37:17 +0100
Subject: [PATCH 04/36] [DOCS] Clarify copy_to behavior with strict dynamic
mappings (#111408)
* [DOCS] Clarify copy_to behavior with strict dynamic mappings
* Add id
* De-verbosify
* Delete pesky comma
* More info about root and nest
* Fixes per review, clarify non-recursive explanation
* Skip tests for illustrative example
* Fix example syntax
* Fix typo
---
.../reference/mapping/params/copy-to.asciidoc | 102 ++++++++++++++++--
1 file changed, 95 insertions(+), 7 deletions(-)
diff --git a/docs/reference/mapping/params/copy-to.asciidoc b/docs/reference/mapping/params/copy-to.asciidoc
index 10eebfb027736..b26ceac349a3e 100644
--- a/docs/reference/mapping/params/copy-to.asciidoc
+++ b/docs/reference/mapping/params/copy-to.asciidoc
@@ -64,16 +64,104 @@ Some important points:
* It is the field _value_ which is copied, not the terms (which result from the analysis process).
* The original <> field will not be modified to show the copied values.
* The same value can be copied to multiple fields, with `"copy_to": [ "field_1", "field_2" ]`
-* You cannot copy recursively via intermediary fields such as a `copy_to` on
-`field_1` to `field_2` and `copy_to` on `field_2` to `field_3` expecting
-indexing into `field_1` will eventuate in `field_3`, instead use copy_to
-directly to multiple fields from the originating field.
+* You cannot copy recursively using intermediary fields.
+The following configuration will not copy data from `field_1` to `field_3`:
++
+[source,console]
+----
+PUT bad_example_index
+{
+ "mappings": {
+ "properties": {
+ "field_1": {
+ "type": "text",
+ "copy_to": "field_2"
+ },
+ "field_2": {
+ "type": "text",
+ "copy_to": "field_3"
+ },
+ "field_3": {
+ "type": "text"
+ }
+ }
+ }
+}
+----
+Instead, copy to multiple fields from the source field:
++
+[source,console]
+----
+PUT good_example_index
+{
+ "mappings": {
+ "properties": {
+ "field_1": {
+ "type": "text",
+ "copy_to": ["field_2", "field_3"]
+ },
+ "field_2": {
+ "type": "text"
+ },
+ "field_3": {
+ "type": "text"
+ }
+ }
+ }
+}
+----
+
+NOTE: `copy_to` is not supported for field types where values take the form of objects, e.g. `date_range`.
+
+[float]
+[[copy-to-dynamic-mapping]]
+==== Dynamic mapping
+
+Consider the following points when using `copy_to` with dynamic mappings:
+
* If the target field does not exist in the index mappings, the usual
<> behavior applies. By default, with
<> set to `true`, a non-existent target field will be
-dynamically added to the index mappings. If `dynamic` is set to `false`, the
+dynamically added to the index mappings.
+* If `dynamic` is set to `false`, the
target field will not be added to the index mappings, and the value will not be
-copied. If `dynamic` is set to `strict`, copying to a non-existent field will
+copied.
+* If `dynamic` is set to `strict`, copying to a non-existent field will
result in an error.
++
+** If the target field is nested, then `copy_to` fields must specify the full path to the nested field.
+Omitting the full path will lead to a `strict_dynamic_mapping_exception`.
+Use `"copy_to": ["parent_field.child_field"]` to correctly target a nested field.
++
+For example:
++
+[source,console]
+--------------------------------------------------
+PUT /test_index
+{
+ "mappings": {
+ "dynamic": "strict",
+ "properties": {
+ "description": {
+ "properties": {
+ "notes": {
+ "type": "text",
+ "copy_to": [ "description.notes_raw"], <1>
+ "analyzer": "standard",
+ "search_analyzer": "standard"
+ },
+ "notes_raw": {
+ "type": "keyword"
+ }
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------
-NOTE: `copy_to` is _not_ supported for field types where values take the form of objects, e.g. `date_range`
\ No newline at end of file
+<1> The `notes` field is copied to the `notes_raw` field. Targeting `notes_raw` alone instead of `description.notes_raw`
+would lead to a `strict_dynamic_mapping_exception`.
++
+In this example, `notes_raw` is not defined at the root of the mapping, but under the `description` field.
+Without the fully qualified path, {es} would interpret the `copy_to` target as a root-level field, not as a nested field under `description`.
\ No newline at end of file
From 1329dc333d0e1aa8bdd64f41f5d81a7ba1c79fd8 Mon Sep 17 00:00:00 2001
From: Simon Cooper
Date: Thu, 1 Aug 2024 14:33:33 +0100
Subject: [PATCH 05/36] Add release version to setCompatibleVersions task
(#111489)
The release version is used to determine if it actually needs to update the CCS version or not
---
.../internal/release/ReleaseToolsPlugin.java | 2 +-
.../release/SetCompatibleVersionsTask.java | 19 ++++++++++++++++++-
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java
index 08abb02ea831e..ec79fe20492e1 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java
@@ -52,7 +52,7 @@ public void apply(Project project) {
project.getTasks().register("extractCurrentVersions", ExtractCurrentVersionsTask.class);
project.getTasks().register("tagVersions", TagVersionsTask.class);
- project.getTasks().register("setCompatibleVersions", SetCompatibleVersionsTask.class);
+ project.getTasks().register("setCompatibleVersions", SetCompatibleVersionsTask.class, t -> t.setThisVersion(version));
final FileTree yamlFiles = projectDirectory.dir("docs/changelog")
.getAsFileTree()
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java
index 15e0a0cc345d5..17761e5183b31 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java
@@ -14,6 +14,7 @@
import com.github.javaparser.ast.expr.NameExpr;
import com.github.javaparser.printer.lexicalpreservation.LexicalPreservingPrinter;
+import org.elasticsearch.gradle.Version;
import org.gradle.api.tasks.TaskAction;
import org.gradle.api.tasks.options.Option;
import org.gradle.initialization.layout.BuildLayout;
@@ -28,6 +29,8 @@
public class SetCompatibleVersionsTask extends AbstractVersionsTask {
+ private Version thisVersion;
+ private Version releaseVersion;
private Map versionIds = Map.of();
@Inject
@@ -35,21 +38,35 @@ public SetCompatibleVersionsTask(BuildLayout layout) {
super(layout);
}
+ public void setThisVersion(Version version) {
+ thisVersion = version;
+ }
+
@Option(option = "version-id", description = "Version id used for the release. Of the form :.")
public void versionIds(List version) {
this.versionIds = splitVersionIds(version);
}
+ @Option(option = "release", description = "The version being released")
+ public void releaseVersion(String version) {
+ releaseVersion = Version.fromString(version);
+ }
+
@TaskAction
public void executeTask() throws IOException {
if (versionIds.isEmpty()) {
throw new IllegalArgumentException("No version ids specified");
}
+
+ if (releaseVersion.getMajor() < thisVersion.getMajor()) {
+ // don't need to update CCS version - this is for a different major
+ return;
+ }
+
Integer transportVersion = versionIds.get(TRANSPORT_VERSION_TYPE);
if (transportVersion == null) {
throw new IllegalArgumentException("TransportVersion id not specified");
}
-
Path versionJava = rootDir.resolve(TRANSPORT_VERSIONS_FILE_PATH);
CompilationUnit file = LexicalPreservingPrinter.setup(StaticJavaParser.parse(versionJava));
From 96a04fc1e5b74e046e1caaecc53e36f86e47fad5 Mon Sep 17 00:00:00 2001
From: Nik Everett
Date: Thu, 1 Aug 2024 09:36:17 -0400
Subject: [PATCH 06/36] ESQL: Make test result order consistent (#111510)
This test for our new `MV_PSERIES_WEIGHTED_SUM` function was failing
sometimes because it was asserting results returned in order but hadn't
forced the result to come back in that order.
---
.../src/main/resources/mv_pseries_weighted_sum.csv-spec | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_pseries_weighted_sum.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_pseries_weighted_sum.csv-spec
index 4d8ffd1136908..df8771ad7a832 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_pseries_weighted_sum.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_pseries_weighted_sum.csv-spec
@@ -70,7 +70,8 @@ FROM alerts
TOP(kibana.alert.risk_score, 10000, "desc"), 1.5
) BY host.name
| EVAL normalized_score = ROUND(100 * score / 261.2, 2)
-| KEEP host.name, normalized_score, score;
+| KEEP host.name, normalized_score, score
+| SORT normalized_score DESC;
host.name:keyword|normalized_score:double|score:double
test-host-1 |36.16 |94.45465156212452
From 63c2ddea115c2ca9e55ecce692b6ec07c1bd675a Mon Sep 17 00:00:00 2001
From: Nhat Nguyen
Date: Thu, 1 Aug 2024 06:53:37 -0700
Subject: [PATCH 07/36] Fix DocValuesCodecDuelTests (#111503)
We should not access any docValues attribute of the current document if
advance() is exhausted or if advanceExact() returns false.
Closes #111470
---
.../codec/tsdb/DocValuesCodecDuelTests.java | 32 ++++++++++++-------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
index e8949dda78f7f..9b58e785131c9 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java
@@ -232,6 +232,9 @@ private void assertSortedSetDocValues(LeafReader baselineReader, LeafReader cont
for (int i = 0; i < docIdsToAdvanceTo.length; i++) {
int docId = docIdsToAdvanceTo[i];
int baselineTarget = assertAdvance(docId, baselineReader, contenderReader, baseline, contender);
+ if (baselineTarget == NO_MORE_DOCS) {
+ break;
+ }
assertEquals(baseline.docValueCount(), contender.docValueCount());
for (int j = 0; j < baseline.docValueCount(); j++) {
long baselineOrd = baseline.nextOrd();
@@ -255,12 +258,14 @@ private void assertSortedSetDocValues(LeafReader baselineReader, LeafReader cont
boolean contenderFound = contender.advanceExact(docId);
assertEquals(baselineFound, contenderFound);
assertEquals(baseline.docID(), contender.docID());
- assertEquals(baseline.docValueCount(), contender.docValueCount());
- for (int i = 0; i < baseline.docValueCount(); i++) {
- long baselineOrd = baseline.nextOrd();
- long contenderOrd = contender.nextOrd();
- assertEquals(baselineOrd, contenderOrd);
- assertEquals(baseline.lookupOrd(baselineOrd), contender.lookupOrd(contenderOrd));
+ if (baselineFound) {
+ assertEquals(baseline.docValueCount(), contender.docValueCount());
+ for (int i = 0; i < baseline.docValueCount(); i++) {
+ long baselineOrd = baseline.nextOrd();
+ long contenderOrd = contender.nextOrd();
+ assertEquals(baselineOrd, contenderOrd);
+ assertEquals(baseline.lookupOrd(baselineOrd), contender.lookupOrd(contenderOrd));
+ }
}
}
}
@@ -328,6 +333,9 @@ private void assertSortedNumericDocValues(LeafReader baselineReader, LeafReader
for (int i = 0; i < docIdsToAdvanceTo.length; i++) {
int docId = docIdsToAdvanceTo[i];
int baselineTarget = assertAdvance(docId, baselineReader, contenderReader, baseline, contender);
+ if (baselineTarget == NO_MORE_DOCS) {
+ break;
+ }
assertEquals(baseline.docValueCount(), contender.docValueCount());
for (int j = 0; j < baseline.docValueCount(); j++) {
long baselineValue = baseline.nextValue();
@@ -349,11 +357,13 @@ private void assertSortedNumericDocValues(LeafReader baselineReader, LeafReader
boolean contenderResult = contender.advanceExact(docId);
assertEquals(baselineResult, contenderResult);
assertEquals(baseline.docID(), contender.docID());
- assertEquals(baseline.docValueCount(), contender.docValueCount());
- for (int i = 0; i < baseline.docValueCount(); i++) {
- long baselineValue = baseline.nextValue();
- long contenderValue = contender.nextValue();
- assertEquals(baselineValue, contenderValue);
+ if (baselineResult) {
+ assertEquals(baseline.docValueCount(), contender.docValueCount());
+ for (int i = 0; i < baseline.docValueCount(); i++) {
+ long baselineValue = baseline.nextValue();
+ long contenderValue = contender.nextValue();
+ assertEquals(baselineValue, contenderValue);
+ }
}
}
}
From 4d67ac1ef5d95814dd526c7751ca025615cc5651 Mon Sep 17 00:00:00 2001
From: Pat Whelan
Date: Thu, 1 Aug 2024 10:08:04 -0400
Subject: [PATCH 08/36] [ML] Start a new trace before loading trained model
(#111364)
Each distinct task is a different span in APM tracing, so trained model deployments need a new trace context.
---
.../TransportLoadTrainedModelPackage.java | 46 +++++++++++--------
1 file changed, 28 insertions(+), 18 deletions(-)
diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java
index cdc3205f4197c..c4c2c17fcbc12 100644
--- a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java
+++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java
@@ -180,27 +180,37 @@ static void importModel(
}
private ModelDownloadTask createDownloadTask(Request request) {
- return (ModelDownloadTask) taskManager.register(MODEL_IMPORT_TASK_TYPE, MODEL_IMPORT_TASK_ACTION, new TaskAwareRequest() {
- @Override
- public void setParentTask(TaskId taskId) {
- request.setParentTask(taskId);
- }
+ // Loading the model is done by a separate task, so needs a new trace context
+ try (var ignored = threadPool.getThreadContext().newTraceContext()) {
+ return (ModelDownloadTask) taskManager.register(MODEL_IMPORT_TASK_TYPE, MODEL_IMPORT_TASK_ACTION, new TaskAwareRequest() {
+ @Override
+ public void setParentTask(TaskId taskId) {
+ request.setParentTask(taskId);
+ }
- @Override
- public void setRequestId(long requestId) {
- request.setRequestId(requestId);
- }
+ @Override
+ public void setRequestId(long requestId) {
+ request.setRequestId(requestId);
+ }
- @Override
- public TaskId getParentTask() {
- return request.getParentTask();
- }
+ @Override
+ public TaskId getParentTask() {
+ return request.getParentTask();
+ }
- @Override
- public ModelDownloadTask createTask(long id, String type, String action, TaskId parentTaskId, Map headers) {
- return new ModelDownloadTask(id, type, action, downloadModelTaskDescription(request.getModelId()), parentTaskId, headers);
- }
- }, false);
+ @Override
+ public ModelDownloadTask createTask(long id, String type, String action, TaskId parentTaskId, Map headers) {
+ return new ModelDownloadTask(
+ id,
+ type,
+ action,
+ downloadModelTaskDescription(request.getModelId()),
+ parentTaskId,
+ headers
+ );
+ }
+ }, false);
+ }
}
private static void recordError(
From 46f941c9f637ddf174653c68d5c609cfc857ac75 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
Date: Thu, 1 Aug 2024 14:25:01 +0000
Subject: [PATCH 09/36] Bump versions after 7.17.23 release
---
.buildkite/pipelines/intake.yml | 2 +-
.buildkite/pipelines/periodic-packaging.yml | 6 +++---
.buildkite/pipelines/periodic.yml | 10 +++++-----
.ci/bwcVersions | 2 +-
.ci/snapshotBwcVersions | 2 +-
server/src/main/java/org/elasticsearch/Version.java | 1 +
.../resources/org/elasticsearch/TransportVersions.csv | 1 +
.../org/elasticsearch/index/IndexVersions.csv | 1 +
8 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml
index 776b1ab944f69..e323a9238ca5b 100644
--- a/.buildkite/pipelines/intake.yml
+++ b/.buildkite/pipelines/intake.yml
@@ -62,7 +62,7 @@ steps:
timeout_in_minutes: 300
matrix:
setup:
- BWC_VERSION: ["7.17.23", "8.14.4", "8.15.0", "8.16.0"]
+ BWC_VERSION: ["7.17.24", "8.14.4", "8.15.0", "8.16.0"]
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
diff --git a/.buildkite/pipelines/periodic-packaging.yml b/.buildkite/pipelines/periodic-packaging.yml
index e9c743885d78d..6e86e46f79484 100644
--- a/.buildkite/pipelines/periodic-packaging.yml
+++ b/.buildkite/pipelines/periodic-packaging.yml
@@ -322,8 +322,8 @@ steps:
env:
BWC_VERSION: 7.16.3
- - label: "{{matrix.image}} / 7.17.23 / packaging-tests-upgrade"
- command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v7.17.23
+ - label: "{{matrix.image}} / 7.17.24 / packaging-tests-upgrade"
+ command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v7.17.24
timeout_in_minutes: 300
matrix:
setup:
@@ -337,7 +337,7 @@ steps:
buildDirectory: /dev/shm/bk
diskSizeGb: 250
env:
- BWC_VERSION: 7.17.23
+ BWC_VERSION: 7.17.24
- label: "{{matrix.image}} / 8.0.1 / packaging-tests-upgrade"
command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.0.1
diff --git a/.buildkite/pipelines/periodic.yml b/.buildkite/pipelines/periodic.yml
index f908b946bb523..a0bc07f7ca3b7 100644
--- a/.buildkite/pipelines/periodic.yml
+++ b/.buildkite/pipelines/periodic.yml
@@ -342,8 +342,8 @@ steps:
- signal_reason: agent_stop
limit: 3
- - label: 7.17.23 / bwc
- command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v7.17.23#bwcTest
+ - label: 7.17.24 / bwc
+ command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v7.17.24#bwcTest
timeout_in_minutes: 300
agents:
provider: gcp
@@ -353,7 +353,7 @@ steps:
preemptible: true
diskSizeGb: 250
env:
- BWC_VERSION: 7.17.23
+ BWC_VERSION: 7.17.24
retry:
automatic:
- exit_status: "-1"
@@ -771,7 +771,7 @@ steps:
setup:
ES_RUNTIME_JAVA:
- openjdk17
- BWC_VERSION: ["7.17.23", "8.14.4", "8.15.0", "8.16.0"]
+ BWC_VERSION: ["7.17.24", "8.14.4", "8.15.0", "8.16.0"]
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
@@ -821,7 +821,7 @@ steps:
- openjdk21
- openjdk22
- openjdk23
- BWC_VERSION: ["7.17.23", "8.14.4", "8.15.0", "8.16.0"]
+ BWC_VERSION: ["7.17.24", "8.14.4", "8.15.0", "8.16.0"]
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
diff --git a/.ci/bwcVersions b/.ci/bwcVersions
index 776be80e0d291..d6072488ae93b 100644
--- a/.ci/bwcVersions
+++ b/.ci/bwcVersions
@@ -16,7 +16,7 @@ BWC_VERSION:
- "7.14.2"
- "7.15.2"
- "7.16.3"
- - "7.17.23"
+ - "7.17.24"
- "8.0.1"
- "8.1.3"
- "8.2.3"
diff --git a/.ci/snapshotBwcVersions b/.ci/snapshotBwcVersions
index f5f7f7a7d4ecb..909960a67cc41 100644
--- a/.ci/snapshotBwcVersions
+++ b/.ci/snapshotBwcVersions
@@ -1,5 +1,5 @@
BWC_VERSION:
- - "7.17.23"
+ - "7.17.24"
- "8.14.4"
- "8.15.0"
- "8.16.0"
diff --git a/server/src/main/java/org/elasticsearch/Version.java b/server/src/main/java/org/elasticsearch/Version.java
index fefe2ea486485..fd29a81cdb143 100644
--- a/server/src/main/java/org/elasticsearch/Version.java
+++ b/server/src/main/java/org/elasticsearch/Version.java
@@ -123,6 +123,7 @@ public class Version implements VersionId, ToXContentFragment {
public static final Version V_7_17_21 = new Version(7_17_21_99);
public static final Version V_7_17_22 = new Version(7_17_22_99);
public static final Version V_7_17_23 = new Version(7_17_23_99);
+ public static final Version V_7_17_24 = new Version(7_17_24_99);
public static final Version V_8_0_0 = new Version(8_00_00_99);
public static final Version V_8_0_1 = new Version(8_00_01_99);
diff --git a/server/src/main/resources/org/elasticsearch/TransportVersions.csv b/server/src/main/resources/org/elasticsearch/TransportVersions.csv
index 7d2697539fa13..687e435990785 100644
--- a/server/src/main/resources/org/elasticsearch/TransportVersions.csv
+++ b/server/src/main/resources/org/elasticsearch/TransportVersions.csv
@@ -70,6 +70,7 @@
7.17.20,7172099
7.17.21,7172199
7.17.22,7172299
+7.17.23,7172399
8.0.0,8000099
8.0.1,8000199
8.1.0,8010099
diff --git a/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv b/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv
index f177ab1468cb2..8c86ca48d6284 100644
--- a/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv
+++ b/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv
@@ -70,6 +70,7 @@
7.17.20,7172099
7.17.21,7172199
7.17.22,7172299
+7.17.23,7172399
8.0.0,8000099
8.0.1,8000199
8.1.0,8010099
From 7f715a4cbeb6e899b7784dd5cff91aad3bd0b66a Mon Sep 17 00:00:00 2001
From: Armin Braun
Date: Thu, 1 Aug 2024 16:53:03 +0200
Subject: [PATCH 10/36] Only use at most a single thread for search context
freeing (#111156)
Forking to `GENERIC` makes sense here since we sporadically block for a
macroscopic amount of time to protect transport threads, but in almost
all cases the operation operates on the same data structures and is very
fast. Since it's also very frequent, we shouldn't be creating a bunch of
generic threads during a burst -> lets throttle to a single thread.
---
.../action/search/SearchTransportService.java | 37 +++++++++++++++++--
1 file changed, 34 insertions(+), 3 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java b/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java
index fb3c49d83cb93..52d4542faaf77 100644
--- a/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java
+++ b/server/src/main/java/org/elasticsearch/action/search/SearchTransportService.java
@@ -24,9 +24,12 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.common.util.concurrent.EsExecutors;
+import org.elasticsearch.common.util.concurrent.ThrottledTaskRunner;
import org.elasticsearch.core.Nullable;
+import org.elasticsearch.core.Releasable;
import org.elasticsearch.search.SearchPhaseResult;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.search.dfs.DfsSearchResult;
@@ -60,6 +63,7 @@
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
+import java.util.concurrent.Executor;
import java.util.function.BiFunction;
import static org.elasticsearch.action.search.SearchTransportAPMMetrics.ACTION_ATTRIBUTE_NAME;
@@ -455,9 +459,10 @@ public static void registerRequestHandler(
boolean freed = searchService.freeReaderContext(request.id());
channel.sendResponse(SearchFreeContextResponse.of(freed));
};
+ final Executor freeContextExecutor = buildFreeContextExecutor(transportService);
transportService.registerRequestHandler(
FREE_CONTEXT_SCROLL_ACTION_NAME,
- transportService.getThreadPool().generic(),
+ freeContextExecutor,
ScrollFreeContextRequest::new,
instrumentedHandler(FREE_CONTEXT_SCROLL_ACTION_METRIC, transportService, searchTransportMetrics, freeContextHandler)
);
@@ -470,7 +475,7 @@ public static void registerRequestHandler(
transportService.registerRequestHandler(
FREE_CONTEXT_ACTION_NAME,
- transportService.getThreadPool().generic(),
+ freeContextExecutor,
SearchFreeContextRequest::new,
instrumentedHandler(FREE_CONTEXT_ACTION_METRIC, transportService, searchTransportMetrics, freeContextHandler)
);
@@ -478,7 +483,7 @@ public static void registerRequestHandler(
transportService.registerRequestHandler(
CLEAR_SCROLL_CONTEXTS_ACTION_NAME,
- transportService.getThreadPool().generic(),
+ freeContextExecutor,
ClearScrollContextsRequest::new,
instrumentedHandler(CLEAR_SCROLL_CONTEXTS_ACTION_METRIC, transportService, searchTransportMetrics, (request, channel, task) -> {
searchService.freeAllScrollContexts();
@@ -626,6 +631,32 @@ public static void registerRequestHandler(
TransportActionProxy.registerProxyAction(transportService, QUERY_CAN_MATCH_NODE_NAME, true, CanMatchNodeResponse::new);
}
+ private static Executor buildFreeContextExecutor(TransportService transportService) {
+ final ThrottledTaskRunner throttledTaskRunner = new ThrottledTaskRunner(
+ "free_context",
+ 1,
+ transportService.getThreadPool().generic()
+ );
+ return r -> throttledTaskRunner.enqueueTask(new ActionListener<>() {
+ @Override
+ public void onResponse(Releasable releasable) {
+ try (releasable) {
+ r.run();
+ }
+ }
+
+ @Override
+ public void onFailure(Exception e) {
+ if (r instanceof AbstractRunnable abstractRunnable) {
+ abstractRunnable.onFailure(e);
+ }
+ // should be impossible, GENERIC pool doesn't reject anything
+ logger.error("unexpected failure running " + r, e);
+ assert false : new AssertionError("unexpected failure running " + r, e);
+ }
+ });
+ }
+
private static TransportRequestHandler instrumentedHandler(
String actionQualifier,
TransportService transportService,
From 028b35129c836ed2a0e94bf07bfd3d8907e1d595 Mon Sep 17 00:00:00 2001
From: David Turner
Date: Thu, 1 Aug 2024 17:41:31 +0100
Subject: [PATCH 11/36] Remove unused compatibility shims (#111509)
These methods are now unused by any dependent project so can be removed.
Relates #107984
---
.../restore/RestoreSnapshotRequest.java | 5 --
.../DeleteStoredScriptRequestBuilder.java | 29 -----------
.../GetStoredScriptRequestBuilder.java | 29 -----------
.../PutStoredScriptRequestBuilder.java | 44 ----------------
.../client/internal/ClusterAdminClient.java | 50 -------------------
5 files changed, 157 deletions(-)
delete mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/DeleteStoredScriptRequestBuilder.java
delete mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/GetStoredScriptRequestBuilder.java
delete mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequestBuilder.java
diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java
index f0d47813dad77..f9ee2d84f8732 100644
--- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java
@@ -68,11 +68,6 @@ public RestoreSnapshotRequest(TimeValue masterNodeTimeout) {
super(masterNodeTimeout);
}
- @Deprecated(forRemoval = true) // temporary compatibility shim
- public RestoreSnapshotRequest(String repository, String snapshot) {
- this(MasterNodeRequest.TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, repository, snapshot);
- }
-
/**
* Constructs a new put repository request with the provided repository and snapshot names.
*
diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/DeleteStoredScriptRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/DeleteStoredScriptRequestBuilder.java
deleted file mode 100644
index 375365c174885..0000000000000
--- a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/DeleteStoredScriptRequestBuilder.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0 and the Server Side Public License, v 1; you may not use this file except
- * in compliance with, at your election, the Elastic License 2.0 or the Server
- * Side Public License, v 1.
- */
-
-package org.elasticsearch.action.admin.cluster.storedscripts;
-
-import org.elasticsearch.action.support.master.AcknowledgedRequestBuilder;
-import org.elasticsearch.action.support.master.AcknowledgedResponse;
-import org.elasticsearch.client.internal.ElasticsearchClient;
-import org.elasticsearch.core.TimeValue;
-
-public class DeleteStoredScriptRequestBuilder extends AcknowledgedRequestBuilder<
- DeleteStoredScriptRequest,
- AcknowledgedResponse,
- DeleteStoredScriptRequestBuilder> {
-
- public DeleteStoredScriptRequestBuilder(ElasticsearchClient client, TimeValue masterNodeTimeout, TimeValue ackTimeout) {
- super(client, TransportDeleteStoredScriptAction.TYPE, new DeleteStoredScriptRequest(masterNodeTimeout, ackTimeout));
- }
-
- public DeleteStoredScriptRequestBuilder setId(String id) {
- request.id(id);
- return this;
- }
-}
diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/GetStoredScriptRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/GetStoredScriptRequestBuilder.java
deleted file mode 100644
index 798d78928c860..0000000000000
--- a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/GetStoredScriptRequestBuilder.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0 and the Server Side Public License, v 1; you may not use this file except
- * in compliance with, at your election, the Elastic License 2.0 or the Server
- * Side Public License, v 1.
- */
-
-package org.elasticsearch.action.admin.cluster.storedscripts;
-
-import org.elasticsearch.action.support.master.MasterNodeReadOperationRequestBuilder;
-import org.elasticsearch.client.internal.ElasticsearchClient;
-import org.elasticsearch.core.TimeValue;
-
-public class GetStoredScriptRequestBuilder extends MasterNodeReadOperationRequestBuilder<
- GetStoredScriptRequest,
- GetStoredScriptResponse,
- GetStoredScriptRequestBuilder> {
-
- public GetStoredScriptRequestBuilder(ElasticsearchClient client, TimeValue masterNodeTimeout) {
- super(client, GetStoredScriptAction.INSTANCE, new GetStoredScriptRequest(masterNodeTimeout));
- }
-
- public GetStoredScriptRequestBuilder setId(String id) {
- request.id(id);
- return this;
- }
-
-}
diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequestBuilder.java
deleted file mode 100644
index e096fa24e6837..0000000000000
--- a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequestBuilder.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0 and the Server Side Public License, v 1; you may not use this file except
- * in compliance with, at your election, the Elastic License 2.0 or the Server
- * Side Public License, v 1.
- */
-
-package org.elasticsearch.action.admin.cluster.storedscripts;
-
-import org.elasticsearch.action.support.master.AcknowledgedRequestBuilder;
-import org.elasticsearch.action.support.master.AcknowledgedResponse;
-import org.elasticsearch.client.internal.ElasticsearchClient;
-import org.elasticsearch.common.bytes.BytesReference;
-import org.elasticsearch.core.TimeValue;
-import org.elasticsearch.xcontent.XContentType;
-
-public class PutStoredScriptRequestBuilder extends AcknowledgedRequestBuilder<
- PutStoredScriptRequest,
- AcknowledgedResponse,
- PutStoredScriptRequestBuilder> {
-
- public PutStoredScriptRequestBuilder(ElasticsearchClient client, TimeValue masterNodeTimeout, TimeValue ackTimeout) {
- super(client, TransportPutStoredScriptAction.TYPE, new PutStoredScriptRequest(masterNodeTimeout, ackTimeout));
- }
-
- public PutStoredScriptRequestBuilder setId(String id) {
- request.id(id);
- return this;
- }
-
- public PutStoredScriptRequestBuilder setContext(String context) {
- request.context(context);
- return this;
- }
-
- /**
- * Set the source of the script along with the content type of the source
- */
- public PutStoredScriptRequestBuilder setContent(BytesReference source, XContentType xContentType) {
- request.content(source, xContentType);
- return this;
- }
-}
diff --git a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java
index 995fe99cadffb..4d5a670925b5b 100644
--- a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java
+++ b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java
@@ -94,16 +94,6 @@
import org.elasticsearch.action.admin.cluster.stats.ClusterStatsRequestBuilder;
import org.elasticsearch.action.admin.cluster.stats.ClusterStatsResponse;
import org.elasticsearch.action.admin.cluster.stats.TransportClusterStatsAction;
-import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequest;
-import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequestBuilder;
-import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction;
-import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest;
-import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequestBuilder;
-import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptResponse;
-import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest;
-import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequestBuilder;
-import org.elasticsearch.action.admin.cluster.storedscripts.TransportDeleteStoredScriptAction;
-import org.elasticsearch.action.admin.cluster.storedscripts.TransportPutStoredScriptAction;
import org.elasticsearch.action.ingest.DeletePipelineRequest;
import org.elasticsearch.action.ingest.DeletePipelineRequestBuilder;
import org.elasticsearch.action.ingest.DeletePipelineTransportAction;
@@ -118,9 +108,7 @@
import org.elasticsearch.action.ingest.SimulatePipelineRequest;
import org.elasticsearch.action.ingest.SimulatePipelineRequestBuilder;
import org.elasticsearch.action.ingest.SimulatePipelineResponse;
-import org.elasticsearch.action.support.master.AcknowledgedRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
-import org.elasticsearch.action.support.master.MasterNodeRequest;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.tasks.TaskId;
@@ -425,42 +413,4 @@ public ActionFuture simulatePipeline(SimulatePipelineR
public SimulatePipelineRequestBuilder prepareSimulatePipeline(BytesReference source, XContentType xContentType) {
return new SimulatePipelineRequestBuilder(this, source, xContentType);
}
-
- @Deprecated(forRemoval = true) // temporary compatibility shim
- public PutStoredScriptRequestBuilder preparePutStoredScript() {
- return new PutStoredScriptRequestBuilder(
- this,
- MasterNodeRequest.TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT,
- AcknowledgedRequest.DEFAULT_ACK_TIMEOUT
- );
- }
-
- @Deprecated(forRemoval = true) // temporary compatibility shim
- public void deleteStoredScript(DeleteStoredScriptRequest request, ActionListener listener) {
- execute(TransportDeleteStoredScriptAction.TYPE, request, listener);
- }
-
- @Deprecated(forRemoval = true) // temporary compatibility shim
- public DeleteStoredScriptRequestBuilder prepareDeleteStoredScript(String id) {
- return new DeleteStoredScriptRequestBuilder(
- client,
- MasterNodeRequest.TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT,
- AcknowledgedRequest.DEFAULT_ACK_TIMEOUT
- ).setId(id);
- }
-
- @Deprecated(forRemoval = true) // temporary compatibility shim
- public void putStoredScript(final PutStoredScriptRequest request, ActionListener listener) {
- execute(TransportPutStoredScriptAction.TYPE, request, listener);
- }
-
- @Deprecated(forRemoval = true) // temporary compatibility shim
- public GetStoredScriptRequestBuilder prepareGetStoredScript(String id) {
- return new GetStoredScriptRequestBuilder(this, MasterNodeRequest.TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT).setId(id);
- }
-
- @Deprecated(forRemoval = true) // temporary compatibility shim
- public void getStoredScript(final GetStoredScriptRequest request, final ActionListener listener) {
- execute(GetStoredScriptAction.INSTANCE, request, listener);
- }
}
From ea692d1348dcdbda177a96ef15c0d0ddf80012c1 Mon Sep 17 00:00:00 2001
From: Nik Everett
Date: Thu, 1 Aug 2024 13:54:28 -0400
Subject: [PATCH 12/36] ESQL: Don't mutate the BoolQueryBuilder in plan
(#111519)
This modifies ESQL's `QueryBuilder` merging to stop it from mutating
`BoolQueryBuilder`s in place. It's more efficient when you can do that,
but only marginally so. Instead we create a shallow copy of the same
builder and mutate *that*. That lines up much better with the plan being
immutable objects. It should be!
The resulting queries that ESQL sends to lucene are the same here - we
just modify how we build them.
This should stop a fun class of bugs that can come up when we mutate the
query builders in multiple threads - because we *do* replan the query in
multiple threads. That's fine. So long as we shallow copy, like we do in
this PR.
---
docs/changelog/111519.yaml | 5 +
.../index/query/BoolQueryBuilder.java | 24 +++
.../index/query/BoolQueryBuilderTests.java | 37 ++++
.../xpack/esql/core/util/Queries.java | 2 +-
.../xpack/esql/core/util/QueriesTests.java | 34 +++-
.../esql/qa/server/multi-node/build.gradle | 3 +
.../xpack/esql/qa/multi_node/Clusters.java | 4 +-
.../xpack/esql/qa/multi_node/EsqlSpecIT.java | 2 +-
.../esql/qa/multi_node/FieldExtractorIT.java | 2 +-
.../xpack/esql/qa/multi_node/RestEsqlIT.java | 38 +++++
.../esql/qa/multi_node/EsqlClientYamlIT.java | 2 +-
.../esql/qa/server/single-node/build.gradle | 6 +-
.../xpack/esql/qa/single_node/RestEsqlIT.java | 85 +---------
.../xpack/esql/qa/rest/RestEsqlTestCase.java | 158 ++++++++++++++++++
14 files changed, 302 insertions(+), 100 deletions(-)
create mode 100644 docs/changelog/111519.yaml
create mode 100644 x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RestEsqlIT.java
diff --git a/docs/changelog/111519.yaml b/docs/changelog/111519.yaml
new file mode 100644
index 0000000000000..8cc62fb8ed903
--- /dev/null
+++ b/docs/changelog/111519.yaml
@@ -0,0 +1,5 @@
+pr: 111519
+summary: "ESQL: Don't mutate the `BoolQueryBuilder` in plan"
+area: ES|QL
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/BoolQueryBuilder.java
index 4b4727bca4198..9856bab64ec6a 100644
--- a/server/src/main/java/org/elasticsearch/index/query/BoolQueryBuilder.java
+++ b/server/src/main/java/org/elasticsearch/index/query/BoolQueryBuilder.java
@@ -410,4 +410,28 @@ private static boolean rewriteClauses(
public TransportVersion getMinimalSupportedVersion() {
return TransportVersions.ZERO;
}
+
+ /**
+ * Create a new builder with the same clauses but modification of
+ * the builder won't modify the original. Modifications of any
+ * of the copy's clauses will modify the original. Don't so that.
+ */
+ public BoolQueryBuilder shallowCopy() {
+ BoolQueryBuilder copy = new BoolQueryBuilder();
+ copy.adjustPureNegative = adjustPureNegative;
+ copy.minimumShouldMatch = minimumShouldMatch;
+ for (QueryBuilder q : mustClauses) {
+ copy.must(q);
+ }
+ for (QueryBuilder q : mustNotClauses) {
+ copy.mustNot(q);
+ }
+ for (QueryBuilder q : filterClauses) {
+ copy.filter(q);
+ }
+ for (QueryBuilder q : shouldClauses) {
+ copy.should(q);
+ }
+ return copy;
+ }
}
diff --git a/server/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java
index c29957f04c515..a4cc96a2063dc 100644
--- a/server/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java
@@ -32,7 +32,9 @@
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.hasItem;
import static org.hamcrest.CoreMatchers.instanceOf;
+import static org.hamcrest.Matchers.not;
public class BoolQueryBuilderTests extends AbstractQueryTestCase {
@Override
@@ -463,4 +465,39 @@ public void testMustRewrite() throws IOException {
IllegalStateException e = expectThrows(IllegalStateException.class, () -> boolQuery.toQuery(context));
assertEquals("Rewrite first", e.getMessage());
}
+
+ public void testShallowCopy() {
+ BoolQueryBuilder orig = createTestQueryBuilder();
+ BoolQueryBuilder shallowCopy = orig.shallowCopy();
+ assertThat(shallowCopy.adjustPureNegative(), equalTo(orig.adjustPureNegative()));
+ assertThat(shallowCopy.minimumShouldMatch(), equalTo(orig.minimumShouldMatch()));
+ assertThat(shallowCopy.must(), equalTo(orig.must()));
+ assertThat(shallowCopy.mustNot(), equalTo(orig.mustNot()));
+ assertThat(shallowCopy.should(), equalTo(orig.should()));
+ assertThat(shallowCopy.filter(), equalTo(orig.filter()));
+
+ QueryBuilder b = new MatchQueryBuilder("foo", "bar");
+ switch (between(0, 3)) {
+ case 0 -> {
+ shallowCopy.must(b);
+ assertThat(shallowCopy.must(), hasItem(b));
+ assertThat(orig.must(), not(hasItem(b)));
+ }
+ case 1 -> {
+ shallowCopy.mustNot(b);
+ assertThat(shallowCopy.mustNot(), hasItem(b));
+ assertThat(orig.mustNot(), not(hasItem(b)));
+ }
+ case 2 -> {
+ shallowCopy.should(b);
+ assertThat(shallowCopy.should(), hasItem(b));
+ assertThat(orig.should(), not(hasItem(b)));
+ }
+ case 3 -> {
+ shallowCopy.filter(b);
+ assertThat(shallowCopy.filter(), hasItem(b));
+ assertThat(orig.filter(), not(hasItem(b)));
+ }
+ }
+ }
}
diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/Queries.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/Queries.java
index 9403c3c6a0bc0..759d7b80acc22 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/Queries.java
+++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/Queries.java
@@ -50,7 +50,7 @@ public static QueryBuilder combine(Clause clause, List queries) {
if (firstQuery == null) {
firstQuery = query;
if (firstQuery instanceof BoolQueryBuilder bqb) {
- bool = bqb;
+ bool = bqb.shallowCopy();
}
}
// at least two entries, start copying
diff --git a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/QueriesTests.java b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/QueriesTests.java
index c5f4eb2ba8283..8dde968640c21 100644
--- a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/QueriesTests.java
+++ b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/QueriesTests.java
@@ -14,6 +14,7 @@
import static java.util.Arrays.asList;
import static org.hamcrest.Matchers.everyItem;
+import static org.hamcrest.Matchers.hasItem;
import static org.hamcrest.Matchers.in;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.sameInstance;
@@ -92,13 +93,27 @@ public void testCombineBoolQueries() {
assertThat(combination, instanceOf(BoolQueryBuilder.class));
var bool = (BoolQueryBuilder) combination;
+ assertBoolQueryMerge(queries, bool, clause);
+ }
- var clauseList = clause.innerQueries.apply(bool);
+ private void assertBoolQueryMerge(QueryBuilder[] queries, BoolQueryBuilder bool, Queries.Clause clause) {
+ BoolQueryBuilder first = (BoolQueryBuilder) queries[0];
+ for (QueryBuilder b : first.must()) {
+ assertThat(bool.must(), hasItem(b));
+ }
+ for (QueryBuilder b : first.mustNot()) {
+ assertThat(bool.mustNot(), hasItem(b));
+ }
+ for (QueryBuilder b : first.should()) {
+ assertThat(bool.should(), hasItem(b));
+ }
+ for (QueryBuilder b : first.filter()) {
+ assertThat(bool.filter(), hasItem(b));
+ }
- for (QueryBuilder query : queries) {
- if (query != bool) {
- assertThat(query, in(clauseList));
- }
+ var clauseList = clause.innerQueries.apply(bool);
+ for (int i = 1; i < queries.length; i++) {
+ assertThat(queries[i], in(clauseList));
}
}
@@ -118,10 +133,11 @@ public void testCombineMixedBoolAndNonBoolQueries() {
assertThat(combination, instanceOf(BoolQueryBuilder.class));
var bool = (BoolQueryBuilder) combination;
- var clauseList = clause.innerQueries.apply(bool);
-
- for (QueryBuilder query : queries) {
- if (query != bool) {
+ if (queries[0] instanceof BoolQueryBuilder) {
+ assertBoolQueryMerge(queries, bool, clause);
+ } else {
+ var clauseList = clause.innerQueries.apply(bool);
+ for (QueryBuilder query : queries) {
assertThat(query, in(clauseList));
}
}
diff --git a/x-pack/plugin/esql/qa/server/multi-node/build.gradle b/x-pack/plugin/esql/qa/server/multi-node/build.gradle
index 6bba58b721a94..9f8ca78aba81e 100644
--- a/x-pack/plugin/esql/qa/server/multi-node/build.gradle
+++ b/x-pack/plugin/esql/qa/server/multi-node/build.gradle
@@ -8,6 +8,9 @@ dependencies {
javaRestTestImplementation project(xpackModule('esql:qa:testFixtures'))
javaRestTestImplementation project(xpackModule('esql:qa:server'))
yamlRestTestImplementation project(xpackModule('esql:qa:server'))
+
+ clusterPlugins project(':plugins:mapper-size')
+ clusterPlugins project(':plugins:mapper-murmur3')
}
GradleUtils.extendSourceSet(project, "javaRestTest", "yamlRestTest")
diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/Clusters.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/Clusters.java
index 4aa17801fa217..3a68aee9fd205 100644
--- a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/Clusters.java
+++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/Clusters.java
@@ -8,15 +8,17 @@
package org.elasticsearch.xpack.esql.qa.multi_node;
import org.elasticsearch.test.cluster.ElasticsearchCluster;
+import org.elasticsearch.test.cluster.local.LocalClusterConfigProvider;
import org.elasticsearch.test.cluster.local.distribution.DistributionType;
public class Clusters {
- public static ElasticsearchCluster testCluster() {
+ public static ElasticsearchCluster testCluster(LocalClusterConfigProvider configProvider) {
return ElasticsearchCluster.local()
.distribution(DistributionType.DEFAULT)
.nodes(2)
.setting("xpack.security.enabled", "false")
.setting("xpack.license.self_generated.type", "trial")
+ .apply(() -> configProvider)
.build();
}
}
diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlSpecIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlSpecIT.java
index 93385ec9efd89..3a0c400de1795 100644
--- a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlSpecIT.java
+++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlSpecIT.java
@@ -14,7 +14,7 @@
public class EsqlSpecIT extends EsqlSpecTestCase {
@ClassRule
- public static ElasticsearchCluster cluster = Clusters.testCluster();
+ public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> {});
@Override
protected String getTestRestCluster() {
diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/FieldExtractorIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/FieldExtractorIT.java
index bcb83a31f7641..9ec454db2d325 100644
--- a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/FieldExtractorIT.java
+++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/FieldExtractorIT.java
@@ -17,7 +17,7 @@
@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
public class FieldExtractorIT extends FieldExtractorTestCase {
@ClassRule
- public static ElasticsearchCluster cluster = Clusters.testCluster();
+ public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> {});
@Override
protected String getTestRestCluster() {
diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RestEsqlIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RestEsqlIT.java
new file mode 100644
index 0000000000000..7e98d486d6c2e
--- /dev/null
+++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RestEsqlIT.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.qa.multi_node;
+
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.elasticsearch.test.cluster.ElasticsearchCluster;
+import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase;
+import org.junit.ClassRule;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class RestEsqlIT extends RestEsqlTestCase {
+ @ClassRule
+ public static ElasticsearchCluster cluster = Clusters.testCluster(
+ specBuilder -> specBuilder.plugin("mapper-size").plugin("mapper-murmur3")
+ );
+
+ @Override
+ protected String getTestRestCluster() {
+ return cluster.getHttpAddresses();
+ }
+
+ @ParametersFactory(argumentFormatting = "%1s")
+ public static List modes() {
+ return Arrays.stream(Mode.values()).map(m -> new Object[] { m }).toList();
+ }
+
+ public RestEsqlIT(Mode mode) {
+ super(mode);
+ }
+}
diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/yamlRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlClientYamlIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/yamlRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlClientYamlIT.java
index d3ddae16e8af1..62e83c9280087 100644
--- a/x-pack/plugin/esql/qa/server/multi-node/src/yamlRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlClientYamlIT.java
+++ b/x-pack/plugin/esql/qa/server/multi-node/src/yamlRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/EsqlClientYamlIT.java
@@ -19,7 +19,7 @@
public class EsqlClientYamlIT extends ESClientYamlSuiteTestCase {
@ClassRule
- public static ElasticsearchCluster cluster = Clusters.testCluster();
+ public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> {});
@Override
protected String getTestRestCluster() {
diff --git a/x-pack/plugin/esql/qa/server/single-node/build.gradle b/x-pack/plugin/esql/qa/server/single-node/build.gradle
index 865d7cf5f5e6c..ab8e3d4b32d9a 100644
--- a/x-pack/plugin/esql/qa/server/single-node/build.gradle
+++ b/x-pack/plugin/esql/qa/server/single-node/build.gradle
@@ -20,10 +20,8 @@ dependencies {
javaRestTestImplementation("org.slf4j:slf4j-nop:${versions.slf4j}")
javaRestTestImplementation('org.apache.arrow:arrow-memory-unsafe:16.1.0')
- dependencies {
- clusterPlugins project(':plugins:mapper-size')
- clusterPlugins project(':plugins:mapper-murmur3')
- }
+ clusterPlugins project(':plugins:mapper-size')
+ clusterPlugins project(':plugins:mapper-murmur3')
}
restResources {
diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java
index 797fc803ed531..d679ee18d0a73 100644
--- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java
+++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java
@@ -13,7 +13,6 @@
import org.apache.lucene.search.DocIdSetIterator;
import org.elasticsearch.Build;
import org.elasticsearch.client.Request;
-import org.elasticsearch.client.Response;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.settings.Settings;
@@ -22,25 +21,20 @@
import org.elasticsearch.test.TestClustersThreadFilter;
import org.elasticsearch.test.cluster.ElasticsearchCluster;
import org.elasticsearch.test.cluster.LogType;
-import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase;
import org.hamcrest.Matchers;
-import org.junit.Assert;
import org.junit.ClassRule;
import java.io.IOException;
import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
-import java.util.Locale;
import java.util.Map;
import static org.elasticsearch.test.ListMatcher.matchesList;
import static org.elasticsearch.test.MapMatcher.assertMap;
import static org.elasticsearch.test.MapMatcher.matchesMap;
-import static org.hamcrest.Matchers.any;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
@@ -72,7 +66,7 @@ public RestEsqlIT(Mode mode) {
}
public void testBasicEsql() throws IOException {
- indexTestData();
+ indexTimestampData(1);
RequestObjectBuilder builder = requestObjectBuilder().query(fromIndex() + " | stats avg(value)");
if (Build.current().isSnapshot()) {
@@ -274,50 +268,8 @@ public void testTableDuplicateNames() throws IOException {
assertThat(re.getMessage(), containsString("[6:10] Duplicate field 'a'"));
}
- /**
- * INLINESTATS can group on {@code NOW()}. It's a little silly, but
- * doing something like {@code DATE_TRUNC(1 YEAR, NOW() - 1970-01-01T00:00:00Z)} is
- * much more sensible. But just grouping on {@code NOW()} is enough to test this.
- *
- * This works because {@code NOW()} locks it's value at the start of the entire
- * query. It's part of the "configuration" of the query.
- *
- */
- public void testInlineStatsNow() throws IOException {
- assumeTrue("INLINESTATS only available on snapshots", Build.current().isSnapshot());
- indexTestData();
-
- RequestObjectBuilder builder = requestObjectBuilder().query(
- fromIndex() + " | EVAL now=NOW() | INLINESTATS AVG(value) BY now | SORT value ASC"
- );
- Map result = runEsql(builder);
- ListMatcher values = matchesList();
- for (int i = 0; i < 1000; i++) {
- values = values.item(
- matchesList().item("2020-12-12T00:00:00.000Z")
- .item("value" + i)
- .item("value" + i)
- .item(i)
- .item(any(String.class))
- .item(499.5)
- );
- }
- assertMap(
- result,
- matchesMap().entry(
- "columns",
- matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
- .item(matchesMap().entry("name", "test").entry("type", "text"))
- .item(matchesMap().entry("name", "test.keyword").entry("type", "keyword"))
- .item(matchesMap().entry("name", "value").entry("type", "long"))
- .item(matchesMap().entry("name", "now").entry("type", "date"))
- .item(matchesMap().entry("name", "AVG(value)").entry("type", "double"))
- ).entry("values", values)
- );
- }
-
public void testProfile() throws IOException {
- indexTestData();
+ indexTimestampData(1);
RequestObjectBuilder builder = requestObjectBuilder().query(fromIndex() + " | STATS AVG(value)");
builder.profile(true);
@@ -371,7 +323,7 @@ public void testProfile() throws IOException {
public void testInlineStatsProfile() throws IOException {
assumeTrue("INLINESTATS only available on snapshots", Build.current().isSnapshot());
- indexTestData();
+ indexTimestampData(1);
RequestObjectBuilder builder = requestObjectBuilder().query(fromIndex() + " | INLINESTATS AVG(value) | SORT value ASC");
builder.profile(true);
@@ -486,37 +438,6 @@ private MapMatcher basicProfile() {
return matchesMap().entry("pages_processed", greaterThan(0)).entry("process_nanos", greaterThan(0));
}
- private void indexTestData() throws IOException {
- Request createIndex = new Request("PUT", testIndexName());
- createIndex.setJsonEntity("""
- {
- "settings": {
- "index": {
- "number_of_shards": 1
- }
- }
- }""");
- Response response = client().performRequest(createIndex);
- assertThat(
- entityToMap(response.getEntity(), XContentType.JSON),
- matchesMap().entry("shards_acknowledged", true).entry("index", testIndexName()).entry("acknowledged", true)
- );
-
- StringBuilder b = new StringBuilder();
- for (int i = 0; i < 1000; i++) {
- b.append(String.format(Locale.ROOT, """
- {"create":{"_index":"%s"}}
- {"@timestamp":"2020-12-12","test":"value%s","value":%d}
- """, testIndexName(), i, i));
- }
- Request bulk = new Request("POST", "/_bulk");
- bulk.addParameter("refresh", "true");
- bulk.addParameter("filter_path", "errors");
- bulk.setJsonEntity(b.toString());
- response = client().performRequest(bulk);
- Assert.assertEquals("{\"errors\":false}", EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8));
- }
-
private void assertException(String query, String... errorMessages) throws IOException {
ResponseException re = expectThrows(ResponseException.class, () -> runEsqlSync(requestObjectBuilder().query(query)));
assertThat(re.getResponse().getStatusLine().getStatusCode(), equalTo(400));
diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java
index 82b7459066586..81e82a8d60b77 100644
--- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java
+++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java
@@ -11,6 +11,7 @@
import org.apache.http.entity.ContentType;
import org.apache.http.nio.entity.NByteArrayEntity;
import org.apache.http.util.EntityUtils;
+import org.elasticsearch.Build;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.Response;
@@ -20,16 +21,19 @@
import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
+import org.elasticsearch.test.ListMatcher;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.xcontent.ToXContent;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xpack.esql.EsqlTestUtils;
import org.junit.After;
+import org.junit.Assert;
import org.junit.Before;
import java.io.ByteArrayOutputStream;
@@ -52,11 +56,13 @@
import static java.util.Collections.emptySet;
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
+import static org.elasticsearch.test.ListMatcher.matchesList;
import static org.elasticsearch.test.MapMatcher.assertMap;
import static org.elasticsearch.test.MapMatcher.matchesMap;
import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.Mode.ASYNC;
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.Mode.SYNC;
+import static org.hamcrest.Matchers.any;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.either;
import static org.hamcrest.Matchers.emptyOrNullString;
@@ -121,6 +127,8 @@ public static class RequestObjectBuilder {
private Boolean profile = null;
+ private CheckedConsumer filter;
+
public RequestObjectBuilder() throws IOException {
this(randomFrom(XContentType.values()));
}
@@ -187,6 +195,11 @@ public RequestObjectBuilder profile(boolean profile) {
return this;
}
+ public RequestObjectBuilder filter(CheckedConsumer filter) {
+ this.filter = filter;
+ return this;
+ }
+
public RequestObjectBuilder build() throws IOException {
if (isBuilt == false) {
if (tables != null) {
@@ -205,6 +218,11 @@ public RequestObjectBuilder build() throws IOException {
if (profile != null) {
builder.field("profile", profile);
}
+ if (filter != null) {
+ builder.startObject("filter");
+ filter.accept(builder);
+ builder.endObject();
+ }
builder.endObject();
isBuilt = true;
}
@@ -594,6 +612,115 @@ public void testComplexFieldNames() throws IOException {
assertThat(e.getMessage(), containsString("The field names are too complex to process"));
}
+ /**
+ * INLINESTATS can group on {@code NOW()}. It's a little silly, but
+ * doing something like {@code DATE_TRUNC(1 YEAR, NOW() - 1970-01-01T00:00:00Z)} is
+ * much more sensible. But just grouping on {@code NOW()} is enough to test this.
+ *
+ * This works because {@code NOW()} locks it's value at the start of the entire
+ * query. It's part of the "configuration" of the query.
+ *
+ */
+ public void testInlineStatsNow() throws IOException {
+ assumeTrue("INLINESTATS only available on snapshots", Build.current().isSnapshot());
+ indexTimestampData(1);
+
+ RequestObjectBuilder builder = requestObjectBuilder().query(
+ fromIndex() + " | EVAL now=NOW() | INLINESTATS AVG(value) BY now | SORT value ASC"
+ );
+ Map result = runEsql(builder);
+ ListMatcher values = matchesList();
+ for (int i = 0; i < 1000; i++) {
+ values = values.item(
+ matchesList().item("2020-12-12T00:00:00.000Z")
+ .item("value" + i)
+ .item("value" + i)
+ .item(i)
+ .item(any(String.class))
+ .item(499.5)
+ );
+ }
+ assertMap(
+ result,
+ matchesMap().entry(
+ "columns",
+ matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
+ .item(matchesMap().entry("name", "test").entry("type", "text"))
+ .item(matchesMap().entry("name", "test.keyword").entry("type", "keyword"))
+ .item(matchesMap().entry("name", "value").entry("type", "long"))
+ .item(matchesMap().entry("name", "now").entry("type", "date"))
+ .item(matchesMap().entry("name", "AVG(value)").entry("type", "double"))
+ ).entry("values", values)
+ );
+ }
+
+ public void testTopLevelFilter() throws IOException {
+ indexTimestampData(3); // Multiple shards has caused a bug in the past with the merging case below
+
+ RequestObjectBuilder builder = requestObjectBuilder().filter(b -> {
+ b.startObject("range");
+ {
+ b.startObject("@timestamp").field("gte", "2020-12-12").endObject();
+ }
+ b.endObject();
+ }).query(fromIndex() + " | STATS SUM(value)");
+ assertMap(
+ runEsql(builder),
+ matchesMap().entry("columns", matchesList().item(matchesMap().entry("name", "SUM(value)").entry("type", "long")))
+ .entry("values", List.of(List.of(499500)))
+ );
+ }
+
+ public void testTopLevelFilterMerged() throws IOException {
+ indexTimestampData(3); // Multiple shards has caused a bug in the past with the merging case below
+
+ RequestObjectBuilder builder = requestObjectBuilder().filter(b -> {
+ b.startObject("range");
+ {
+ b.startObject("@timestamp").field("gte", "2020-12-12").endObject();
+ }
+ b.endObject();
+ }).query(fromIndex() + " | WHERE value == 12 | STATS SUM(value)");
+ assertMap(
+ runEsql(builder),
+ matchesMap().entry("columns", matchesList().item(matchesMap().entry("name", "SUM(value)").entry("type", "long")))
+ .entry("values", List.of(List.of(12)))
+ );
+ }
+
+ public void testTopLevelFilterBoolMerged() throws IOException {
+ indexTimestampData(3); // Multiple shards has caused a bug in the past
+
+ for (int i = 0; i < 100; i++) {
+ // Run the query many times so we're more likely to bump into any sort of modification problems
+ RequestObjectBuilder builder = requestObjectBuilder().filter(b -> {
+ b.startObject("bool");
+ {
+ b.startArray("filter");
+ {
+ b.startObject().startObject("range");
+ {
+ b.startObject("@timestamp").field("gte", "2020-12-12").endObject();
+ }
+ b.endObject().endObject();
+ b.startObject().startObject("match");
+ {
+ b.field("test", "value12");
+ }
+ b.endObject().endObject();
+ }
+ b.endArray();
+ }
+ b.endObject();
+ }).query(fromIndex() + " | WHERE @timestamp > \"2010-01-01\" | STATS SUM(value)");
+ assertMap(
+ runEsql(builder),
+ matchesMap().entry("columns", matchesList().item(matchesMap().entry("name", "SUM(value)").entry("type", "long")))
+ .entry("values", List.of(List.of(12)))
+ );
+ }
+ }
+
private static String queryWithComplexFieldNames(int field) {
StringBuilder query = new StringBuilder();
query.append(" | keep ").append(randomAlphaOfLength(10)).append(1);
@@ -1009,4 +1136,35 @@ protected boolean preserveClusterUponCompletion() {
public void assertRequestBreakerEmpty() throws Exception {
EsqlSpecTestCase.assertRequestBreakerEmpty();
}
+
+ protected void indexTimestampData(int shards) throws IOException {
+ Request createIndex = new Request("PUT", testIndexName());
+ createIndex.setJsonEntity("""
+ {
+ "settings": {
+ "index": {
+ "number_of_shards": %shards%
+ }
+ }
+ }""".replace("%shards%", Integer.toString(shards)));
+ Response response = client().performRequest(createIndex);
+ assertThat(
+ entityToMap(response.getEntity(), XContentType.JSON),
+ matchesMap().entry("shards_acknowledged", true).entry("index", testIndexName()).entry("acknowledged", true)
+ );
+
+ StringBuilder b = new StringBuilder();
+ for (int i = 0; i < 1000; i++) {
+ b.append(String.format(Locale.ROOT, """
+ {"create":{"_index":"%s"}}
+ {"@timestamp":"2020-12-12","test":"value%s","value":%d}
+ """, testIndexName(), i, i));
+ }
+ Request bulk = new Request("POST", "/_bulk");
+ bulk.addParameter("refresh", "true");
+ bulk.addParameter("filter_path", "errors");
+ bulk.setJsonEntity(b.toString());
+ response = client().performRequest(bulk);
+ Assert.assertEquals("{\"errors\":false}", EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8));
+ }
}
From 9df3a3d186d39d7b431057939f780b79fcea69d7 Mon Sep 17 00:00:00 2001
From: Keith Massey
Date: Thu, 1 Aug 2024 15:57:54 -0500
Subject: [PATCH 13/36] Truncating watcher history if it is too large (#111245)
---
docs/changelog/111245.yaml | 6 +
.../settings/notification-settings.asciidoc | 5 +
.../core/watcher/history/WatchRecord.java | 160 +++++++++++++++++-
.../integration/HistoryIntegrationTests.java | 69 +++++++-
.../elasticsearch/xpack/watcher/Watcher.java | 2 +-
.../xpack/watcher/history/HistoryStore.java | 21 ++-
.../watcher/history/HistoryStoreTests.java | 108 +++++++++++-
7 files changed, 364 insertions(+), 7 deletions(-)
create mode 100644 docs/changelog/111245.yaml
diff --git a/docs/changelog/111245.yaml b/docs/changelog/111245.yaml
new file mode 100644
index 0000000000000..384373d52cb20
--- /dev/null
+++ b/docs/changelog/111245.yaml
@@ -0,0 +1,6 @@
+pr: 111245
+summary: Truncating watcher history if it is too large
+area: Watcher
+type: bug
+issues:
+ - 94745
diff --git a/docs/reference/settings/notification-settings.asciidoc b/docs/reference/settings/notification-settings.asciidoc
index 4a48c26974084..145112ef4d27c 100644
--- a/docs/reference/settings/notification-settings.asciidoc
+++ b/docs/reference/settings/notification-settings.asciidoc
@@ -42,6 +42,11 @@ Specifies the path to a file that contains a key for encrypting sensitive data.
If `xpack.watcher.encrypt_sensitive_data` is set to `true`, this setting is
required. For more information, see <>.
+`xpack.watcher.max.history.record.size`::
+(<>)
+The maximum size watcher history record that can be written into the watcher history index. Any larger history record will have some of
+its larger fields removed. Defaults to 10mb.
+
`xpack.http.proxy.host`::
(<>)
Specifies the address of the proxy server to use to connect to HTTP services.
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/history/WatchRecord.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/history/WatchRecord.java
index 64215c0b1acf1..f6e8a80884d66 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/history/WatchRecord.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/history/WatchRecord.java
@@ -9,6 +9,7 @@
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Nullable;
+import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
@@ -21,8 +22,10 @@
import org.elasticsearch.xpack.core.watcher.execution.Wid;
import org.elasticsearch.xpack.core.watcher.input.ExecutableInput;
import org.elasticsearch.xpack.core.watcher.input.Input;
+import org.elasticsearch.xpack.core.watcher.support.WatcherDateTimeUtils;
import org.elasticsearch.xpack.core.watcher.support.xcontent.WatcherParams;
import org.elasticsearch.xpack.core.watcher.trigger.TriggerEvent;
+import org.elasticsearch.xpack.core.watcher.watch.Payload;
import org.elasticsearch.xpack.core.watcher.watch.Watch;
import org.elasticsearch.xpack.core.watcher.watch.WatchField;
@@ -45,13 +48,16 @@ public abstract class WatchRecord implements ToXContentObject {
private static final ParseField EXECUTION_RESULT = new ParseField("result");
private static final ParseField EXCEPTION = new ParseField("exception");
private static final ParseField USER = new ParseField("user");
+ public static final String TRUNCATED_RECORD_KEY = "truncated";
+ public static final String TRUNCATED_RECORD_VALUE = "Watch history record exceeded the value of the "
+ + "`xpack.watcher.max.history.record.size' setting";
protected final Wid id;
protected final Watch watch;
private final String nodeId;
protected final TriggerEvent triggerEvent;
protected final ExecutionState state;
- private final String user;
+ protected final String user;
// only emitted to xcontent in "debug" mode
protected final Map vars;
@@ -254,6 +260,8 @@ public String toString() {
return id.toString();
}
+ public abstract WatchRecord dropLargeFields() throws Exception;
+
public static class MessageWatchRecord extends WatchRecord {
@Nullable
private final String[] messages;
@@ -299,6 +307,24 @@ public MessageWatchRecord(WatchRecord record, ExecutionState state, String messa
}
}
+ private MessageWatchRecord(
+ Wid id,
+ TriggerEvent triggerEvent,
+ ExecutionState state,
+ Map vars,
+ ExecutableInput extends Input, ? extends Input.Result> redactedInput,
+ ExecutableCondition condition,
+ Map metadata,
+ Watch watch,
+ WatchExecutionResult redactedResult,
+ String nodeId,
+ String user,
+ String[] messages
+ ) {
+ super(id, triggerEvent, state, vars, redactedInput, condition, metadata, watch, redactedResult, nodeId, user);
+ this.messages = messages;
+ }
+
public String[] messages() {
return messages;
}
@@ -309,10 +335,46 @@ void innerToXContent(XContentBuilder builder, Params params) throws IOException
builder.array(MESSAGES.getPreferredName(), messages);
}
}
+
+ @Override
+ public WatchRecord dropLargeFields() throws Exception {
+ return new MessageWatchRecord(
+ this.id,
+ this.triggerEvent,
+ this.state,
+ this.vars,
+ this.input == null ? null : getTruncatedInput(),
+ this.condition,
+ this.metadata,
+ this.watch,
+ this.executionResult == null ? null : getTruncatedWatchExecutionResult(this),
+ this.getNodeId(),
+ this.user,
+ this.messages
+ );
+ }
}
public static class ExceptionWatchRecord extends WatchRecord {
+ private ExceptionWatchRecord(
+ Wid id,
+ TriggerEvent triggerEvent,
+ ExecutionState state,
+ Map vars,
+ ExecutableInput extends Input, ? extends Input.Result> redactedInput,
+ ExecutableCondition condition,
+ Map metadata,
+ Watch watch,
+ WatchExecutionResult redactedResult,
+ String nodeId,
+ String user,
+ Exception exception
+ ) {
+ super(id, triggerEvent, state, vars, redactedInput, condition, metadata, watch, redactedResult, nodeId, user);
+ this.exception = exception;
+ }
+
private static final Map STACK_TRACE_ENABLED_PARAMS = Map.of(
ElasticsearchException.REST_EXCEPTION_SKIP_STACK_TRACE,
"false"
@@ -356,5 +418,101 @@ void innerToXContent(XContentBuilder builder, Params params) throws IOException
}
}
}
+
+ @Override
+ public WatchRecord dropLargeFields() throws Exception {
+ return new ExceptionWatchRecord(
+ this.id,
+ triggerEvent,
+ this.state,
+ this.vars,
+ this.input == null ? null : getTruncatedInput(),
+ this.condition,
+ this.metadata,
+ this.watch,
+ this.executionResult == null ? null : getTruncatedWatchExecutionResult(this),
+ this.getNodeId(),
+ this.user,
+ this.exception
+ );
+ }
+ }
+
+ /*
+ * This returns a ExecutableInput whose toXContent() returns no information other than a new TRUNCATED_MESSAGE field. It
+ * drops other information to avoid having a document that is too large to index into Elasticsearch.
+ */
+ private static ExecutableInput extends Input, ? extends Input.Result> getTruncatedInput() {
+ return new ExecutableInput<>(new Input() {
+ @Override
+ public String type() {
+ return TRUNCATED_RECORD_KEY;
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.value(TRUNCATED_RECORD_VALUE);
+ return builder;
+ }
+ }) {
+ @Override
+ public Input.Result execute(WatchExecutionContext ctx, Payload payload) {
+ throw new UnsupportedOperationException("Redacted input cannot be executed");
+ }
+ };
+ }
+
+ /*
+ * This returns a WatchExecutionResult whose toXContent() returns minimal information, including a new TRUNCATED_MESSAGE field. It
+ * drops most other information to avoid having a document that is too large to index into Elasticsearch.
+ */
+ private static WatchExecutionResult getTruncatedWatchExecutionResult(WatchRecord watchRecord) {
+ WatchExecutionContext watchExecutionContext = new WatchExecutionContext(
+ watchRecord.id.watchId(),
+ watchRecord.executionResult.executionTime(),
+ null,
+ TimeValue.ZERO
+ ) {
+ @Override
+ public boolean knownWatch() {
+ return false;
+ }
+
+ @Override
+ public boolean simulateAction(String actionId) {
+ return false;
+ }
+
+ @Override
+ public boolean skipThrottling(String actionId) {
+ return false;
+ }
+
+ @Override
+ public boolean shouldBeExecuted() {
+ return false;
+ }
+
+ @Override
+ public boolean recordExecution() {
+ return false;
+ }
+ };
+
+ return new WatchExecutionResult(watchExecutionContext, watchRecord.executionResult.executionDurationMs()) {
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ WatcherDateTimeUtils.writeDate(
+ Field.EXECUTION_TIME.getPreferredName(),
+ builder,
+ watchRecord.executionResult.executionTime()
+ );
+ builder.field(Field.EXECUTION_DURATION.getPreferredName(), watchRecord.executionResult.executionDurationMs());
+ builder.field(TRUNCATED_RECORD_KEY, TRUNCATED_RECORD_VALUE);
+ builder.endObject();
+ return builder;
+ }
+ };
}
}
diff --git a/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/HistoryIntegrationTests.java b/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/HistoryIntegrationTests.java
index 19cd37400a01c..ee645e4f32798 100644
--- a/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/HistoryIntegrationTests.java
+++ b/x-pack/plugin/watcher/src/internalClusterTest/java/org/elasticsearch/xpack/watcher/test/integration/HistoryIntegrationTests.java
@@ -8,6 +8,7 @@
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
+import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.protocol.xpack.watcher.PutWatchResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.sort.SortBuilders;
@@ -17,6 +18,7 @@
import org.elasticsearch.xpack.core.watcher.actions.ActionStatus;
import org.elasticsearch.xpack.core.watcher.client.WatchSourceBuilder;
import org.elasticsearch.xpack.core.watcher.history.HistoryStoreField;
+import org.elasticsearch.xpack.core.watcher.history.WatchRecord;
import org.elasticsearch.xpack.core.watcher.input.Input;
import org.elasticsearch.xpack.core.watcher.support.xcontent.XContentSource;
import org.elasticsearch.xpack.core.watcher.transport.actions.execute.ExecuteWatchRequestBuilder;
@@ -29,6 +31,7 @@
import org.elasticsearch.xpack.watcher.trigger.schedule.IntervalSchedule;
import java.util.Locale;
+import java.util.Map;
import static org.elasticsearch.index.mapper.MapperService.SINGLE_MAPPING_NAME;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
@@ -44,6 +47,7 @@
import static org.elasticsearch.xpack.watcher.test.WatcherTestUtils.templateRequest;
import static org.elasticsearch.xpack.watcher.trigger.TriggerBuilders.schedule;
import static org.elasticsearch.xpack.watcher.trigger.schedule.Schedules.interval;
+import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
@@ -225,6 +229,69 @@ public void testThatHistoryContainsStatus() throws Exception {
});
}
+ public void testThatHistoryIsTruncated() throws Exception {
+ {
+ /*
+ * The input for this watch is 1 MB, smaller than the 10 MB default of HistoryStore's MAX_HISTORY_SIZE_SETTING. So we do not
+ * expect its history record to be truncated.
+ */
+ new PutWatchRequestBuilder(client()).setId("test_watch_small")
+ .setSource(
+ watchBuilder().trigger(schedule(interval(5, IntervalSchedule.Interval.Unit.HOURS)))
+ .input(simpleInput("foo", randomAlphaOfLength((int) ByteSizeValue.ofMb(1).getBytes())))
+ .addAction("_logger", loggingAction("#### randomLogging"))
+ )
+ .get();
+ new ExecuteWatchRequestBuilder(client()).setId("test_watch_small").setRecordExecution(true).get();
+ assertBusy(() -> {
+ assertResponse(getWatchHistory(), searchResponse -> {
+ assertHitCount(searchResponse, 1);
+ SearchHit hit = searchResponse.getHits().getAt(0);
+ XContentSource source = new XContentSource(hit.getSourceRef(), XContentType.JSON);
+ Map input = source.getValue("input");
+ assertThat(input.containsKey(WatchRecord.TRUNCATED_RECORD_KEY), equalTo(false));
+ assertThat(input.containsKey("simple"), equalTo(true));
+ Map result = source.getValue("result");
+ assertThat(result.containsKey(WatchRecord.TRUNCATED_RECORD_KEY), equalTo(false));
+ assertThat(result.containsKey("input"), equalTo(true));
+ assertThat(result.containsKey("actions"), equalTo(true));
+ assertThat(result.containsKey("condition"), equalTo(true));
+ });
+ });
+ }
+ {
+ /*
+ * The input for this watch is 20 MB, much bigger than the 10 MB default of HistoryStore's MAX_HISTORY_SIZE_SETTING. So we
+ * expect to see its history record truncated before being stored.
+ */
+ new PutWatchRequestBuilder(client()).setId("test_watch_large")
+ .setSource(
+ watchBuilder().trigger(schedule(interval(5, IntervalSchedule.Interval.Unit.HOURS)))
+ .input(simpleInput("foo", randomAlphaOfLength((int) ByteSizeValue.ofMb(20).getBytes())))
+ .addAction("_logger", loggingAction("#### randomLogging"))
+ )
+ .get();
+ new ExecuteWatchRequestBuilder(client()).setId("test_watch_large").setRecordExecution(true).get();
+ assertBusy(() -> {
+ assertResponse(getWatchHistory(), searchResponse -> {
+ assertHitCount(searchResponse, 2);
+ SearchHit hit = searchResponse.getHits().getAt(1);
+ XContentSource source = new XContentSource(hit.getSourceRef(), XContentType.JSON);
+ Map input = source.getValue("input");
+ assertThat(input.containsKey(WatchRecord.TRUNCATED_RECORD_KEY), equalTo(true));
+ assertThat(input.get(WatchRecord.TRUNCATED_RECORD_KEY), equalTo(WatchRecord.TRUNCATED_RECORD_VALUE));
+ assertThat(input.containsKey("simple"), equalTo(false));
+ Map result = source.getValue("result");
+ assertThat(result.containsKey(WatchRecord.TRUNCATED_RECORD_KEY), equalTo(true));
+ assertThat(result.get(WatchRecord.TRUNCATED_RECORD_KEY), equalTo(WatchRecord.TRUNCATED_RECORD_VALUE));
+ assertThat(result.containsKey("input"), equalTo(false));
+ assertThat(result.containsKey("actions"), equalTo(false));
+ assertThat(result.containsKey("condition"), equalTo(false));
+ });
+ });
+ }
+ }
+
/*
* Returns a SearchRequestBuilder containing up to the default number of watch history records (10) if the .watcher-history* is ready.
* Otherwise it throws an AssertionError.
@@ -232,7 +299,7 @@ public void testThatHistoryContainsStatus() throws Exception {
private SearchRequestBuilder getWatchHistory() {
ensureGreen(HistoryStoreField.DATA_STREAM);
flushAndRefresh(".watcher-history-*");
- return prepareSearch(".watcher-history-*");
+ return prepareSearch(".watcher-history-*").addSort("@timestamp", SortOrder.ASC);
}
}
diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/Watcher.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/Watcher.java
index 2d71aef08ea13..821c92b514667 100644
--- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/Watcher.java
+++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/Watcher.java
@@ -490,7 +490,7 @@ public void afterBulk(long executionId, BulkRequest request, Exception failure)
.setBulkSize(SETTING_BULK_SIZE.get(settings))
.build();
- HistoryStore historyStore = new HistoryStore(bulkProcessor);
+ HistoryStore historyStore = new HistoryStore(bulkProcessor, settings);
// schedulers
final Set> scheduleParsers = new HashSet<>();
diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/history/HistoryStore.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/history/HistoryStore.java
index b4f6d82eab965..d8ba0c7e7a506 100644
--- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/history/HistoryStore.java
+++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/history/HistoryStore.java
@@ -13,6 +13,9 @@
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
+import org.elasticsearch.common.settings.Setting;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xpack.core.watcher.history.HistoryStoreField;
@@ -22,16 +25,24 @@
import java.io.IOException;
+import static org.elasticsearch.common.settings.Setting.Property.NodeScope;
import static org.elasticsearch.xpack.core.watcher.support.Exceptions.ioException;
public class HistoryStore {
private static final Logger logger = LogManager.getLogger(HistoryStore.class);
+ public static final Setting MAX_HISTORY_SIZE_SETTING = Setting.byteSizeSetting(
+ "xpack.watcher.max.history.record.size",
+ ByteSizeValue.ofMb(10),
+ NodeScope
+ );
private final BulkProcessor2 bulkProcessor;
+ private final ByteSizeValue maxHistoryRecordSize;
- public HistoryStore(BulkProcessor2 bulkProcessor) {
+ public HistoryStore(BulkProcessor2 bulkProcessor, Settings settings) {
this.bulkProcessor = bulkProcessor;
+ maxHistoryRecordSize = MAX_HISTORY_SIZE_SETTING.get(settings);
}
/**
@@ -41,9 +52,15 @@ public HistoryStore(BulkProcessor2 bulkProcessor) {
public void put(WatchRecord watchRecord) throws Exception {
try (XContentBuilder builder = XContentFactory.jsonBuilder()) {
watchRecord.toXContent(builder, WatcherParams.HIDE_SECRETS);
-
IndexRequest request = new IndexRequest(HistoryStoreField.DATA_STREAM).id(watchRecord.id().value()).source(builder);
request.opType(IndexRequest.OpType.CREATE);
+ if (request.source().length() > maxHistoryRecordSize.getBytes()) {
+ WatchRecord redactedWatchRecord = watchRecord.dropLargeFields();
+ try (XContentBuilder redactedBuilder = XContentFactory.jsonBuilder()) {
+ redactedWatchRecord.toXContent(redactedBuilder, WatcherParams.HIDE_SECRETS);
+ request.source(redactedBuilder);
+ }
+ }
bulkProcessor.add(request);
} catch (IOException ioe) {
throw ioException("failed to persist watch record [{}]", ioe, watchRecord);
diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/history/HistoryStoreTests.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/history/HistoryStoreTests.java
index 7b2300ed6e892..89968aa2cf19b 100644
--- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/history/HistoryStoreTests.java
+++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/history/HistoryStoreTests.java
@@ -19,6 +19,7 @@
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.common.settings.MockSecureSettings;
import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.threadpool.ThreadPool;
@@ -38,6 +39,7 @@
import org.elasticsearch.xpack.watcher.common.http.HttpResponse;
import org.elasticsearch.xpack.watcher.notification.jira.JiraAccount;
import org.elasticsearch.xpack.watcher.notification.jira.JiraIssue;
+import org.elasticsearch.xpack.watcher.test.WatcherTestUtils;
import org.elasticsearch.xpack.watcher.trigger.schedule.ScheduleTriggerEvent;
import org.junit.Before;
import org.mockito.ArgumentCaptor;
@@ -45,6 +47,7 @@
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
+import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import static java.util.Collections.emptyMap;
@@ -55,6 +58,7 @@
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.not;
import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.atLeastOnce;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
@@ -73,9 +77,18 @@ public void init() {
when(client.threadPool()).thenReturn(threadPool);
when(client.settings()).thenReturn(settings);
when(threadPool.getThreadContext()).thenReturn(new ThreadContext(settings));
+ historyStore = createHistoryStore(null);
+ }
+
+ private HistoryStore createHistoryStore(ByteSizeValue maxHistoryRecordSize) {
BulkProcessor2.Listener listener = mock(BulkProcessor2.Listener.class);
- BulkProcessor2 bulkProcessor = BulkProcessor2.builder(client::bulk, listener, threadPool).setBulkActions(1).build();
- historyStore = new HistoryStore(bulkProcessor);
+ BulkProcessor2 bulkProcessor = BulkProcessor2.builder(client::bulk, listener, client.threadPool()).setBulkActions(1).build();
+ Settings.Builder settingsBuilder = Settings.builder();
+ if (maxHistoryRecordSize != null) {
+ settingsBuilder.put(HistoryStore.MAX_HISTORY_SIZE_SETTING.getKey(), maxHistoryRecordSize);
+ }
+ Settings settings = settingsBuilder.build();
+ return new HistoryStore(bulkProcessor, settings);
}
public void testPut() throws Exception {
@@ -111,6 +124,97 @@ public void testPut() throws Exception {
assertThat(historyItemIndexed.get(), equalTo(true));
}
+ @SuppressWarnings("unchecked")
+ public void testPutLargeHistory() throws Exception {
+ IndexResponse indexResponse = mock(IndexResponse.class);
+ AtomicBoolean historyRedacted = new AtomicBoolean(false);
+ doAnswer(invocation -> {
+ BulkRequest request = (BulkRequest) invocation.getArguments()[0];
+ ActionListener listener = (ActionListener) invocation.getArguments()[1];
+ IndexRequest indexRequest = (IndexRequest) request.requests().get(0);
+ Map sourceMap = indexRequest.sourceAsMap();
+ if (indexRequest.opType() == OpType.CREATE && indexRequest.index().equals(HistoryStoreField.DATA_STREAM)) {
+ if (sourceMap.containsKey("input")
+ && ((Map) sourceMap.get("input")).containsKey(WatchRecord.TRUNCATED_RECORD_KEY)
+ && sourceMap.containsKey("result")
+ && ((Map) sourceMap.get("result")).containsKey(WatchRecord.TRUNCATED_RECORD_KEY)) {
+ assertThat(
+ ((Map) sourceMap.get("input")).get(WatchRecord.TRUNCATED_RECORD_KEY),
+ equalTo(WatchRecord.TRUNCATED_RECORD_VALUE)
+ );
+ assertThat(
+ ((Map) sourceMap.get("result")).get(WatchRecord.TRUNCATED_RECORD_KEY),
+ equalTo(WatchRecord.TRUNCATED_RECORD_VALUE)
+ );
+ historyRedacted.set(true);
+ }
+ listener.onResponse(
+ new BulkResponse(new BulkItemResponse[] { BulkItemResponse.success(1, OpType.CREATE, indexResponse) }, 1)
+ );
+ } else {
+ listener.onFailure(new ElasticsearchException("test issue"));
+ fail("Should never get here");
+ }
+ return null;
+ }).when(client).bulk(any(), any());
+ HistoryStore historyStoreSmallLimit = createHistoryStore(ByteSizeValue.ofBytes(10));
+ HistoryStore historyStoreLargeLimit = createHistoryStore(ByteSizeValue.ofBytes(10_000_000));
+ {
+ /*
+ * First, create a history record with input and results. We expect this to not be truncated when the store has a high limit,
+ * and we expect it to be truncated when we have the artificially low limit.
+ */
+ WatchExecutionContext context = WatcherTestUtils.createWatchExecutionContext();
+ WatchExecutionResult result = new WatchExecutionResult(context, randomNonNegativeLong());
+ String message = randomAlphaOfLength(100);
+ WatchRecord watchRecord = new WatchRecord.MessageWatchRecord(context, result, message);
+ historyStoreLargeLimit.put(watchRecord);
+ verify(client, atLeastOnce()).bulk(any(), any());
+ assertThat(historyRedacted.get(), equalTo(false));
+ historyStoreSmallLimit.put(watchRecord);
+ verify(client, atLeastOnce()).bulk(any(), any());
+ assertThat(historyRedacted.get(), equalTo(true));
+ }
+ {
+ /*
+ * Now make sure that we don't blow up when the input and result are null
+ */
+ historyRedacted.set(false);
+ ZonedDateTime now = Instant.ofEpochMilli(0).atZone(ZoneOffset.UTC);
+ Wid wid = new Wid("_name", now);
+ ScheduleTriggerEvent event = new ScheduleTriggerEvent(wid.watchId(), now, now);
+ WatchRecord watchRecord = new WatchRecord.MessageWatchRecord(
+ wid,
+ event,
+ ExecutionState.EXECUTED,
+ null,
+ randomAlphaOfLength(10)
+ );
+ historyStoreLargeLimit.put(watchRecord);
+ verify(client, atLeastOnce()).bulk(any(), any());
+ assertThat(historyRedacted.get(), equalTo(false));
+ historyStoreSmallLimit.put(watchRecord);
+ verify(client, atLeastOnce()).bulk(any(), any());
+ assertThat(historyRedacted.get(), equalTo(false));
+ }
+ {
+ /*
+ * Now make sure that we don't blow up when the input and result are null
+ */
+ historyRedacted.set(false);
+ WatchExecutionContext context = WatcherTestUtils.createWatchExecutionContext();
+ WatchExecutionResult result = new WatchExecutionResult(context, randomNonNegativeLong());
+ Exception exception = new RuntimeException(randomAlphaOfLength(100));
+ WatchRecord watchRecord = new WatchRecord.ExceptionWatchRecord(context, result, exception);
+ historyStoreLargeLimit.put(watchRecord);
+ verify(client, atLeastOnce()).bulk(any(), any());
+ assertThat(historyRedacted.get(), equalTo(false));
+ historyStoreSmallLimit.put(watchRecord);
+ verify(client, atLeastOnce()).bulk(any(), any());
+ assertThat(historyRedacted.get(), equalTo(true));
+ }
+ }
+
public void testStoreWithHideSecrets() throws Exception {
HttpClient httpClient = mock(HttpClient.class);
when(httpClient.execute(any(HttpRequest.class))).thenReturn(new HttpResponse(HttpStatus.SC_INTERNAL_SERVER_ERROR));
From 155042d09ffe3c40a139dad8697f4cbf0baa6528 Mon Sep 17 00:00:00 2001
From: Oleksandr Kolomiiets
Date: Thu, 1 Aug 2024 14:36:18 -0700
Subject: [PATCH 14/36] Add more leaf fields to logsdb data generator (#111469)
---
...ogsIndexModeRandomDataChallengeRestIT.java | 44 +++++--
test/framework/build.gradle | 3 +
.../DataGeneratorSpecification.java | 33 ++++--
.../logsdb/datageneration/FieldType.java | 10 +-
.../datageneration/arbitrary/Arbitrary.java | 40 -------
.../arbitrary/RandomBasedArbitrary.java | 83 -------------
.../datageneration/datasource/DataSource.java | 49 ++++++++
.../datasource/DataSourceHandler.java | 71 +++++++++++
.../datasource/DataSourceRequest.java | 109 +++++++++++++++++
.../datasource/DataSourceResponse.java | 55 +++++++++
.../DefaultObjectGenerationHandler.java | 64 ++++++++++
.../DefaultPrimitiveTypesHandler.java | 74 ++++++++++++
.../datasource/DefaultWrappersHandler.java | 42 +++++++
.../logsdb/datageneration/fields/Context.java | 31 ++++-
.../datageneration/fields/FieldValues.java | 34 ------
.../GenericSubObjectFieldDataGenerator.java | 33 ++++--
.../fields/leaf/ByteFieldDataGenerator.java | 40 +++++++
.../fields/leaf/DoubleFieldDataGenerator.java | 40 +++++++
.../fields/leaf/FloatFieldDataGenerator.java | 40 +++++++
.../leaf/HalfFloatFieldDataGenerator.java | 40 +++++++
.../leaf/IntegerFieldDataGenerator.java | 40 +++++++
.../leaf/KeywordFieldDataGenerator.java | 14 ++-
.../fields/leaf/LongFieldDataGenerator.java | 14 ++-
.../leaf/ScaledFloatFieldDataGenerator.java | 44 +++++++
.../fields/leaf/ShortFieldDataGenerator.java | 40 +++++++
.../leaf/UnsignedLongFieldDataGenerator.java | 40 +++++++
.../DataGeneratorSnapshotTests.java | 104 +++++++++-------
.../datageneration/DataGeneratorTests.java | 112 +++++++++---------
....java => DefaultWrappersHandlerTests.java} | 16 +--
29 files changed, 1047 insertions(+), 312 deletions(-)
delete mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java
delete mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSource.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceHandler.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultObjectGenerationHandler.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultPrimitiveTypesHandler.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultWrappersHandler.java
delete mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/FieldValues.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ByteFieldDataGenerator.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/DoubleFieldDataGenerator.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/FloatFieldDataGenerator.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/HalfFloatFieldDataGenerator.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/IntegerFieldDataGenerator.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ScaledFloatFieldDataGenerator.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ShortFieldDataGenerator.java
create mode 100644 test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/UnsignedLongFieldDataGenerator.java
rename test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/{FieldValuesTests.java => DefaultWrappersHandlerTests.java} (69%)
diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java
index 3c2ee0d7723ed..c1d63b76fc25c 100644
--- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java
+++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/StandardVersusLogsIndexModeRandomDataChallengeRestIT.java
@@ -13,7 +13,9 @@
import org.elasticsearch.logsdb.datageneration.DataGenerator;
import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
import org.elasticsearch.logsdb.datageneration.FieldType;
-import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse;
import org.elasticsearch.logsdb.datageneration.fields.PredefinedField;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
@@ -21,16 +23,21 @@
import java.io.IOException;
import java.time.Instant;
import java.util.List;
+import java.util.function.Function;
/**
* Challenge test (see {@link StandardVersusLogsIndexModeChallengeRestIT}) that uses randomly generated
* mapping and documents in order to cover more code paths and permutations.
*/
public class StandardVersusLogsIndexModeRandomDataChallengeRestIT extends StandardVersusLogsIndexModeChallengeRestIT {
+ private final boolean fullyDynamicMapping;
+
private final DataGenerator dataGenerator;
public StandardVersusLogsIndexModeRandomDataChallengeRestIT() {
super();
+ this.fullyDynamicMapping = randomBoolean();
+
this.dataGenerator = new DataGenerator(
DataGeneratorSpecification.builder()
// Nested fields don't work with subobjects: false.
@@ -39,22 +46,40 @@ public StandardVersusLogsIndexModeRandomDataChallengeRestIT() {
// Currently matching fails because in synthetic source all fields are flat (given that we have subobjects: false)
// but stored source is identical to original document which has nested structure.
.withMaxObjectDepth(0)
- .withArbitrary(new RandomBasedArbitrary() {
+ .withDataSourceHandlers(List.of(new DataSourceHandler() {
// TODO enable null values
// Matcher does not handle nulls currently
@Override
- public boolean generateNullValue() {
- return false;
+ public DataSourceResponse.NullWrapper handle(DataSourceRequest.NullWrapper request) {
+ return new DataSourceResponse.NullWrapper(Function.identity());
}
// TODO enable arrays
// List matcher currently does not apply matching logic recursively
// and equality check fails because arrays are sorted in synthetic source.
@Override
- public boolean generateArrayOfValues() {
- return false;
+ public DataSourceResponse.ArrayWrapper handle(DataSourceRequest.ArrayWrapper request) {
+ return new DataSourceResponse.ArrayWrapper(Function.identity());
+ }
+
+ // TODO enable scaled_float fields
+ // There a difference in synthetic source (precision loss)
+ // specific to this fields which matcher can't handle.
+ @Override
+ public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
+ // Unsigned long is not used with dynamic mapping
+ // since it can initially look like long
+ // but later fail to parse once big values arrive.
+ // Double is not used since it maps to float with dynamic mapping
+ // resulting in precision loss compared to original source.
+ var excluded = fullyDynamicMapping
+ ? List.of(FieldType.DOUBLE, FieldType.SCALED_FLOAT, FieldType.UNSIGNED_LONG)
+ : List.of(FieldType.SCALED_FLOAT);
+ return new DataSourceResponse.FieldTypeGenerator(
+ () -> randomValueOtherThanMany(excluded::contains, () -> randomFrom(FieldType.values()))
+ );
}
- })
+ }))
.withPredefinedFields(List.of(new PredefinedField("host.name", FieldType.KEYWORD)))
.build()
);
@@ -62,7 +87,7 @@ public boolean generateArrayOfValues() {
@Override
public void baselineMappings(XContentBuilder builder) throws IOException {
- if (randomBoolean()) {
+ if (fullyDynamicMapping == false) {
dataGenerator.writeMapping(builder);
} else {
// We want dynamic mapping, but we need host.name to be a keyword instead of text to support aggregations.
@@ -81,10 +106,9 @@ public void baselineMappings(XContentBuilder builder) throws IOException {
@Override
public void contenderMappings(XContentBuilder builder) throws IOException {
- if (randomBoolean()) {
+ if (fullyDynamicMapping == false) {
dataGenerator.writeMapping(builder, b -> builder.field("subobjects", false));
} else {
- // Sometimes we go with full dynamic mapping.
builder.startObject();
builder.field("subobjects", false);
builder.endObject();
diff --git a/test/framework/build.gradle b/test/framework/build.gradle
index 4d598a00de7b6..c8d4aba10b478 100644
--- a/test/framework/build.gradle
+++ b/test/framework/build.gradle
@@ -31,6 +31,9 @@ dependencies {
api 'org.objenesis:objenesis:3.3'
api "org.elasticsearch:mocksocket:${versions.mocksocket}"
+
+ testImplementation project(':x-pack:plugin:mapper-unsigned-long')
+ testImplementation project(":modules:mapper-extras")
}
sourceSets {
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java
index ea47ad3be1fa6..57bf9f12ccef1 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java
@@ -8,16 +8,17 @@
package org.elasticsearch.logsdb.datageneration;
-import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary;
-import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler;
import org.elasticsearch.logsdb.datageneration.fields.PredefinedField;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
/**
* Allows configuring behavior of {@link DataGenerator}.
- * @param arbitrary provides arbitrary values used during generation
+ * @param dataSource source of generated data
* @param maxFieldCountPerLevel maximum number of fields that an individual object in mapping has.
* Applies to subobjects.
* @param maxObjectDepth maximum depth of nested objects
@@ -25,7 +26,7 @@
* @param predefinedFields predefined fields that must be present in mapping and documents. Only top level fields are supported.
*/
public record DataGeneratorSpecification(
- Arbitrary arbitrary,
+ DataSource dataSource,
int maxFieldCountPerLevel,
int maxObjectDepth,
int nestedFieldsLimit,
@@ -41,24 +42,24 @@ public static DataGeneratorSpecification buildDefault() {
}
public static class Builder {
- private Arbitrary arbitrary;
+ private List dataSourceHandlers;
private int maxFieldCountPerLevel;
private int maxObjectDepth;
private int nestedFieldsLimit;
private List predefinedFields;
public Builder() {
- arbitrary = new RandomBasedArbitrary();
+ this.dataSourceHandlers = new ArrayList<>();
// Simply sufficiently big numbers to get some permutations
- maxFieldCountPerLevel = 50;
- maxObjectDepth = 2;
+ this.maxFieldCountPerLevel = 50;
+ this.maxObjectDepth = 2;
// Default value of index.mapping.nested_fields.limit
- nestedFieldsLimit = 50;
- predefinedFields = new ArrayList<>();
+ this.nestedFieldsLimit = 50;
+ this.predefinedFields = new ArrayList<>();
}
- public Builder withArbitrary(Arbitrary arbitrary) {
- this.arbitrary = arbitrary;
+ public Builder withDataSourceHandlers(Collection handlers) {
+ this.dataSourceHandlers.addAll(handlers);
return this;
}
@@ -83,7 +84,13 @@ public Builder withPredefinedFields(List predefinedFields) {
}
public DataGeneratorSpecification build() {
- return new DataGeneratorSpecification(arbitrary, maxFieldCountPerLevel, maxObjectDepth, nestedFieldsLimit, predefinedFields);
+ return new DataGeneratorSpecification(
+ new DataSource(dataSourceHandlers),
+ maxFieldCountPerLevel,
+ maxObjectDepth,
+ nestedFieldsLimit,
+ predefinedFields
+ );
}
}
}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java
index 0a675d85077e4..c8821c087d084 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/FieldType.java
@@ -13,5 +13,13 @@
*/
public enum FieldType {
KEYWORD,
- LONG
+ LONG,
+ UNSIGNED_LONG,
+ INTEGER,
+ SHORT,
+ BYTE,
+ DOUBLE,
+ FLOAT,
+ HALF_FLOAT,
+ SCALED_FLOAT
}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java
deleted file mode 100644
index 7a4bb880c5335..0000000000000
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0 and the Server Side Public License, v 1; you may not use this file except
- * in compliance with, at your election, the Elastic License 2.0 or the Server
- * Side Public License, v 1.
- */
-
-package org.elasticsearch.logsdb.datageneration.arbitrary;
-
-import org.elasticsearch.logsdb.datageneration.FieldType;
-
-/**
- * Provides arbitrary values for different purposes.
- */
-public interface Arbitrary {
- boolean generateSubObject();
-
- boolean generateNestedObject();
-
- int childFieldCount(int lowerBound, int upperBound);
-
- String fieldName(int lengthLowerBound, int lengthUpperBound);
-
- FieldType fieldType();
-
- long longValue();
-
- String stringValue(int lengthLowerBound, int lengthUpperBound);
-
- boolean generateNullValue();
-
- boolean generateArrayOfValues();
-
- int valueArraySize();
-
- boolean generateArrayOfObjects();
-
- int objectArraySize();
-}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java
deleted file mode 100644
index 257bd17fc1892..0000000000000
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0 and the Server Side Public License, v 1; you may not use this file except
- * in compliance with, at your election, the Elastic License 2.0 or the Server
- * Side Public License, v 1.
- */
-
-package org.elasticsearch.logsdb.datageneration.arbitrary;
-
-import org.elasticsearch.logsdb.datageneration.FieldType;
-
-import static org.elasticsearch.test.ESTestCase.randomAlphaOfLengthBetween;
-import static org.elasticsearch.test.ESTestCase.randomBoolean;
-import static org.elasticsearch.test.ESTestCase.randomDouble;
-import static org.elasticsearch.test.ESTestCase.randomFrom;
-import static org.elasticsearch.test.ESTestCase.randomIntBetween;
-import static org.elasticsearch.test.ESTestCase.randomLong;
-
-public class RandomBasedArbitrary implements Arbitrary {
- @Override
- public boolean generateSubObject() {
- // Using a static 10% change, this is just a chosen value that can be tweaked.
- return randomDouble() <= 0.1;
- }
-
- @Override
- public boolean generateNestedObject() {
- // Using a static 10% change, this is just a chosen value that can be tweaked.
- return randomDouble() <= 0.1;
- }
-
- @Override
- public int childFieldCount(int lowerBound, int upperBound) {
- return randomIntBetween(lowerBound, upperBound);
- }
-
- @Override
- public String fieldName(int lengthLowerBound, int lengthUpperBound) {
- return randomAlphaOfLengthBetween(lengthLowerBound, lengthUpperBound);
- }
-
- @Override
- public FieldType fieldType() {
- return randomFrom(FieldType.values());
- }
-
- @Override
- public long longValue() {
- return randomLong();
- }
-
- @Override
- public String stringValue(int lengthLowerBound, int lengthUpperBound) {
- return randomAlphaOfLengthBetween(lengthLowerBound, lengthUpperBound);
- }
-
- @Override
- public boolean generateNullValue() {
- // Using a static 10% chance, this is just a chosen value that can be tweaked.
- return randomDouble() < 0.1;
- }
-
- @Override
- public boolean generateArrayOfValues() {
- return randomBoolean();
- }
-
- @Override
- public int valueArraySize() {
- return randomIntBetween(0, 5);
- }
-
- @Override
- public boolean generateArrayOfObjects() {
- return randomBoolean();
- }
-
- @Override
- public int objectArraySize() {
- return randomIntBetween(0, 5);
- }
-}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSource.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSource.java
new file mode 100644
index 0000000000000..f53b8169f6b70
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSource.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.datasource;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * This class handles any decision performed during data generation that changes the output.
+ * For example: generating a random number, array of random size, mapping parameter.
+ *
+ * Goals of this abstraction are:
+ *
+ * to be able to easily add new types of decisions/generators
+ * to decouple different types of decisions from each other, adding new data type should be an isolated additive change
+ * to allow overriding only small specific subset of behavior (e.g. for testing purposes)
+ *
+ */
+public class DataSource {
+ private List handlers;
+
+ public DataSource(Collection additionalHandlers) {
+ this.handlers = new ArrayList<>();
+
+ this.handlers.addAll(additionalHandlers);
+
+ this.handlers.add(new DefaultPrimitiveTypesHandler());
+ this.handlers.add(new DefaultWrappersHandler());
+ this.handlers.add(new DefaultObjectGenerationHandler());
+ }
+
+ public T get(DataSourceRequest request) {
+ for (var handler : handlers) {
+ var response = request.accept(handler);
+ if (response != null) {
+ return response;
+ }
+ }
+
+ throw new IllegalStateException("Request is not supported by data source");
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceHandler.java
new file mode 100644
index 0000000000000..1ee587159ee5f
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceHandler.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.datasource;
+
+public interface DataSourceHandler {
+ default DataSourceResponse.LongGenerator handle(DataSourceRequest.LongGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.UnsignedLongGenerator handle(DataSourceRequest.UnsignedLongGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.IntegerGenerator handle(DataSourceRequest.IntegerGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.ShortGenerator handle(DataSourceRequest.ShortGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.ByteGenerator handle(DataSourceRequest.ByteGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.DoubleGenerator handle(DataSourceRequest.DoubleGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.DoubleInRangeGenerator handle(DataSourceRequest.DoubleInRangeGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.FloatGenerator handle(DataSourceRequest.FloatGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.HalfFloatGenerator handle(DataSourceRequest.HalfFloatGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.StringGenerator handle(DataSourceRequest.StringGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.NullWrapper handle(DataSourceRequest.NullWrapper request) {
+ return null;
+ }
+
+ default DataSourceResponse.ArrayWrapper handle(DataSourceRequest.ArrayWrapper request) {
+ return null;
+ }
+
+ default DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
+ return null;
+ }
+
+ default DataSourceResponse.ObjectArrayGenerator handle(DataSourceRequest.ObjectArrayGenerator request) {
+ return null;
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java
new file mode 100644
index 0000000000000..d28ce7033578c
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.datasource;
+
+import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
+
+public interface DataSourceRequest {
+ TResponse accept(DataSourceHandler handler);
+
+ record LongGenerator() implements DataSourceRequest {
+ public DataSourceResponse.LongGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record UnsignedLongGenerator() implements DataSourceRequest {
+ public DataSourceResponse.UnsignedLongGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record IntegerGenerator() implements DataSourceRequest {
+ public DataSourceResponse.IntegerGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record ShortGenerator() implements DataSourceRequest {
+ public DataSourceResponse.ShortGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record ByteGenerator() implements DataSourceRequest {
+ public DataSourceResponse.ByteGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record DoubleGenerator() implements DataSourceRequest {
+ public DataSourceResponse.DoubleGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record DoubleInRangeGenerator(double minExclusive, double maxExclusive)
+ implements
+ DataSourceRequest {
+ public DataSourceResponse.DoubleInRangeGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record FloatGenerator() implements DataSourceRequest {
+ public DataSourceResponse.FloatGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record HalfFloatGenerator() implements DataSourceRequest {
+ public DataSourceResponse.HalfFloatGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record StringGenerator() implements DataSourceRequest {
+ public DataSourceResponse.StringGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record NullWrapper() implements DataSourceRequest {
+ public DataSourceResponse.NullWrapper accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record ArrayWrapper() implements DataSourceRequest {
+ public DataSourceResponse.ArrayWrapper accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record ChildFieldGenerator(DataGeneratorSpecification specification)
+ implements
+ DataSourceRequest {
+ public DataSourceResponse.ChildFieldGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record FieldTypeGenerator() implements DataSourceRequest {
+ public DataSourceResponse.FieldTypeGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+
+ record ObjectArrayGenerator() implements DataSourceRequest {
+ public DataSourceResponse.ObjectArrayGenerator accept(DataSourceHandler handler) {
+ return handler.handle(this);
+ }
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java
new file mode 100644
index 0000000000000..867bb9603ca00
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceResponse.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.datasource;
+
+import org.elasticsearch.logsdb.datageneration.FieldType;
+
+import java.util.Optional;
+import java.util.function.Function;
+import java.util.function.Supplier;
+
+public interface DataSourceResponse {
+ record LongGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record UnsignedLongGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record IntegerGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record ShortGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record ByteGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record DoubleGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record DoubleInRangeGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record FloatGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record HalfFloatGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record StringGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record NullWrapper(Function, Supplier> wrapper) implements DataSourceResponse {}
+
+ record ArrayWrapper(Function, Supplier> wrapper) implements DataSourceResponse {}
+
+ interface ChildFieldGenerator extends DataSourceResponse {
+ int generateChildFieldCount();
+
+ boolean generateNestedSubObject();
+
+ boolean generateRegularSubObject();
+
+ String generateFieldName();
+ }
+
+ record FieldTypeGenerator(Supplier generator) implements DataSourceResponse {}
+
+ record ObjectArrayGenerator(Supplier> lengthGenerator) implements DataSourceResponse {}
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultObjectGenerationHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultObjectGenerationHandler.java
new file mode 100644
index 0000000000000..45e4b0b6d6624
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultObjectGenerationHandler.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.datasource;
+
+import org.elasticsearch.logsdb.datageneration.FieldType;
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.Optional;
+
+import static org.elasticsearch.test.ESTestCase.randomAlphaOfLengthBetween;
+import static org.elasticsearch.test.ESTestCase.randomDouble;
+import static org.elasticsearch.test.ESTestCase.randomFrom;
+import static org.elasticsearch.test.ESTestCase.randomIntBetween;
+
+public class DefaultObjectGenerationHandler implements DataSourceHandler {
+ @Override
+ public DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) {
+ return new DataSourceResponse.ChildFieldGenerator() {
+ @Override
+ public int generateChildFieldCount() {
+ return ESTestCase.randomIntBetween(0, request.specification().maxFieldCountPerLevel());
+ }
+
+ @Override
+ public boolean generateNestedSubObject() {
+ // Using a static 10% change, this is just a chosen value that can be tweaked.
+ return randomDouble() <= 0.1;
+ }
+
+ @Override
+ public boolean generateRegularSubObject() {
+ // Using a static 10% change, this is just a chosen value that can be tweaked.
+ return randomDouble() <= 0.1;
+ }
+
+ @Override
+ public String generateFieldName() {
+ return randomAlphaOfLengthBetween(1, 10);
+ }
+ };
+ }
+
+ @Override
+ public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
+ return new DataSourceResponse.FieldTypeGenerator(() -> randomFrom(FieldType.values()));
+ }
+
+ @Override
+ public DataSourceResponse.ObjectArrayGenerator handle(DataSourceRequest.ObjectArrayGenerator request) {
+ return new DataSourceResponse.ObjectArrayGenerator(() -> {
+ if (ESTestCase.randomBoolean()) {
+ return Optional.of(randomIntBetween(0, 5));
+ }
+
+ return Optional.empty();
+ });
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultPrimitiveTypesHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultPrimitiveTypesHandler.java
new file mode 100644
index 0000000000000..c9e581f973aae
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultPrimitiveTypesHandler.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.datasource;
+
+import org.apache.lucene.sandbox.document.HalfFloatPoint;
+import org.elasticsearch.test.ESTestCase;
+
+import java.math.BigInteger;
+
+public class DefaultPrimitiveTypesHandler implements DataSourceHandler {
+ @Override
+ public DataSourceResponse.LongGenerator handle(DataSourceRequest.LongGenerator request) {
+ return new DataSourceResponse.LongGenerator(ESTestCase::randomLong);
+ }
+
+ @Override
+ public DataSourceResponse.UnsignedLongGenerator handle(DataSourceRequest.UnsignedLongGenerator request) {
+ return new DataSourceResponse.UnsignedLongGenerator(() -> new BigInteger(64, ESTestCase.random()));
+ }
+
+ @Override
+ public DataSourceResponse.IntegerGenerator handle(DataSourceRequest.IntegerGenerator request) {
+ return new DataSourceResponse.IntegerGenerator(ESTestCase::randomInt);
+ }
+
+ @Override
+ public DataSourceResponse.ShortGenerator handle(DataSourceRequest.ShortGenerator request) {
+ return new DataSourceResponse.ShortGenerator(ESTestCase::randomShort);
+ }
+
+ @Override
+ public DataSourceResponse.ByteGenerator handle(DataSourceRequest.ByteGenerator request) {
+ return new DataSourceResponse.ByteGenerator(ESTestCase::randomByte);
+ }
+
+ @Override
+ public DataSourceResponse.DoubleGenerator handle(DataSourceRequest.DoubleGenerator request) {
+ return new DataSourceResponse.DoubleGenerator(ESTestCase::randomDouble);
+ }
+
+ @Override
+ public DataSourceResponse.DoubleInRangeGenerator handle(DataSourceRequest.DoubleInRangeGenerator request) {
+ return new DataSourceResponse.DoubleInRangeGenerator(
+ () -> ESTestCase.randomDoubleBetween(request.minExclusive(), request.maxExclusive(), false)
+ );
+ }
+
+ @Override
+ public DataSourceResponse.FloatGenerator handle(DataSourceRequest.FloatGenerator request) {
+ return new DataSourceResponse.FloatGenerator(ESTestCase::randomFloat);
+ }
+
+ @Override
+ public DataSourceResponse.HalfFloatGenerator handle(DataSourceRequest.HalfFloatGenerator request) {
+ // This trick taken from NumberFieldMapper reduces precision of float to actual half float precision.
+ // We do this to avoid getting tripped on values in synthetic source having reduced precision but
+ // values in stored source having full float precision.
+ // This can be removed with a more lenient matcher.
+ return new DataSourceResponse.HalfFloatGenerator(
+ () -> HalfFloatPoint.sortableShortToHalfFloat(HalfFloatPoint.halfFloatToSortableShort(ESTestCase.randomFloat()))
+ );
+ }
+
+ @Override
+ public DataSourceResponse.StringGenerator handle(DataSourceRequest.StringGenerator request) {
+ return new DataSourceResponse.StringGenerator(() -> ESTestCase.randomAlphaOfLengthBetween(0, 50));
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultWrappersHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultWrappersHandler.java
new file mode 100644
index 0000000000000..57af9786f200b
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultWrappersHandler.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.datasource;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.IntStream;
+
+public class DefaultWrappersHandler implements DataSourceHandler {
+ @Override
+ public DataSourceResponse.NullWrapper handle(DataSourceRequest.NullWrapper ignored) {
+ return new DataSourceResponse.NullWrapper(injectNulls());
+ }
+
+ @Override
+ public DataSourceResponse.ArrayWrapper handle(DataSourceRequest.ArrayWrapper ignored) {
+ return new DataSourceResponse.ArrayWrapper(wrapInArray());
+ }
+
+ private static Function, Supplier> injectNulls() {
+ return (values) -> () -> ESTestCase.randomBoolean() ? null : values.get();
+ }
+
+ private static Function, Supplier> wrapInArray() {
+ return (values) -> () -> {
+ if (ESTestCase.randomBoolean()) {
+ var size = ESTestCase.randomIntBetween(0, 5);
+ return IntStream.range(0, size).mapToObj((i) -> values.get()).toList();
+ }
+
+ return values.get();
+ };
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java
index b257807890c00..647d5bff152d1 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java
@@ -9,9 +9,17 @@
package org.elasticsearch.logsdb.datageneration.fields;
import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse;
+
+import java.util.Optional;
class Context {
private final DataGeneratorSpecification specification;
+
+ private final DataSourceResponse.ChildFieldGenerator childFieldGenerator;
+ private final DataSourceResponse.FieldTypeGenerator fieldTypeGenerator;
+ private final DataSourceResponse.ObjectArrayGenerator objectArrayGenerator;
private final int objectDepth;
private final int nestedFieldsCount;
@@ -21,6 +29,9 @@ class Context {
private Context(DataGeneratorSpecification specification, int objectDepth, int nestedFieldsCount) {
this.specification = specification;
+ this.childFieldGenerator = specification.dataSource().get(new DataSourceRequest.ChildFieldGenerator(specification));
+ this.fieldTypeGenerator = specification.dataSource().get(new DataSourceRequest.FieldTypeGenerator());
+ this.objectArrayGenerator = specification.dataSource().get(new DataSourceRequest.ObjectArrayGenerator());
this.objectDepth = objectDepth;
this.nestedFieldsCount = nestedFieldsCount;
}
@@ -29,6 +40,14 @@ public DataGeneratorSpecification specification() {
return specification;
}
+ public DataSourceResponse.ChildFieldGenerator childFieldGenerator() {
+ return childFieldGenerator;
+ }
+
+ public DataSourceResponse.FieldTypeGenerator fieldTypeGenerator() {
+ return fieldTypeGenerator;
+ }
+
public Context subObject() {
return new Context(specification, objectDepth + 1, nestedFieldsCount);
}
@@ -38,16 +57,20 @@ public Context nestedObject() {
}
public boolean shouldAddObjectField() {
- return specification.arbitrary().generateSubObject() && objectDepth < specification.maxObjectDepth();
+ return childFieldGenerator.generateRegularSubObject() && objectDepth < specification.maxObjectDepth();
}
public boolean shouldAddNestedField() {
- return specification.arbitrary().generateNestedObject()
+ return childFieldGenerator.generateNestedSubObject()
&& objectDepth < specification.maxObjectDepth()
&& nestedFieldsCount < specification.nestedFieldsLimit();
}
- public boolean shouldGenerateObjectArray() {
- return objectDepth > 0 && specification.arbitrary().generateArrayOfObjects();
+ public Optional generateObjectArray() {
+ if (objectDepth == 0) {
+ return Optional.empty();
+ }
+
+ return objectArrayGenerator.lengthGenerator().get();
}
}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/FieldValues.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/FieldValues.java
deleted file mode 100644
index 74196c5c8926c..0000000000000
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/FieldValues.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0 and the Server Side Public License, v 1; you may not use this file except
- * in compliance with, at your election, the Elastic License 2.0 or the Server
- * Side Public License, v 1.
- */
-
-package org.elasticsearch.logsdb.datageneration.fields;
-
-import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary;
-
-import java.util.function.Function;
-import java.util.function.Supplier;
-import java.util.stream.IntStream;
-
-public class FieldValues {
- private FieldValues() {}
-
- public static Function, Supplier> injectNulls(Arbitrary arbitrary) {
- return (values) -> () -> arbitrary.generateNullValue() ? null : values.get();
- }
-
- public static Function, Supplier> wrappedInArray(Arbitrary arbitrary) {
- return (values) -> () -> {
- if (arbitrary.generateArrayOfValues()) {
- var size = arbitrary.valueArraySize();
- return IntStream.range(0, size).mapToObj((i) -> values.get()).toList();
- }
-
- return values.get();
- };
- }
-}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java
index 24f59867f85b8..1a3da3b63add0 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java
@@ -11,8 +11,16 @@
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
import org.elasticsearch.logsdb.datageneration.FieldType;
+import org.elasticsearch.logsdb.datageneration.fields.leaf.ByteFieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.fields.leaf.DoubleFieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.fields.leaf.FloatFieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.fields.leaf.HalfFloatFieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.fields.leaf.IntegerFieldDataGenerator;
import org.elasticsearch.logsdb.datageneration.fields.leaf.KeywordFieldDataGenerator;
import org.elasticsearch.logsdb.datageneration.fields.leaf.LongFieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.fields.leaf.ScaledFloatFieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.fields.leaf.ShortFieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.fields.leaf.UnsignedLongFieldDataGenerator;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
@@ -34,7 +42,7 @@ public class GenericSubObjectFieldDataGenerator {
List generateChildFields() {
var existingFieldNames = new HashSet();
// no child fields is legal
- var childFieldsCount = context.specification().arbitrary().childFieldCount(0, context.specification().maxFieldCountPerLevel());
+ var childFieldsCount = context.childFieldGenerator().generateChildFieldCount();
var result = new ArrayList(childFieldsCount);
for (int i = 0; i < childFieldsCount; i++) {
@@ -45,7 +53,7 @@ List generateChildFields() {
} else if (context.shouldAddNestedField()) {
result.add(new ChildField(fieldName, new NestedFieldDataGenerator(context.nestedObject())));
} else {
- var fieldType = context.specification().arbitrary().fieldType();
+ var fieldType = context.fieldTypeGenerator().generator().get();
result.add(leafField(fieldType, fieldName));
}
}
@@ -66,8 +74,9 @@ static void writeChildFieldsMapping(XContentBuilder mapping, List ch
static void writeObjectsData(XContentBuilder document, Context context, CheckedConsumer objectWriter)
throws IOException {
- if (context.shouldGenerateObjectArray()) {
- int size = context.specification().arbitrary().objectArraySize();
+ var optionalLength = context.generateObjectArray();
+ if (optionalLength.isPresent()) {
+ int size = optionalLength.get();
document.startArray();
for (int i = 0; i < size; i++) {
@@ -94,17 +103,25 @@ static void writeChildFieldsData(XContentBuilder document, Iterable
private ChildField leafField(FieldType type, String fieldName) {
var generator = switch (type) {
- case LONG -> new LongFieldDataGenerator(context.specification().arbitrary());
- case KEYWORD -> new KeywordFieldDataGenerator(context.specification().arbitrary());
+ case KEYWORD -> new KeywordFieldDataGenerator(context.specification().dataSource());
+ case LONG -> new LongFieldDataGenerator(context.specification().dataSource());
+ case UNSIGNED_LONG -> new UnsignedLongFieldDataGenerator(context.specification().dataSource());
+ case INTEGER -> new IntegerFieldDataGenerator(context.specification().dataSource());
+ case SHORT -> new ShortFieldDataGenerator(context.specification().dataSource());
+ case BYTE -> new ByteFieldDataGenerator(context.specification().dataSource());
+ case DOUBLE -> new DoubleFieldDataGenerator(context.specification().dataSource());
+ case FLOAT -> new FloatFieldDataGenerator(context.specification().dataSource());
+ case HALF_FLOAT -> new HalfFloatFieldDataGenerator(context.specification().dataSource());
+ case SCALED_FLOAT -> new ScaledFloatFieldDataGenerator(context.specification().dataSource());
};
return new ChildField(fieldName, generator);
}
private String generateFieldName(Set existingFields) {
- var fieldName = context.specification().arbitrary().fieldName(1, 10);
+ var fieldName = context.childFieldGenerator().generateFieldName();
while (existingFields.contains(fieldName)) {
- fieldName = context.specification().arbitrary().fieldName(1, 10);
+ fieldName = context.childFieldGenerator().generateFieldName();
}
existingFields.add(fieldName);
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ByteFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ByteFieldDataGenerator.java
new file mode 100644
index 0000000000000..07a7bd65b67fb
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ByteFieldDataGenerator.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields.leaf;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+public class ByteFieldDataGenerator implements FieldDataGenerator {
+ private final Supplier valueGenerator;
+
+ public ByteFieldDataGenerator(DataSource dataSource) {
+ var bytes = dataSource.get(new DataSourceRequest.ByteGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> bytes.generator().get());
+ }
+
+ @Override
+ public CheckedConsumer mappingWriter() {
+ return b -> b.startObject().field("type", "byte").endObject();
+ }
+
+ @Override
+ public CheckedConsumer fieldValueGenerator() {
+ return b -> b.value(valueGenerator.get());
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/DoubleFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/DoubleFieldDataGenerator.java
new file mode 100644
index 0000000000000..84c5afe2fae51
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/DoubleFieldDataGenerator.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields.leaf;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+public class DoubleFieldDataGenerator implements FieldDataGenerator {
+ private final Supplier valueGenerator;
+
+ public DoubleFieldDataGenerator(DataSource dataSource) {
+ var doubles = dataSource.get(new DataSourceRequest.DoubleGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> doubles.generator().get());
+ }
+
+ @Override
+ public CheckedConsumer mappingWriter() {
+ return b -> b.startObject().field("type", "double").endObject();
+ }
+
+ @Override
+ public CheckedConsumer fieldValueGenerator() {
+ return b -> b.value(valueGenerator.get());
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/FloatFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/FloatFieldDataGenerator.java
new file mode 100644
index 0000000000000..34e401a99bd0a
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/FloatFieldDataGenerator.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields.leaf;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+public class FloatFieldDataGenerator implements FieldDataGenerator {
+ private final Supplier valueGenerator;
+
+ public FloatFieldDataGenerator(DataSource dataSource) {
+ var floats = dataSource.get(new DataSourceRequest.FloatGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> floats.generator().get());
+ }
+
+ @Override
+ public CheckedConsumer mappingWriter() {
+ return b -> b.startObject().field("type", "float").endObject();
+ }
+
+ @Override
+ public CheckedConsumer fieldValueGenerator() {
+ return b -> b.value(valueGenerator.get());
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/HalfFloatFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/HalfFloatFieldDataGenerator.java
new file mode 100644
index 0000000000000..3201926e35041
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/HalfFloatFieldDataGenerator.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields.leaf;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+public class HalfFloatFieldDataGenerator implements FieldDataGenerator {
+ private final Supplier valueGenerator;
+
+ public HalfFloatFieldDataGenerator(DataSource dataSource) {
+ var halfFloats = dataSource.get(new DataSourceRequest.HalfFloatGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> halfFloats.generator().get());
+ }
+
+ @Override
+ public CheckedConsumer mappingWriter() {
+ return b -> b.startObject().field("type", "half_float").endObject();
+ }
+
+ @Override
+ public CheckedConsumer fieldValueGenerator() {
+ return b -> b.value(valueGenerator.get());
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/IntegerFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/IntegerFieldDataGenerator.java
new file mode 100644
index 0000000000000..a532d77abc80e
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/IntegerFieldDataGenerator.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields.leaf;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+public class IntegerFieldDataGenerator implements FieldDataGenerator {
+ private final Supplier valueGenerator;
+
+ public IntegerFieldDataGenerator(DataSource dataSource) {
+ var ints = dataSource.get(new DataSourceRequest.IntegerGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> ints.generator().get());
+ }
+
+ @Override
+ public CheckedConsumer mappingWriter() {
+ return b -> b.startObject().field("type", "integer").endObject();
+ }
+
+ @Override
+ public CheckedConsumer fieldValueGenerator() {
+ return b -> b.value(valueGenerator.get());
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/KeywordFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/KeywordFieldDataGenerator.java
index 89ae1d6034c15..913cd5657dc6f 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/KeywordFieldDataGenerator.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/KeywordFieldDataGenerator.java
@@ -10,20 +10,22 @@
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
-import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.function.Supplier;
-import static org.elasticsearch.logsdb.datageneration.fields.FieldValues.injectNulls;
-import static org.elasticsearch.logsdb.datageneration.fields.FieldValues.wrappedInArray;
-
public class KeywordFieldDataGenerator implements FieldDataGenerator {
private final Supplier valueGenerator;
- public KeywordFieldDataGenerator(Arbitrary arbitrary) {
- this.valueGenerator = injectNulls(arbitrary).andThen(wrappedInArray(arbitrary)).apply(() -> arbitrary.stringValue(0, 50));
+ public KeywordFieldDataGenerator(DataSource dataSource) {
+ var strings = dataSource.get(new DataSourceRequest.StringGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> strings.generator().get());
}
@Override
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/LongFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/LongFieldDataGenerator.java
index 097c5fe024d2b..3627385f51a7c 100644
--- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/LongFieldDataGenerator.java
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/LongFieldDataGenerator.java
@@ -10,20 +10,22 @@
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
-import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.function.Supplier;
-import static org.elasticsearch.logsdb.datageneration.fields.FieldValues.injectNulls;
-import static org.elasticsearch.logsdb.datageneration.fields.FieldValues.wrappedInArray;
-
public class LongFieldDataGenerator implements FieldDataGenerator {
private final Supplier valueGenerator;
- public LongFieldDataGenerator(Arbitrary arbitrary) {
- this.valueGenerator = injectNulls(arbitrary).andThen(wrappedInArray(arbitrary)).apply(arbitrary::longValue);
+ public LongFieldDataGenerator(DataSource dataSource) {
+ var longs = dataSource.get(new DataSourceRequest.LongGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> longs.generator().get());
}
@Override
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ScaledFloatFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ScaledFloatFieldDataGenerator.java
new file mode 100644
index 0000000000000..38fa0504cf7e7
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ScaledFloatFieldDataGenerator.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields.leaf;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+public class ScaledFloatFieldDataGenerator implements FieldDataGenerator {
+ private final double scalingFactor;
+ private final Supplier valueGenerator;
+
+ public ScaledFloatFieldDataGenerator(DataSource dataSource) {
+ var positiveDoubles = dataSource.get(new DataSourceRequest.DoubleInRangeGenerator(0, Double.MAX_VALUE));
+ this.scalingFactor = positiveDoubles.generator().get();
+
+ var doubles = dataSource.get(new DataSourceRequest.DoubleGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> doubles.generator().get());
+ }
+
+ @Override
+ public CheckedConsumer mappingWriter() {
+ return b -> b.startObject().field("type", "scaled_float").field("scaling_factor", scalingFactor).endObject();
+ }
+
+ @Override
+ public CheckedConsumer fieldValueGenerator() {
+ return b -> b.value(valueGenerator.get());
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ShortFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ShortFieldDataGenerator.java
new file mode 100644
index 0000000000000..511b31794a925
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/ShortFieldDataGenerator.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields.leaf;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+public class ShortFieldDataGenerator implements FieldDataGenerator {
+ private final Supplier valueGenerator;
+
+ public ShortFieldDataGenerator(DataSource dataSource) {
+ var shorts = dataSource.get(new DataSourceRequest.ShortGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> shorts.generator().get());
+ }
+
+ @Override
+ public CheckedConsumer mappingWriter() {
+ return b -> b.startObject().field("type", "short").endObject();
+ }
+
+ @Override
+ public CheckedConsumer fieldValueGenerator() {
+ return b -> b.value(valueGenerator.get());
+ }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/UnsignedLongFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/UnsignedLongFieldDataGenerator.java
new file mode 100644
index 0000000000000..327b3260fdec5
--- /dev/null
+++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/leaf/UnsignedLongFieldDataGenerator.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.logsdb.datageneration.fields.leaf;
+
+import org.elasticsearch.core.CheckedConsumer;
+import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+public class UnsignedLongFieldDataGenerator implements FieldDataGenerator {
+ private final Supplier valueGenerator;
+
+ public UnsignedLongFieldDataGenerator(DataSource dataSource) {
+ var unsignedLongs = dataSource.get(new DataSourceRequest.UnsignedLongGenerator());
+ var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
+ var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
+
+ this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> unsignedLongs.generator().get());
+ }
+
+ @Override
+ public CheckedConsumer mappingWriter() {
+ return b -> b.startObject().field("type", "unsigned_long").endObject();
+ }
+
+ @Override
+ public CheckedConsumer fieldValueGenerator() {
+ return b -> b.value(valueGenerator.get());
+ }
+}
diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java
index e476e02d03778..6c1b0c22f305d 100644
--- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java
+++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java
@@ -9,16 +9,21 @@
package org.elasticsearch.logsdb.datageneration;
import org.elasticsearch.common.Strings;
-import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
+import java.util.List;
+import java.util.Optional;
+
public class DataGeneratorSnapshotTests extends ESTestCase {
public void testSnapshot() throws Exception {
var dataGenerator = new DataGenerator(
DataGeneratorSpecification.builder()
- .withArbitrary(new TestArbitrary())
+ .withDataSourceHandlers(List.of(new DataSourceOverrides()))
.withMaxFieldCountPerLevel(5)
.withMaxObjectDepth(2)
.build()
@@ -127,85 +132,96 @@ public void testSnapshot() throws Exception {
assertEquals(expectedDocument, Strings.toString(document));
}
- private class TestArbitrary implements Arbitrary {
- private int generatedFields = 0;
- private FieldType fieldType = FieldType.KEYWORD;
+ private static class DataSourceOverrides implements DataSourceHandler {
private long longValue = 0;
- private long generatedStringValues = 0;
+ private long generatedStrings = 0;
private int generateNullChecks = 0;
private int generateArrayChecks = 0;
private boolean producedObjectArray = false;
+ private FieldType fieldType = FieldType.KEYWORD;
+ private final StaticChildFieldGenerator childFieldGenerator = new StaticChildFieldGenerator();
@Override
- public boolean generateSubObject() {
- return generatedFields < 6;
+ public DataSourceResponse.LongGenerator handle(DataSourceRequest.LongGenerator request) {
+ return new DataSourceResponse.LongGenerator(() -> longValue++);
}
@Override
- public boolean generateNestedObject() {
- return generatedFields > 6 && generatedFields < 12;
+ public DataSourceResponse.StringGenerator handle(DataSourceRequest.StringGenerator request) {
+ return new DataSourceResponse.StringGenerator(() -> "string" + (generatedStrings++ + 1));
}
@Override
- public int childFieldCount(int lowerBound, int upperBound) {
- assert lowerBound < 2 && upperBound > 2;
- return 2;
+ public DataSourceResponse.NullWrapper handle(DataSourceRequest.NullWrapper request) {
+ return new DataSourceResponse.NullWrapper((values) -> () -> generateNullChecks++ % 4 == 0 ? null : values.get());
}
@Override
- public String fieldName(int lengthLowerBound, int lengthUpperBound) {
- return "f" + (generatedFields++ + 1);
- }
+ public DataSourceResponse.ArrayWrapper handle(DataSourceRequest.ArrayWrapper request) {
- @Override
- public FieldType fieldType() {
- if (fieldType == FieldType.KEYWORD) {
- fieldType = FieldType.LONG;
- return FieldType.KEYWORD;
- }
-
- fieldType = FieldType.KEYWORD;
- return FieldType.LONG;
- }
+ return new DataSourceResponse.ArrayWrapper((values) -> () -> {
+ if (generateArrayChecks++ % 4 == 0) {
+ // we have nulls so can't use List.of
+ return new Object[] { values.get(), values.get() };
+ }
- @Override
- public long longValue() {
- return longValue++;
+ return values.get();
+ });
}
@Override
- public String stringValue(int lengthLowerBound, int lengthUpperBound) {
- return "string" + (generatedStringValues++ + 1);
+ public DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) {
+
+ return childFieldGenerator;
}
@Override
- public boolean generateNullValue() {
- return generateNullChecks++ % 4 == 0;
+ public DataSourceResponse.ObjectArrayGenerator handle(DataSourceRequest.ObjectArrayGenerator request) {
+ return new DataSourceResponse.ObjectArrayGenerator(() -> {
+ if (producedObjectArray == false) {
+ producedObjectArray = true;
+ return Optional.of(2);
+ }
+
+ return Optional.empty();
+ });
}
@Override
- public boolean generateArrayOfValues() {
- return generateArrayChecks++ % 4 == 0;
+ public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
+ return new DataSourceResponse.FieldTypeGenerator(() -> {
+ if (fieldType == FieldType.KEYWORD) {
+ fieldType = FieldType.LONG;
+ return FieldType.KEYWORD;
+ }
+
+ fieldType = FieldType.KEYWORD;
+ return FieldType.LONG;
+ });
}
+ }
+
+ private static class StaticChildFieldGenerator implements DataSourceResponse.ChildFieldGenerator {
+ private int generatedFields = 0;
@Override
- public int valueArraySize() {
+ public int generateChildFieldCount() {
return 2;
}
@Override
- public boolean generateArrayOfObjects() {
- if (producedObjectArray == false) {
- producedObjectArray = true;
- return true;
- }
+ public boolean generateNestedSubObject() {
+ return generatedFields > 6 && generatedFields < 12;
+ }
- return false;
+ @Override
+ public boolean generateRegularSubObject() {
+ return generatedFields < 6;
}
@Override
- public int objectArraySize() {
- return 2;
+ public String generateFieldName() {
+ return "f" + (generatedFields++ + 1);
}
}
}
diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java
index 309c5ad428829..db3b81891e87e 100644
--- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java
+++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java
@@ -11,13 +11,20 @@
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.index.mapper.MapperServiceTestCase;
import org.elasticsearch.index.mapper.SourceToParse;
-import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary;
-import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary;
+import org.elasticsearch.index.mapper.extras.MapperExtrasPlugin;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse;
+import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.unsignedlong.UnsignedLongMapperPlugin;
import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Optional;
public class DataGeneratorTests extends ESTestCase {
public void testDataGeneratorSanity() throws IOException {
@@ -34,15 +41,21 @@ public void testDataGeneratorSanity() throws IOException {
public void testDataGeneratorProducesValidMappingAndDocument() throws IOException {
// Make sure objects, nested objects and all field types are covered.
- var testArbitrary = new RandomBasedArbitrary() {
+ var testChildFieldGenerator = new DataSourceResponse.ChildFieldGenerator() {
private boolean subObjectCovered = false;
private boolean nestedCovered = false;
private int generatedFields = 0;
@Override
- public boolean generateSubObject() {
- if (subObjectCovered == false) {
- subObjectCovered = true;
+ public int generateChildFieldCount() {
+ // Make sure to generate enough fields to go through all field types.
+ return 20;
+ }
+
+ @Override
+ public boolean generateNestedSubObject() {
+ if (nestedCovered == false) {
+ nestedCovered = true;
return true;
}
@@ -50,9 +63,9 @@ public boolean generateSubObject() {
}
@Override
- public boolean generateNestedObject() {
- if (nestedCovered == false) {
- nestedCovered = true;
+ public boolean generateRegularSubObject() {
+ if (subObjectCovered == false) {
+ subObjectCovered = true;
return true;
}
@@ -60,28 +73,37 @@ public boolean generateNestedObject() {
}
@Override
- public int childFieldCount(int lowerBound, int upperBound) {
- // Make sure to generate enough fields to go through all field types.
- return 20;
+ public String generateFieldName() {
+ return "f" + generatedFields++;
}
+ };
+
+ var dataSourceOverride = new DataSourceHandler() {
+ private int generatedFields = 0;
@Override
- public String fieldName(int lengthLowerBound, int lengthUpperBound) {
- return "f" + generatedFields++;
+ public DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) {
+ return testChildFieldGenerator;
}
@Override
- public FieldType fieldType() {
- return FieldType.values()[generatedFields % FieldType.values().length];
+ public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
+ return new DataSourceResponse.FieldTypeGenerator(() -> FieldType.values()[generatedFields++ % FieldType.values().length]);
}
};
- var dataGenerator = new DataGenerator(DataGeneratorSpecification.builder().withArbitrary(testArbitrary).build());
+ var dataGenerator = new DataGenerator(
+ DataGeneratorSpecification.builder().withDataSourceHandlers(List.of(dataSourceOverride)).build()
+ );
var mapping = XContentBuilder.builder(XContentType.JSON.xContent());
dataGenerator.writeMapping(mapping);
var mappingService = new MapperServiceTestCase() {
+ @Override
+ protected Collection extends Plugin> getPlugins() {
+ return List.of(new UnsignedLongMapperPlugin(), new MapperExtrasPlugin());
+ }
}.createMapperService(mapping);
var document = XContentBuilder.builder(XContentType.JSON.xContent());
@@ -92,71 +114,49 @@ public FieldType fieldType() {
public void testDataGeneratorStressTest() throws IOException {
// Let's generate 1000000 fields to test an extreme case (2 levels of objects + 1 leaf level with 100 fields per object).
- var arbitrary = new Arbitrary() {
+ var testChildFieldGenerator = new DataSourceResponse.ChildFieldGenerator() {
private int generatedFields = 0;
@Override
- public boolean generateSubObject() {
- return true;
+ public int generateChildFieldCount() {
+ return 100;
}
@Override
- public boolean generateNestedObject() {
+ public boolean generateNestedSubObject() {
return false;
}
@Override
- public int childFieldCount(int lowerBound, int upperBound) {
- return upperBound;
+ public boolean generateRegularSubObject() {
+ return true;
}
@Override
- public String fieldName(int lengthLowerBound, int lengthUpperBound) {
+ public String generateFieldName() {
return "f" + generatedFields++;
}
+ };
+ var dataSourceOverride = new DataSourceHandler() {
@Override
- public FieldType fieldType() {
- return FieldType.LONG;
- }
-
- @Override
- public long longValue() {
- return 0;
- }
-
- @Override
- public String stringValue(int lengthLowerBound, int lengthUpperBound) {
- return "";
- }
-
- @Override
- public boolean generateNullValue() {
- return false;
- }
-
- @Override
- public boolean generateArrayOfValues() {
- return false;
- }
-
- @Override
- public int valueArraySize() {
- return 3;
+ public DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) {
+ return testChildFieldGenerator;
}
@Override
- public boolean generateArrayOfObjects() {
- return false;
+ public DataSourceResponse.ObjectArrayGenerator handle(DataSourceRequest.ObjectArrayGenerator request) {
+ return new DataSourceResponse.ObjectArrayGenerator(Optional::empty);
}
@Override
- public int objectArraySize() {
- return 3;
+ public DataSourceResponse.FieldTypeGenerator handle(DataSourceRequest.FieldTypeGenerator request) {
+ return new DataSourceResponse.FieldTypeGenerator(() -> FieldType.LONG);
}
};
+
var dataGenerator = new DataGenerator(
- DataGeneratorSpecification.builder().withArbitrary(arbitrary).withMaxFieldCountPerLevel(100).withMaxObjectDepth(2).build()
+ DataGeneratorSpecification.builder().withDataSourceHandlers(List.of(dataSourceOverride)).withMaxObjectDepth(2).build()
);
var mapping = XContentBuilder.builder(XContentType.JSON.xContent());
diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/FieldValuesTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DefaultWrappersHandlerTests.java
similarity index 69%
rename from test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/FieldValuesTests.java
rename to test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DefaultWrappersHandlerTests.java
index 5e6a405ba1f87..1ac6d117f0931 100644
--- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/FieldValuesTests.java
+++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DefaultWrappersHandlerTests.java
@@ -8,21 +8,23 @@
package org.elasticsearch.logsdb.datageneration;
-import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary;
-import org.elasticsearch.logsdb.datageneration.fields.FieldValues;
+import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
+import org.elasticsearch.logsdb.datageneration.datasource.DefaultWrappersHandler;
import org.elasticsearch.test.ESTestCase;
import java.util.List;
import java.util.function.Supplier;
-public class FieldValuesTests extends ESTestCase {
+public class DefaultWrappersHandlerTests extends ESTestCase {
public void testSanity() {
+ var sut = new DefaultWrappersHandler();
+
Supplier values = () -> 100;
- var arbitrary = new RandomBasedArbitrary();
+ var nulls = sut.handle(new DataSourceRequest.NullWrapper());
+ var arrays = sut.handle(new DataSourceRequest.ArrayWrapper());
+
+ var valuesWithNullsAndWrappedInArray = arrays.wrapper().compose(nulls.wrapper()).apply(values);
- var valuesWithNullsAndWrappedInArray = FieldValues.injectNulls(arbitrary)
- .andThen(FieldValues.wrappedInArray(arbitrary))
- .apply(values);
var value = valuesWithNullsAndWrappedInArray.get();
if (value instanceof List> list) {
From 65c57b3e330b522a0a33887f65e6da023e66fa50 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
<58790826+elasticsearchmachine@users.noreply.github.com>
Date: Fri, 2 Aug 2024 14:33:33 +1000
Subject: [PATCH 15/36] Mute org.elasticsearch.search.SearchServiceTests
org.elasticsearch.search.SearchServiceTests #111529
---
muted-tests.yml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/muted-tests.yml b/muted-tests.yml
index 4635bf9541acb..256a7b7ba6987 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -141,6 +141,8 @@ tests:
- class: org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvPSeriesWeightedSumTests
method: testEvaluateBlockWithoutNulls {TestCase=, }
issue: https://github.com/elastic/elasticsearch/issues/111498
+- class: org.elasticsearch.search.SearchServiceTests
+ issue: https://github.com/elastic/elasticsearch/issues/111529
# Examples:
#
From 6ca3ac253a8d8171f228d9dbfd6c5c924239c226 Mon Sep 17 00:00:00 2001
From: Moritz Mack
Date: Fri, 2 Aug 2024 09:26:37 +0200
Subject: [PATCH 16/36] Track raw ingest and storage size separately to support
updates by doc (#111179)
This PR starts tracking raw ingest and storage size separately for updates by document.
This is done capturing the ingest size when initially parsing the update, and storage size when
parsing the final, merged document.
Additionally this renames DocumentSizeObserver to XContentParserDecorator / XContentMeteringParserDecorator
for better reasoning about the code. More renaming will have to follow.
---------
Co-authored-by: Przemyslaw Gomulka
---
...teringParserDecoratorWithPipelinesIT.java} | 22 ++++---
...=> XContentMeteringParserDecoratorIT.java} | 22 +++----
.../org/elasticsearch/TransportVersions.java | 1 +
.../bulk/BulkPrimaryExecutionContext.java | 10 ----
.../action/bulk/TransportShardBulkAction.java | 8 +--
.../bulk/TransportSimulateBulkAction.java | 4 +-
.../action/index/IndexRequest.java | 37 +++++++-----
.../action/update/UpdateHelper.java | 10 ++--
.../common/xcontent/XContentHelper.java | 12 ++--
.../index/mapper/DocumentParser.java | 8 +--
.../index/mapper/ParsedDocument.java | 24 +++++---
.../index/mapper/SourceToParse.java | 16 +++---
.../elasticsearch/ingest/IngestService.java | 15 +++--
.../internal/DocumentParsingProvider.java | 4 +-
.../internal/DocumentSizeAccumulator.java | 2 +-
.../internal/DocumentSizeObserver.java | 57 -------------------
.../XContentMeteringParserDecorator.java | 31 ++++++++++
.../internal/XContentParserDecorator.java | 17 ++++++
.../bulk/TransportShardBulkActionTests.java | 6 +-
.../index/IndexingSlowLogTests.java | 12 ++--
.../index/engine/InternalEngineTests.java | 4 +-
.../index/mapper/DynamicTemplatesTests.java | 4 +-
.../index/shard/RefreshListenersTests.java | 4 +-
.../index/translog/TranslogTests.java | 4 +-
.../ingest/IngestServiceTests.java | 20 +++----
.../index/engine/EngineTestCase.java | 3 +-
.../index/mapper/MapperServiceTestCase.java | 4 +-
27 files changed, 180 insertions(+), 181 deletions(-)
rename modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/{DocumentSizeObserverWithPipelinesIT.java => XContentMeteringParserDecoratorWithPipelinesIT.java} (85%)
rename server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/{DocumentSizeObserverIT.java => XContentMeteringParserDecoratorIT.java} (88%)
delete mode 100644 server/src/main/java/org/elasticsearch/plugins/internal/DocumentSizeObserver.java
create mode 100644 server/src/main/java/org/elasticsearch/plugins/internal/XContentMeteringParserDecorator.java
create mode 100644 server/src/main/java/org/elasticsearch/plugins/internal/XContentParserDecorator.java
diff --git a/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverWithPipelinesIT.java b/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/XContentMeteringParserDecoratorWithPipelinesIT.java
similarity index 85%
rename from modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverWithPipelinesIT.java
rename to modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/XContentMeteringParserDecoratorWithPipelinesIT.java
index 16a8013ae9c4a..7f0910ea5cc4d 100644
--- a/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverWithPipelinesIT.java
+++ b/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/XContentMeteringParserDecoratorWithPipelinesIT.java
@@ -14,6 +14,7 @@
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.ingest.common.IngestCommonPlugin;
import org.elasticsearch.plugins.IngestPlugin;
import org.elasticsearch.plugins.Plugin;
@@ -32,7 +33,7 @@
import static org.hamcrest.Matchers.equalTo;
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST)
-public class DocumentSizeObserverWithPipelinesIT extends ESIntegTestCase {
+public class XContentMeteringParserDecoratorWithPipelinesIT extends ESIntegTestCase {
private static String TEST_INDEX_NAME = "test-index-name";
// the assertions are done in plugin which is static and will be created by ES server.
@@ -90,13 +91,13 @@ public DocumentParsingProvider getDocumentParsingProvider() {
// returns a static instance, because we want to assert that the wrapping is called only once
return new DocumentParsingProvider() {
@Override
- public DocumentSizeObserver newDocumentSizeObserver(DocWriteRequest request) {
+ public XContentMeteringParserDecorator newMeteringParserDecorator(DocWriteRequest request) {
if (request instanceof IndexRequest indexRequest && indexRequest.getNormalisedBytesParsed() > 0) {
long normalisedBytesParsed = indexRequest.getNormalisedBytesParsed();
providedFixedSize.set(normalisedBytesParsed);
- return new TestDocumentSizeObserver(normalisedBytesParsed);
+ return new TestXContentMeteringParserDecorator(normalisedBytesParsed);
}
- return new TestDocumentSizeObserver(0L);
+ return new TestXContentMeteringParserDecorator(0L);
}
@Override
@@ -111,17 +112,15 @@ public DocumentSizeReporter newDocumentSizeReporter(
}
}
- public static class TestDocumentSizeObserver implements DocumentSizeObserver {
+ public static class TestXContentMeteringParserDecorator implements XContentMeteringParserDecorator {
long mapCounter = 0;
- long wrapperCounter = 0;
- public TestDocumentSizeObserver(long mapCounter) {
+ public TestXContentMeteringParserDecorator(long mapCounter) {
this.mapCounter = mapCounter;
}
@Override
- public XContentParser wrapParser(XContentParser xContentParser) {
- wrapperCounter++;
+ public XContentParser decorate(XContentParser xContentParser) {
hasWrappedParser = true;
return new FilterXContentParserWrapper(xContentParser) {
@@ -134,10 +133,9 @@ public Map map() throws IOException {
}
@Override
- public long normalisedBytesParsed() {
- return mapCounter;
+ public ParsedDocument.DocumentSize meteredDocumentSize() {
+ return new ParsedDocument.DocumentSize(mapCounter, 0);
}
-
}
}
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverIT.java b/server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/XContentMeteringParserDecoratorIT.java
similarity index 88%
rename from server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverIT.java
rename to server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/XContentMeteringParserDecoratorIT.java
index 7797371a2823b..16fb618e97dfc 100644
--- a/server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/XContentMeteringParserDecoratorIT.java
@@ -34,7 +34,7 @@
import static org.hamcrest.Matchers.equalTo;
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST)
-public class DocumentSizeObserverIT extends ESIntegTestCase {
+public class XContentMeteringParserDecoratorIT extends ESIntegTestCase {
private static String TEST_INDEX_NAME = "test-index-name";
@@ -125,8 +125,8 @@ public TestDocumentParsingProviderPlugin() {}
public DocumentParsingProvider getDocumentParsingProvider() {
return new DocumentParsingProvider() {
@Override
- public