From ba741cd16c99253dcb661d9aa46492fe315c58bf Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Tue, 8 Oct 2024 16:16:18 -0700 Subject: [PATCH] Add reproducer for pagination skipping bug Signed-off-by: Simeon Widdis --- .../sql/legacy/SQLIntegTestCase.java | 7 ++ .../sql/sql/PaginationWindowIT.java | 45 ++++++++- .../calcs_with_shards_index_mappings.json | 99 +++++++++++++++++++ 3 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 integ-test/src/test/resources/indexDefinitions/calcs_with_shards_index_mappings.json diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index eed4e29c9c..c328070468 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -707,6 +707,13 @@ public enum Index { "calcs", getMappingFile("calcs_index_mappings.json"), "src/test/resources/calcs.json"), + // Calcs has enough records for shards to be interesting, but updating the existing mapping with shards in-place + // breaks existing tests. Aside from introducing a primary shard setting > 1, this index is identical to CALCS. + CALCS_WITH_SHARDS( + TestsConstants.TEST_INDEX_CALCS, + "calcs", + getMappingFile("calcs_with_shards_index_mappings.json"), + "src/test/resources/calcs.json"), DATE_FORMATS( TestsConstants.TEST_INDEX_DATE_FORMATS, "date_formats", diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/PaginationWindowIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/PaginationWindowIT.java index 246cbfc4a0..e829fdf490 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/PaginationWindowIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/PaginationWindowIT.java @@ -5,19 +5,24 @@ package org.opensearch.sql.sql; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE; - import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + import org.json.JSONObject; import org.junit.After; import org.junit.Test; import org.opensearch.client.ResponseException; import org.opensearch.sql.legacy.SQLIntegTestCase; +import org.opensearch.sql.util.TestUtils; + +import static org.opensearch.sql.legacy.TestsConstants.*; public class PaginationWindowIT extends SQLIntegTestCase { @Override public void init() throws IOException { loadIndex(Index.PHRASE); + loadIndex(Index.CALCS_WITH_SHARDS); } @After @@ -92,4 +97,40 @@ public void testFetchSizeLargerThanResultWindowFails() throws IOException { () -> executeQueryTemplate("SELECT * FROM %s", TEST_INDEX_PHRASE, window + 1)); resetMaxResultWindow(TEST_INDEX_PHRASE); } + + @Test + public void testMultiShardPagesEqualsActualData() throws IOException { + // A bug made it so when pulling unordered data from an index with multiple shards, data gets lost if the fetchSize + // is not a multiple of the shard count. This tests that, for an index with 4 shards, pulling one page of 10 records + // is equivalent to pulling two pages of 5 records. + + var query = "SELECT key from " + TEST_INDEX_CALCS; + + var expectedResponse = new JSONObject(executeFetchQuery(query, 10, "jdbc")); + var expectedRows = expectedResponse.getJSONArray("datarows"); + + List expectedKeys = new ArrayList<>(); + for (int i = 0; i < expectedRows.length(); i++) { + expectedKeys.add(expectedRows.getJSONArray(i).getString(0)); + } + + var actualPage1 = new JSONObject(executeFetchQuery(query, 5, "jdbc")); + + var actualRows1 = actualPage1.getJSONArray("datarows"); + var cursor = actualPage1.getString("cursor"); + var actualPage2 = executeCursorQuery(cursor); + System.out.println(actualPage2.toString()); + + var actualRows2 = actualPage2.getJSONArray("datarows"); + + List actualKeys = new ArrayList<>(); + for (int i = 0; i < actualRows1.length(); i++) { + actualKeys.add(actualRows1.getJSONArray(i).getString(0)); + } + for (int i = 0; i < actualRows2.length(); i++) { + actualKeys.add(actualRows2.getJSONArray(i).getString(0)); + } + + assertEquals(expectedKeys, actualKeys); + } } diff --git a/integ-test/src/test/resources/indexDefinitions/calcs_with_shards_index_mappings.json b/integ-test/src/test/resources/indexDefinitions/calcs_with_shards_index_mappings.json new file mode 100644 index 0000000000..560e1d55e6 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/calcs_with_shards_index_mappings.json @@ -0,0 +1,99 @@ +{ + "mappings" : { + "properties" : { + "key" : { + "type" : "keyword" + }, + "num0" : { + "type" : "double" + }, + "num1" : { + "type" : "double" + }, + "num2" : { + "type" : "double" + }, + "num3" : { + "type" : "double" + }, + "num4" : { + "type" : "double" + }, + "str0" : { + "type" : "keyword" + }, + "str1" : { + "type" : "keyword" + }, + "str2" : { + "type" : "keyword" + }, + "str3" : { + "type" : "keyword" + }, + "int0" : { + "type" : "integer" + }, + "int1" : { + "type" : "integer" + }, + "int2" : { + "type" : "integer" + }, + "int3" : { + "type" : "integer" + }, + "bool0" : { + "type" : "boolean" + }, + "bool1" : { + "type" : "boolean" + }, + "bool2" : { + "type" : "boolean" + }, + "bool3" : { + "type" : "boolean" + }, + "date0" : { + "type" : "date", + "format": "year_month_day" + }, + "date1" : { + "type" : "date", + "format": "year_month_day" + }, + "date2" : { + "type" : "date", + "format": "year_month_day" + }, + "date3" : { + "type" : "date", + "format": "year_month_day" + }, + "time0" : { + "type" : "date", + "format": "date_time_no_millis" + }, + "time1" : { + "type" : "date", + "format": "hour_minute_second" + }, + "datetime0" : { + "type" : "date", + "format": "date_time_no_millis" + }, + "datetime1" : { + "type" : "date" + }, + "zzz" : { + "type" : "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": 4 + } + } +}