Merge remote-tracking branch 'apache/master'

zachjsh · Jul 1, 2024 · f30b5cd · f30b5cd
2 parents 5d73047 + bd49ecf
commit f30b5cd
Show file tree

Hide file tree

Showing 560 changed files with 21,363 additions and 18,063 deletions.
diff --git a/.github/workflows/reusable-standard-its.yml b/.github/workflows/reusable-standard-its.yml
@@ -113,7 +113,7 @@ jobs:
       - name: Collect service logs on failure
         if: ${{ failure() && steps.run-it.conclusion == 'failure' }}
         run: |
-          tar cvzf ./service-logs.tgz ~/shared/logs
+          tar cvzf ./service-logs.tgz ~/shared/logs ~/shared/tasklogs
 
       - name: Upload Druid service logs to GitHub
         if: ${{ failure() && steps.run-it.conclusion == 'failure' }}

diff --git a/.github/workflows/revised-its.yml b/.github/workflows/revised-its.yml
@@ -50,7 +50,7 @@ jobs:
       matrix:
         #jdk: [8, 11, 17]
         jdk: [8]
-        it: [HighAvailability, MultiStageQuery, Catalog, BatchIndex, MultiStageQueryWithMM, InputSource, InputFormat, Security]
+        it: [HighAvailability, MultiStageQuery, Catalog, BatchIndex, MultiStageQueryWithMM, InputSource, InputFormat, Security, Query]
         #indexer: [indexer, middleManager]
         indexer: [middleManager]
     uses: ./.github/workflows/reusable-revised-its.yml

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/frame/FrameChannelMergerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/frame/FrameChannelMergerBenchmark.java
@@ -351,11 +351,11 @@ public void mergeChannels(Blackhole blackhole)
         channels.stream().map(BlockingQueueFrameChannel::readable).collect(Collectors.toList()),
         frameReader,
         outputChannel.writable(),
-        FrameWriters.makeFrameWriterFactory(
-            FrameType.ROW_BASED,
+        FrameWriters.makeRowBasedFrameWriterFactory(
             new ArenaMemoryAllocatorFactory(1_000_000),
             signature,
-            sortKey
+            sortKey,
+            false
         ),
         sortKey,
         null,

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java
@@ -92,6 +92,7 @@
 import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
 import org.apache.druid.segment.serde.ComplexMetrics;
 import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
+import org.apache.druid.server.ResourceIdPopulatingQueryRunner;
 import org.apache.druid.timeline.SegmentId;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -433,7 +434,8 @@ public void setup()
     String queryName = schemaQuery[1];
 
     schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
-    query = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
+    query = (GroupByQuery) ResourceIdPopulatingQueryRunner.populateResourceId(SCHEMA_QUERY_MAP.get(schemaName)
+                                                                                              .get(queryName));
 
     generator = new DataGenerator(
         schemaInfo.getColumnSchemas(),
@@ -762,12 +764,12 @@ public void queryMultiQueryableIndexWithSerde(Blackhole blackhole, QueryableInde
     //noinspection unchecked
     QueryRunner<ResultRow> theRunner = new FinalizeResultsQueryRunner<>(
         toolChest.mergeResults(
-            new SerializingQueryRunner<>(
-                new DefaultObjectMapper(new SmileFactory(), null),
+            new SerializingQueryRunner(
+                toolChest.decorateObjectMapper(new DefaultObjectMapper(new SmileFactory(), null), query),
                 ResultRow.class,
-                toolChest.mergeResults(
+                (queryPlus, responseContext) -> toolChest.mergeResults(
                     factory.mergeRunners(state.executorService, makeMultiRunners(state))
-                )
+                ).run(QueryPlus.wrap(ResourceIdPopulatingQueryRunner.populateResourceId(query)))
             )
         ),
         (QueryToolChest) toolChest

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SerializingQueryRunner.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SerializingQueryRunner.java
@@ -28,21 +28,22 @@
 import org.apache.druid.query.QueryPlus;
 import org.apache.druid.query.QueryRunner;
 import org.apache.druid.query.context.ResponseContext;
+import org.apache.druid.query.groupby.ResultRow;
 
-public class SerializingQueryRunner<T> implements QueryRunner<T>
+public class SerializingQueryRunner implements QueryRunner<ResultRow>
 {
   static {
     NullHandling.initializeForTests();
   }
 
   private final ObjectMapper smileMapper;
-  private final QueryRunner<T> baseRunner;
-  private final Class<T> clazz;
+  private final QueryRunner<ResultRow> baseRunner;
+  private final Class<ResultRow> clazz;
 
   public SerializingQueryRunner(
       ObjectMapper smileMapper,
-      Class<T> clazz,
-      QueryRunner<T> baseRunner
+      Class<ResultRow> clazz,
+      QueryRunner<ResultRow> baseRunner
   )
   {
     this.smileMapper = smileMapper;
@@ -51,16 +52,16 @@ public SerializingQueryRunner(
   }
 
   @Override
-  public Sequence<T> run(
-      final QueryPlus<T> queryPlus,
+  public Sequence<ResultRow> run(
+      final QueryPlus<ResultRow> queryPlus,
       final ResponseContext responseContext
   )
   {
     return Sequences.map(
         baseRunner.run(queryPlus, responseContext),
         input -> {
           try {
-            return JacksonUtils.readValue(smileMapper, smileMapper.writeValueAsBytes(input), clazz);
+            return JacksonUtils.readValue(smileMapper, smileMapper.writeValueAsBytes(input.getArray()), clazz);
           }
           catch (JsonProcessingException e) {
             throw new RuntimeException(e);

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -37,6 +37,7 @@
 import org.apache.druid.query.DruidProcessingConfig;
 import org.apache.druid.query.QueryContexts;
 import org.apache.druid.query.QueryRunnerFactoryConglomerate;
+import org.apache.druid.query.groupby.GroupByQueryConfig;
 import org.apache.druid.segment.AutoTypeColumnSchema;
 import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.QueryableIndex;
@@ -236,6 +237,14 @@ public String getFormatString()
   })
   private String schema;
 
+  @Param({
+      "singleString",
+      "fixedWidth",
+      "fixedWidthNonNumeric",
+      "always"
+  })
+  private String deferExpressionDimensions;
+
   @Param({
       // non-expression reference
       "0",
@@ -360,13 +369,16 @@ public void setup()
 
     try {
       SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+          engine,
           plannerFactory,
           QUERIES.get(Integer.parseInt(query))
       );
+      log.info("non-vectorized and vectorized results match");
     }
-    catch (Throwable ignored) {
-      // the show must go on
+    catch (Throwable ex) {
+      log.warn(ex, "non-vectorized and vectorized results do not match");
     }
+
     final String sql = QUERIES.get(Integer.parseInt(query));
 
     try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, "EXPLAIN PLAN FOR " + sql, ImmutableMap.of("useNativeQueryExplain", true))) {
@@ -378,8 +390,8 @@ public void setup()
                          .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
       );
     }
-    catch (JsonProcessingException ignored) {
-
+    catch (JsonProcessingException ex) {
+      log.warn(ex, "explain failed");
     }
 
     try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
@@ -393,8 +405,8 @@ public void setup()
       }
       log.info("Total result row count:" + rowCounter);
     }
-    catch (Throwable ignored) {
-
+    catch (Throwable ex) {
+      log.warn(ex, "failed to count rows");
     }
   }
 
@@ -411,7 +423,8 @@ public void querySql(Blackhole blackhole)
   {
     final Map<String, Object> context = ImmutableMap.of(
         QueryContexts.VECTORIZE_KEY, vectorize,
-        QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize
+        QueryContexts.VECTORIZE_VIRTUAL_COLUMNS_KEY, vectorize,
+        GroupByQueryConfig.CTX_KEY_DEFER_EXPRESSION_DIMENSIONS, deferExpressionDimensions
     );
     final String sql = QUERIES.get(Integer.parseInt(query));
     try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlGroupByBenchmark.java
@@ -29,6 +29,7 @@
 import org.apache.druid.java.util.common.granularity.Granularities;
 import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.java.util.common.io.Closer;
+import org.apache.druid.java.util.common.logger.Logger;
 import org.apache.druid.math.expr.ExpressionProcessing;
 import org.apache.druid.query.DruidProcessingConfig;
 import org.apache.druid.query.QueryRunnerFactoryConglomerate;
@@ -91,6 +92,8 @@ public class SqlGroupByBenchmark
     NestedDataModule.registerHandlersAndSerde();
   }
 
+  private static final Logger log = new Logger(SqlGroupByBenchmark.class);
+
   private static final DruidProcessingConfig PROCESSING_CONFIG = new DruidProcessingConfig()
   {
     @Override
@@ -349,12 +352,14 @@ public void setup()
 
     try {
       SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+          engine,
           plannerFactory,
           sqlQuery(groupingDimension)
       );
+      log.info("non-vectorized and vectorized results match");
     }
-    catch (Throwable ignored) {
-      // the show must go on
+    catch (Throwable ex) {
+      log.warn(ex, "non-vectorized and vectorized results do not match");
     }
   }
 

diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlNestedDataBenchmark.java
@@ -20,7 +20,6 @@
 package org.apache.druid.benchmark.query;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.JsonMappingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
@@ -301,7 +300,7 @@ public String getFormatString()
   private SqlEngine engine;
   @Nullable
   private PlannerFactory plannerFactory;
-  private Closer closer = Closer.create();
+  private final Closer closer = Closer.create();
 
   @Setup(Level.Trial)
   public void setup()
@@ -345,16 +344,19 @@ public void setup()
     }
     final QueryableIndex index;
     if ("auto".equals(schema)) {
-      List<DimensionSchema> columnSchemas = schemaInfo.getDimensionsSpec()
-                                                      .getDimensions()
-                                                      .stream()
-                                                      .map(x -> new AutoTypeColumnSchema(x.getName(), null))
-                                                      .collect(Collectors.toList());
+      Iterable<DimensionSchema> columnSchemas = Iterables.concat(
+          schemaInfo.getDimensionsSpec()
+                    .getDimensions()
+                    .stream()
+                    .map(x -> new AutoTypeColumnSchema(x.getName(), null))
+                    .collect(Collectors.toList()),
+          Collections.singletonList(new AutoTypeColumnSchema("nested", null))
+      );
       index = segmentGenerator.generate(
           dataSegment,
           schemaInfo,
-          DimensionsSpec.builder().setDimensions(columnSchemas).build(),
-          TransformSpec.NONE,
+          DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
+          transformSpec,
           IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
           Granularities.NONE,
           rowsPerSegment
@@ -368,7 +370,7 @@ public void setup()
           dataSegment,
           schemaInfo,
           DimensionsSpec.builder().setDimensions(ImmutableList.copyOf(columnSchemas.iterator())).build(),
-          TransformSpec.NONE,
+          transformSpec,
           IndexSpec.builder().withStringDictionaryEncoding(encodingStrategy).build(),
           Granularities.NONE,
           rowsPerSegment
@@ -405,12 +407,14 @@ public void setup()
 
     try {
       SqlVectorizedExpressionSanityTest.sanityTestVectorizedSqlQueries(
+          engine,
           plannerFactory,
           QUERIES.get(Integer.parseInt(query))
       );
+      log.info("non-vectorized and vectorized results match");
     }
     catch (Throwable ex) {
-      log.warn(ex, "failed to sanity check");
+      log.warn(ex, "non-vectorized and vectorized results do not match");
     }
 
     final String sql = QUERIES.get(Integer.parseInt(query));
@@ -424,11 +428,8 @@ public void setup()
                          .writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
       );
     }
-    catch (JsonMappingException e) {
-      throw new RuntimeException(e);
-    }
-    catch (JsonProcessingException e) {
-      throw new RuntimeException(e);
+    catch (JsonProcessingException ex) {
+      log.warn(ex, "explain failed");
     }
 
     try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {

diff --git a/dev/intellij-setup.md b/dev/intellij-setup.md
@@ -55,7 +55,7 @@ The installation of a MySQL metadata store is outside the scope of this document
 Use of other databases such as Postgres or Derby are entirely reasonable, but doing so is left as an excercise to the reader.
 
 ## ZooKeeper
-This also assumes you have [ZooKeeper](http://zookeeper.apache.org/releases.html) running locally, which usually just involves downloading the latst distribution of ZooKeeper, doing some minor configuration in ZooKeeper's `conf/` directory (most defaults are fine), then running `./bin/zkServer.sh start` in the ZooKeeper directory. 
+This also assumes you have [ZooKeeper](http://zookeeper.apache.org/releases.html) running locally, which usually just involves downloading the latest distribution of ZooKeeper, doing some minor configuration in ZooKeeper's `conf/` directory (most defaults are fine), then running `./bin/zkServer.sh start` in the ZooKeeper directory. 
 
 On macOS, you can also achieve this through the following commands
 

diff --git a/dev/upgrade-calcite-parser b/dev/upgrade-calcite-parser
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#--------------------------------------------------------------------
+
+# Adopts base Calcite parser changes 
+#
+# Establishes a git friendly merge situation:
+#
+# Creates a commit which matches the original state of the calcite parser;
+# To this point creates to alternates:
+#   * one with local customizations
+#   * another with all the upstream updates
+# merges the two branches to obtain the upgrade state
+#
+
+[ $# -ne 2 ] && echo -e "updates base parser sources.\n usage: $0 <old_calcite_version> <new_calcite_version>" && exit 1
+
+CALCITE_OLD=$1
+CALCITE_NEW=$2
+
+set -e
+set -x
+
+BRANCH=`git name-rev --name-only HEAD`
+
+REPO=.git/calcite-upgrade
+rm -rf "$REPO"
+git clone $PWD --reference $PWD --branch $BRANCH $REPO
+
+cd "$REPO"
+git checkout -b curr-changes
+
+mvn -q generate-sources -pl sql -Dcalcite.version=$CALCITE_OLD -Pskip-static-checks
+cp -r sql/target/calcite-base-parser/codegen/./  sql/src/main/codegen/./
+git commit -m 'current reverse' -a
+git revert --no-edit HEAD
+# HEAD is now at the same as before; but parent are the base calcite changes
+
+git branch base-changes curr-changes^
+git checkout base-changes
+git show|patch -p0 -R # undo temproarily to ensure maven runs
+
+mvn -q generate-sources -pl sql -Dcalcite.version=$CALCITE_NEW -Pskip-static-checks
+cp -r sql/target/calcite-base-parser/codegen/./  sql/src/main/codegen/./
+
+git commit --allow-empty -m base-changes -a
+git checkout -b new-state
+git merge --no-edit curr-changes
+
+echo ok
+cd -
+
+git remote remove calcite-upgrade &>/dev/null || echo -n
+git remote add -f calcite-upgrade "$REPO"
+
+
+echo "merge branch calcite-upgrade/curr-changes if satisfied with those changes"
+