From 832effcd7571ff2bfc80987e9bf58690d9cc2d35 Mon Sep 17 00:00:00 2001 From: daidai Date: Wed, 27 Nov 2024 00:43:36 +0800 Subject: [PATCH] mc && hudi --- .../doris/datasource/ExternalTable.java | 8 +- .../datasource/hive/HMSExternalTable.java | 30 +- .../datasource/hive/source/HiveScanNode.java | 2 +- .../doris/datasource/hudi/HudiUtils.java | 51 ++++ .../datasource/hudi/source/HudiScanNode.java | 71 ++--- .../maxcompute/MaxComputeExternalTable.java | 30 ++ .../maxcompute/source/MaxComputeScanNode.java | 44 ++- .../translator/PhysicalPlanTranslator.java | 5 +- .../expression/rules/PartitionPruner.java | 2 +- .../rules/rewrite/PruneFileScanPartition.java | 4 +- .../trees/plans/logical/LogicalFileScan.java | 2 +- .../trees/plans/logical/LogicalHudiScan.java | 2 +- .../doris/planner/SingleNodePlanner.java | 4 +- .../hudi/test_hudi_partition_prune.out | 151 ++++++++++ .../hudi/test_hudi_snapshot.out | Bin 348526 -> 350438 bytes .../test_max_compute_partition_prune.out | 125 ++++++++ .../hudi/test_hudi_partition_prune.groovy | 275 +++++++++++++++++ .../hudi/test_hudi_snapshot.groovy | 10 +- .../test_max_compute_partition_prune.groovy | 282 ++++++++++++++++++ 19 files changed, 1026 insertions(+), 72 deletions(-) create mode 100644 regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out create mode 100644 regression-test/data/external_table_p2/maxcompute/test_max_compute_partition_prune.out create mode 100644 regression-test/suites/external_table_p2/hudi/test_hudi_partition_prune.groovy create mode 100644 regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java index 3aee5550acf646..11accdd7450eef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java @@ -377,10 +377,11 @@ public TableIndexes getTableIndexes() { * @return */ public SelectedPartitions initSelectedPartitions(OptionalLong snapshotId) { - if (!supportPartitionPruned()) { + if (!supportInternalPartitionPruned()) { return SelectedPartitions.NOT_PRUNED; } if (CollectionUtils.isEmpty(this.getPartitionColumns(snapshotId))) { + //no partition table. return SelectedPartitions.NOT_PRUNED; } Map nameToPartitionItems = getNameToPartitionItems(snapshotId); @@ -410,11 +411,12 @@ public List getPartitionColumns(OptionalLong snapshotId) { } /** - * Does it support partition cpruned, If so, this method needs to be overridden in subclasses + * Does it support Internal partition pruned, If so, this method needs to be overridden in subclasses + * Internal partition pruned : Implement partition pruning logic without relying on external APIs. * * @return */ - public boolean supportPartitionPruned() { + public boolean supportInternalPartitionPruned() { return false; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 1c30fa24cfb51e..b8349fba21093c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.hive; +import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.ListPartitionItem; @@ -31,6 +32,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.SchemaCacheValue; +import org.apache.doris.datasource.TablePartitionValues; import org.apache.doris.datasource.hudi.HudiUtils; import org.apache.doris.datasource.iceberg.IcebergUtils; import org.apache.doris.mtmv.MTMVBaseTableIf; @@ -40,6 +42,7 @@ import org.apache.doris.mtmv.MTMVSnapshotIf; import org.apache.doris.mtmv.MTMVTimestampSnapshot; import org.apache.doris.nereids.exceptions.NotSupportedException; +import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions; import org.apache.doris.qe.GlobalVariable; import org.apache.doris.statistics.AnalysisInfo; import org.apache.doris.statistics.BaseAnalysisTask; @@ -302,10 +305,33 @@ public List getPartitionColumns(OptionalLong snapshotId) { } @Override - public boolean supportPartitionPruned() { - return getDlaType() == DLAType.HIVE; + public boolean supportInternalPartitionPruned() { + return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI; } + public SelectedPartitions initHudiSelectedPartitions(Optional tableSnapshot) { + if (getDlaType() != DLAType.HUDI) { + return SelectedPartitions.NOT_PRUNED; + } + + if (getPartitionColumns().isEmpty()) { + return SelectedPartitions.NOT_PRUNED; + } + TablePartitionValues tablePartitionValues = HudiUtils.getPartitionValues(tableSnapshot, this); + + + Map idToPartitionItem = tablePartitionValues.getIdToPartitionItem(); + Map idToNameMap = tablePartitionValues.getPartitionIdToNameMap(); + + Map nameToPartitionItems = Maps.newHashMapWithExpectedSize(idToPartitionItem.size()); + for (Entry entry : idToPartitionItem.entrySet()) { + nameToPartitionItems.put(idToNameMap.get(entry.getKey()), entry.getValue()); + } + + return new SelectedPartitions(nameToPartitionItems.size(), nameToPartitionItems, false); + } + + @Override public Map getNameToPartitionItems(OptionalLong snapshotId) { return getNameToPartitionItems(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index e710bdb935d7bc..84ead8fe43cd0a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -82,7 +82,7 @@ public class HiveScanNode extends FileQueryScanNode { // will only be set in Nereids, for lagency planner, it should be null @Setter - private SelectedPartitions selectedPartitions = null; + protected SelectedPartitions selectedPartitions = null; private boolean partitionInit = false; private final AtomicReference batchException = new AtomicReference<>(null); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java index d7803b1a516f9e..7db2fc13098d9b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/HudiUtils.java @@ -17,24 +17,35 @@ package org.apache.doris.datasource.hudi; +import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; +import org.apache.doris.datasource.TablePartitionValues; +import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; +import org.apache.doris.datasource.hudi.source.HudiCachedPartitionProcessor; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; +import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; +import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; +import org.apache.hudi.common.table.timeline.HoodieTimeline; +import org.apache.hudi.common.util.Option; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; public class HudiUtils { @@ -231,4 +242,44 @@ private static Type handleUnionType(Schema avroSchema) { } return Type.UNSUPPORTED; } + + public static TablePartitionValues getPartitionValues(Optional tableSnapshot, + HMSExternalTable hmsTable) { + TablePartitionValues partitionValues = new TablePartitionValues(); + if (hmsTable.getPartitionColumns().isEmpty()) { + //isn't partition table. + return partitionValues; + } + + HoodieTableMetaClient hudiClient = HiveMetaStoreClientHelper.getHudiClient(hmsTable); + HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv() + .getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog()); + boolean useHiveSyncPartition = hmsTable.useHiveSyncPartition(); + + if (tableSnapshot.isPresent()) { + if (tableSnapshot.get().getType() == TableSnapshot.VersionType.VERSION) { + // Hudi does not support `FOR VERSION AS OF`, please use `FOR TIME AS OF`"; + return partitionValues; + } + String queryInstant = tableSnapshot.get().getTime().replaceAll("[-: ]", ""); + + partitionValues = + HiveMetaStoreClientHelper.ugiDoAs( + HiveMetaStoreClientHelper.getConfiguration(hmsTable), + () -> processor.getSnapshotPartitionValues( + hmsTable, hudiClient, queryInstant, useHiveSyncPartition)); + } else { + HoodieTimeline timeline = hudiClient.getCommitsAndCompactionTimeline().filterCompletedInstants(); + Option snapshotInstant = timeline.lastInstant(); + if (!snapshotInstant.isPresent()) { + return partitionValues; + } + partitionValues = + HiveMetaStoreClientHelper.ugiDoAs( + HiveMetaStoreClientHelper.getConfiguration(hmsTable), + () -> processor.getPartitionValues(hmsTable, hudiClient, useHiveSyncPartition)); + } + return partitionValues; + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index a8f2a362bfde8d..14f3656fb69d42 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.ListPartitionItem; import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; @@ -29,12 +30,10 @@ import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.TableFormatType; -import org.apache.doris.datasource.TablePartitionValues; import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; import org.apache.doris.datasource.hive.HivePartition; import org.apache.doris.datasource.hive.source.HiveScanNode; import org.apache.doris.datasource.hudi.HudiUtils; -import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; import org.apache.doris.spi.Split; @@ -68,7 +67,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Locale; @@ -259,50 +257,29 @@ private void setHudiParams(TFileRangeDesc rangeDesc, HudiSplit hudiSplit) { rangeDesc.setTableFormatParams(tableFormatFileDesc); } - private List getPrunedPartitions( - HoodieTableMetaClient metaClient, Option snapshotTimestamp) throws AnalysisException { + private List getPrunedPartitions(HoodieTableMetaClient metaClient) { List partitionColumnTypes = hmsTable.getPartitionColumnTypes(); if (!partitionColumnTypes.isEmpty()) { - HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv() - .getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog()); - TablePartitionValues partitionValues; - if (snapshotTimestamp.isPresent()) { - partitionValues = processor.getSnapshotPartitionValues( - hmsTable, metaClient, snapshotTimestamp.get(), useHiveSyncPartition); - } else { - partitionValues = processor.getPartitionValues(hmsTable, metaClient, useHiveSyncPartition); - } - if (partitionValues != null) { - // 2. prune partitions by expr - partitionValues.readLock().lock(); - try { - Map idToPartitionItem = partitionValues.getIdToPartitionItem(); - this.totalPartitionNum = idToPartitionItem.size(); - ListPartitionPrunerV2 pruner = new ListPartitionPrunerV2(idToPartitionItem, - hmsTable.getPartitionColumns(), columnNameToRange, - partitionValues.getUidToPartitionRange(), - partitionValues.getRangeToId(), - partitionValues.getSingleColumnRangeMap(), - true); - Collection filteredPartitionIds = pruner.prune(); - this.selectedPartitionNum = filteredPartitionIds.size(); - // 3. get partitions from cache - String dbName = hmsTable.getDbName(); - String tblName = hmsTable.getName(); - String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); - String basePath = metaClient.getBasePathV2().toString(); - Map partitionIdToNameMap = partitionValues.getPartitionIdToNameMap(); - Map> partitionValuesMap = partitionValues.getPartitionValuesMap(); - return filteredPartitionIds.stream().map(id -> { - String path = basePath + "/" + partitionIdToNameMap.get(id); - return new HivePartition( - dbName, tblName, false, inputFormat, path, partitionValuesMap.get(id), - Maps.newHashMap()); - }).collect(Collectors.toList()); - } finally { - partitionValues.readLock().unlock(); - } - } + this.totalPartitionNum = selectedPartitions.totalPartitionNum; + Map prunedPartitions = selectedPartitions.selectedPartitions; + this.selectedPartitionNum = prunedPartitions.size(); + + String dbName = hmsTable.getDbName(); + String tblName = hmsTable.getName(); + String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); + String basePath = metaClient.getBasePathV2().toString(); + + List hivePartitions = Lists.newArrayList(); + prunedPartitions.forEach( + (key, value) -> { + String path = basePath + "/" + key; + hivePartitions.add(new HivePartition( + dbName, tblName, false, inputFormat, path, + ((ListPartitionItem) value).getItems().get(0).getPartitionValuesAsStringList(), + Maps.newHashMap())); + } + ); + return hivePartitions; } // unpartitioned table, create a dummy partition to save location and inputformat, // so that we can unify the interface. @@ -392,7 +369,7 @@ public List getSplits() throws UserException { if (!partitionInit) { prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs( HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> getPrunedPartitions(hudiClient, snapshotTimestamp)); + () -> getPrunedPartitions(hudiClient)); partitionInit = true; } List splits = Collections.synchronizedList(new ArrayList<>()); @@ -454,7 +431,7 @@ public boolean isBatchMode() { // Non partition table will get one dummy partition prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs( HiveMetaStoreClientHelper.getConfiguration(hmsTable), - () -> getPrunedPartitions(hudiClient, snapshotTimestamp)); + () -> getPrunedPartitions(hudiClient)); partitionInit = true; } int numPartitions = ConnectContext.get().getSessionVariable().getNumPartitionsInBatchMode(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java index dc3232f79f5f71..fdd9f977c9119c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MapType; +import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; @@ -50,7 +51,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; +import java.util.OptionalLong; import java.util.stream.Collectors; /** @@ -71,6 +74,14 @@ protected synchronized void makeSureInitialized() { } } + public boolean supportInternalPartitionPruned() { + return true; + } + + + public List getPartitionColumns(OptionalLong snapshotId) { + return getPartitionColumns(); + } public List getPartitionColumns() { makeSureInitialized(); @@ -79,6 +90,23 @@ public List getPartitionColumns() { .orElse(Collections.emptyList()); } + public Map getNameToPartitionItems(OptionalLong snapshotId) { + if (getPartitionColumns().isEmpty()) { + return Collections.emptyMap(); + } + + TablePartitionValues tablePartitionValues = getPartitionValues(); + + Map idToPartitionItem = tablePartitionValues.getIdToPartitionItem(); + Map idToNameMap = tablePartitionValues.getPartitionIdToNameMap(); + + Map nameToPartitionItem = Maps.newHashMapWithExpectedSize(idToPartitionItem.size()); + for (Entry entry : idToPartitionItem.entrySet()) { + nameToPartitionItem.put(idToNameMap.get(entry.getKey()), entry.getValue()); + } + return nameToPartitionItem; + } + public TablePartitionValues getPartitionValues() { makeSureInitialized(); Optional schemaCacheValue = getSchemaCacheValue(); @@ -110,6 +138,8 @@ private TablePartitionValues loadPartitionValues(MaxComputeSchemaCacheValue sche /** * parse all values from partitionPath to a single list. + * In MaxCompute : Support special characters : _$#.!@ + * Ref : MaxCompute Error Code: ODPS-0130071 Invalid partition value. * * @param partitionColumns partitionColumns can contain the part1,part2,part3... * @param partitionPath partitionPath format is like the 'part1=123/part2=abc/part3=1bc' diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java index e0b84b0860e551..d3f02e4207b80a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java @@ -40,6 +40,7 @@ import org.apache.doris.datasource.maxcompute.MaxComputeExternalTable; import org.apache.doris.datasource.maxcompute.source.MaxComputeSplit.SplitType; import org.apache.doris.datasource.property.constants.MCProperties; +import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions; import org.apache.doris.nereids.util.DateUtils; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.spi.Split; @@ -50,6 +51,7 @@ import org.apache.doris.thrift.TTableFormatFileDesc; import com.aliyun.odps.OdpsType; +import com.aliyun.odps.PartitionSpec; import com.aliyun.odps.table.TableIdentifier; import com.aliyun.odps.table.configuration.ArrowOptions; import com.aliyun.odps.table.configuration.ArrowOptions.TimestampUnit; @@ -60,6 +62,7 @@ import com.aliyun.odps.table.read.split.impl.IndexedInputSplit; import com.google.common.collect.Maps; import jline.internal.Log; +import lombok.Setter; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -86,14 +89,26 @@ public class MaxComputeScanNode extends FileQueryScanNode { private static final LocationPath ROW_OFFSET_PATH = new LocationPath("/row_offset", Maps.newHashMap()); private static final LocationPath BYTE_SIZE_PATH = new LocationPath("/byte_size", Maps.newHashMap()); + @Setter + private SelectedPartitions selectedPartitions = null; + + public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, + SelectedPartitions selectedPartitions, boolean needCheckColumnPriv) { + this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, + selectedPartitions, needCheckColumnPriv); + } + public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumnPriv) { - this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, needCheckColumnPriv); + this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, + SelectedPartitions.NOT_PRUNED, needCheckColumnPriv); } public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, - StatisticalType statisticalType, boolean needCheckColumnPriv) { + StatisticalType statisticalType, SelectedPartitions selectedPartitions, + boolean needCheckColumnPriv) { super(id, desc, planNodeName, statisticalType, needCheckColumnPriv); table = (MaxComputeExternalTable) desc.getTable(); + this.selectedPartitions = selectedPartitions; } @Override @@ -117,10 +132,25 @@ private void setScanParams(TFileRangeDesc rangeDesc, MaxComputeSplit maxComputeS rangeDesc.setSize(maxComputeSplit.getLength()); } - void createTableBatchReadSession() throws UserException { + boolean createTableBatchReadSession() throws UserException { List requiredPartitionColumns = new ArrayList<>(); List orderedRequiredDataColumns = new ArrayList<>(); + List requiredPartitionSpecs = new ArrayList<>(); + //if requiredPartitionSpecs is empty, get all partition data. + if (!table.getPartitionColumns().isEmpty() && selectedPartitions != SelectedPartitions.NOT_PRUNED) { + this.totalPartitionNum = selectedPartitions.totalPartitionNum; + this.selectedPartitionNum = selectedPartitions.selectedPartitions.size(); + + if (selectedPartitions.selectedPartitions.isEmpty()) { + //no need read any partition data. + return false; + } + selectedPartitions.selectedPartitions.forEach( + (key, value) -> requiredPartitionSpecs.add(new PartitionSpec(key)) + ); + } + Set requiredSlots = desc.getSlots().stream().map(e -> e.getColumn().getName()).collect(Collectors.toSet()); @@ -150,6 +180,7 @@ void createTableBatchReadSession() throws UserException { .withSettings(mcCatalog.getSettings()) .withSplitOptions(mcCatalog.getSplitOption()) .requiredPartitionColumns(requiredPartitionColumns) + .requiredPartitions(requiredPartitionSpecs) .requiredDataColumns(orderedRequiredDataColumns) .withArrowOptions( ArrowOptions.newBuilder() @@ -162,7 +193,7 @@ void createTableBatchReadSession() throws UserException { } catch (java.io.IOException e) { throw new RuntimeException(e); } - + return true; } @Override @@ -430,7 +461,10 @@ public List getSplits() throws UserException { if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) { return result; } - createTableBatchReadSession(); + + if (!createTableBatchReadSession()) { + return result; + } try { String scanSessionSerialize = serializeSession(tableBatchReadSession); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 654ccc8ca1155a..af9490bc61531e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -579,7 +579,8 @@ public PlanFragment visitPhysicalFileScan(PhysicalFileScan fileScan, PlanTransla } else if (table instanceof TrinoConnectorExternalTable) { scanNode = new TrinoConnectorScanNode(context.nextPlanNodeId(), tupleDescriptor, false); } else if (table instanceof MaxComputeExternalTable) { - scanNode = new MaxComputeScanNode(context.nextPlanNodeId(), tupleDescriptor, false); + scanNode = new MaxComputeScanNode(context.nextPlanNodeId(), tupleDescriptor, + fileScan.getSelectedPartitions(), false); } else if (table instanceof LakeSoulExternalTable) { scanNode = new LakeSoulScanNode(context.nextPlanNodeId(), tupleDescriptor, false); } else { @@ -652,6 +653,8 @@ public PlanFragment visitPhysicalHudiScan(PhysicalHudiScan fileScan, PlanTransla if (fileScan.getTableSnapshot().isPresent()) { ((FileQueryScanNode) scanNode).setQueryTableSnapshot(fileScan.getTableSnapshot().get()); } + HudiScanNode hudiScanNode = (HudiScanNode) scanNode; + hudiScanNode.setSelectedPartitions(fileScan.getSelectedPartitions()); return getPlanFragmentForPhysicalFileScan(fileScan, context, scanNode, table, tupleDescriptor); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java index fac1a7f82d2cfb..ed783aa3d5a9b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java @@ -55,7 +55,7 @@ public class PartitionPruner extends DefaultExpressionRewriter { /** Different type of table may have different partition prune behavior. */ public enum PartitionTableType { OLAP, - HIVE + EXTERNAL } private PartitionPruner(List partitions, Expression partitionPredicate) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java index d50219383072df..cc4b4e6ee17ec3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java @@ -59,7 +59,7 @@ public Rule build() { ExternalTable tbl = scan.getTable(); SelectedPartitions selectedPartitions; - if (tbl.supportPartitionPruned()) { + if (tbl.supportInternalPartitionPruned()) { selectedPartitions = pruneExternalPartitions(tbl, filter, scan, ctx.cascadesContext); } else { // set isPruned so that it won't go pass the partition prune again @@ -91,7 +91,7 @@ private SelectedPartitions pruneExternalPartitions(ExternalTable externalTable, Map nameToPartitionItem = scan.getSelectedPartitions().selectedPartitions; List prunedPartitions = new ArrayList<>(PartitionPruner.prune( - partitionSlots, filter.getPredicate(), nameToPartitionItem, ctx, PartitionTableType.HIVE)); + partitionSlots, filter.getPredicate(), nameToPartitionItem, ctx, PartitionTableType.EXTERNAL)); for (String name : prunedPartitions) { selectedPartitionItems.put(name, nameToPartitionItem.get(name)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java index 010c30d915d529..b07ed750c1bef0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFileScan.java @@ -43,7 +43,7 @@ */ public class LogicalFileScan extends LogicalCatalogRelation { - protected final SelectedPartitions selectedPartitions; + protected SelectedPartitions selectedPartitions; protected final Optional tableSample; protected final Optional tableSnapshot; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java index 629690889432b3..51e68eb07631ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHudiScan.java @@ -84,7 +84,7 @@ protected LogicalHudiScan(RelationId id, ExternalTable table, List quali public LogicalHudiScan(RelationId id, ExternalTable table, List qualifier, Optional tableSample, Optional tableSnapshot) { this(id, table, qualifier, Optional.empty(), Optional.empty(), - SelectedPartitions.NOT_PRUNED, tableSample, tableSnapshot, + ((HMSExternalTable) table).initHudiSelectedPartitions(tableSnapshot), tableSample, tableSnapshot, Optional.empty(), Optional.empty()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index d94ad0a2552240..a82ab4248ec600 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -76,7 +76,6 @@ import org.apache.doris.datasource.trinoconnector.source.TrinoConnectorScanNode; import org.apache.doris.qe.ConnectContext; import org.apache.doris.rewrite.mvrewrite.MVSelectFailedException; -import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TPushAggOp; import com.google.common.base.Preconditions; @@ -1994,8 +1993,7 @@ private PlanNode createScanNode(Analyzer analyzer, TableRef tblRef, SelectStmt s break; case MAX_COMPUTE_EXTERNAL_TABLE: // TODO: support max compute scan node - scanNode = new MaxComputeScanNode(ctx.getNextNodeId(), tblRef.getDesc(), "MCScanNode", - StatisticalType.MAX_COMPUTE_SCAN_NODE, true); + scanNode = new MaxComputeScanNode(ctx.getNextNodeId(), tblRef.getDesc(), true); break; case ES_EXTERNAL_TABLE: scanNode = new EsScanNode(ctx.getNextNodeId(), tblRef.getDesc(), true); diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out new file mode 100644 index 00000000000000..2e827d3fc5b9cf --- /dev/null +++ b/regression-test/data/external_table_p2/hudi/test_hudi_partition_prune.out @@ -0,0 +1,151 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !one_partition_1_1 -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 + +-- !one_partition_2_1 -- +4 David 2025 +5 Eva 2025 + +-- !one_partition_3_all -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 +4 David 2025 +5 Eva 2025 + +-- !one_partition_4_all -- +5 Eva 2025 + +-- !one_partition_5_1 -- +3 Charlie 2024 + +-- !two_partition_1_1 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 + +-- !two_partition_2_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_3_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !two_partition_4_all -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 +7 Grace EU 1 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_5_1 -- + +-- !two_partition_6_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !three_partition_1_1 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 + +-- !three_partition_2_1 -- +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 + +-- !three_partition_3_3 -- +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_4_2 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +6 Frank US 2025 Q1 + +-- !three_partition_5_all -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +4 David US 2024 Q2 +5 Eva US 2024 Q2 +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_6_1 -- +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 + +-- !three_partition_7_7 -- +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_8_2 -- +7 Grace US 2025 Q2 + +-- !one_partition_6_0 -- + +-- !two_partition_7_0 -- + +-- !two_partition_8_0 -- + +-- !three_partition_9_0 -- + +-- !three_partition_10_0 -- + +-- !three_partition_11_0 -- + +-- !time_travel_two_partition_1_3 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 + +-- !time_travel_two_partition_2_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_3_1 -- +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_4_0 -- + +-- !time_travel_two_partition_5_0 -- + +-- !time_travel_two_partition_6_1 -- +1 Alice US 1 + diff --git a/regression-test/data/external_table_p2/hudi/test_hudi_snapshot.out b/regression-test/data/external_table_p2/hudi/test_hudi_snapshot.out index efad67ffbfa8c407fa42d0dd28f569f492bd1d12..2ce13dc272b739a7c028e2f8dbbe303148e312e5 100644 GIT binary patch delta 5036 zcmeHLU5H#o7WUrTZRo)rGmeh)8z-HaXf)N<@4Z!{pi$90WYxHnuwvp^x2k&5rl)(- zJ=rCOWdt7r{_A}xBBHqLgLw!We?o)oZbD>TaFqyZ@ImkgRCe)i9)-y6>Fy*%@gXd` zZXO11)j9Xv+g0aObH4MP`}OZH`~0QL9(*;}+?$=ecu)3N)(ck zIwY8)LNUXYqS=O(zg#?*7Yj;Kj7fpQg(*XXAj+mGnWAb6vl5qt;J8rM)ut=(l2V1W z8@FwHV9S=t3(9t~^39Py_rq9;V1jGvlx&+f-o2QB%*Nj1ItF43i3K-XQjp8nln_#qk8 zT1`bsNWqg>z^c`HZML$o+q&WuAtj<1Wh^7l56+pyIxMt7B5@3HmSCi{P$<<{#}qTJ zQqys3!OhLo^c*ZuOe+g*lhF%qw%!0|?kU#i7b^8y+xlBwt+wSJr*{;$gEHT!82A+v zC6cXvYdC_ivt759iqmy#$2aOmT5oh#J3FjRdUvDZGX0OC0HXO*ZX)}5`^Xx@xw8pJ zmT-eOXDO1x8sy5>>ezunr2d7&x&A9_v+lFV6L*wR}}s62cnS)I3uex;fSU&oL0;dq^yz%r!FlkCR8eL zc6@x&KX)*u{0M;{FM{AhT2j;c8J zeTRZ8J^pn#FoC&{iNIx)2uBewON^wE5-CoSL^-RiHvV5PM-yYVZWg-d>2#yI=j?zf z_2kK5%pd$LAjRn&^||?Gy*9qJQmyJ!$JPA@w(6Q|Hns1&I~WYG-}QPh5s+;0y7jHA zCWCdk?8wc-5fQ@wZ9ZI^-L!I3>sPx%na_IPA8R!Sg2#LPu@lktUOp7!*1)yFI}`q? z!{KB|di)_9v}JbR;N%^o$1-EE69R(J5EDrO2OMZ+XOdL1lyRmCq(@=toJ|wN2!I6F zR-lwyf^^IiKny1}h1dMilhNS5qtij_=ydpLpMUH(;o#MTk{NnVfa72ID(Ii;CL};p z92-s%iD?2GLP99(G_tWQ$IduN4ubFufrl}dh$M=~e(4Xv$TDCp1J*KNoiBjpzyE7+ zO{8XG|LkP2*7uL(`=$s+m=?-br8u=1Q4F<&t1>|<7BSK!HI^_cG-sUzmuC`Ob`rdG z`0eQB$m9LNQ0v%y_-5YUc_JA0tNQaBcv(<}6Rc63v}=KN5A=pUV3~rD&w}@L!@QdRuM!H~Djb%|0M~t)nM%-}U+JALfSsL%t`ZQ&w}} zp$cjOPoRZNEnuxmlqf5j@E90im+wg_plg*P_Nz;|WxluEpe{G4=S_p^mt<~To{*nn ze5r^PAxzP%-Y=^#`_(?3?{9kUMdwTv29+zkh delta 11237 zcmeHNdypJQea_78$zH2)ES(}FTgH8*zO5>iy9 z$k($gdvzv-{*P7j+SS}nPxtTf{l4$_gVP&-_Qx9^`ZMd`m7SktTXs+?O3L>*Pol_S zJdPbs1a-oIDu-~zeL|TEU43TQx^4LqZd9d^2Ap{%Y%X!RL#adDBZSS+aE6kK7lw>@ zrFL^sm3So#OQSD6^EBP-uldI>;{w|V_G1G$6uO@`(!YNKS9R>m+l!6&wc5Da*Y5r} z;f@=W?wpj3rS^QaF*o_<@?<=pWi4f{ZZ?O-?zODdvsQcZ?$Y}v{Wc%4}9w37iw8!uG(Hq)Z~oA{EF}~Et-DL8lGZ2@_gnd4v!@c zX%stB`iT=$u7V(-e&Xu-CyTo(GO1S^)mFPH+tsXLhSjRp7rFzC7x7kRpv`KGM+8`_ zS3YZvS)pF{fKBw*hX#sUWUIU%FJ%6(H~0ftvt2$Qo5xgB?-?j;vN`WeY@gKkuC>Qg z8A(Q^>%;*wV^e_>@icL$Br1xTpZX*)KUHn3`q750Axehw(@+aviGt*bSmMOq92J%Gy~&pPjWf6jRx1>oYeM1`8e``nmh< zu})(z-MYUzxA^O4P;5wIkCp_X85ZeO-nFPt({DL@6hV z1Z*=Qc*{hewTpvJ*W%;8r8=24kC`DxwJb~er-pj^>^JQ3KUiNN8Qd?`1Ue9k=@mb{=B8vH!&kQvIc zRVeX@=k~=NL=hYNG3*n=Wd|p0d!PTnbPMO-$|h#J-&EiI(lfU|eeha*>iw>8C_%_+ zO1PV`Y*<7!t}e)~X)R=}YP)&@#)U|cHH=G6o}91NRDbB$FPB<~D~WcTf$fD*aDAp; z+&bk1ZiO-GhGbRTpdPIdAvoh5`l+FYAB9{+JaPn!jo}l_o5$g$K^g>#66LdKwY8}e z2y7T`|B^2>vt;pAcZ*c5io5!kJ|v_4WJ(dZ0ZEjj5)n9@at{V6LdO?w9P=<#O6b#f6bG%kK7LPe za424^wHKQgjlrXS1twmzhdbmmMg8+hy8_o-Y|7W0;XZjxm2b(Cs+JEIH|Pm>U|N6u zbH%Y`UkMf>x+Y2}pyEvef(SYfac#RyzR+K#oD&)c}#d*BH=~Cceo4_N2G)~(vPK#65m7SD0x@F z^Ldc?fdf;3PkEu|$S_Lr!mdbT*H?r@xJ`)2Be5@?gu&r>fHjGN1o;Qc6RR*Ggmc^$ z5rx&`)JbOjlsMcEr6Xd7&%#4s;Y!3QHTNY79&5k@%nygeN6bNT&cl)Cr7Dj7lJ~)v z4_hlQAGUwFPT%n-_Ruay!lPlyWz=UPj2%QLtd1*U1b~=0f+qq`@dZ=*Q9iHO)eg=Sxhxcu-1ZtpS9XDckW2d%M(?TX%T!P86QLNnMb64aPw=hC3lh=V8@L-0oAR^qK{eCr#@2{ zpY#CGD1o8DaQkr!B|*h!A#<5Nv#~flhZqy2EO2l{05S_7r4e^L#o=hm1#%Iq9jd8n zIeKZ$R~J@0mW--3lS%sg#|wiB>70K5t!r**szxI9%OAIf^wwV$M)G(`zgU>kU-%p3 z_VIChWJh=uC^m`1KnB7ANP!I|1cUOJ>$t8@SRBNGpX$?71A{=J4kIO(rjZw>2%<#7 zp9I1KTmt??ViCI$myt+eN_ZCsSk8fs`b0l9ZjUYh=I6+ghp0?>iMzeU^BmVZLc$py z7-I@tfV<%0WOV1~8mxA5Y$uN8zx_iT2k$pVg>B(2IIQ{Z7w>AAU)v8%oxj1I0COj* zwJxU3QR>NTv6e@zM5U?$;5#!}Z6r!1{mBO?<^=vTYv6BdJ>)8wOLOG9m6HmmArrwW z6QEvD0p!u2imcIT54lt%+;_Yn@(?{R=@Ii0LR2DE$l>~aq9?XkgL@>#w<_y2)$zqD zVm!WX1827{7;%=SNKt(zKm)%*^}52qh<;~Oq@#Cai;cPR4U5gWF3#*?k9~`=Y;{g; z-c)pnALh{G@oTJ+<&Yxer9jgmq6y-qqKI4oXJHy6+7KcWFCRlciiH`zZnyy2O}LsF zZcBF0b7t86BZUK1a;%SF4{X^|M-!brXq7sTT((A+_uG}NH#biLI2qKPPe*U)CST2B zg_iX7mc6OS;dxsA8^GLEhemvf_H_ zgOhVvKE3ijbE;(k=6&-jJip?0PTfD&`T2KtkGuzkL3w|*)#_Ciy@a$sThfoVfiM|> zv_2aS?9d+_wKvDimBgpQNmD;Udcpk4I0_t>5HEDySR$RS9u(4DK7IYrBU@O&Tr4(L zc|CGL#n-QF7#P#h!`ALK2Y{Ta3;LA<_NLN>z}40dH42*x0rxtmyvdLMwY^?{e%c;h zk;m<42lR)TJ-RDA>IwkCP$kp_7?uH2oR2Jq)J2?Bd4WucO9K|^C;qdz39v8@fR7{O z6-kldcx*su%$dXRLTOBVWCoL0T$l9n3Ss)d2as1T;TebpL?D@7z@FxL7 zczomiINpQZ6w)^ufV+&nyO|(^&HT)?2{KEjsK$0j_tZU}=$^iH>eg4k1sHyM=lgmw z4N!!-$VhNp0AN(p7i=v*=k>vo>XzJ*ahRFOx{~_*f;Bj>ryi+hXX1O?i%9)}mXWo! zOlgICE(40s|NhmYnYk5!kQau%%Ml2Q20Sns1(8psDf)_o z)C{NJ*c$0W=JY6(*Y>k5Sftb@=Vx?Z3H-S}j2=bJ7h=lQIyYMO2y*f1b` zQ4#RK5zE&PtrSS-k*kZFR+s39GR#q1AN-%7NLLVlT$|UeG!CyIU+bAI#`p*<32$gizQY zW@ocTqpI}jm+Z|21Rs62U7XMl+17?x>IxBbK@+8vQx8^QzBwAwqac@PNH`0agd1{T z1p_isr?`&iO3(Mhgn0=yZnzOqrIe;5ijz3PtRklX&iRg@L^_z$gacim7$ERNQ$ZMb z7lb6x^l{|mOL$005EMz?YmUgk9UXD$3PtNctV|@*Vp|k2PWO7d#h8rkB8cxZda+YD)bo&XJRok9QskLx9%uzE1+D}KfB5vDj;0z^FIOw!#c;`M%LaSYl+EGs7v&@zpyu0$Mo5+ z6^4rfY@RW_9i2z^Z<X*!r(;|#GWfU@~7MMBfE;l^%o^c zPxy%K>vVBUp9>1Rr{1ojtSRTqhk*iAyWiFhWl7VJANT8ncf;I35$H#sx2HPy%8_Z; z4LG4doqj@*fYJp1@B6M3fTaw8S^z$%KK1YRptc^i2B$FljjUdc&Afv?JD~uVaM>fv zXdr+v_B>#XY3pu#_vN5`RQVlQQ=k32!eHn8w}%U$c{=AmF=Z36vezBB+}7uwvqo0F z`YHR-ft7EbFN6hs?s|J1}biT3zY z_R8Z=6@Kw1{kG=|qdPB=9#;C)^M(Cy@WcJC{02Y#20#1;Kb%V?{}(^3-P88Yl_$Sh z_^2U>uh7?j+upMBi8F;=MNP&EmrfFQqt6>G@f$4h|4Wv5#jwQNmH`t|XiW)NR|fbI zhk!;Q3Um}tBnI*YBZUlq1rv!c0^n%md~m3qQF|$HL7IpNN*d~yNZ~dgJQ!gl07}Vm zJAPNDGIUaiHWFYKTyl!r+`vx)l29j}4dYyM!VeRIn_ZVh%rJt1xi4ap5I12?oInoX zF^G66W3Y3957k-vUTO$u4TgB=1N(|!-ZyZ5oqi@Lj_h(dIASiKY=PQ#M38{hP9yL* zE(w8~e8`g+3`+uS{m>c=jp|bhXzWRkafi795SB|K0I?*2EGAtgK@=*~PN0ch7rZ#Q z6_6KHpB^fXE?>%nN*rR652pAvat}-~MLADrK&^NXqG&(4*Qi~MBI32udg5+;9K&u1 z;a)AB5Btu^t06Zpb%h#83iZ5#Q1?M|G1aqdf&k0uz4vC-hJNMy_NMW!IGW3HMo=;^ z7nMlcFn>2gUwYz?ikpfaWnND2mR(wL!Qy(W$vtqbzDpskni6{|l|E&W1p666zvm2B z62%v)eHFMqh?J0Z77hdAf7%`x#{*CHq~-2|``|U`!mA*^bbj?5E#A5WuJORR!iKf^ zkPoQ{tESI9UEB=%8RBaW@QsEkl!X2T@)is#Dwz~zt>DlefbJm+G8!mT;D5*3U@ht= zzF`k;X)Uz?G|ZcyG#Xsr^QjB5_(#QIP2Op((O=Tw;r2E`hLvZ*oA>0fE)#c~I$3^4 zQ#Lex(H<-^<_Uf3$>NkH^ns5Qcda};HSnd&^oO?$4DS+0LF)#Rr<6*FSrNJ>xD1%7 zzcE^prk)UzfD$+6xCvrH6r;*@q1MpI@Q@*RNU)Cr;uA|FU-%M59Bh%om|==63@(`B zr97mB3L@knvsoV}bA*Z+vxOjR84qcHO$&>gNO8dSU2VO0&pKRq*sr$^_e_~Hj~aTE5tmd!8@XTA4Apr9zd6BQZb##+aw`y#)X zH=L`v0+9+nJX!2A9uh2VS0Kri*yfyURPXQhPZ(F~8cfyncI-h7UyVVa_cwP`3x<|G z9To>KH^y<3tS{u`$3Q?q9O`xNwzf`$6`#YMUZ<9Xv=U$)8S3UrVR$N$zVuPY`3j68 zSDsT)jKlY0?ou8hm zOz&yU*VGbJ(PQ1I>DhEH%g)O7JYKJ%<$v2FMTqmkt6dArv76!_w7MXK`GNHe5aOXI zgO!0m>m^`UUe~n%D*SaivVxS*%o>Ks@6LTItE+akuDV4%qE;L2AbpOBz6}W7diK0M ztRLD@q?g?YO*Ct_^`2+#!L?mIc@H`?YWnP1^n5^gdF34tL=RabQy|0zsBeTM7Ngqr zO&*J4&+)y4C<^CGMCa^{gI9O;4`V2OJr3O#Abi=Y|3e<4{-0QDZPt(cN0D#62}+Y` zWN4N5)>N0{>CWXL8P8)h$B)@V9WuUM{}(B)(;wP0fc_~`V#q4ogXR>aV32vPwj%qY z0|d1>4j|-wefIO#M*aOQ1BaF`<@S2LsTJ-F{888HC8AHrqw{-^1XF-SC^ z_c)K>8L^L5^|6+aqnVTe#G8P2p4+rz>s+;&v&(bM>|}es-;$7zDy|`wwrG3Qu8ed^ z)AAv>YaSfB%V%^K{e@rJLq+fzU~!MwQ;%IYl1d6bBr%Ml4|Kx?WDRK;7Q-+3GK!&t zz0UqmR8_b0rO&KP3rH^Jw=#5R_1n20z`o8_kpm&Mln+%Q4mA@ix!7{!^>gvln=xWKuW+a+Xxi&j(uZKI+p*u(*m*5ZCLdw*yC)p~Meo6J zyFpP#!Z5c#a0CaVj7;>NK^?pm6w#XOcA>Qvdi^tXF5-_ofOSTB7#6P#7O~-HI=~)$u=sV1XV-rl0s3 zqQ8&kuQWw(8i+km+F${lP(=`MdCa&_AQ)NR0`r#Fw`3h_WG8DX={B4!8TIou$Lq5) zj%0A#7Jct_i>^Jq*piL%ktTFkePy$--43Wee!DfTe|69r4ba0tsDw%Zl@4ykB$n6k z#cl$5J91U*Dc3xrUs6Qa(DEyV%~xIA6h;w~e^?mPFHPE1ANYZ_W@r}qHt!(o@7voW zn;rV(h9V8Uuw#8_5`BB4N6|K73IIUxKpK-nwE{?lejE=1kcTF=NU+tz$hyLIe>6uA zWVMFIhrX%sLQw&oT^Z?o>tl3$HcOVuH#D=Xe^|MUco=?&ac#6c-Z_14qC-YYEBoAm zf8Mq%5*C7k^&I5~@FInZ1+5Lhpi!8}FmM&R{7}xsPy}Uw!YE{^aW*6*e8wB`7>-B5 z@DkHM1AItgCBVZoydMW8i>4jZz7xk1%}*TdH34ozLuJfB4?FZmkiIBb(6%4~=A&WB oOT$Qz6lQ|^7#QZ1;pZU3;x= 3 ORDER BY id;""" + + def two_partition_1_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 1 ORDER BY id;""" + def two_partition_2_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + def two_partition_3_2 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' ORDER BY id;""" + def two_partition_4_all = """SELECT id,name,part1,part2 FROM two_partition_tb ORDER BY id;""" + def two_partition_5_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 2 AND id > 5 ORDER BY id;""" + def two_partition_6_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + + def three_partition_1_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_2_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;""" + def three_partition_3_3 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 ORDER BY id;""" + def three_partition_4_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q1' ORDER BY id;""" + def three_partition_5_all = """SELECT id,name,part1,part2,part3 FROM three_partition_tb ORDER BY id;""" + def three_partition_6_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_7_7 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part2 = 2025 ORDER BY id;""" + def three_partition_8_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;""" + + + + qt_one_partition_1_1 one_partition_1_1 + explain { + sql("${one_partition_1_1}") + contains "partition=1/2" + } + + qt_one_partition_2_1 one_partition_2_1 + explain { + sql("${one_partition_2_1}") + contains "partition=1/2" + } + + qt_one_partition_3_all one_partition_3_all + explain { + sql("${one_partition_3_all}") + contains "partition=2/2" + } + + qt_one_partition_4_all one_partition_4_all + explain { + sql("${one_partition_4_all}") + contains "partition=2/2" + } + + qt_one_partition_5_1 one_partition_5_1 + explain { + sql("${one_partition_5_1}") + contains "partition=1/2" + } + + + qt_two_partition_1_1 two_partition_1_1 + explain { + sql("${two_partition_1_1}") + contains "partition=1/4" + } + + qt_two_partition_2_1 two_partition_2_1 + explain { + sql("${two_partition_2_1}") + contains "partition=1/4" + } + + qt_two_partition_3_2 two_partition_3_2 + explain { + sql("${two_partition_3_2}") + contains "partition=2/4" + } + + qt_two_partition_4_all two_partition_4_all + explain { + sql("${two_partition_4_all}") + contains "partition=4/4" + } + + qt_two_partition_5_1 two_partition_5_1 + explain { + sql("${two_partition_5_1}") + contains "partition=1/4" + } + + qt_two_partition_6_1 two_partition_6_1 + explain { + sql("${two_partition_6_1}") + contains "partition=1/4" + } + + + + qt_three_partition_1_1 three_partition_1_1 + explain { + sql("${three_partition_1_1}") + contains "partition=1/10" + } + + qt_three_partition_2_1 three_partition_2_1 + explain { + sql("${three_partition_2_1}") + contains "partition=1/10" + } + + qt_three_partition_3_3 three_partition_3_3 + explain { + sql("${three_partition_3_3}") + contains "partition=3/10" + } + + qt_three_partition_4_2 three_partition_4_2 + explain { + sql("${three_partition_4_2}") + contains "partition=2/10" + } + + qt_three_partition_5_all three_partition_5_all + explain { + sql("${three_partition_5_all}") + contains "partition=10/10" + } + + qt_three_partition_6_1 three_partition_6_1 + explain { + sql("${three_partition_6_1}") + contains "partition=1/10" + } + + qt_three_partition_7_7 three_partition_7_7 + explain { + sql("${three_partition_7_7}") + contains "partition=7/10" + } + + qt_three_partition_8_2 three_partition_8_2 + explain { + sql("${three_partition_8_2}") + contains "partition=2/10" + } + + + // 0 partitions + def one_partition_6_0 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2023 ORDER BY id;""" + qt_one_partition_6_0 one_partition_6_0 + explain { + sql("${one_partition_6_0}") + contains "partition=0/2" + } + + def two_partition_7_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'CN' AND part2 = 1 ORDER BY id;""" + qt_two_partition_7_0 two_partition_7_0 + explain { + sql("${two_partition_7_0}") + contains "partition=0/4" + } + + def two_partition_8_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 3 ORDER BY id;""" + qt_two_partition_8_0 two_partition_8_0 + explain { + sql("${two_partition_8_0}") + contains "partition=0/4" + } + + def three_partition_9_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;""" + qt_three_partition_9_0 three_partition_9_0 + explain { + sql("${three_partition_9_0}") + contains "partition=0/10" + } + + def three_partition_10_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_10_0 three_partition_10_0 + explain { + sql("${three_partition_10_0}") + contains "partition=0/10" + } + + def three_partition_11_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_11_0 three_partition_11_0 + explain { + sql("${three_partition_11_0}") + contains "partition=0/10" + } + + + //time travel + def time_travel_two_partition_1_3 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' order by id;" + def time_travel_two_partition_2_2 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part1='US' order by id;" + def time_travel_two_partition_3_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part2=2 order by id;" + def time_travel_two_partition_4_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012025218' where part2=10 order by id;" + + qt_time_travel_two_partition_1_3 time_travel_two_partition_1_3 + explain { + sql("${time_travel_two_partition_1_3}") + contains "partition=3/3" + } + + + qt_time_travel_two_partition_2_2 time_travel_two_partition_2_2 + explain { + sql("${time_travel_two_partition_2_2}") + contains "partition=2/3" + } + + qt_time_travel_two_partition_3_1 time_travel_two_partition_3_1 + explain { + sql("${time_travel_two_partition_3_1}") + contains "partition=1/3" + } + + qt_time_travel_two_partition_4_0 time_travel_two_partition_4_0 + explain { + sql("${time_travel_two_partition_4_0}") + contains "partition=0/3" + } + + + + + def time_travel_two_partition_5_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20231126012025218' order by id;" + qt_time_travel_two_partition_5_0 time_travel_two_partition_5_0 + explain { + sql("${time_travel_two_partition_5_0}") + contains "partition=0/0" + } + + def time_travel_two_partition_6_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241126012011733' order by id;" + qt_time_travel_two_partition_6_1 time_travel_two_partition_6_1 + explain { + sql("${time_travel_two_partition_6_1}") + contains "partition=1/1" + } + + sql """drop catalog if exists ${catalog_name};""" +} \ No newline at end of file diff --git a/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy b/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy index 53c09e6d5a9031..274b7efdbbb5ab 100644 --- a/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy +++ b/regression-test/suites/external_table_p2/hudi/test_hudi_snapshot.groovy @@ -52,7 +52,7 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q05 """SELECT age, COUNT(*) AS user_count FROM ${table_name} GROUP BY age ORDER BY user_count DESC LIMIT 5;""" // Query users with purchase records and limit output - qt_q06 """SELECT user_id, purchases FROM ${table_name} WHERE array_size(purchases) > 0 ORDER BY user_id LIMIT 5;""" + qt_q06 """SELECT user_id, purchases FROM ${table_name} WHERE array_size(purchases) > 0 ORDER BY user_id,event_time LIMIT 5;""" // Query users with a specific tag and limit output qt_q07 """SELECT * FROM ${table_name} WHERE array_contains(tags, 'others') ORDER BY event_time LIMIT 5;""" @@ -64,7 +64,7 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q09 """SELECT * FROM ${table_name} WHERE struct_element(struct_element(address, 'coordinates'), 'latitude') BETWEEN 0 AND 100 AND struct_element(struct_element(address, 'coordinates'), 'longitude') BETWEEN 0 AND 100 ORDER BY event_time LIMIT 5;""" // Query records with ratings above a specific value and limit output - qt_q10 """SELECT * FROM ${table_name} WHERE rating > 4.5 ORDER BY rating DESC LIMIT 5;""" + qt_q10 """SELECT * FROM ${table_name} WHERE rating > 4.5 ORDER BY rating DESC,event_time LIMIT 5;""" // Query all users' signup dates and limit output qt_q11 """SELECT user_id, signup_date FROM ${table_name} ORDER BY signup_date DESC LIMIT 10;""" @@ -73,13 +73,13 @@ suite("test_hudi_snapshot", "p2,external,hudi,external_remote,external_remote_hu qt_q12 """SELECT * FROM ${table_name} WHERE struct_element(address, 'postal_code') = '80312' ORDER BY event_time LIMIT 5;""" // Query users with profile pictures and limit output - qt_q13 """SELECT user_id, profile_picture FROM ${table_name} WHERE profile_picture IS NOT NULL ORDER BY user_id LIMIT 5;""" + qt_q13 """SELECT user_id, profile_picture FROM ${table_name} WHERE profile_picture IS NOT NULL ORDER BY user_id,event_time LIMIT 5;""" // Query users by signup date and limit output - qt_q14 """SELECT * FROM ${table_name} WHERE signup_date = '2024-01-15' ORDER BY user_id LIMIT 5;""" + qt_q14 """SELECT * FROM ${table_name} WHERE signup_date = '2024-01-15' ORDER BY user_id,event_time LIMIT 5;""" // Query the total count of purchases for each user and limit output - qt_q15 """SELECT user_id, array_size(purchases) AS purchase_count FROM ${table_name} ORDER BY purchase_count DESC LIMIT 5;""" + qt_q15 """SELECT user_id, array_size(purchases) AS purchase_count FROM ${table_name} ORDER BY purchase_count,event_time DESC LIMIT 5;""" } test_hudi_snapshot_querys("user_activity_log_cow_non_partition") diff --git a/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy new file mode 100644 index 00000000000000..e34569117a167f --- /dev/null +++ b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy @@ -0,0 +1,282 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + + +/* +CREATE TABLE one_partition_tb ( + id INT, + name string +) +PARTITIONED BY (part1 INT); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (1, 'Alice'); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (2, 'Bob'); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (3, 'Charlie'); +INSERT INTO one_partition_tb PARTITION (part1=2025) VALUES (4, 'David'); +INSERT INTO one_partition_tb PARTITION (part1=2025) VALUES (5, 'Eva'); +CREATE TABLE two_partition_tb ( + id INT, + name string +) +PARTITIONED BY (part1 STRING, part2 int); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (1, 'Alice'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (2, 'Bob'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (3, 'Charlie'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=2) VALUES (4, 'David'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=2) VALUES (5, 'Eva'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=1) VALUES (6, 'Frank'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=1) VALUES (7, 'Grace'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (8, 'Hannah'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (9, 'Ivy'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (10, 'Jack'); +CREATE TABLE three_partition_tb ( + id INT, + name string +) +PARTITIONED BY (part1 STRING, part2 INT, part3 STRING); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (1, 'Alice'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (2, 'Bob'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (3, 'Charlie'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q2') VALUES (4, 'David'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q2') VALUES (5, 'Eva'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2025, part3='Q1') VALUES (6, 'Frank'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2025, part3='Q2') VALUES (7, 'Grace'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2024, part3='Q1') VALUES (8, 'Hannah'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2024, part3='Q1') VALUES (9, 'Ivy'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q2') VALUES (10, 'Jack'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q2') VALUES (11, 'Leo'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q3') VALUES (12, 'Mia'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q1') VALUES (13, 'Nina'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q2') VALUES (14, 'Oscar'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q3') VALUES (15, 'Paul'); +select * from one_partition_tb; +select * from two_partition_tb; +select * from three_partition_tb; +show partitions one_partition_tb; +show partitions two_partition_tb; +show partitions three_partition_tb; +*/ + +suite("test_max_compute_partition_prune", "p2,external,maxcompute,external_remote,external_remote_maxcompute") { + + + def one_partition_1_1 = """SELECT * FROM one_partition_tb WHERE part1 = 2024 ORDER BY id;""" + def one_partition_2_1 = """SELECT * FROM one_partition_tb WHERE part1 = 2025 ORDER BY id;""" + def one_partition_3_all = """SELECT * FROM one_partition_tb ORDER BY id;""" + def one_partition_4_all = """SELECT * FROM one_partition_tb WHERE id = 5 ORDER BY id;""" + def one_partition_5_1 = """SELECT * FROM one_partition_tb WHERE part1 = 2024 AND id >= 3 ORDER BY id;""" + + def two_partition_1_1 = """SELECT * FROM two_partition_tb WHERE part1 = 'US' AND part2 = 1 ORDER BY id;""" + def two_partition_2_1 = """SELECT * FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + def two_partition_3_2 = """SELECT * FROM two_partition_tb WHERE part1 = 'US' ORDER BY id;""" + def two_partition_4_all = """SELECT * FROM two_partition_tb ORDER BY id;""" + def two_partition_5_1 = """SELECT * FROM two_partition_tb WHERE part1 = 'US' AND part2 = 2 AND id > 5 ORDER BY id;""" + def two_partition_6_1 = """SELECT * FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + + def three_partition_1_1 = """SELECT * FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_2_1 = """SELECT * FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;""" + def three_partition_3_3 = """SELECT * FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 ORDER BY id;""" + def three_partition_4_2 = """SELECT * FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q1' ORDER BY id;""" + def three_partition_5_all = """SELECT * FROM three_partition_tb ORDER BY id;""" + def three_partition_6_1 = """SELECT * FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_7_7 = """SELECT * FROM three_partition_tb WHERE part2 = 2025 ORDER BY id;""" + def three_partition_8_2 = """SELECT * FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;""" + + + String enabled = context.config.otherConfigs.get("enableMaxComputeTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String ak = context.config.otherConfigs.get("ak") + String sk = context.config.otherConfigs.get("sk"); + String mc_db = "mc_datalake" + String mc_catalog_name = "test_max_compute_partition_prune" + + sql """drop catalog if exists ${mc_catalog_name};""" + sql """ + create catalog if not exists ${mc_catalog_name} properties ( + "type" = "max_compute", + "mc.default.project" = "${mc_db}", + "mc.access_key" = "${ak}", + "mc.secret_key" = "${sk}", + "mc.endpoint" = "http://service.cn-beijing-vpc.maxcompute.aliyun-inc.com/api" + ); + """ + sql """ switch ${mc_catalog_name} """ + sql """ use ${mc_db}""" + + qt_one_partition_1_1 one_partition_1_1 + explain { + sql("${one_partition_1_1}") + contains "partition=1/2" + } + + qt_one_partition_2_1 one_partition_2_1 + explain { + sql("${one_partition_2_1}") + contains "partition=1/2" + } + + qt_one_partition_3_all one_partition_3_all + explain { + sql("${one_partition_3_all}") + contains "partition=2/2" + } + + qt_one_partition_4_all one_partition_4_all + explain { + sql("${one_partition_4_all}") + contains "partition=2/2" + } + + qt_one_partition_5_1 one_partition_5_1 + explain { + sql("${one_partition_5_1}") + contains "partition=1/2" + } + + + qt_two_partition_1_1 two_partition_1_1 + explain { + sql("${two_partition_1_1}") + contains "partition=1/4" + } + + qt_two_partition_2_1 two_partition_2_1 + explain { + sql("${two_partition_2_1}") + contains "partition=1/4" + } + + qt_two_partition_3_2 two_partition_3_2 + explain { + sql("${two_partition_3_2}") + contains "partition=2/4" + } + + qt_two_partition_4_all two_partition_4_all + explain { + sql("${two_partition_4_all}") + contains "partition=4/4" + } + + qt_two_partition_5_1 two_partition_5_1 + explain { + sql("${two_partition_5_1}") + contains "partition=1/4" + } + + qt_two_partition_6_1 two_partition_6_1 + explain { + sql("${two_partition_6_1}") + contains "partition=1/4" + } + + + + qt_three_partition_1_1 three_partition_1_1 + explain { + sql("${three_partition_1_1}") + contains "partition=1/10" + } + + qt_three_partition_2_1 three_partition_2_1 + explain { + sql("${three_partition_2_1}") + contains "partition=1/10" + } + + qt_three_partition_3_3 three_partition_3_3 + explain { + sql("${three_partition_3_3}") + contains "partition=3/10" + } + + qt_three_partition_4_2 three_partition_4_2 + explain { + sql("${three_partition_4_2}") + contains "partition=2/10" + } + + qt_three_partition_5_all three_partition_5_all + explain { + sql("${three_partition_5_all}") + contains "partition=10/10" + } + + qt_three_partition_6_1 three_partition_6_1 + explain { + sql("${three_partition_6_1}") + contains "partition=1/10" + } + + qt_three_partition_7_7 three_partition_7_7 + explain { + sql("${three_partition_7_7}") + contains "partition=7/10" + } + + qt_three_partition_8_2 three_partition_8_2 + explain { + sql("${three_partition_8_2}") + contains "partition=2/10" + } + + + // 0 partitions + def one_partition_6_0 = """SELECT * FROM one_partition_tb WHERE part1 = 2023 ORDER BY id;""" + qt_one_partition_6_0 one_partition_6_0 + explain { + sql("${one_partition_6_0}") + contains "partition=0/2" + } + + def two_partition_7_0 = """SELECT * FROM two_partition_tb WHERE part1 = 'CN' AND part2 = 1 ORDER BY id;""" + qt_two_partition_7_0 two_partition_7_0 + explain { + sql("${two_partition_7_0}") + contains "partition=0/4" + } + + def two_partition_8_0 = """SELECT * FROM two_partition_tb WHERE part1 = 'US' AND part2 = 3 ORDER BY id;""" + qt_two_partition_8_0 two_partition_8_0 + explain { + sql("${two_partition_8_0}") + contains "partition=0/4" + } + + def three_partition_9_0 = """SELECT * FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;""" + qt_three_partition_9_0 three_partition_9_0 + explain { + sql("${three_partition_9_0}") + contains "partition=0/10" + } + + def three_partition_10_0 = """SELECT * FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_10_0 three_partition_10_0 + explain { + sql("${three_partition_10_0}") + contains "partition=0/10" + } + + def three_partition_11_0 = """SELECT * FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_11_0 three_partition_11_0 + explain { + sql("${three_partition_11_0}") + contains "partition=0/10" + } + + } +} \ No newline at end of file