Skip to content

Commit

Permalink
[enhance](mtmv)External partition prune #44415 #44567 #44673 (#44767)
Browse files Browse the repository at this point in the history
  • Loading branch information
zddr authored Nov 29, 2024
1 parent c3707db commit e03517e
Show file tree
Hide file tree
Showing 27 changed files with 383 additions and 105 deletions.
23 changes: 21 additions & 2 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.datasource.mvcc.MvccSnapshot;
import org.apache.doris.mtmv.MTMVRefreshContext;
import org.apache.doris.mtmv.MTMVRelatedTableIf;
import org.apache.doris.mtmv.MTMVSnapshotIf;
Expand Down Expand Up @@ -983,6 +984,10 @@ public PartitionInfo getPartitionInfo() {
}

@Override
public Set<String> getPartitionColumnNames(Optional<MvccSnapshot> snapshot) throws DdlException {
return getPartitionColumnNames();
}

public Set<String> getPartitionColumnNames() throws DdlException {
Set<String> partitionColumnNames = Sets.newHashSet();
if (partitionInfo instanceof SinglePartitionInfo) {
Expand Down Expand Up @@ -3001,11 +3006,20 @@ public long getVisibleVersionTime() {
}

@Override
public PartitionType getPartitionType(Optional<MvccSnapshot> snapshot) {
return getPartitionType();
}

public PartitionType getPartitionType() {
return partitionInfo.getType();
}

@Override
public Map<String, PartitionItem> getAndCopyPartitionItems(Optional<MvccSnapshot> snapshot)
throws AnalysisException {
return getAndCopyPartitionItems();
}

public Map<String, PartitionItem> getAndCopyPartitionItems() throws AnalysisException {
if (!tryReadLock(1, TimeUnit.MINUTES)) {
throw new AnalysisException(
Expand All @@ -3026,12 +3040,17 @@ public Map<String, PartitionItem> getAndCopyPartitionItems() throws AnalysisExce
}

@Override
public List<Column> getPartitionColumns(Optional<MvccSnapshot> snapshot) {
return getPartitionColumns();
}

public List<Column> getPartitionColumns() {
return getPartitionInfo().getPartitionColumns();
}

@Override
public MTMVSnapshotIf getPartitionSnapshot(String partitionName, MTMVRefreshContext context)
public MTMVSnapshotIf getPartitionSnapshot(String partitionName, MTMVRefreshContext context,
Optional<MvccSnapshot> snapshot)
throws AnalysisException {
Map<String, Long> partitionVersions = context.getBaseVersions().getPartitionVersions();
long partitionId = getPartitionOrAnalysisException(partitionName).getId();
Expand All @@ -3041,7 +3060,7 @@ public MTMVSnapshotIf getPartitionSnapshot(String partitionName, MTMVRefreshCont
}

@Override
public MTMVSnapshotIf getTableSnapshot(MTMVRefreshContext context) {
public MTMVSnapshotIf getTableSnapshot(MTMVRefreshContext context, Optional<MvccSnapshot> snapshot) {
Map<Long, Long> tableVersions = context.getBaseVersions().getTableVersions();
long visibleVersion = tableVersions.containsKey(id) ? tableVersions.get(id) : getVisibleVersion();
return new MTMVVersionSnapshot(visibleVersion, id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.catalog.TableAttributes;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.constraint.Constraint;
Expand All @@ -28,6 +29,8 @@
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.mvcc.MvccSnapshot;
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.persist.gson.GsonPostProcessable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.AnalysisInfo;
Expand All @@ -40,13 +43,15 @@
import com.google.common.collect.Lists;
import com.google.gson.annotations.SerializedName;
import lombok.Getter;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -371,4 +376,52 @@ protected Optional<SchemaCacheValue> getSchemaCacheValue() {
ExternalSchemaCache cache = Env.getCurrentEnv().getExtMetaCacheMgr().getSchemaCache(catalog);
return cache.getSchemaValue(dbName, name);
}

/**
* Retrieve all partitions and initialize SelectedPartitions
*
* @param snapshot if not support mvcc, ignore this
* @return
*/
public SelectedPartitions initSelectedPartitions(Optional<MvccSnapshot> snapshot) {
if (!supportPartitionPruned()) {
return SelectedPartitions.NOT_PRUNED;
}
if (CollectionUtils.isEmpty(this.getPartitionColumns(snapshot))) {
return SelectedPartitions.NOT_PRUNED;
}
Map<String, PartitionItem> nameToPartitionItems = getNameToPartitionItems(snapshot);
return new SelectedPartitions(nameToPartitionItems.size(), nameToPartitionItems, false);
}

/**
* get partition map
* If partition related operations are supported, this method needs to be implemented in the subclass
*
* @param snapshot if not support mvcc, ignore this
* @return partitionName ==> PartitionItem
*/
public Map<String, PartitionItem> getNameToPartitionItems(Optional<MvccSnapshot> snapshot) {
return Collections.emptyMap();
}

/**
* get partition column list
* If partition related operations are supported, this method needs to be implemented in the subclass
*
* @param snapshot if not support mvcc, ignore this
* @return
*/
public List<Column> getPartitionColumns(Optional<MvccSnapshot> snapshot) {
return Collections.emptyList();
}

/**
* Does it support partition cpruned, If so, this method needs to be overridden in subclasses
*
* @return
*/
public boolean supportPartitionPruned() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@
import org.apache.doris.datasource.SchemaCacheValue;
import org.apache.doris.datasource.hudi.HudiUtils;
import org.apache.doris.datasource.iceberg.IcebergUtils;
import org.apache.doris.datasource.mvcc.MvccSnapshot;
import org.apache.doris.mtmv.MTMVBaseTableIf;
import org.apache.doris.mtmv.MTMVMaxTimestampSnapshot;
import org.apache.doris.mtmv.MTMVRefreshContext;
import org.apache.doris.mtmv.MTMVRelatedTableIf;
import org.apache.doris.mtmv.MTMVSnapshotIf;
import org.apache.doris.mtmv.MTMVTimestampSnapshot;
import org.apache.doris.nereids.exceptions.NotSupportedException;
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.qe.GlobalVariable;
import org.apache.doris.statistics.AnalysisInfo;
import org.apache.doris.statistics.BaseAnalysisTask;
Expand Down Expand Up @@ -288,27 +288,45 @@ public List<Type> getPartitionColumnTypes() {
.orElse(Collections.emptyList());
}

@Override
public List<Column> getPartitionColumns() {
makeSureInitialized();
Optional<SchemaCacheValue> schemaCacheValue = getSchemaCacheValue();
return schemaCacheValue.map(value -> ((HMSSchemaCacheValue) value).getPartitionColumns())
.orElse(Collections.emptyList());
}

public SelectedPartitions getAllPartitions() {
@Override
public List<Column> getPartitionColumns(Optional<MvccSnapshot> snapshot) {
return getPartitionColumns();
}

@Override
public boolean supportPartitionPruned() {
return getDlaType() == DLAType.HIVE;
}

@Override
public Map<String, PartitionItem> getNameToPartitionItems(Optional<MvccSnapshot> snapshot) {
return getNameToPartitionItems();
}

public Map<String, PartitionItem> getNameToPartitionItems() {
if (CollectionUtils.isEmpty(this.getPartitionColumns())) {
return SelectedPartitions.NOT_PRUNED;
return Collections.emptyMap();
}

HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
.getMetaStoreCache((HMSExternalCatalog) this.getCatalog());
List<Type> partitionColumnTypes = this.getPartitionColumnTypes();
HiveMetaStoreCache.HivePartitionValues hivePartitionValues = cache.getPartitionValues(
this.getDbName(), this.getName(), partitionColumnTypes);
Map<Long, PartitionItem> idToPartitionItem = hivePartitionValues.getIdToPartitionItem();

return new SelectedPartitions(idToPartitionItem.size(), idToPartitionItem, false);
// transfer id to name
BiMap<Long, String> idToName = hivePartitionValues.getPartitionNameToIdMap().inverse();
Map<String, PartitionItem> nameToPartitionItem = Maps.newHashMapWithExpectedSize(idToPartitionItem.size());
for (Entry<Long, PartitionItem> entry : idToPartitionItem.entrySet()) {
nameToPartitionItem.put(idToName.get(entry.getKey()), entry.getValue());
}
return nameToPartitionItem;
}

public boolean isHiveTransactionalTable() {
Expand Down Expand Up @@ -739,34 +757,33 @@ public Set<String> getDistributionColumnNames() {
}

@Override
public PartitionType getPartitionType(Optional<MvccSnapshot> snapshot) {
return getPartitionType();
}

public PartitionType getPartitionType() {
return getPartitionColumns().size() > 0 ? PartitionType.LIST : PartitionType.UNPARTITIONED;
}

@Override
public Set<String> getPartitionColumnNames(Optional<MvccSnapshot> snapshot) {
return getPartitionColumnNames();
}

public Set<String> getPartitionColumnNames() {
return getPartitionColumns().stream()
.map(c -> c.getName().toLowerCase()).collect(Collectors.toSet());
}

@Override
public Map<String, PartitionItem> getAndCopyPartitionItems() {
HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
.getMetaStoreCache((HMSExternalCatalog) getCatalog());
HiveMetaStoreCache.HivePartitionValues hivePartitionValues = cache.getPartitionValues(
getDbName(), getName(), getPartitionColumnTypes());
Map<String, PartitionItem> res = Maps.newHashMap();
Map<Long, PartitionItem> idToPartitionItem = hivePartitionValues.getIdToPartitionItem();
BiMap<Long, String> idToName = hivePartitionValues.getPartitionNameToIdMap().inverse();
for (Entry<Long, PartitionItem> entry : idToPartitionItem.entrySet()) {
res.put(idToName.get(entry.getKey()), entry.getValue());
}
return res;

public Map<String, PartitionItem> getAndCopyPartitionItems(Optional<MvccSnapshot> snapshot) {
return getNameToPartitionItems();
}

@Override
public MTMVSnapshotIf getPartitionSnapshot(String partitionName, MTMVRefreshContext context)
throws AnalysisException {
public MTMVSnapshotIf getPartitionSnapshot(String partitionName, MTMVRefreshContext context,
Optional<MvccSnapshot> snapshot) throws AnalysisException {
HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
.getMetaStoreCache((HMSExternalCatalog) getCatalog());
HiveMetaStoreCache.HivePartitionValues hivePartitionValues = cache.getPartitionValues(
Expand All @@ -778,7 +795,8 @@ public MTMVSnapshotIf getPartitionSnapshot(String partitionName, MTMVRefreshCont
}

@Override
public MTMVSnapshotIf getTableSnapshot(MTMVRefreshContext context) throws AnalysisException {
public MTMVSnapshotIf getTableSnapshot(MTMVRefreshContext context, Optional<MvccSnapshot> snapshot)
throws AnalysisException {
if (getPartitionType() == PartitionType.UNPARTITIONED) {
return new MTMVMaxTimestampSnapshot(getName(), getLastDdlTime());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.mvcc;

/**
* The snapshot information of mvcc is defined by each table,
* but it should be ensured that the table information queried through this snapshot remains unchanged
*/
public interface MvccSnapshot {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.mvcc;

import org.apache.doris.catalog.TableIf;

/**
* The table that needs to query data based on the version needs to implement this interface.
*/
public interface MvccTable extends TableIf {
/**
* Retrieve the current snapshot information of the table,
* and the returned result will be used for the entire process of this query
*
* @return MvccSnapshot
*/
MvccSnapshot loadSnapshot();
}
Loading

0 comments on commit e03517e

Please sign in to comment.