Skip to content

Commit

Permalink
[feature](mtmv)Support iceberg partition refresh. (#44726)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Previously, when using Iceberg to create MTMV, it was not possible to
perceive changes in partition lists and data, so only ```refresh
materialized view mv1 complete ```could be used to force full refresh.

This PR obtains the partition list of Iceberg, the last update time of
the partition, and the latest snapshotId of the table.

Therefore, MTMV can be partition based on Iceberg tables and perceive
changes in data, automatically refreshing partitions

For now, we only support single partition column table and the partition
transform must one of hour, day, month or year.
Will support Identity transform soon.

Issue Number: close #xxx

Related PR: #xxx

Problem Summary:

### Release note

None
  • Loading branch information
Jibing-Li authored Dec 16, 2024
1 parent 9231b7d commit f49e609
Show file tree
Hide file tree
Showing 12 changed files with 1,199 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,14 @@ public boolean isDefaultPartition() {

@Override
public PartitionKeyDesc toPartitionKeyDesc() {
return PartitionKeyDesc.createFixed(
if (partitionKeyRange.hasLowerBound()) {
return PartitionKeyDesc.createFixed(
PartitionInfo.toPartitionValue(partitionKeyRange.lowerEndpoint()),
PartitionInfo.toPartitionValue(partitionKeyRange.upperEndpoint()));
} else {
// For null partition value.
return PartitionKeyDesc.createLessThan(PartitionInfo.toPartitionValue(partitionKeyRange.upperEndpoint()));
}
}

@Override
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.iceberg;

import java.util.List;

public class IcebergPartition {
private final String partitionName;
private final List<String> partitionValues;
private final int specId;
private final long recordCount;
private final long fileSizeInBytes;
private final long fileCount;
private final long lastUpdateTime;
private final long lastSnapshotId;
private final List<String> transforms;

public IcebergPartition(String partitionName, int specId, long recordCount, long fileSizeInBytes, long fileCount,
long lastUpdateTime, long lastSnapshotId, List<String> partitionValues,
List<String> transforms) {
this.partitionName = partitionName;
this.specId = specId;
this.recordCount = recordCount;
this.fileSizeInBytes = fileSizeInBytes;
this.fileCount = fileCount;
this.lastUpdateTime = lastUpdateTime;
this.lastSnapshotId = lastSnapshotId;
this.partitionValues = partitionValues;
this.transforms = transforms;
}

public String getPartitionName() {
return partitionName;
}

public int getSpecId() {
return specId;
}

public long getRecordCount() {
return recordCount;
}

public long getFileSizeInBytes() {
return fileSizeInBytes;
}

public long getFileCount() {
return fileCount;
}

public long getLastUpdateTime() {
return lastUpdateTime;
}

public long getLastSnapshotId() {
return lastSnapshotId;
}

public List<String> getPartitionValues() {
return partitionValues;
}

public List<String> getTransforms() {
return transforms;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.iceberg;

import org.apache.doris.catalog.PartitionItem;

import com.google.common.collect.Maps;

import java.util.Map;
import java.util.Set;

public class IcebergPartitionInfo {
private final Map<String, PartitionItem> nameToPartitionItem;
private final Map<String, IcebergPartition> nameToIcebergPartition;
private final Map<String, Set<String>> nameToIcebergPartitionNames;

public IcebergPartitionInfo() {
this.nameToPartitionItem = Maps.newHashMap();
this.nameToIcebergPartition = Maps.newHashMap();
this.nameToIcebergPartitionNames = Maps.newHashMap();
}

public IcebergPartitionInfo(Map<String, PartitionItem> nameToPartitionItem,
Map<String, IcebergPartition> nameToIcebergPartition,
Map<String, Set<String>> nameToIcebergPartitionNames) {
this.nameToPartitionItem = nameToPartitionItem;
this.nameToIcebergPartition = nameToIcebergPartition;
this.nameToIcebergPartitionNames = nameToIcebergPartitionNames;
}

public Map<String, PartitionItem> getNameToPartitionItem() {
return nameToPartitionItem;
}

public Map<String, IcebergPartition> getNameToIcebergPartition() {
return nameToIcebergPartition;
}

public long getLatestSnapshotId(String partitionName) {
Set<String> icebergPartitionNames = nameToIcebergPartitionNames.get(partitionName);
if (icebergPartitionNames == null) {
return nameToIcebergPartition.get(partitionName).getLastSnapshotId();
}
long latestSnapshotId = 0;
long latestUpdateTime = -1;
for (String name : icebergPartitionNames) {
IcebergPartition partition = nameToIcebergPartition.get(name);
long lastUpdateTime = partition.getLastUpdateTime();
if (latestUpdateTime < lastUpdateTime) {
latestUpdateTime = lastUpdateTime;
latestSnapshotId = partition.getLastSnapshotId();
}
}
return latestSnapshotId;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.iceberg;

import org.apache.doris.catalog.Column;
import org.apache.doris.datasource.SchemaCacheValue;

import java.util.List;

public class IcebergSchemaCacheValue extends SchemaCacheValue {

private final List<Column> partitionColumns;
private final IcebergPartitionInfo partitionInfo;
private final long snapshotId;

public IcebergSchemaCacheValue(List<Column> schema, List<Column> partitionColumns,
long snapshotId, IcebergPartitionInfo partitionInfo) {
super(schema);
this.partitionColumns = partitionColumns;
this.snapshotId = snapshotId;
this.partitionInfo = partitionInfo;
}

public List<Column> getPartitionColumns() {
return partitionColumns;
}

public IcebergPartitionInfo getPartitionInfo() {
return partitionInfo;
}

public long getSnapshotId() {
return snapshotId;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.apache.doris.mtmv.MTMVRefreshContext;
import org.apache.doris.mtmv.MTMVRefreshEnum.RefreshMethod;
import org.apache.doris.mtmv.MTMVRefreshPartitionSnapshot;
import org.apache.doris.mtmv.MTMVRelatedTableIf;
import org.apache.doris.mtmv.MTMVRelation;
import org.apache.doris.mtmv.MTMVUtil;
import org.apache.doris.nereids.StatementContext;
Expand Down Expand Up @@ -182,6 +183,12 @@ public void run() throws JobException {
this.relation = MTMVPlanUtil.generateMTMVRelation(mtmv, ctx);
beforeMTMVRefresh();
if (mtmv.getMvPartitionInfo().getPartitionType() != MTMVPartitionType.SELF_MANAGE) {
MTMVRelatedTableIf relatedTable = mtmv.getMvPartitionInfo().getRelatedTable();
if (!relatedTable.isValidRelatedTable()) {
throw new JobException("MTMV " + mtmv.getName() + "'s related table " + relatedTable.getName()
+ " is not a valid related table anymore, stop refreshing."
+ " e.g. Table has multiple partition columns or including not supported transform functions.");
}
MTMVPartitionUtil.alignMvPartition(mtmv);
}
MTMVRefreshContext context = MTMVRefreshContext.buildContext(mtmv);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ public static boolean isSyncWithPartitions(MTMVRefreshContext context, String mt
if (!relatedTable.needAutoRefresh()) {
return true;
}
// check if partitions of related table if changed
// check if partitions of related table is changed
Set<String> snapshotPartitions = mtmv.getRefreshSnapshot().getSnapshotPartitions(mtmvPartitionName);
if (!Objects.equals(relatedPartitionNames, snapshotPartitions)) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,13 @@ default boolean needAutoRefresh() {
* @return
*/
boolean isPartitionColumnAllowNull();

/**
* If the table is supported as related table.
* For example, an Iceberg table may become unsupported after partition revolution.
* @return
*/
default boolean isValidRelatedTable() {
return true;
}
}
Loading

0 comments on commit f49e609

Please sign in to comment.