Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make partitions loading for iceberg table lazy to avoid unnecessary loading #23645

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ public class BaseHiveTableLayoutHandle
private final TupleDomain<ColumnHandle> partitionColumnPredicate;

// coordinator-only properties
private final Optional<List<HivePartition>> partitions;
private final Optional<LazyLoadedPartitions> partitions;

public BaseHiveTableLayoutHandle(
List<BaseHiveColumnHandle> partitionColumns,
TupleDomain<Subfield> domainPredicate,
RowExpression remainingPredicate,
boolean pushdownFilterEnabled,
TupleDomain<ColumnHandle> partitionColumnPredicate,
Optional<List<HivePartition>> partitions)
Optional<LazyLoadedPartitions> partitions)
{
this.partitionColumns = ImmutableList.copyOf(requireNonNull(partitionColumns, "partitionColumns is null"));
this.domainPredicate = requireNonNull(domainPredicate, "domainPredicate is null");
Expand Down Expand Up @@ -91,7 +91,7 @@ public TupleDomain<ColumnHandle> getPartitionColumnPredicate()
* @return list of partitions if available, {@code Optional.empty()} if dropped
*/
@JsonIgnore
public Optional<List<HivePartition>> getPartitions()
public Optional<LazyLoadedPartitions> getPartitions()
{
return partitions;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;

import com.facebook.presto.spi.LazyIterable;

import java.util.List;

import static java.util.Objects.requireNonNull;

public class LazyLoadedPartitions
{
private boolean fullyLoaded;
private PartitionLoader partitionLoader;
private List<HivePartition> partitions;

public LazyLoadedPartitions(List<HivePartition> partitions)
{
this.partitions = requireNonNull(partitions, "partitions is null");
this.fullyLoaded = true;
}

public LazyLoadedPartitions(PartitionLoader partitionLoader)
{
this.partitionLoader = requireNonNull(partitionLoader, "partitionLoader is null");
}

public void setMaxPartitionThreshold(int maxPartitionThreshold)
{
if (this.partitionLoader != null) {
this.partitionLoader.setMaxPartitionThreshold(maxPartitionThreshold);
}
}

public List<HivePartition> getFullyLoadedPartitions()
{
tryFullyLoad();
return this.partitions;
}

/**
* This method may return an iterable with lazy loading
* */
public LazyIterable<HivePartition> getPartitionsIterable()
{
return new LazyPartitionsIterable(this);
}

public boolean isEmpty()
{
if (this.fullyLoaded) {
return this.partitions.isEmpty();
}
else {
return this.partitionLoader.isEmpty();
}
}

private void tryFullyLoad()
{
if (!this.fullyLoaded) {
synchronized (this) {
if (!this.fullyLoaded) {
this.partitions = this.partitionLoader.loadPartitions();
this.fullyLoaded = true;
this.partitionLoader = null;
}
}
}
}

public interface PartitionLoader
{
List<HivePartition> loadPartitions();

boolean isEmpty();

void setMaxPartitionThreshold(int maxPartitionThreshold);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;

import com.facebook.presto.spi.LazyIterable;
import com.google.common.collect.AbstractIterator;

import java.util.Iterator;
import java.util.List;

import static java.util.Objects.requireNonNull;

class LazyPartitionsIterable
implements LazyIterable<HivePartition>
{
private final LazyLoadedPartitions partitions;

public LazyPartitionsIterable(LazyLoadedPartitions partitions)
{
this.partitions = requireNonNull(partitions, "partitions is null");
}

@Override
public Iterator<HivePartition> iterator()
{
return new LazyIterator(partitions);
}

@Override
public void setMaxIterableCount(int maxIterableCount)
{
partitions.setMaxPartitionThreshold(maxIterableCount);
}

private static class LazyIterator
extends AbstractIterator<HivePartition>
{
private final LazyLoadedPartitions lazyPartitions;
private List<HivePartition> partitions;
private int position = -1;

private LazyIterator(LazyLoadedPartitions lazyPartitions)
{
this.lazyPartitions = lazyPartitions;
}

@Override
protected HivePartition computeNext()
{
if (partitions == null) {
partitions = lazyPartitions.getFullyLoadedPartitions();
}

position++;
if (position >= partitions.size()) {
return endOfData();
}
return partitions.get(position);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.DiscretePredicates;
import com.facebook.presto.spi.LazyIterable;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.function.StandardFunctionResolution;
import com.facebook.presto.spi.relation.RowExpression;
import com.facebook.presto.spi.relation.RowExpressionService;
import com.facebook.presto.spi.relation.SpecialFormExpression;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;

import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand All @@ -51,14 +53,35 @@ public final class MetadataUtils
{
private MetadataUtils() {}

public static Optional<DiscretePredicates> getDiscretePredicates(List<ColumnHandle> partitionColumns, List<HivePartition> partitions)
public static Optional<DiscretePredicates> getDiscretePredicates(List<ColumnHandle> partitionColumns, LazyLoadedPartitions partitions)
{
Optional<DiscretePredicates> discretePredicates = Optional.empty();
if (!partitionColumns.isEmpty() && !(partitions.size() == 1 && partitions.get(0).getPartitionId().equals(UNPARTITIONED_ID))) {
if (!partitionColumns.isEmpty()) {
// Do not create tuple domains for every partition at the same time!
// There can be a huge number of partitions so use an iterable so
// all domains do not need to be in memory at the same time.
Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(partitions, (hivePartition) -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
LazyIterable<HivePartition> partitionIterable = partitions.getPartitionsIterable();
Iterable<TupleDomain<ColumnHandle>> partitionDomains = new LazyIterable<TupleDomain<ColumnHandle>>()
{
@Override
public void setMaxIterableCount(int maxIterableCount)
{
partitionIterable.setMaxIterableCount(maxIterableCount);
}

@Override
public Iterator<TupleDomain<ColumnHandle>> iterator()
{
return Iterators.transform(partitionIterable.iterator(), (hivePartition) -> {
if (hivePartition.getPartitionId().equals(UNPARTITIONED_ID)) {
return TupleDomain.all();
}
else {
return TupleDomain.fromFixedValues(hivePartition.getKeys());
}
});
}
};
discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
}
return discretePredicates;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ public Optional<Object> getInfo(ConnectorTableLayoutHandle layoutHandle)
HiveTableLayoutHandle tableLayoutHandle = (HiveTableLayoutHandle) layoutHandle;
if (tableLayoutHandle.getPartitions().isPresent()) {
return Optional.of(new HiveInputInfo(
tableLayoutHandle.getPartitions().get().stream()
tableLayoutHandle.getPartitions().get().getFullyLoadedPartitions().stream()
.map(hivePartition -> hivePartition.getPartitionId().getPartitionName())
.collect(toList()),
false));
Expand Down Expand Up @@ -2553,13 +2553,13 @@ public OptionalLong metadataDelete(ConnectorSession session, ConnectorTableHandl
private List<HivePartition> getOrComputePartitions(HiveTableLayoutHandle layoutHandle, ConnectorSession session, ConnectorTableHandle tableHandle)
{
if (layoutHandle.getPartitions().isPresent()) {
return layoutHandle.getPartitions().get();
return layoutHandle.getPartitions().get().getFullyLoadedPartitions();
}
else {
TupleDomain<ColumnHandle> partitionColumnPredicate = layoutHandle.getPartitionColumnPredicate();
Predicate<Map<ColumnHandle, NullableValue>> predicate = convertToPredicate(partitionColumnPredicate);
List<ConnectorTableLayoutResult> tableLayoutResults = getTableLayouts(session, tableHandle, new Constraint<>(partitionColumnPredicate, predicate), Optional.empty());
return ((HiveTableLayoutHandle) Iterables.getOnlyElement(tableLayoutResults).getTableLayout().getHandle()).getPartitions().get();
return ((HiveTableLayoutHandle) Iterables.getOnlyElement(tableLayoutResults).getTableLayout().getHandle()).getPartitions().get().getFullyLoadedPartitions();
}
}

Expand Down Expand Up @@ -2677,7 +2677,7 @@ public List<ConnectorTableLayoutResult> getTableLayouts(ConnectorSession session
.setRemainingPredicate(TRUE_CONSTANT)
.setPredicateColumns(predicateColumns)
.setPartitionColumnPredicate(hivePartitionResult.getEnforcedConstraint())
.setPartitions(hivePartitionResult.getPartitions())
.setPartitions(new LazyLoadedPartitions(hivePartitionResult.getPartitions()))
.setBucketHandle(hiveBucketHandle)
.setBucketFilter(hivePartitionResult.getBucketFilter())
.setPushdownFilterEnabled(false)
Expand Down Expand Up @@ -2724,7 +2724,7 @@ public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTa
{
HiveTableLayoutHandle hiveLayoutHandle = (HiveTableLayoutHandle) layoutHandle;
List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveLayoutHandle.getPartitionColumns());
List<HivePartition> partitions = hiveLayoutHandle.getPartitions().get();
LazyLoadedPartitions partitions = hiveLayoutHandle.getPartitions().get();

Optional<DiscretePredicates> discretePredicates = getDiscretePredicates(partitionColumns, partitions);

Expand Down Expand Up @@ -2784,7 +2784,7 @@ public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTa
if (hiveLayoutHandle.isPushdownFilterEnabled()) {
Map<String, ColumnHandle> predicateColumns = hiveLayoutHandle.getPredicateColumns().entrySet()
.stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
predicate = getPredicate(hiveLayoutHandle, partitionColumns, partitions, predicateColumns);
predicate = getPredicate(hiveLayoutHandle, partitionColumns, partitions.getFullyLoadedPartitions(), predicateColumns);

// capture subfields from domainPredicate to add to remainingPredicate
// so those filters don't get lost
Expand All @@ -2794,7 +2794,7 @@ public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTa
subfieldPredicate = getSubfieldPredicate(session, hiveLayoutHandle, columnTypes, functionResolution, rowExpressionService);
}
else {
predicate = createPredicate(partitionColumns, partitions);
predicate = createPredicate(partitionColumns, partitions.getFullyLoadedPartitions());
subfieldPredicate = TRUE_CONSTANT;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ public ConnectorSplitSource getSplits(
}

// get partitions
List<HivePartition> partitions = layout.getPartitions()
List<HivePartition> partitions = layout.getPartitions().map(LazyLoadedPartitions::getFullyLoadedPartitions)
.orElseThrow(() -> new PrestoException(GENERIC_INTERNAL_ERROR, "Layout does not contain partitions"));

// short circuit if we don't have any partitions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class HiveTableLayoutHandle
private final boolean footerStatsUnreliable;

// coordinator-only properties
private final Optional<List<HivePartition>> partitions;
private final Optional<LazyLoadedPartitions> partitions;
private final Optional<HiveTableHandle> hiveTableHandle;

/**
Expand Down Expand Up @@ -133,7 +133,7 @@ protected HiveTableLayoutHandle(
Optional<Set<HiveColumnHandle>> requestedColumns,
boolean partialAggregationsPushedDown,
boolean appendRowNumberEnabled,
Optional<List<HivePartition>> partitions,
Optional<LazyLoadedPartitions> partitions,
boolean footerStatsUnreliable,
Optional<HiveTableHandle> hiveTableHandle)
{
Expand Down Expand Up @@ -300,7 +300,7 @@ private TupleDomain<ColumnHandle> getConstraint(PlanCanonicalizationStrategy can
// Constants are only removed from point checks, and not range checks. Example:
// `x = 1` is equivalent to `x = 1000`
// `x > 1` is NOT equivalent to `x > 1000`
TupleDomain<ColumnHandle> constraint = createPredicate(ImmutableList.copyOf(getPartitionColumns()), partitions.get());
TupleDomain<ColumnHandle> constraint = createPredicate(ImmutableList.copyOf(getPartitionColumns()), partitions.get().getFullyLoadedPartitions());
constraint = getDomainPredicate()
.transform(subfield -> subfield.getPath().isEmpty() ? subfield.getRootName() : null)
.transform(getPredicateColumns()::get)
Expand Down Expand Up @@ -393,7 +393,7 @@ public static class Builder
private boolean appendRowNumberEnabled;
private boolean footerStatsUnreliable;

private Optional<List<HivePartition>> partitions;
private Optional<LazyLoadedPartitions> partitions;
private Optional<HiveTableHandle> hiveTableHandle = Optional.empty();

public Builder setSchemaTableName(SchemaTableName schemaTableName)
Expand Down Expand Up @@ -492,13 +492,13 @@ public Builder setAppendRowNumberEnabled(boolean appendRowNumberEnabled)
return this;
}

public Builder setPartitions(List<HivePartition> partitions)
public Builder setPartitions(LazyLoadedPartitions partitions)
{
requireNonNull(partitions, "partitions is null");
return setPartitions(Optional.of(partitions));
}

public Builder setPartitions(Optional<List<HivePartition>> partitions)
public Builder setPartitions(Optional<LazyLoadedPartitions> partitions)
{
requireNonNull(partitions, "partitions is null");
this.partitions = partitions;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import com.facebook.presto.hive.HiveTableHandle;
import com.facebook.presto.hive.HiveTableLayoutHandle;
import com.facebook.presto.hive.HiveTransactionManager;
import com.facebook.presto.hive.LazyLoadedPartitions;
import com.facebook.presto.hive.metastore.Column;
import com.facebook.presto.hive.metastore.MetastoreContext;
import com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore;
Expand Down Expand Up @@ -195,7 +196,7 @@ public ConnectorPushdownFilterResult getConnectorPushdownFilterResult(
.setRemainingPredicate(remainingExpressions.getRemainingExpression())
.setPredicateColumns(predicateColumns)
.setPartitionColumnPredicate(hivePartitionResult.getEnforcedConstraint())
.setPartitions(hivePartitionResult.getPartitions())
.setPartitions(new LazyLoadedPartitions(hivePartitionResult.getPartitions()))
.setBucketHandle(hivePartitionResult.getBucketHandle())
.setBucketFilter(hivePartitionResult.getBucketFilter())
.setPushdownFilterEnabled(true)
Expand Down
Loading
Loading