io.trino.plugin.deltalake.DeltaLakeSplitManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of trino-delta-lake Show documentation
Show all versions of trino-delta-lake Show documentation
Trino - Delta Lake connector
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.deltalake;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Inject;
import io.airlift.units.DataSize;
import io.trino.filesystem.Location;
import io.trino.filesystem.TrinoFileSystemFactory;
import io.trino.filesystem.cache.CachingHostAddressProvider;
import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorSplitSource;
import io.trino.plugin.deltalake.functions.tablechanges.TableChangesSplitSource;
import io.trino.plugin.deltalake.functions.tablechanges.TableChangesTableFunctionHandle;
import io.trino.plugin.deltalake.transactionlog.AddFileEntry;
import io.trino.plugin.deltalake.transactionlog.MetadataEntry;
import io.trino.plugin.deltalake.transactionlog.TableSnapshot;
import io.trino.plugin.deltalake.transactionlog.TransactionLogAccess;
import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics;
import io.trino.spi.SplitWeight;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorSplitManager;
import io.trino.spi.connector.ConnectorSplitSource;
import io.trino.spi.connector.ConnectorTableHandle;
import io.trino.spi.connector.ConnectorTransactionHandle;
import io.trino.spi.connector.Constraint;
import io.trino.spi.connector.DynamicFilter;
import io.trino.spi.connector.FixedSplitSource;
import io.trino.spi.function.table.ConnectorTableFunctionHandle;
import io.trino.spi.predicate.Domain;
import io.trino.spi.predicate.NullableValue;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.TypeManager;
import java.net.URI;
import java.time.Instant;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.stream.Stream;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.plugin.deltalake.DeltaLakeAnalyzeProperties.AnalyzeMode.FULL_REFRESH;
import static io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle;
import static io.trino.plugin.deltalake.DeltaLakeMetadata.createStatisticsPredicate;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getDynamicFilteringWaitTimeout;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getMaxSplitSize;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema;
import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.deserializePartitionValue;
import static io.trino.spi.connector.FixedSplitSource.emptySplitSource;
import static java.lang.Math.clamp;
import static java.util.Objects.requireNonNull;
import static java.util.function.Function.identity;
public class DeltaLakeSplitManager
implements ConnectorSplitManager
{
private final TypeManager typeManager;
private final TransactionLogAccess transactionLogAccess;
private final ExecutorService executor;
private final int maxSplitsPerSecond;
private final int maxOutstandingSplits;
private final double minimumAssignedSplitWeight;
private final TrinoFileSystemFactory fileSystemFactory;
private final DeltaLakeTransactionManager deltaLakeTransactionManager;
private final CachingHostAddressProvider cachingHostAddressProvider;
@Inject
public DeltaLakeSplitManager(
TypeManager typeManager,
TransactionLogAccess transactionLogAccess,
ExecutorService executor,
DeltaLakeConfig config,
TrinoFileSystemFactory fileSystemFactory,
DeltaLakeTransactionManager deltaLakeTransactionManager,
CachingHostAddressProvider cachingHostAddressProvider)
{
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.transactionLogAccess = requireNonNull(transactionLogAccess, "transactionLogAccess is null");
this.executor = requireNonNull(executor, "executor is null");
this.maxSplitsPerSecond = config.getMaxSplitsPerSecond();
this.maxOutstandingSplits = config.getMaxOutstandingSplits();
this.minimumAssignedSplitWeight = config.getMinimumAssignedSplitWeight();
this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null");
this.deltaLakeTransactionManager = requireNonNull(deltaLakeTransactionManager, "deltaLakeTransactionManager is null");
this.cachingHostAddressProvider = requireNonNull(cachingHostAddressProvider, "cacheHostAddressProvider is null");
}
@Override
public ConnectorSplitSource getSplits(
ConnectorTransactionHandle transaction,
ConnectorSession session,
ConnectorTableHandle handle,
DynamicFilter dynamicFilter,
Constraint constraint)
{
DeltaLakeTableHandle deltaLakeTableHandle = (DeltaLakeTableHandle) handle;
if (deltaLakeTableHandle.getEnforcedPartitionConstraint().isNone() || deltaLakeTableHandle.getNonPartitionConstraint().isNone()) {
if (deltaLakeTableHandle.isRecordScannedFiles()) {
return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
}
return emptySplitSource();
}
DeltaLakeSplitSource splitSource = new DeltaLakeSplitSource(
deltaLakeTableHandle.getSchemaTableName(),
getSplits(transaction, deltaLakeTableHandle, session, deltaLakeTableHandle.getMaxScannedFileSize(), dynamicFilter.getColumnsCovered(), constraint),
executor,
maxSplitsPerSecond,
maxOutstandingSplits,
dynamicFilter,
getDynamicFilteringWaitTimeout(session),
deltaLakeTableHandle.isRecordScannedFiles());
return new ClassLoaderSafeConnectorSplitSource(splitSource, DeltaLakeSplitManager.class.getClassLoader());
}
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableFunctionHandle function)
{
if (function instanceof TableChangesTableFunctionHandle tableFunctionHandle) {
return new TableChangesSplitSource(session, fileSystemFactory, tableFunctionHandle);
}
throw new UnsupportedOperationException("Unrecognized function: " + function);
}
private Stream getSplits(
ConnectorTransactionHandle transaction,
DeltaLakeTableHandle tableHandle,
ConnectorSession session,
Optional maxScannedFileSize,
Set columnsCoveredByDynamicFilter,
Constraint constraint)
{
TableSnapshot tableSnapshot = deltaLakeTransactionManager.get(transaction, session.getIdentity())
.getSnapshot(session, tableHandle.getSchemaTableName(), tableHandle.getLocation(), tableHandle.getReadVersion());
Stream validDataFiles = transactionLogAccess.getActiveFiles(
session,
tableSnapshot,
tableHandle.getMetadataEntry(),
tableHandle.getProtocolEntry(),
tableHandle.getEnforcedPartitionConstraint(),
tableHandle.getProjectedColumns().orElse(ImmutableSet.of()));
TupleDomain enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint();
TupleDomain nonPartitionConstraint = tableHandle.getNonPartitionConstraint();
Domain pathDomain = getPathDomain(nonPartitionConstraint);
boolean splittable =
// Delta Lake handles updates and deletes by copying entire data files, minus updates/deletes. Because of this we can only have one Split/UpdatablePageSource
// per file. TODO (https://github.com/trinodb/trino/issues/17063) use deletion vectors instead of copy-on-write and remove DeltaLakeTableHandle.writeType
tableHandle.getWriteType().isEmpty() &&
// When only partitioning columns projected, there is no point splitting the files
mayAnyDataColumnProjected(tableHandle);
Optional filesModifiedAfter = tableHandle.getAnalyzeHandle().flatMap(AnalyzeHandle::getFilesModifiedAfter);
Optional maxScannedFileSizeInBytes = maxScannedFileSize.map(DataSize::toBytes);
MetadataEntry metadataEntry = tableHandle.getMetadataEntry();
boolean isOptimize = tableHandle.isOptimize();
if (isOptimize) {
checkArgument(maxScannedFileSizeInBytes.isPresent(), "maxScannedFileSizeInBytes must be provided when performing OPTIMIZE");
validDataFiles = filterValidDataFilesForOptimize(validDataFiles, maxScannedFileSizeInBytes.get());
}
Set predicatedColumnNames = Stream.concat(
nonPartitionConstraint.getDomains().orElseThrow().keySet().stream(),
columnsCoveredByDynamicFilter.stream()
.map(DeltaLakeColumnHandle.class::cast))
.map(DeltaLakeColumnHandle::getBaseColumnName)
.collect(toImmutableSet());
List schema = extractSchema(metadataEntry, tableHandle.getProtocolEntry(), typeManager);
List predicatedColumns = schema.stream()
.filter(column -> predicatedColumnNames.contains(column.getName()))
.collect(toImmutableList());
return validDataFiles
.flatMap(addAction -> {
if (tableHandle.getAnalyzeHandle().isPresent() &&
!(tableHandle.getAnalyzeHandle().get().getAnalyzeMode() == FULL_REFRESH) && !addAction.isDataChange()) {
// skip files which do not introduce data change on non FULL REFRESH
return Stream.empty();
}
String splitPath = buildSplitPath(Location.of(tableHandle.getLocation()), addAction).toString();
if (!pathMatchesPredicate(pathDomain, splitPath)) {
return Stream.empty();
}
if (filesModifiedAfter.isPresent() && addAction.getModificationTime() <= filesModifiedAfter.get().toEpochMilli()) {
return Stream.empty();
}
if (addAction.getDeletionVector().isEmpty() && maxScannedFileSizeInBytes.isPresent() && addAction.getSize() > maxScannedFileSizeInBytes.get()) {
return Stream.empty();
}
Map enforcedDomains = enforcedPartitionConstraint.getDomains().orElseThrow();
if (!partitionMatchesPredicate(addAction.getCanonicalPartitionValues(), enforcedDomains)) {
return Stream.empty();
}
TupleDomain statisticsPredicate = createStatisticsPredicate(
addAction,
predicatedColumns,
metadataEntry.getLowercasePartitionColumns());
if (!nonPartitionConstraint.overlaps(statisticsPredicate)) {
return Stream.empty();
}
if (constraint.predicate().isPresent()) {
Map> partitionValues = addAction.getCanonicalPartitionValues();
Map deserializedValues = constraint.getPredicateColumns().orElseThrow().stream()
.map(DeltaLakeColumnHandle.class::cast)
.filter(column -> column.isBaseColumn() && partitionValues.containsKey(column.getBaseColumnName()))
.collect(toImmutableMap(identity(), column -> new NullableValue(
column.getBaseType(),
deserializePartitionValue(column, partitionValues.get(column.getBaseColumnName())))));
if (!constraint.predicate().get().test(deserializedValues)) {
return Stream.empty();
}
}
return splitsForFile(
session,
addAction,
splitPath,
addAction.getCanonicalPartitionValues(),
statisticsPredicate,
splittable)
.stream();
});
}
private static Stream filterValidDataFilesForOptimize(Stream validDataFiles, long maxScannedFileSizeInBytes)
{
// Value being present is a pending file (potentially the only one) for a given partition.
// Value being empty is a tombstone, indicates that there were in the stream previously at least 2 files selected for processing for a given partition.
Map