All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.deltalake.transactionlog.TableSnapshot Maven / Gradle / Ivy

There is a newer version: 458
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.deltalake.transactionlog;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import io.trino.filesystem.Location;
import io.trino.filesystem.TrinoFileSystem;
import io.trino.filesystem.TrinoInputFile;
import io.trino.parquet.ParquetReaderOptions;
import io.trino.plugin.deltalake.DeltaLakeColumnHandle;
import io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator;
import io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager;
import io.trino.plugin.deltalake.transactionlog.checkpoint.LastCheckpoint;
import io.trino.plugin.deltalake.transactionlog.checkpoint.TransactionLogTail;
import io.trino.plugin.hive.FileFormatDataSourceStats;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.TypeManager;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Stream;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.Streams.stream;
import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_FILESYSTEM_ERROR;
import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA;
import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.readLastCheckpoint;
import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir;
import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.ADD;
import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.SIDECAR;
import static io.trino.plugin.deltalake.transactionlog.checkpoint.TransactionLogTail.getEntriesFromJson;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

/**
 * The current state of a Delta table.  It's defined by its latest checkpoint and the subsequent transactions
 * not included in the checkpoint.
 */
public class TableSnapshot
{
    private final Optional lastCheckpoint;
    private final SchemaTableName table;
    private final TransactionLogTail logTail;
    private final String tableLocation;
    private final ParquetReaderOptions parquetReaderOptions;
    private final boolean checkpointRowStatisticsWritingEnabled;
    private final int domainCompactionThreshold;

    private Optional cachedMetadata = Optional.empty();

    private TableSnapshot(
            SchemaTableName table,
            Optional lastCheckpoint,
            TransactionLogTail logTail,
            String tableLocation,
            ParquetReaderOptions parquetReaderOptions,
            boolean checkpointRowStatisticsWritingEnabled,
            int domainCompactionThreshold)
    {
        this.table = requireNonNull(table, "table is null");
        this.lastCheckpoint = requireNonNull(lastCheckpoint, "lastCheckpoint is null");
        this.logTail = requireNonNull(logTail, "logTail is null");
        this.tableLocation = requireNonNull(tableLocation, "tableLocation is null");
        this.parquetReaderOptions = requireNonNull(parquetReaderOptions, "parquetReaderOptions is null");
        this.checkpointRowStatisticsWritingEnabled = checkpointRowStatisticsWritingEnabled;
        this.domainCompactionThreshold = domainCompactionThreshold;
    }

    public static TableSnapshot load(
            SchemaTableName table,
            Optional lastCheckpoint,
            TrinoFileSystem fileSystem,
            String tableLocation,
            ParquetReaderOptions parquetReaderOptions,
            boolean checkpointRowStatisticsWritingEnabled,
            int domainCompactionThreshold)
            throws IOException
    {
        Optional lastCheckpointVersion = lastCheckpoint.map(LastCheckpoint::version);
        TransactionLogTail transactionLogTail = TransactionLogTail.loadNewTail(fileSystem, tableLocation, lastCheckpointVersion, Optional.empty());

        return new TableSnapshot(
                table,
                lastCheckpoint,
                transactionLogTail,
                tableLocation,
                parquetReaderOptions,
                checkpointRowStatisticsWritingEnabled,
                domainCompactionThreshold);
    }

    public Optional getUpdatedSnapshot(TrinoFileSystem fileSystem, Optional toVersion)
            throws IOException
    {
        if (toVersion.isEmpty()) {
            // Load any newer table snapshot

            Optional lastCheckpoint = readLastCheckpoint(fileSystem, tableLocation);
            if (lastCheckpoint.isPresent()) {
                long ourCheckpointVersion = getLastCheckpointVersion().orElse(0L);
                if (ourCheckpointVersion != lastCheckpoint.get().version()) {
                    // There is a new checkpoint in the table, load anew
                    return Optional.of(TableSnapshot.load(
                            table,
                            lastCheckpoint,
                            fileSystem,
                            tableLocation,
                            parquetReaderOptions,
                            checkpointRowStatisticsWritingEnabled,
                            domainCompactionThreshold));
                }
            }
        }

        Optional updatedLogTail = logTail.getUpdatedTail(fileSystem, tableLocation, toVersion);
        return updatedLogTail.map(transactionLogTail -> new TableSnapshot(
                table,
                lastCheckpoint,
                transactionLogTail,
                tableLocation,
                parquetReaderOptions,
                checkpointRowStatisticsWritingEnabled,
                domainCompactionThreshold));
    }

    public long getVersion()
    {
        return logTail.getVersion();
    }

    public SchemaTableName getTable()
    {
        return table;
    }

    public Optional getCachedMetadata()
    {
        return cachedMetadata;
    }

    public String getTableLocation()
    {
        return tableLocation;
    }

    public void setCachedMetadata(Optional cachedMetadata)
    {
        this.cachedMetadata = cachedMetadata;
    }

    public List getJsonTransactionLogEntries()
    {
        return logTail.getFileEntries();
    }

    public List getTransactions()
    {
        return logTail.getTransactions();
    }

    public Stream getCheckpointTransactionLogEntries(
            ConnectorSession session,
            Set entryTypes,
            CheckpointSchemaManager checkpointSchemaManager,
            TypeManager typeManager,
            TrinoFileSystem fileSystem,
            FileFormatDataSourceStats stats,
            Optional metadataAndProtocol,
            TupleDomain partitionConstraint,
            Optional> addStatsMinMaxColumnFilter)
            throws IOException
    {
        if (lastCheckpoint.isEmpty()) {
            return Stream.empty();
        }

        LastCheckpoint checkpoint = lastCheckpoint.get();
        // Add entries contain statistics. When struct statistics are used the format of the Parquet file depends on the schema. It is important to use the schema at the time
        // of the Checkpoint creation, in case the schema has evolved since it was written.
        if (entryTypes.contains(ADD)) {
            checkState(metadataAndProtocol.isPresent(), "metadata and protocol information is needed to process the add log entries");
        }

        return getCheckpointPartPaths(checkpoint).stream()
                .map(fileSystem::newInputFile)
                .flatMap(checkpointFile -> getCheckpointTransactionLogEntries(
                        session,
                        fileSystem,
                        entryTypes,
                        metadataAndProtocol.map(MetadataAndProtocolEntry::metadataEntry),
                        metadataAndProtocol.map(MetadataAndProtocolEntry::protocolEntry),
                        checkpointSchemaManager,
                        typeManager,
                        stats,
                        checkpoint,
                        checkpointFile,
                        partitionConstraint,
                        addStatsMinMaxColumnFilter));
    }

    public Optional getLastCheckpointVersion()
    {
        return lastCheckpoint.map(LastCheckpoint::version);
    }

    private Stream getCheckpointTransactionLogEntries(
            ConnectorSession session,
            TrinoFileSystem fileSystem,
            Set entryTypes,
            Optional metadataEntry,
            Optional protocolEntry,
            CheckpointSchemaManager checkpointSchemaManager,
            TypeManager typeManager,
            FileFormatDataSourceStats stats,
            LastCheckpoint checkpoint,
            TrinoInputFile checkpointFile,
            TupleDomain partitionConstraint,
            Optional> addStatsMinMaxColumnFilter)
    {
        long fileSize;
        try {
            fileSize = checkpointFile.length();
        }
        catch (FileNotFoundException e) {
            throw new TrinoException(DELTA_LAKE_INVALID_SCHEMA, format("%s mentions a non-existent checkpoint file for table: %s", checkpoint, table));
        }
        catch (IOException e) {
            throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, format("Unexpected IO exception occurred while retrieving the length of the file: %s for the table %s", checkpoint, table), e);
        }
        if (checkpoint.v2Checkpoint().isPresent()) {
            return getV2CheckpointTransactionLogEntriesFrom(
                    session,
                    entryTypes,
                    metadataEntry,
                    protocolEntry,
                    checkpointSchemaManager,
                    typeManager,
                    stats,
                    checkpoint,
                    checkpointFile,
                    partitionConstraint,
                    addStatsMinMaxColumnFilter,
                    fileSystem,
                    fileSize);
        }
        CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator(
                checkpointFile,
                session,
                fileSize,
                checkpointSchemaManager,
                typeManager,
                entryTypes,
                metadataEntry,
                protocolEntry,
                stats,
                parquetReaderOptions,
                checkpointRowStatisticsWritingEnabled,
                domainCompactionThreshold,
                partitionConstraint,
                addStatsMinMaxColumnFilter);
        return stream(checkpointEntryIterator).onClose(checkpointEntryIterator::close);
    }

    private Stream getV2CheckpointTransactionLogEntriesFrom(
            ConnectorSession session,
            Set entryTypes,
            Optional metadataEntry,
            Optional protocolEntry,
            CheckpointSchemaManager checkpointSchemaManager,
            TypeManager typeManager,
            FileFormatDataSourceStats stats,
            LastCheckpoint checkpoint,
            TrinoInputFile checkpointFile,
            TupleDomain partitionConstraint,
            Optional> addStatsMinMaxColumnFilter,
            TrinoFileSystem fileSystem,
            long fileSize)
    {
        return getV2CheckpointEntries(session, entryTypes, metadataEntry, protocolEntry, checkpointSchemaManager, typeManager, stats, checkpoint, checkpointFile, partitionConstraint, addStatsMinMaxColumnFilter, fileSystem, fileSize)
                .mapMulti((entry, builder) -> {
                    if (entry.getSidecar() == null) {
                        builder.accept(entry);
                        return;
                    }
                    Location sidecar = checkpointFile.location().sibling("_sidecars").appendPath(entry.getSidecar().path());
                    CheckpointEntryIterator iterator = new CheckpointEntryIterator(
                            fileSystem.newInputFile(sidecar),
                            session,
                            fileSize,
                            checkpointSchemaManager,
                            typeManager,
                            entryTypes,
                            metadataEntry,
                            protocolEntry,
                            stats,
                            parquetReaderOptions,
                            checkpointRowStatisticsWritingEnabled,
                            domainCompactionThreshold,
                            partitionConstraint,
                            addStatsMinMaxColumnFilter);
                    stream(iterator).onClose(iterator::close).forEach(builder);
                });
    }

    private Stream getV2CheckpointEntries(
            ConnectorSession session,
            Set entryTypes,
            Optional metadataEntry,
            Optional protocolEntry,
            CheckpointSchemaManager checkpointSchemaManager,
            TypeManager typeManager,
            FileFormatDataSourceStats stats,
            LastCheckpoint checkpoint,
            TrinoInputFile checkpointFile,
            TupleDomain partitionConstraint,
            Optional> addStatsMinMaxColumnFilter,
            TrinoFileSystem fileSystem,
            long fileSize)
    {
        if (checkpointFile.location().fileName().endsWith(".json")) {
            try {
                return getEntriesFromJson(checkpoint.version(), checkpointFile).stream().flatMap(List::stream);
            }
            catch (IOException e) {
                throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, format("Unexpected IO exception occurred while reading the entries of the file: %s for the table %s", checkpoint, table), e);
            }
        }
        if (checkpointFile.location().fileName().endsWith(".parquet")) {
            CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator(
                    fileSystem.newInputFile(checkpointFile.location()),
                    session,
                    fileSize,
                    checkpointSchemaManager,
                    typeManager,
                    ImmutableSet.builder()
                            .addAll(entryTypes)
                            .add(SIDECAR)
                            .build(),
                    metadataEntry,
                    protocolEntry,
                    stats,
                    parquetReaderOptions,
                    checkpointRowStatisticsWritingEnabled,
                    domainCompactionThreshold,
                    partitionConstraint,
                    addStatsMinMaxColumnFilter);
            return stream(checkpointEntryIterator)
                    .onClose(checkpointEntryIterator::close);
        }
        throw new IllegalArgumentException("Unsupported v2 checkpoint file format: " + checkpointFile.location());
    }

    public record MetadataAndProtocolEntry(MetadataEntry metadataEntry, ProtocolEntry protocolEntry)
    {
        public MetadataAndProtocolEntry
        {
            requireNonNull(metadataEntry, "metadataEntry is null");
            requireNonNull(protocolEntry, "protocolEntry is null");
        }
    }

    private List getCheckpointPartPaths(LastCheckpoint checkpoint)
    {
        Location transactionLogDir = Location.of(getTransactionLogDir(tableLocation));
        ImmutableList.Builder paths = ImmutableList.builder();
        if (checkpoint.v2Checkpoint().isPresent()) {
            verify(checkpoint.parts().isEmpty(), "v2 checkpoint should not have multi-part checkpoints");
            paths.add(transactionLogDir.appendPath(checkpoint.v2Checkpoint().get().path()));
        }
        else if (checkpoint.parts().isEmpty()) {
            paths.add(transactionLogDir.appendPath("%020d.checkpoint.parquet".formatted(checkpoint.version())));
        }
        else {
            int partsCount = checkpoint.parts().get();
            for (int i = 1; i <= partsCount; i++) {
                paths.add(transactionLogDir.appendPath("%020d.checkpoint.%010d.%010d.parquet".formatted(checkpoint.version(), i, partsCount)));
            }
        }
        return paths.build();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy