All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.deltalake.transactionlog.DeltaLakeDataFileCacheEntry Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.deltalake.transactionlog;

import com.google.common.collect.ImmutableList;
import com.google.errorprone.annotations.Immutable;

import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.airlift.slice.SizeOf.instanceSize;
import static java.util.Objects.requireNonNull;

@Immutable
public final class DeltaLakeDataFileCacheEntry
{
    private static final int INSTANCE_SIZE = instanceSize(DeltaLakeDataFileCacheEntry.class);

    private final long version;
    private final List activeFiles;
    private volatile long retainedSize = -1;

    public DeltaLakeDataFileCacheEntry(long version, List activeFiles)
    {
        this.version = version;
        this.activeFiles = ImmutableList.copyOf(requireNonNull(activeFiles, "activeFiles is null"));
    }

    public DeltaLakeDataFileCacheEntry withUpdatesApplied(List newEntries, long newVersion)
    {
        Map activeJsonEntries = new LinkedHashMap<>();
        HashSet removedFiles = new HashSet<>();

        // The json entries containing the last few entries in the log need to be applied on top of the parquet snapshot:
        // - Any files which have been removed need to be excluded
        // - Any files with newer add actions need to be updated with the most recent metadata
        newEntries.forEach(deltaLakeTransactionLogEntry -> {
            AddFileEntry addEntry = deltaLakeTransactionLogEntry.getAdd();
            if (addEntry != null) {
                activeJsonEntries.put(addEntry.getPath(), addEntry);
            }

            RemoveFileEntry removeEntry = deltaLakeTransactionLogEntry.getRemove();
            if (removeEntry != null) {
                activeJsonEntries.remove(removeEntry.getPath());
                removedFiles.add(removeEntry.getPath());
            }
        });

        Stream filteredExistingEntries = activeFiles.stream()
                .filter(addEntry -> !removedFiles.contains(addEntry.getPath()) && !activeJsonEntries.containsKey(addEntry.getPath()));
        List updatedActiveFileList =
                Stream.concat(filteredExistingEntries, activeJsonEntries.values().stream())
                        .collect(toImmutableList());

        return new DeltaLakeDataFileCacheEntry(newVersion, updatedActiveFileList);
    }

    public long getVersion()
    {
        return version;
    }

    public List getActiveFiles()
    {
        return activeFiles;
    }

    public long getRetainedSizeInBytes()
    {
        if (retainedSize == -1) {
            retainedSize = INSTANCE_SIZE + activeFiles.stream()
                    .mapToLong(AddFileEntry::getRetainedSizeInBytes)
                    .sum();
        }
        return retainedSize;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy