All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hudi.table.HudiTableFileSystemView Maven / Gradle / Ivy

There is a newer version: 465
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hudi.table;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import io.airlift.json.ObjectMapperProvider;
import io.airlift.log.Logger;
import io.trino.filesystem.FileEntry;
import io.trino.filesystem.FileIterator;
import io.trino.filesystem.Location;
import io.trino.plugin.hudi.compaction.CompactionOperation;
import io.trino.plugin.hudi.compaction.HudiCompactionOperation;
import io.trino.plugin.hudi.compaction.HudiCompactionPlan;
import io.trino.plugin.hudi.files.HudiBaseFile;
import io.trino.plugin.hudi.files.HudiFileGroup;
import io.trino.plugin.hudi.files.HudiFileGroupId;
import io.trino.plugin.hudi.files.HudiLogFile;
import io.trino.plugin.hudi.model.HudiFileFormat;
import io.trino.plugin.hudi.model.HudiInstant;
import io.trino.plugin.hudi.model.HudiReplaceCommitMetadata;
import io.trino.plugin.hudi.timeline.HudiTimeline;
import io.trino.spi.TrinoException;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.FileReader;
import org.apache.avro.file.SeekableByteArrayInput;
import org.apache.avro.io.DatumReader;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificRecordBase;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Stream;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static io.trino.plugin.hudi.HudiErrorCode.HUDI_BAD_DATA;
import static io.trino.plugin.hudi.files.FSUtils.LOG_FILE_PATTERN;
import static io.trino.plugin.hudi.files.FSUtils.getPartitionLocation;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.groupingBy;

public class HudiTableFileSystemView
{
    private static final Logger LOG = Logger.get(HudiTableFileSystemView.class);
    private static final Integer VERSION_2 = 2;

    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapperProvider().get();
    // Locks to control concurrency. Sync operations use write-lock blocking all fetch operations.
    // For the common-case, we allow concurrent read of single or multiple partitions
    private final ReentrantReadWriteLock globalLock = new ReentrantReadWriteLock();
    private final ReentrantReadWriteLock.ReadLock readLock = globalLock.readLock();
    // Used to concurrently load and populate partition views
    private final ConcurrentHashMap addedPartitions = new ConcurrentHashMap<>(4096);
    private final HudiTableMetaClient metaClient;
    private final HudiTimeline visibleCommitsAndCompactionTimeline;

    private boolean closed;
    private Map> partitionToFileGroupsMap;
    private Map> fgIdToPendingCompaction;
    private Map fgIdToReplaceInstants;

    public HudiTableFileSystemView(HudiTableMetaClient metaClient, HudiTimeline visibleActiveTimeline)
    {
        partitionToFileGroupsMap = new ConcurrentHashMap<>();
        this.metaClient = metaClient;
        this.visibleCommitsAndCompactionTimeline = visibleActiveTimeline.getWriteTimeline();
        resetFileGroupsReplaced(visibleCommitsAndCompactionTimeline);
        resetPendingCompactionOperations(getAllPendingCompactionOperations(metaClient)
                .values().stream()
                .map(pair -> Map.entry(pair.getKey(), CompactionOperation.convertFromAvroRecordInstance(pair.getValue()))));
    }

    private static Map> getAllPendingCompactionOperations(
            HudiTableMetaClient metaClient)
    {
        List> pendingCompactionPlanWithInstants =
                getAllPendingCompactionPlans(metaClient);

        Map> fgIdToPendingCompactionWithInstantMap = new HashMap<>();
        pendingCompactionPlanWithInstants.stream()
                .flatMap(instantPlanPair -> getPendingCompactionOperations(instantPlanPair.getKey(), instantPlanPair.getValue()))
                .forEach(pair -> {
                    if (fgIdToPendingCompactionWithInstantMap.containsKey(pair.getKey())) {
                        HudiCompactionOperation operation = pair.getValue().getValue();
                        HudiCompactionOperation anotherOperation = fgIdToPendingCompactionWithInstantMap.get(pair.getKey()).getValue();

                        if (!operation.equals(anotherOperation)) {
                            String msg = "Hudi File Id (" + pair.getKey() + ") has more than 1 pending compactions. Instants: "
                                    + pair.getValue() + ", " + fgIdToPendingCompactionWithInstantMap.get(pair.getKey());
                            throw new IllegalStateException(msg);
                        }
                    }
                    fgIdToPendingCompactionWithInstantMap.put(pair.getKey(), pair.getValue());
                });
        return fgIdToPendingCompactionWithInstantMap;
    }

    private static List> getAllPendingCompactionPlans(
            HudiTableMetaClient metaClient)
    {
        List pendingCompactionInstants =
                metaClient.getActiveTimeline()
                        .filterPendingCompactionTimeline()
                        .getInstants()
                        .collect(toImmutableList());
        return pendingCompactionInstants.stream()
                .map(instant -> {
                    try {
                        return Map.entry(instant, getCompactionPlan(metaClient, instant.getTimestamp()));
                    }
                    catch (IOException e) {
                        throw new TrinoException(HUDI_BAD_DATA, e);
                    }
                })
                .collect(toImmutableList());
    }

    private static HudiCompactionPlan getCompactionPlan(HudiTableMetaClient metaClient, String compactionInstant)
            throws IOException
    {
        HudiCompactionPlan compactionPlan = deserializeAvroMetadata(
                metaClient
                        .getActiveTimeline()
                        .readCompactionPlanAsBytes(HudiTimeline.getCompactionRequestedInstant(compactionInstant)).get(),
                HudiCompactionPlan.class);
        return upgradeToLatest(compactionPlan, compactionPlan.getVersion());
    }

    private static HudiCompactionPlan upgradeToLatest(HudiCompactionPlan metadata, int metadataVersion)
    {
        if (metadataVersion == VERSION_2) {
            return metadata;
        }
        checkState(metadataVersion == 1, "Lowest supported metadata version is 1");
        List v2CompactionOperationList = new ArrayList<>();
        if (null != metadata.getOperations()) {
            v2CompactionOperationList = metadata.getOperations().stream()
                    .map(compactionOperation ->
                            HudiCompactionOperation.newBuilder()
                                    .setBaseInstantTime(compactionOperation.getBaseInstantTime())
                                    .setFileId(compactionOperation.getFileId())
                                    .setPartitionPath(compactionOperation.getPartitionPath())
                                    .setMetrics(compactionOperation.getMetrics())
                                    .setDataFilePath(compactionOperation.getDataFilePath() == null ? null : Location.of(compactionOperation.getDataFilePath()).fileName())
                                    .setDeltaFilePaths(compactionOperation.getDeltaFilePaths().stream().map(filePath -> Location.of(filePath).fileName()).collect(toImmutableList()))
                                    .build())
                    .collect(toImmutableList());
        }
        return new HudiCompactionPlan(v2CompactionOperationList, metadata.getExtraMetadata(), VERSION_2);
    }

    private static  T deserializeAvroMetadata(byte[] bytes, Class clazz)
            throws IOException
    {
        DatumReader reader = new SpecificDatumReader<>(clazz);
        FileReader fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
        checkState(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
        return fileReader.next();
    }

    private static Stream>> getPendingCompactionOperations(
            HudiInstant instant, HudiCompactionPlan compactionPlan)
    {
        List ops = compactionPlan.getOperations();
        if (null != ops) {
            return ops.stream().map(op -> Map.entry(
                    new HudiFileGroupId(op.getPartitionPath(), op.getFileId()),
                    Map.entry(instant.getTimestamp(), op)));
        }
        return Stream.empty();
    }

    private void resetPendingCompactionOperations(Stream> operations)
    {
        this.fgIdToPendingCompaction = operations.collect(toImmutableMap(
                entry -> entry.getValue().getFileGroupId(),
                identity()));
    }

    private void resetFileGroupsReplaced(HudiTimeline timeline)
    {
        // for each REPLACE instant, get map of (partitionPath -> deleteFileGroup)
        HudiTimeline replacedTimeline = timeline.getCompletedReplaceTimeline();
        Map replacedFileGroups = replacedTimeline.getInstants()
                .flatMap(instant -> {
                    try {
                        HudiReplaceCommitMetadata replaceMetadata = HudiReplaceCommitMetadata.fromBytes(
                                metaClient.getActiveTimeline().getInstantDetails(instant).get(),
                                OBJECT_MAPPER,
                                HudiReplaceCommitMetadata.class);

                        // get replace instant mapping for each partition, fileId
                        return replaceMetadata.getPartitionToReplaceFileIds().entrySet().stream()
                                .flatMap(entry -> entry.getValue().stream().map(fileId ->
                                        Map.entry(new HudiFileGroupId(entry.getKey(), fileId), instant)));
                    }
                    catch (IOException e) {
                        throw new TrinoException(HUDI_BAD_DATA, "error reading commit metadata for " + instant, e);
                    }
                })
                .collect(toImmutableMap(Entry::getKey, Entry::getValue));
        fgIdToReplaceInstants = new ConcurrentHashMap<>(replacedFileGroups);
    }

    public final Stream getLatestBaseFiles(String partitionStr)
    {
        try {
            readLock.lock();
            String partitionPath = formatPartitionKey(partitionStr);
            ensurePartitionLoadedCorrectly(partitionPath);
            return fetchLatestBaseFiles(partitionPath)
                    .filter(hudiBaseFile -> !isFileGroupReplaced(partitionPath, hudiBaseFile.getFileId()));
        }
        finally {
            readLock.unlock();
        }
    }

    private boolean isFileGroupReplaced(String partitionPath, String fileId)
    {
        return isFileGroupReplaced(new HudiFileGroupId(partitionPath, fileId));
    }

    private String formatPartitionKey(String partitionStr)
    {
        return partitionStr.endsWith("/") ? partitionStr.substring(0, partitionStr.length() - 1) : partitionStr;
    }

    private void ensurePartitionLoadedCorrectly(String partition)
    {
        checkState(!isClosed(), "View is already closed");

        addedPartitions.computeIfAbsent(partition, (partitionPathStr) -> {
            long beginTs = System.currentTimeMillis();
            if (!isPartitionAvailableInStore(partitionPathStr)) {
                // Not loaded yet
                try {
                    LOG.debug("Building file system view for partition (%s)", partitionPathStr);

                    Location partitionLocation = getPartitionLocation(metaClient.getBasePath(), partitionPathStr);
                    FileIterator partitionFiles = listPartition(partitionLocation);
                    List groups = addFilesToView(partitionFiles);

                    if (groups.isEmpty()) {
                        storePartitionView(partitionPathStr, new ArrayList<>());
                    }
                }
                catch (IOException e) {
                    throw new TrinoException(HUDI_BAD_DATA, "Failed to list base files in partition " + partitionPathStr, e);
                }
            }
            else {
                LOG.debug("View already built for Partition :%s, FOUND is ", partitionPathStr);
            }
            long endTs = System.currentTimeMillis();
            LOG.debug("Time to load partition (%s) =%s", partitionPathStr, endTs - beginTs);
            return true;
        });
    }

    protected boolean isPartitionAvailableInStore(String partitionPath)
    {
        return partitionToFileGroupsMap.containsKey(partitionPath);
    }

    private FileIterator listPartition(Location partitionLocation)
            throws IOException
    {
        FileIterator fileIterator = metaClient.getFileSystem().listFiles(partitionLocation);
        if (fileIterator.hasNext()) {
            return fileIterator;
        }
        return FileIterator.empty();
    }

    public List addFilesToView(FileIterator partitionFiles)
            throws IOException
    {
        List fileGroups = buildFileGroups(partitionFiles, visibleCommitsAndCompactionTimeline, true);
        // Group by partition for efficient updates for both InMemory and DiskBased structures.
        fileGroups.stream()
                .collect(groupingBy(HudiFileGroup::getPartitionPath))
                .forEach((partition, value) -> {
                    if (!isPartitionAvailableInStore(partition)) {
                        storePartitionView(partition, value);
                    }
                });
        return fileGroups;
    }

    private List buildFileGroups(
            FileIterator partitionFiles,
            HudiTimeline timeline,
            boolean addPendingCompactionFileSlice)
            throws IOException
    {
        List hoodieBaseFiles = new ArrayList<>();
        List hudiLogFiles = new ArrayList<>();
        String baseHoodieFileExtension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
        while (partitionFiles.hasNext()) {
            FileEntry fileEntry = partitionFiles.next();
            if (fileEntry.location().path().contains(baseHoodieFileExtension)) {
                hoodieBaseFiles.add(new HudiBaseFile(fileEntry));
            }
            String fileName = fileEntry.location().fileName();
            if (LOG_FILE_PATTERN.matcher(fileName).matches() && fileName.contains(HudiFileFormat.HOODIE_LOG.getFileExtension())) {
                hudiLogFiles.add(new HudiLogFile(fileEntry));
            }
        }
        return buildFileGroups(hoodieBaseFiles.stream(), hudiLogFiles.stream(), timeline, addPendingCompactionFileSlice);
    }

    private List buildFileGroups(
            Stream baseFileStream,
            Stream logFileStream,
            HudiTimeline timeline,
            boolean addPendingCompactionFileSlice)
    {
        Map, List> baseFiles = baseFileStream
                .collect(groupingBy(baseFile -> {
                    String partitionPathStr = getPartitionPathFor(baseFile);
                    return Map.entry(partitionPathStr, baseFile.getFileId());
                }));

        Map, List> logFiles = logFileStream
                .collect(groupingBy((logFile) -> {
                    String partitionPathStr = getRelativePartitionPath(metaClient.getBasePath(), logFile.getPath().parentDirectory());
                    return Map.entry(partitionPathStr, logFile.getFileId());
                }));

        Set> fileIdSet = new HashSet<>(baseFiles.keySet());
        fileIdSet.addAll(logFiles.keySet());

        List fileGroups = new ArrayList<>();
        fileIdSet.forEach(pair -> {
            String fileId = pair.getValue();
            String partitionPath = pair.getKey();
            HudiFileGroup group = new HudiFileGroup(partitionPath, fileId, timeline);
            if (baseFiles.containsKey(pair)) {
                baseFiles.get(pair).forEach(group::addBaseFile);
            }
            if (logFiles.containsKey(pair)) {
                logFiles.get(pair).forEach(group::addLogFile);
            }

            if (addPendingCompactionFileSlice) {
                Optional> pendingCompaction =
                        getPendingCompactionOperationWithInstant(group.getFileGroupId());
                // If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
                // so that any new ingestion uses the correct base-instant
                pendingCompaction.ifPresent(entry ->
                        group.addNewFileSliceAtInstant(entry.getKey()));
            }
            fileGroups.add(group);
        });

        return fileGroups;
    }

    private String getPartitionPathFor(HudiBaseFile baseFile)
    {
        return getRelativePartitionPath(metaClient.getBasePath(), baseFile.getFullPath().parentDirectory());
    }

    private String getRelativePartitionPath(Location basePath, Location fullPartitionPath)
    {
        String fullPartitionPathStr = fullPartitionPath.path();

        if (!fullPartitionPathStr.startsWith(basePath.path())) {
            throw new IllegalArgumentException("Partition location does not belong to base-location");
        }

        int partitionStartIndex = fullPartitionPath.path().indexOf(basePath.fileName(), basePath.parentDirectory().path().length());
        // Partition-Path could be empty for non-partitioned tables
        if (partitionStartIndex + basePath.fileName().length() == fullPartitionPathStr.length()) {
            return "";
        }
        return fullPartitionPathStr.substring(partitionStartIndex + basePath.fileName().length() + 1);
    }

    protected Optional> getPendingCompactionOperationWithInstant(HudiFileGroupId fgId)
    {
        return Optional.ofNullable(fgIdToPendingCompaction.get(fgId));
    }

    private void storePartitionView(String partitionPath, List fileGroups)
    {
        LOG.debug("Adding file-groups for partition :%s, #FileGroups=%s", partitionPath, fileGroups.size());
        List newList = ImmutableList.copyOf(fileGroups);
        partitionToFileGroupsMap.put(partitionPath, newList);
    }

    private Stream fetchLatestBaseFiles(final String partitionPath)
    {
        return fetchAllStoredFileGroups(partitionPath)
                .filter(filGroup -> !isFileGroupReplaced(filGroup.getFileGroupId()))
                .map(filGroup -> Map.entry(filGroup.getFileGroupId(), getLatestBaseFile(filGroup)))
                .filter(pair -> pair.getValue().isPresent())
                .map(pair -> pair.getValue().get());
    }

    private Stream fetchAllStoredFileGroups(String partition)
    {
        final List fileGroups = ImmutableList.copyOf(partitionToFileGroupsMap.get(partition));
        return fileGroups.stream();
    }

    private boolean isFileGroupReplaced(HudiFileGroupId fileGroup)
    {
        return Optional.ofNullable(fgIdToReplaceInstants.get(fileGroup)).isPresent();
    }

    protected Optional getLatestBaseFile(HudiFileGroup fileGroup)
    {
        return fileGroup.getAllBaseFiles()
                .filter(hudiBaseFile -> !isBaseFileDueToPendingCompaction(hudiBaseFile) && !isBaseFileDueToPendingClustering(hudiBaseFile))
                .findFirst();
    }

    private boolean isBaseFileDueToPendingCompaction(HudiBaseFile baseFile)
    {
        final String partitionPath = getPartitionPathFor(baseFile);

        Optional> compactionWithInstantTime =
                getPendingCompactionOperationWithInstant(new HudiFileGroupId(partitionPath, baseFile.getFileId()));
        return compactionWithInstantTime.isPresent() && (null != compactionWithInstantTime.get().getKey())
                && baseFile.getCommitTime().equals(compactionWithInstantTime.get().getKey());
    }

    private boolean isBaseFileDueToPendingClustering(HudiBaseFile baseFile)
    {
        List pendingReplaceInstants = metaClient.getActiveTimeline()
                .filterPendingReplaceTimeline()
                .getInstants()
                .map(HudiInstant::getTimestamp)
                .collect(toImmutableList());

        return !pendingReplaceInstants.isEmpty() && pendingReplaceInstants.contains(baseFile.getCommitTime());
    }

    public boolean isClosed()
    {
        return closed;
    }

    public void close()
    {
        this.fgIdToPendingCompaction = null;
        this.partitionToFileGroupsMap = null;
        this.fgIdToReplaceInstants = null;
        closed = true;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy