All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hudi.split.HudiSplitFactory Maven / Gradle / Ivy

There is a newer version: 465
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hudi.split;

import com.google.common.collect.ImmutableList;
import io.trino.plugin.hive.HivePartitionKey;
import io.trino.plugin.hudi.HudiFileStatus;
import io.trino.plugin.hudi.HudiSplit;
import io.trino.plugin.hudi.HudiTableHandle;
import io.trino.spi.TrinoException;

import java.util.List;

import static io.trino.plugin.hudi.HudiErrorCode.HUDI_FILESYSTEM_ERROR;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public class HudiSplitFactory
{
    private static final double SPLIT_SLOP = 1.1;   // 10% slop/overflow allowed in bytes per split while generating splits

    private final HudiTableHandle hudiTableHandle;
    private final HudiSplitWeightProvider hudiSplitWeightProvider;

    public HudiSplitFactory(
            HudiTableHandle hudiTableHandle,
            HudiSplitWeightProvider hudiSplitWeightProvider)
    {
        this.hudiTableHandle = requireNonNull(hudiTableHandle, "hudiTableHandle is null");
        this.hudiSplitWeightProvider = requireNonNull(hudiSplitWeightProvider, "hudiSplitWeightProvider is null");
    }

    public List createSplits(List partitionKeys, HudiFileStatus fileStatus)
    {
        if (fileStatus.isDirectory()) {
            throw new TrinoException(HUDI_FILESYSTEM_ERROR, format("Not a valid location: %s", fileStatus.location()));
        }

        long fileSize = fileStatus.length();

        if (fileSize == 0) {
            return ImmutableList.of(new HudiSplit(
                    fileStatus.location().toString(),
                    0,
                    fileSize,
                    fileSize,
                    fileStatus.modificationTime(),
                    hudiTableHandle.getRegularPredicates(),
                    partitionKeys,
                    hudiSplitWeightProvider.calculateSplitWeight(fileSize)));
        }

        ImmutableList.Builder splits = ImmutableList.builder();
        long splitSize = fileStatus.blockSize();

        long bytesRemaining = fileSize;
        while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
            splits.add(new HudiSplit(
                    fileStatus.location().toString(),
                    fileSize - bytesRemaining,
                    splitSize,
                    fileSize,
                    fileStatus.modificationTime(),
                    hudiTableHandle.getRegularPredicates(),
                    partitionKeys,
                    hudiSplitWeightProvider.calculateSplitWeight(splitSize)));
            bytesRemaining -= splitSize;
        }
        if (bytesRemaining > 0) {
            splits.add(new HudiSplit(
                    fileStatus.location().toString(),
                    fileSize - bytesRemaining,
                    bytesRemaining,
                    fileSize,
                    fileStatus.modificationTime(),
                    hudiTableHandle.getRegularPredicates(),
                    partitionKeys,
                    hudiSplitWeightProvider.calculateSplitWeight(bytesRemaining)));
        }
        return splits.build();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy