All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.migrate.FileMetaUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.migrate;

import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.data.BinaryRowWriter;
import org.dinky.shaded.paimon.data.BinaryWriter;
import org.dinky.shaded.paimon.format.FieldStats;
import org.dinky.shaded.paimon.format.FileFormat;
import org.dinky.shaded.paimon.format.TableStatsExtractor;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.FileStatus;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.io.CompactIncrement;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.NewFilesIncrement;
import org.dinky.shaded.paimon.statistics.FieldStatsCollector;
import org.dinky.shaded.paimon.stats.BinaryTableStats;
import org.dinky.shaded.paimon.stats.FieldStatsArraySerializer;
import org.dinky.shaded.paimon.table.AbstractFileStoreTable;
import org.dinky.shaded.paimon.table.Table;
import org.dinky.shaded.paimon.table.sink.CommitMessage;
import org.dinky.shaded.paimon.table.sink.CommitMessageImpl;
import org.dinky.shaded.paimon.types.DataField;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.Pair;
import org.dinky.shaded.paimon.utils.StatsCollectorFactories;
import org.dinky.shaded.paimon.utils.TypeUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.stream.Collectors;

/** To construct file meta data for external files. */
public class FileMetaUtils {

    private static final Logger LOG = LoggerFactory.getLogger(FileMetaUtils.class);

    public static List construct(
            FileIO fileIO,
            String format,
            String location,
            Table paimonTable,
            Predicate filter,
            Path dir,
            Map rollback)
            throws IOException {
        List fileStatuses =
                Arrays.stream(fileIO.listStatus(new Path(location)))
                        .filter(s -> !s.isDir())
                        .filter(filter)
                        .collect(Collectors.toList());

        return fileStatuses.stream()
                .map(
                        status ->
                                constructFileMeta(
                                        format, status, fileIO, paimonTable, dir, rollback))
                .collect(Collectors.toList());
    }

    public static CommitMessage commitFile(BinaryRow partition, List dataFileMetas) {
        return new CommitMessageImpl(
                partition,
                0,
                new NewFilesIncrement(dataFileMetas, Collections.emptyList()),
                new CompactIncrement(
                        Collections.emptyList(), Collections.emptyList(), Collections.emptyList()));
    }

    // -----------------------------private method---------------------------------------------

    private static DataFileMeta constructFileMeta(
            String format,
            FileStatus fileStatus,
            FileIO fileIO,
            Table table,
            Path dir,
            Map rollback) {

        try {
            FieldStatsCollector.Factory[] factories =
                    StatsCollectorFactories.createStatsFactories(
                            ((AbstractFileStoreTable) table).coreOptions(),
                            table.rowType().getFieldNames());

            TableStatsExtractor tableStatsExtractor =
                    FileFormat.getFileFormat(
                                    ((AbstractFileStoreTable) table)
                                            .coreOptions()
                                            .toConfiguration(),
                                    format)
                            .createStatsExtractor(table.rowType(), factories)
                            .orElseThrow(
                                    () ->
                                            new RuntimeException(
                                                    "Can't get table stats extractor for format "
                                                            + format));
            Path newPath = renameFile(fileIO, fileStatus.getPath(), dir, format, rollback);
            return constructFileMeta(
                    newPath.getName(),
                    fileStatus.getLen(),
                    newPath,
                    tableStatsExtractor,
                    fileIO,
                    table);
        } catch (IOException e) {
            throw new RuntimeException("error when construct file meta", e);
        }
    }

    private static Path renameFile(
            FileIO fileIO, Path originPath, Path newDir, String format, Map rollback)
            throws IOException {
        String subfix = "." + format;
        String fileName = originPath.getName();
        String newFileName = fileName.endsWith(subfix) ? fileName : fileName + "." + format;
        Path newPath = new Path(newDir, newFileName);
        rollback.put(newPath, originPath);
        LOG.info("Migration: rename file from " + originPath + " to " + newPath);
        fileIO.rename(originPath, newPath);
        return newPath;
    }

    private static DataFileMeta constructFileMeta(
            String fileName,
            long fileSize,
            Path path,
            TableStatsExtractor tableStatsExtractor,
            FileIO fileIO,
            Table table)
            throws IOException {
        FieldStatsArraySerializer statsArraySerializer =
                new FieldStatsArraySerializer(table.rowType());

        Pair fileInfo =
                tableStatsExtractor.extractWithFileInfo(fileIO, path);
        BinaryTableStats stats = statsArraySerializer.toBinary(fileInfo.getLeft());

        return DataFileMeta.forAppend(
                fileName,
                fileSize,
                fileInfo.getRight().getRowCount(),
                stats,
                0,
                0,
                ((AbstractFileStoreTable) table).schema().id());
    }

    public static BinaryRow writePartitionValue(
            RowType partitionRowType,
            Map partitionValues,
            List valueSetters) {

        BinaryRow binaryRow = new BinaryRow(partitionRowType.getFieldCount());
        BinaryRowWriter binaryRowWriter = new BinaryRowWriter(binaryRow);

        List fields = partitionRowType.getFields();

        for (int i = 0; i < fields.size(); i++) {
            Object value =
                    TypeUtils.castFromString(
                            partitionValues.get(fields.get(i).name()), fields.get(i).type());
            valueSetters.get(i).setValue(binaryRowWriter, i, value);
        }
        binaryRowWriter.complete();
        return binaryRow;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy