All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.io.DataFileMeta Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.io;

import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.data.Timestamp;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.stats.BinaryTableStats;
import org.dinky.shaded.paimon.stats.FieldStatsArraySerializer;
import org.dinky.shaded.paimon.types.ArrayType;
import org.dinky.shaded.paimon.types.BigIntType;
import org.dinky.shaded.paimon.types.DataField;
import org.dinky.shaded.paimon.types.DataTypes;
import org.dinky.shaded.paimon.types.IntType;
import org.dinky.shaded.paimon.types.RowType;

import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;

import static org.dinky.shaded.paimon.data.BinaryRow.EMPTY_ROW;
import static org.dinky.shaded.paimon.utils.Preconditions.checkArgument;
import static org.dinky.shaded.paimon.utils.SerializationUtils.newBytesType;
import static org.dinky.shaded.paimon.utils.SerializationUtils.newStringType;

/** Metadata of a data file. */
public class DataFileMeta {

    // Append only data files don't have any key columns and meaningful level value. it will use
    // the following dummy values.
    public static final BinaryTableStats EMPTY_KEY_STATS =
            new BinaryTableStats(EMPTY_ROW, EMPTY_ROW, new Long[0]);
    public static final BinaryRow EMPTY_MIN_KEY = EMPTY_ROW;
    public static final BinaryRow EMPTY_MAX_KEY = EMPTY_ROW;
    public static final int DUMMY_LEVEL = 0;

    private final String fileName;
    private final long fileSize;
    private final long rowCount;

    private final BinaryRow minKey;
    private final BinaryRow maxKey;
    private final BinaryTableStats keyStats;
    private final BinaryTableStats valueStats;

    private final long minSequenceNumber;
    private final long maxSequenceNumber;
    private final long schemaId;
    private final int level;

    private final List extraFiles;
    private final Timestamp creationTime;

    public static DataFileMeta forAppend(
            String fileName,
            long fileSize,
            long rowCount,
            BinaryTableStats rowStats,
            long minSequenceNumber,
            long maxSequenceNumber,
            long schemaId) {
        return new DataFileMeta(
                fileName,
                fileSize,
                rowCount,
                EMPTY_MIN_KEY,
                EMPTY_MAX_KEY,
                EMPTY_KEY_STATS,
                rowStats,
                minSequenceNumber,
                maxSequenceNumber,
                schemaId,
                DUMMY_LEVEL);
    }

    public DataFileMeta(
            String fileName,
            long fileSize,
            long rowCount,
            BinaryRow minKey,
            BinaryRow maxKey,
            BinaryTableStats keyStats,
            BinaryTableStats valueStats,
            long minSequenceNumber,
            long maxSequenceNumber,
            long schemaId,
            int level) {
        this(
                fileName,
                fileSize,
                rowCount,
                minKey,
                maxKey,
                keyStats,
                valueStats,
                minSequenceNumber,
                maxSequenceNumber,
                schemaId,
                level,
                Collections.emptyList(),
                Timestamp.fromLocalDateTime(LocalDateTime.now()).toMillisTimestamp());
    }

    public DataFileMeta(
            String fileName,
            long fileSize,
            long rowCount,
            BinaryRow minKey,
            BinaryRow maxKey,
            BinaryTableStats keyStats,
            BinaryTableStats valueStats,
            long minSequenceNumber,
            long maxSequenceNumber,
            long schemaId,
            int level,
            List extraFiles,
            Timestamp creationTime) {
        this.fileName = fileName;
        this.fileSize = fileSize;
        this.rowCount = rowCount;

        this.minKey = minKey;
        this.maxKey = maxKey;
        this.keyStats = keyStats;
        this.valueStats = valueStats;

        this.minSequenceNumber = minSequenceNumber;
        this.maxSequenceNumber = maxSequenceNumber;
        this.level = level;
        this.schemaId = schemaId;
        this.extraFiles = Collections.unmodifiableList(extraFiles);
        this.creationTime = creationTime;
    }

    public String fileName() {
        return fileName;
    }

    public long fileSize() {
        return fileSize;
    }

    public long rowCount() {
        return rowCount;
    }

    public BinaryRow minKey() {
        return minKey;
    }

    public BinaryRow maxKey() {
        return maxKey;
    }

    public BinaryTableStats keyStats() {
        return keyStats;
    }

    public BinaryTableStats valueStats() {
        return valueStats;
    }

    public long minSequenceNumber() {
        return minSequenceNumber;
    }

    public long maxSequenceNumber() {
        return maxSequenceNumber;
    }

    public long schemaId() {
        return schemaId;
    }

    public int level() {
        return level;
    }

    /**
     * Usage:
     *
     * 
    *
  • Paimon 0.2 *
      *
    • Stores changelog files for {@link CoreOptions.ChangelogProducer#INPUT}. Changelog * files are moved to {@link NewFilesIncrement} since Paimon 0.3. *
    *
*/ public List extraFiles() { return extraFiles; } public Timestamp creationTime() { return creationTime; } public long creationTimeEpochMillis() { return creationTime .toLocalDateTime() .atZone(ZoneId.systemDefault()) .toInstant() .toEpochMilli(); } public Optional fileFormat() { String[] split = fileName.split("\\."); try { return Optional.of( CoreOptions.FileFormatType.valueOf(split[split.length - 1].toUpperCase())); } catch (IllegalArgumentException e) { return Optional.empty(); } } public DataFileMeta upgrade(int newLevel) { checkArgument(newLevel > this.level); return new DataFileMeta( fileName, fileSize, rowCount, minKey, maxKey, keyStats, valueStats, minSequenceNumber, maxSequenceNumber, schemaId, newLevel, extraFiles, creationTime); } public List collectFiles(DataFilePathFactory pathFactory) { List paths = new ArrayList<>(); paths.add(pathFactory.toPath(fileName)); extraFiles.forEach(f -> paths.add(pathFactory.toPath(f))); return paths; } public DataFileMeta copy(List newExtraFiles) { return new DataFileMeta( fileName, fileSize, rowCount, minKey, maxKey, keyStats, valueStats, minSequenceNumber, maxSequenceNumber, schemaId, level, newExtraFiles, creationTime); } @Override public boolean equals(Object o) { if (!(o instanceof DataFileMeta)) { return false; } DataFileMeta that = (DataFileMeta) o; return Objects.equals(fileName, that.fileName) && fileSize == that.fileSize && rowCount == that.rowCount && Objects.equals(minKey, that.minKey) && Objects.equals(maxKey, that.maxKey) && Objects.equals(keyStats, that.keyStats) && Objects.equals(valueStats, that.valueStats) && minSequenceNumber == that.minSequenceNumber && maxSequenceNumber == that.maxSequenceNumber && schemaId == that.schemaId && level == that.level && Objects.equals(extraFiles, that.extraFiles) && Objects.equals(creationTime, that.creationTime); } @Override public int hashCode() { return Objects.hash( fileName, fileSize, rowCount, minKey, maxKey, keyStats, valueStats, minSequenceNumber, maxSequenceNumber, schemaId, level, extraFiles, creationTime); } @Override public String toString() { return String.format( "{%s, %d, %d, %s, %s, %s, %s, %d, %d, %d, %d, %s, %s}", fileName, fileSize, rowCount, minKey, maxKey, keyStats, valueStats, minSequenceNumber, maxSequenceNumber, schemaId, level, extraFiles, creationTime); } public static RowType schema() { List fields = new ArrayList<>(); fields.add(new DataField(0, "_FILE_NAME", newStringType(false))); fields.add(new DataField(1, "_FILE_SIZE", new BigIntType(false))); fields.add(new DataField(2, "_ROW_COUNT", new BigIntType(false))); fields.add(new DataField(3, "_MIN_KEY", newBytesType(false))); fields.add(new DataField(4, "_MAX_KEY", newBytesType(false))); fields.add(new DataField(5, "_KEY_STATS", FieldStatsArraySerializer.schema())); fields.add(new DataField(6, "_VALUE_STATS", FieldStatsArraySerializer.schema())); fields.add(new DataField(7, "_MIN_SEQUENCE_NUMBER", new BigIntType(false))); fields.add(new DataField(8, "_MAX_SEQUENCE_NUMBER", new BigIntType(false))); fields.add(new DataField(9, "_SCHEMA_ID", new BigIntType(false))); fields.add(new DataField(10, "_LEVEL", new IntType(false))); fields.add(new DataField(11, "_EXTRA_FILES", new ArrayType(false, newStringType(false)))); fields.add(new DataField(12, "_CREATION_TIME", DataTypes.TIMESTAMP_MILLIS())); return new RowType(fields); } public static long getMaxSequenceNumber(List fileMetas) { return fileMetas.stream() .map(DataFileMeta::maxSequenceNumber) .max(Long::compare) .orElse(-1L); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy