All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.Snapshot Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon;

import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.manifest.FileKind;
import org.dinky.shaded.paimon.manifest.ManifestEntry;
import org.dinky.shaded.paimon.manifest.ManifestFileMeta;
import org.dinky.shaded.paimon.manifest.ManifestList;
import org.dinky.shaded.paimon.operation.FileStoreScan;
import org.dinky.shaded.paimon.utils.JsonSerdeUtil;

import org.dinky.shaded.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator;
import org.dinky.shaded.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonGetter;
import org.dinky.shaded.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonInclude;
import org.dinky.shaded.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonProperty;

import javax.annotation.Nullable;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;

/**
 * This file is the entrance to all data committed at some specific time point.
 *
 * 

Versioned change list: * *

    *
  • Version 1: Initial version for paimon <= 0.2. There is no "version" field in json file. *
  • Version 2: Introduced in paimon 0.3. Add "version" field and "changelogManifestList" field. *
  • Version 3: Introduced in paimon 0.4. Add "baseRecordCount" field, "deltaRecordCount" field * and "changelogRecordCount" field. *
* *

Unversioned change list: * *

    *
  • Since paimon 0.2 and paimon 0.3, commitIdentifier is changed from a String to a long value. * For paimon < 0.2, only Flink connectors have paimon sink and they use checkpointId as * commitIdentifier (which is a long value). Json can automatically perform type conversion so * there is no compatibility issue. *
*/ public class Snapshot { public static final long FIRST_SNAPSHOT_ID = 1; public static final int TABLE_STORE_02_VERSION = 1; private static final int CURRENT_VERSION = 3; private static final String FIELD_VERSION = "version"; private static final String FIELD_ID = "id"; private static final String FIELD_SCHEMA_ID = "schemaId"; private static final String FIELD_BASE_MANIFEST_LIST = "baseManifestList"; private static final String FIELD_DELTA_MANIFEST_LIST = "deltaManifestList"; private static final String FIELD_CHANGELOG_MANIFEST_LIST = "changelogManifestList"; private static final String FIELD_INDEX_MANIFEST = "indexManifest"; private static final String FIELD_COMMIT_USER = "commitUser"; private static final String FIELD_COMMIT_IDENTIFIER = "commitIdentifier"; private static final String FIELD_COMMIT_KIND = "commitKind"; private static final String FIELD_TIME_MILLIS = "timeMillis"; private static final String FIELD_LOG_OFFSETS = "logOffsets"; private static final String FIELD_TOTAL_RECORD_COUNT = "totalRecordCount"; private static final String FIELD_DELTA_RECORD_COUNT = "deltaRecordCount"; private static final String FIELD_CHANGELOG_RECORD_COUNT = "changelogRecordCount"; private static final String FIELD_WATERMARK = "watermark"; // version of snapshot // null for paimon <= 0.2 @JsonProperty(FIELD_VERSION) @Nullable private final Integer version; @JsonProperty(FIELD_ID) private final long id; @JsonProperty(FIELD_SCHEMA_ID) private final long schemaId; // a manifest list recording all changes from the previous snapshots @JsonProperty(FIELD_BASE_MANIFEST_LIST) private final String baseManifestList; // a manifest list recording all new changes occurred in this snapshot // for faster expire and streaming reads @JsonProperty(FIELD_DELTA_MANIFEST_LIST) private final String deltaManifestList; // a manifest list recording all changelog produced in this snapshot // null if no changelog is produced, or for paimon <= 0.2 @JsonProperty(FIELD_CHANGELOG_MANIFEST_LIST) @Nullable private final String changelogManifestList; // a manifest recording all index files of this table // null if no index file @JsonProperty(FIELD_INDEX_MANIFEST) @JsonInclude(JsonInclude.Include.NON_NULL) private final String indexManifest; @JsonProperty(FIELD_COMMIT_USER) private final String commitUser; // Mainly for snapshot deduplication. // // If multiple snapshots have the same commitIdentifier, reading from any of these snapshots // must produce the same table. // // If snapshot A has a smaller commitIdentifier than snapshot B, then snapshot A must be // committed before snapshot B, and thus snapshot A must contain older records than snapshot B. @JsonProperty(FIELD_COMMIT_IDENTIFIER) private final long commitIdentifier; @JsonProperty(FIELD_COMMIT_KIND) private final CommitKind commitKind; @JsonProperty(FIELD_TIME_MILLIS) private final long timeMillis; @JsonProperty(FIELD_LOG_OFFSETS) private final Map logOffsets; // record count of all changes occurred in this snapshot // null for paimon <= 0.3 @JsonProperty(FIELD_TOTAL_RECORD_COUNT) @Nullable private final Long totalRecordCount; // record count of all new changes occurred in this snapshot // null for paimon <= 0.3 @JsonProperty(FIELD_DELTA_RECORD_COUNT) @Nullable private final Long deltaRecordCount; // record count of all changelog produced in this snapshot // null for paimon <= 0.3 @JsonProperty(FIELD_CHANGELOG_RECORD_COUNT) @Nullable private final Long changelogRecordCount; // watermark for input records // null for paimon <= 0.3 // null if there is no watermark in new committing, and the previous snapshot does not have a // watermark @JsonProperty(FIELD_WATERMARK) @Nullable private final Long watermark; public Snapshot( long id, long schemaId, String baseManifestList, String deltaManifestList, @Nullable String changelogManifestList, @Nullable String indexManifest, String commitUser, long commitIdentifier, CommitKind commitKind, long timeMillis, Map logOffsets, @Nullable Long totalRecordCount, @Nullable Long deltaRecordCount, @Nullable Long changelogRecordCount, @Nullable Long watermark) { this( CURRENT_VERSION, id, schemaId, baseManifestList, deltaManifestList, changelogManifestList, indexManifest, commitUser, commitIdentifier, commitKind, timeMillis, logOffsets, totalRecordCount, deltaRecordCount, changelogRecordCount, watermark); } @JsonCreator public Snapshot( @JsonProperty(FIELD_VERSION) @Nullable Integer version, @JsonProperty(FIELD_ID) long id, @JsonProperty(FIELD_SCHEMA_ID) long schemaId, @JsonProperty(FIELD_BASE_MANIFEST_LIST) String baseManifestList, @JsonProperty(FIELD_DELTA_MANIFEST_LIST) String deltaManifestList, @JsonProperty(FIELD_CHANGELOG_MANIFEST_LIST) @Nullable String changelogManifestList, @JsonProperty(FIELD_INDEX_MANIFEST) @Nullable String indexManifest, @JsonProperty(FIELD_COMMIT_USER) String commitUser, @JsonProperty(FIELD_COMMIT_IDENTIFIER) long commitIdentifier, @JsonProperty(FIELD_COMMIT_KIND) CommitKind commitKind, @JsonProperty(FIELD_TIME_MILLIS) long timeMillis, @JsonProperty(FIELD_LOG_OFFSETS) Map logOffsets, @JsonProperty(FIELD_TOTAL_RECORD_COUNT) Long totalRecordCount, @JsonProperty(FIELD_DELTA_RECORD_COUNT) Long deltaRecordCount, @JsonProperty(FIELD_CHANGELOG_RECORD_COUNT) Long changelogRecordCount, @JsonProperty(FIELD_WATERMARK) Long watermark) { this.version = version; this.id = id; this.schemaId = schemaId; this.baseManifestList = baseManifestList; this.deltaManifestList = deltaManifestList; this.changelogManifestList = changelogManifestList; this.indexManifest = indexManifest; this.commitUser = commitUser; this.commitIdentifier = commitIdentifier; this.commitKind = commitKind; this.timeMillis = timeMillis; this.logOffsets = logOffsets; this.totalRecordCount = totalRecordCount; this.deltaRecordCount = deltaRecordCount; this.changelogRecordCount = changelogRecordCount; this.watermark = watermark; } @JsonGetter(FIELD_VERSION) public int version() { // there is no version field for paimon <= 0.2 return version == null ? TABLE_STORE_02_VERSION : version; } @JsonGetter(FIELD_ID) public long id() { return id; } @JsonGetter(FIELD_SCHEMA_ID) public long schemaId() { return schemaId; } @JsonGetter(FIELD_BASE_MANIFEST_LIST) public String baseManifestList() { return baseManifestList; } @JsonGetter(FIELD_DELTA_MANIFEST_LIST) public String deltaManifestList() { return deltaManifestList; } @JsonGetter(FIELD_CHANGELOG_MANIFEST_LIST) @Nullable public String changelogManifestList() { return changelogManifestList; } @JsonGetter(FIELD_INDEX_MANIFEST) @Nullable public String indexManifest() { return indexManifest; } @JsonGetter(FIELD_COMMIT_USER) public String commitUser() { return commitUser; } @JsonGetter(FIELD_COMMIT_IDENTIFIER) public long commitIdentifier() { return commitIdentifier; } @JsonGetter(FIELD_COMMIT_KIND) public CommitKind commitKind() { return commitKind; } @JsonGetter(FIELD_TIME_MILLIS) public long timeMillis() { return timeMillis; } @JsonGetter(FIELD_LOG_OFFSETS) public Map logOffsets() { return logOffsets; } @JsonGetter(FIELD_TOTAL_RECORD_COUNT) @Nullable public Long totalRecordCount() { return totalRecordCount; } @JsonGetter(FIELD_DELTA_RECORD_COUNT) @Nullable public Long deltaRecordCount() { return deltaRecordCount; } @JsonGetter(FIELD_CHANGELOG_RECORD_COUNT) @Nullable public Long changelogRecordCount() { return changelogRecordCount; } @JsonGetter(FIELD_WATERMARK) @Nullable public Long watermark() { return watermark; } /** * Return all {@link ManifestFileMeta} instances for either data or changelog manifests in this * snapshot. * * @param manifestList a {@link ManifestList} instance used for reading files at snapshot. * @return a list of ManifestFileMeta. */ public List allManifests(ManifestList manifestList) { List result = new ArrayList<>(); result.addAll(dataManifests(manifestList)); result.addAll(changelogManifests(manifestList)); return result; } /** * Return a {@link ManifestFileMeta} for each data manifest in this snapshot. * * @param manifestList a {@link ManifestList} instance used for reading files at snapshot. * @return a list of ManifestFileMeta. */ public List dataManifests(ManifestList manifestList) { List result = new ArrayList<>(); result.addAll(manifestList.read(baseManifestList)); result.addAll(deltaManifests(manifestList)); return result; } /** * Return a {@link ManifestFileMeta} for each delta manifest in this snapshot. * * @param manifestList a {@link ManifestList} instance used for reading files at snapshot. * @return a list of ManifestFileMeta. */ public List deltaManifests(ManifestList manifestList) { return manifestList.read(deltaManifestList); } /** * Return a {@link ManifestFileMeta} for each changelog manifest in this snapshot. * * @param manifestList a {@link ManifestList} instance used for reading files at snapshot. * @return a list of ManifestFileMeta. */ public List changelogManifests(ManifestList manifestList) { return changelogManifestList == null ? Collections.emptyList() : manifestList.read(changelogManifestList); } /** * Return record count of all changes occurred in this snapshot given the scan. * * @param scan a {@link FileStoreScan} instance used for count of reading files at snapshot. * @return total record count of Snapshot. */ public Long totalRecordCount(FileStoreScan scan) { return totalRecordCount == null ? recordCount(scan.withSnapshot(id).plan().files()) : totalRecordCount; } public static long recordCount(List manifestEntries) { return manifestEntries.stream().mapToLong(manifest -> manifest.file().rowCount()).sum(); } public static long recordCountAdd(List manifestEntries) { return manifestEntries.stream() .filter(manifestEntry -> FileKind.ADD.equals(manifestEntry.kind())) .mapToLong(manifest -> manifest.file().rowCount()) .sum(); } public static long recordCountDelete(List manifestEntries) { return manifestEntries.stream() .filter(manifestEntry -> FileKind.DELETE.equals(manifestEntry.kind())) .mapToLong(manifest -> manifest.file().rowCount()) .sum(); } public String toJson() { return JsonSerdeUtil.toJson(this); } public static Snapshot fromJson(String json) { return JsonSerdeUtil.fromJson(json, Snapshot.class); } public static Snapshot fromPath(FileIO fileIO, Path path) { try { String json = fileIO.readFileUtf8(path); return Snapshot.fromJson(json); } catch (IOException e) { throw new RuntimeException("Fails to read snapshot from path " + path, e); } } public static Optional safelyFromPath(FileIO fileIO, Path path) throws IOException { try { String json = fileIO.readFileUtf8(path); return Optional.of(Snapshot.fromJson(json)); } catch (FileNotFoundException e) { return Optional.empty(); } } @Override public int hashCode() { return Objects.hash( version, id, schemaId, baseManifestList, deltaManifestList, changelogManifestList, indexManifest, commitUser, commitIdentifier, commitKind, timeMillis, logOffsets, totalRecordCount, deltaRecordCount, changelogRecordCount, watermark); } @Override public boolean equals(Object o) { if (!(o instanceof Snapshot)) { return false; } Snapshot that = (Snapshot) o; return Objects.equals(version, that.version) && id == that.id && schemaId == that.schemaId && Objects.equals(baseManifestList, that.baseManifestList) && Objects.equals(deltaManifestList, that.deltaManifestList) && Objects.equals(changelogManifestList, that.changelogManifestList) && Objects.equals(indexManifest, that.indexManifest) && Objects.equals(commitUser, that.commitUser) && commitIdentifier == that.commitIdentifier && commitKind == that.commitKind && timeMillis == that.timeMillis && Objects.equals(logOffsets, that.logOffsets) && Objects.equals(totalRecordCount, that.totalRecordCount) && Objects.equals(deltaRecordCount, that.deltaRecordCount) && Objects.equals(changelogRecordCount, that.changelogRecordCount) && Objects.equals(watermark, that.watermark); } /** Type of changes in this snapshot. */ public enum CommitKind { /** Changes flushed from the mem table. */ APPEND, /** Changes by compacting existing data files. */ COMPACT, /** Changes that clear up the whole partition and then add new records. */ OVERWRITE } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy