All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.utils.SnapshotFileUtil Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netease.arctic.utils;

import com.netease.arctic.iceberg.optimize.InternalRecordWrapper;
import com.netease.arctic.table.ArcticTable;
import com.netease.arctic.shade.org.apache.iceberg.DataFile;
import com.netease.arctic.shade.org.apache.iceberg.DeleteFile;
import com.netease.arctic.shade.org.apache.iceberg.FileContent;
import com.netease.arctic.shade.org.apache.iceberg.FileMetadata;
import com.netease.arctic.shade.org.apache.iceberg.HasTableOperations;
import com.netease.arctic.shade.org.apache.iceberg.MetadataTableType;
import com.netease.arctic.shade.org.apache.iceberg.MetadataTableUtils;
import com.netease.arctic.shade.org.apache.iceberg.Snapshot;
import com.netease.arctic.shade.org.apache.iceberg.Table;
import com.netease.arctic.shade.org.apache.iceberg.data.GenericRecord;
import com.netease.arctic.shade.org.apache.iceberg.data.IcebergGenerics;
import com.netease.arctic.shade.org.apache.iceberg.data.Record;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expressions;
import com.netease.arctic.shade.org.apache.iceberg.io.CloseableIterable;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class SnapshotFileUtil {
  private static final Logger LOG = LoggerFactory.getLogger(SnapshotFileUtil.class);

  public static void getSnapshotFiles(
      ArcticTable table, String innerTable, Snapshot snapshot,
      List addFiles,
      List deleteFiles) {
    Preconditions.checkNotNull(addFiles, "Add files to delete can not be null");
    Preconditions.checkNotNull(deleteFiles, "Delete files to delete can not be null");

    for (DataFile file : snapshot.addedFiles()) {
      addFiles.add(ConvertStructUtil.convertToAmsDatafile(file, table, innerTable));
    }
    for (DataFile file : snapshot.deletedFiles()) {
      deleteFiles.add(ConvertStructUtil.convertToAmsDatafile(file, table, innerTable));
    }

    table.io().doAs(() -> {
      List addIcebergFiles = new ArrayList<>();
      List deleteIcebergFiles = new ArrayList<>();
      getDeleteFiles(table, snapshot, addIcebergFiles, deleteIcebergFiles);
      addIcebergFiles.forEach(e -> addFiles.add(ConvertStructUtil.convertToAmsDatafile(e, table, innerTable)));
      deleteIcebergFiles.forEach(e -> deleteFiles.add(ConvertStructUtil.convertToAmsDatafile(e, table, innerTable)));
      return null;
    });

    LOG.debug("{} snapshot get {} add files count and {} delete file count.",
        snapshot.snapshotId(), addFiles.size(), deleteFiles.size());
  }

  public static void getDeleteFiles(
      ArcticTable table, Snapshot snapshot,
      List addFiles,
      List deleteFiles) {
    Table entriesTable = MetadataTableUtils.createMetadataTableInstance(((HasTableOperations) table).operations(),
        table.name(), table.name() + "#ENTRIES",
        MetadataTableType.ENTRIES);
    try (CloseableIterable manifests = IcebergGenerics.read(entriesTable)
        .useSnapshot(snapshot.snapshotId())
        .where(Expressions.equal(ManifestEntryFields.SNAPSHOT_ID.name(), snapshot.snapshotId()))
        .build()) {
      manifests.forEach(record -> {
        int status = (int) record.get(ManifestEntryFields.STATUS.fieldId());
        GenericRecord dataFile = (GenericRecord) record.get(ManifestEntryFields.DATA_FILE_ID);
        Integer contentId = (Integer) dataFile.getField(DataFile.CONTENT.name());
        if (contentId != null && contentId != 0) {
          String filePath = (String) dataFile.getField(DataFile.FILE_PATH.name());
          String partitionPath = null;
          GenericRecord parRecord = (GenericRecord) dataFile.getField(DataFile.PARTITION_NAME);
          if (parRecord != null) {
            InternalRecordWrapper wrapper = new InternalRecordWrapper(parRecord.struct());
            partitionPath = table.spec().partitionToPath(wrapper.wrap(parRecord));
          }
          Long fileSize = (Long) dataFile.getField(DataFile.FILE_SIZE.name());
          Long recordCount = (Long) dataFile.getField(DataFile.RECORD_COUNT.name());
          DeleteFile deleteFile;
          FileMetadata.Builder builder = FileMetadata.deleteFileBuilder(table.spec())
              .withPath(filePath)
              .withFileSizeInBytes(fileSize)
              .withRecordCount(recordCount);
          if (!table.spec().isUnpartitioned()) {
            builder.withPartitionPath(partitionPath);
          }
          if (contentId == FileContent.POSITION_DELETES.id()) {
            builder.ofPositionDeletes();
          } else {
            builder.ofEqualityDeletes();
          }
          deleteFile = builder.build();
          if (status == ManifestEntryFields.Status.DELETED.id()) {
            deleteFiles.add(deleteFile);
          } else if (status == ManifestEntryFields.Status.ADDED.id()) {
            addFiles.add(deleteFile);
          }
        }
      });
    } catch (IOException exception) {
      LOG.error("close manifest file error", exception);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy