All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.shade.org.apache.iceberg.MergingSnapshotProducer Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.netease.arctic.shade.org.apache.iceberg;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import com.netease.arctic.shade.org.apache.iceberg.events.CreateSnapshotEvent;
import com.netease.arctic.shade.org.apache.iceberg.exceptions.RuntimeIOException;
import com.netease.arctic.shade.org.apache.iceberg.exceptions.ValidationException;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expression;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expressions;
import com.netease.arctic.shade.org.apache.iceberg.io.CloseableIterator;
import com.netease.arctic.shade.org.apache.iceberg.io.InputFile;
import com.netease.arctic.shade.org.apache.iceberg.io.OutputFile;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.Predicate;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Iterators;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Lists;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Maps;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Sets;
import com.netease.arctic.shade.org.apache.iceberg.util.CharSequenceSet;
import com.netease.arctic.shade.org.apache.iceberg.util.Pair;
import com.netease.arctic.shade.org.apache.iceberg.util.SnapshotUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.MANIFEST_MIN_MERGE_COUNT;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.MANIFEST_MIN_MERGE_COUNT_DEFAULT;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.MANIFEST_TARGET_SIZE_BYTES;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.MANIFEST_TARGET_SIZE_BYTES_DEFAULT;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED;
import static com.netease.arctic.shade.org.apache.iceberg.TableProperties.SNAPSHOT_ID_INHERITANCE_ENABLED_DEFAULT;

abstract class MergingSnapshotProducer extends SnapshotProducer {
  private static final Logger LOG = LoggerFactory.getLogger(MergingSnapshotProducer.class);

  // data is only added in "append" and "overwrite" operations
  private static final Set VALIDATE_ADDED_FILES_OPERATIONS =
      ImmutableSet.of(DataOperations.APPEND, DataOperations.OVERWRITE);
  // data files are removed in "overwrite", "replace", and "delete"
  private static final Set VALIDATE_DATA_FILES_EXIST_OPERATIONS =
      ImmutableSet.of(DataOperations.OVERWRITE, DataOperations.REPLACE, DataOperations.DELETE);
  private static final Set VALIDATE_DATA_FILES_EXIST_SKIP_DELETE_OPERATIONS =
      ImmutableSet.of(DataOperations.OVERWRITE, DataOperations.REPLACE);
  // delete files can be added in "overwrite" or "delete" operations
  private static final Set VALIDATE_ADDED_DELETE_FILES_OPERATIONS =
      ImmutableSet.of(DataOperations.OVERWRITE, DataOperations.DELETE);

  private final String tableName;
  private final TableOperations ops;
  private final SnapshotSummary.Builder summaryBuilder = SnapshotSummary.builder();
  private final ManifestMergeManager mergeManager;
  private final ManifestFilterManager filterManager;
  private final ManifestMergeManager deleteMergeManager;
  private final ManifestFilterManager deleteFilterManager;
  private final boolean snapshotIdInheritanceEnabled;

  // update data
  private final List newFiles = Lists.newArrayList();
  private Long newFilesSequenceNumber;
  private final Map> newDeleteFilesBySpec = Maps.newHashMap();
  private final List appendManifests = Lists.newArrayList();
  private final List rewrittenAppendManifests = Lists.newArrayList();
  private final SnapshotSummary.Builder addedFilesSummary = SnapshotSummary.builder();
  private final SnapshotSummary.Builder appendedManifestsSummary = SnapshotSummary.builder();
  private Expression deleteExpression = Expressions.alwaysFalse();
  private PartitionSpec dataSpec;

  // cache new manifests after writing
  private ManifestFile cachedNewManifest = null;
  private boolean hasNewFiles = false;

  // cache new manifests for delete files
  private final List cachedNewDeleteManifests = Lists.newLinkedList();
  private boolean hasNewDeleteFiles = false;

  private boolean caseSensitive = true;

  MergingSnapshotProducer(String tableName, TableOperations ops) {
    super(ops);
    this.tableName = tableName;
    this.ops = ops;
    this.dataSpec = null;
    long targetSizeBytes = ops.current()
        .propertyAsLong(MANIFEST_TARGET_SIZE_BYTES, MANIFEST_TARGET_SIZE_BYTES_DEFAULT);
    int minCountToMerge = ops.current()
        .propertyAsInt(MANIFEST_MIN_MERGE_COUNT, MANIFEST_MIN_MERGE_COUNT_DEFAULT);
    boolean mergeEnabled = ops.current()
        .propertyAsBoolean(TableProperties.MANIFEST_MERGE_ENABLED, TableProperties.MANIFEST_MERGE_ENABLED_DEFAULT);
    this.mergeManager = new DataFileMergeManager(targetSizeBytes, minCountToMerge, mergeEnabled);
    this.filterManager = new DataFileFilterManager();
    this.deleteMergeManager = new DeleteFileMergeManager(targetSizeBytes, minCountToMerge, mergeEnabled);
    this.deleteFilterManager = new DeleteFileFilterManager();
    this.snapshotIdInheritanceEnabled = ops.current()
        .propertyAsBoolean(SNAPSHOT_ID_INHERITANCE_ENABLED, SNAPSHOT_ID_INHERITANCE_ENABLED_DEFAULT);
  }

  @Override
  public ThisT set(String property, String value) {
    summaryBuilder.set(property, value);
    return self();
  }

  public ThisT caseSensitive(boolean isCaseSensitive) {
    this.caseSensitive = isCaseSensitive;
    filterManager.caseSensitive(isCaseSensitive);
    deleteFilterManager.caseSensitive(isCaseSensitive);
    return self();
  }

  protected boolean isCaseSensitive() {
    return caseSensitive;
  }

  protected PartitionSpec dataSpec() {
    Preconditions.checkState(dataSpec != null, "Cannot determine partition spec: no data files have been added");
    // the spec is set when the write is started
    return dataSpec;
  }

  protected Expression rowFilter() {
    return deleteExpression;
  }

  protected List addedFiles() {
    return ImmutableList.copyOf(newFiles);
  }

  protected void failAnyDelete() {
    filterManager.failAnyDelete();
    deleteFilterManager.failAnyDelete();
  }

  protected void failMissingDeletePaths() {
    filterManager.failMissingDeletePaths();
    deleteFilterManager.failMissingDeletePaths();
  }

  /**
   * Add a filter to match files to delete. A file will be deleted if all of the rows it contains
   * match this or any other filter passed to this method.
   *
   * @param expr an expression to match rows.
   */
  protected void deleteByRowFilter(Expression expr) {
    this.deleteExpression = expr;
    filterManager.deleteByRowFilter(expr);
    // if a delete file matches the row filter, then it can be deleted because the rows will also be deleted
    deleteFilterManager.deleteByRowFilter(expr);
  }

  /**
   * Add a partition tuple to drop from the table during the delete phase.
   */
  protected void dropPartition(int specId, StructLike partition) {
    // dropping the data in a partition also drops all deletes in the partition
    filterManager.dropPartition(specId, partition);
    deleteFilterManager.dropPartition(specId, partition);
  }

  /**
   * Add a specific data file to be deleted in the new snapshot.
   */
  protected void delete(DataFile file) {
    filterManager.delete(file);
  }

  /**
   * Add a specific delete file to be deleted in the new snapshot.
   */
  protected void delete(DeleteFile file) {
    deleteFilterManager.delete(file);
  }

  /**
   * Add a specific data path to be deleted in the new snapshot.
   */
  protected void delete(CharSequence path) {
    // this is an old call that never worked for delete files and can only be used to remove data files.
    filterManager.delete(path);
  }

  /**
   * Add a data file to the new snapshot.
   */
  protected void add(DataFile file) {
    Preconditions.checkNotNull(file, "Invalid data file: null");
    setDataSpec(file);
    addedFilesSummary.addedFile(dataSpec(), file);
    hasNewFiles = true;
    newFiles.add(file);
  }

  /**
   * Add a delete file to the new snapshot.
   */
  protected void add(DeleteFile file) {
    Preconditions.checkNotNull(file, "Invalid delete file: null");
    PartitionSpec fileSpec = ops.current().spec(file.specId());
    List deleteFiles = newDeleteFilesBySpec.computeIfAbsent(file.specId(), specId -> Lists.newArrayList());
    deleteFiles.add(file);
    addedFilesSummary.addedFile(fileSpec, file);
    hasNewDeleteFiles = true;
  }

  private void setDataSpec(DataFile file) {
    PartitionSpec fileSpec = ops.current().spec(file.specId());
    Preconditions.checkNotNull(fileSpec, "Cannot find partition spec for data file: %s", file.path());
    if (dataSpec == null) {
      dataSpec = fileSpec;
    } else if (dataSpec.specId() != file.specId()) {
      throw new ValidationException("Invalid data file, expected spec id: %d", dataSpec.specId());
    }
  }

  /**
   * Add all files in a manifest to the new snapshot.
   */
  protected void add(ManifestFile manifest) {
    Preconditions.checkArgument(manifest.content() == ManifestContent.DATA,
        "Cannot append delete manifest: %s", manifest);
    if (snapshotIdInheritanceEnabled && manifest.snapshotId() == null) {
      appendedManifestsSummary.addedManifest(manifest);
      appendManifests.add(manifest);
    } else {
      // the manifest must be rewritten with this update's snapshot ID
      ManifestFile copiedManifest = copyManifest(manifest);
      rewrittenAppendManifests.add(copiedManifest);
    }
  }

  private ManifestFile copyManifest(ManifestFile manifest) {
    TableMetadata current = ops.current();
    InputFile toCopy = ops.io().newInputFile(manifest.path());
    OutputFile newManifestPath = newManifestOutput();
    return ManifestFiles.copyAppendManifest(
        current.formatVersion(), toCopy, current.specsById(), newManifestPath, snapshotId(), appendedManifestsSummary);
  }

  /**
   * Validates that no files matching a filter have been added to the table since a starting snapshot.
   *
   * @param base table metadata to validate
   * @param startingSnapshotId id of the snapshot current at the start of the operation
   * @param conflictDetectionFilter an expression used to find new conflicting data files
   */
  protected void validateAddedDataFiles(TableMetadata base, Long startingSnapshotId,
                                        Expression conflictDetectionFilter) {
    // if there is no current table state, no files have been added
    if (base.currentSnapshot() == null) {
      return;
    }

    Pair, Set> history =
        validationHistory(base, startingSnapshotId, VALIDATE_ADDED_FILES_OPERATIONS, ManifestContent.DATA);
    List manifests = history.first();
    Set newSnapshots = history.second();

    ManifestGroup conflictGroup = new ManifestGroup(ops.io(), manifests, ImmutableList.of())
        .caseSensitive(caseSensitive)
        .filterManifestEntries(entry -> newSnapshots.contains(entry.snapshotId()))
        .filterData(conflictDetectionFilter)
        .specsById(base.specsById())
        .ignoreDeleted()
        .ignoreExisting();

    try (CloseableIterator> conflicts = conflictGroup.entries().iterator()) {
      if (conflicts.hasNext()) {
        throw new ValidationException("Found conflicting files that can contain records matching %s: %s",
            conflictDetectionFilter,
            Iterators.toString(Iterators.transform(conflicts, entry -> entry.file().path().toString())));
      }

    } catch (IOException e) {
      throw new UncheckedIOException(
          String.format("Failed to validate no appends matching %s", conflictDetectionFilter), e);
    }
  }

  /**
   * Validates that no new delete files that must be applied to the given data files have been added to the table since
   * a starting snapshot.
   *
   * @param base table metadata to validate
   * @param startingSnapshotId id of the snapshot current at the start of the operation
   * @param dataFiles data files to validate have no new row deletes
   */
  protected void validateNoNewDeletesForDataFiles(TableMetadata base, Long startingSnapshotId,
                                                  Iterable dataFiles) {
    validateNoNewDeletesForDataFiles(base, startingSnapshotId, null, dataFiles, newFilesSequenceNumber != null);
  }

  /**
   * Validates that no new delete files that must be applied to the given data files have been added to the table since
   * a starting snapshot.
   *
   * @param base table metadata to validate
   * @param startingSnapshotId id of the snapshot current at the start of the operation
   * @param dataFilter a data filter
   * @param dataFiles data files to validate have no new row deletes
   */
  protected void validateNoNewDeletesForDataFiles(TableMetadata base, Long startingSnapshotId,
                                                  Expression dataFilter, Iterable dataFiles) {
    validateNoNewDeletesForDataFiles(base, startingSnapshotId, dataFilter, dataFiles, false);
  }

  /**
   * Validates that no new delete files that must be applied to the given data files have been added to the table since
   * a starting snapshot, with the option to ignore equality deletes during the validation.
   * 

* For example, in the case of rewriting data files, if the added data files have the same sequence number as the * replaced data files, equality deletes added at a higher sequence number are still effective against the added * data files, so there is no risk of commit conflict between RewriteFiles and RowDelta. In cases like this, * validation against equality delete files can be omitted. * * @param base table metadata to validate * @param startingSnapshotId id of the snapshot current at the start of the operation * @param dataFilter a data filter * @param dataFiles data files to validate have no new row deletes * @param ignoreEqualityDeletes whether equality deletes should be ignored in validation */ private void validateNoNewDeletesForDataFiles(TableMetadata base, Long startingSnapshotId, Expression dataFilter, Iterable dataFiles, boolean ignoreEqualityDeletes) { // if there is no current table state, no files have been added if (base.currentSnapshot() == null || base.formatVersion() < 2) { return; } Pair, Set> history = validationHistory(base, startingSnapshotId, VALIDATE_ADDED_DELETE_FILES_OPERATIONS, ManifestContent.DELETES); List deleteManifests = history.first(); long startingSequenceNumber = startingSequenceNumber(base, startingSnapshotId); DeleteFileIndex deletes = buildDeleteFileIndex(deleteManifests, startingSequenceNumber, dataFilter); for (DataFile dataFile : dataFiles) { // if any delete is found that applies to files written in or before the starting snapshot, fail DeleteFile[] deleteFiles = deletes.forDataFile(startingSequenceNumber, dataFile); if (ignoreEqualityDeletes) { ValidationException.check( Arrays.stream(deleteFiles).noneMatch(deleteFile -> deleteFile.content() == FileContent.POSITION_DELETES), "Cannot commit, found new position delete for replaced data file: %s", dataFile); } else { ValidationException.check(deleteFiles.length == 0, "Cannot commit, found new delete for replaced data file: %s", dataFile); } } } /** * Validates that no delete files matching a filter have been added to the table since a starting snapshot. * * @param base table metadata to validate * @param startingSnapshotId id of the snapshot current at the start of the operation * @param dataFilter an expression used to find new conflicting delete files */ protected void validateNoNewDeleteFiles(TableMetadata base, Long startingSnapshotId, Expression dataFilter) { // if there is no current table state, no files have been added if (base.currentSnapshot() == null || base.formatVersion() < 2) { return; } Pair, Set> history = validationHistory(base, startingSnapshotId, VALIDATE_ADDED_DELETE_FILES_OPERATIONS, ManifestContent.DELETES); List deleteManifests = history.first(); long startingSequenceNumber = startingSequenceNumber(base, startingSnapshotId); DeleteFileIndex deletes = buildDeleteFileIndex(deleteManifests, startingSequenceNumber, dataFilter); ValidationException.check(deletes.isEmpty(), "Found new conflicting delete files that can apply to records matching %s: %s", dataFilter, Iterables.transform(deletes.referencedDeleteFiles(), ContentFile::path)); } protected void setNewFilesSequenceNumber(long sequenceNumber) { this.newFilesSequenceNumber = sequenceNumber; } private long startingSequenceNumber(TableMetadata metadata, Long staringSnapshotId) { if (staringSnapshotId != null && metadata.snapshot(staringSnapshotId) != null) { Snapshot startingSnapshot = metadata.snapshot(staringSnapshotId); return startingSnapshot.sequenceNumber(); } else { return TableMetadata.INITIAL_SEQUENCE_NUMBER; } } private DeleteFileIndex buildDeleteFileIndex(List deleteManifests, long startingSequenceNumber, Expression dataFilter) { DeleteFileIndex.Builder builder = DeleteFileIndex.builderFor(ops.io(), deleteManifests) .afterSequenceNumber(startingSequenceNumber) .caseSensitive(caseSensitive) .specsById(ops.current().specsById()); if (dataFilter != null) { builder.filterData(dataFilter); } return builder.build(); } @SuppressWarnings("CollectionUndefinedEquality") protected void validateDataFilesExist(TableMetadata base, Long startingSnapshotId, CharSequenceSet requiredDataFiles, boolean skipDeletes, Expression conflictDetectionFilter) { // if there is no current table state, no files have been removed if (base.currentSnapshot() == null) { return; } Set matchingOperations = skipDeletes ? VALIDATE_DATA_FILES_EXIST_SKIP_DELETE_OPERATIONS : VALIDATE_DATA_FILES_EXIST_OPERATIONS; Pair, Set> history = validationHistory(base, startingSnapshotId, matchingOperations, ManifestContent.DATA); List manifests = history.first(); Set newSnapshots = history.second(); ManifestGroup matchingDeletesGroup = new ManifestGroup(ops.io(), manifests, ImmutableList.of()) .filterManifestEntries(entry -> entry.status() != ManifestEntry.Status.ADDED && newSnapshots.contains(entry.snapshotId()) && requiredDataFiles.contains(entry.file().path())) .specsById(base.specsById()) .ignoreExisting(); if (conflictDetectionFilter != null) { matchingDeletesGroup.filterData(conflictDetectionFilter); } try (CloseableIterator> deletes = matchingDeletesGroup.entries().iterator()) { if (deletes.hasNext()) { throw new ValidationException("Cannot commit, missing data files: %s", Iterators.toString(Iterators.transform(deletes, entry -> entry.file().path().toString()))); } } catch (IOException e) { throw new UncheckedIOException("Failed to validate required files exist", e); } } private Pair, Set> validationHistory(TableMetadata base, Long startingSnapshotId, Set matchingOperations, ManifestContent content) { List manifests = Lists.newArrayList(); Set newSnapshots = Sets.newHashSet(); Snapshot lastSnapshot = null; Iterable snapshots = SnapshotUtil.ancestorsBetween( base.currentSnapshot().snapshotId(), startingSnapshotId, base::snapshot); for (Snapshot currentSnapshot : snapshots) { lastSnapshot = currentSnapshot; if (matchingOperations.contains(currentSnapshot.operation())) { newSnapshots.add(currentSnapshot.snapshotId()); if (content == ManifestContent.DATA) { for (ManifestFile manifest : currentSnapshot.dataManifests()) { if (manifest.snapshotId() == currentSnapshot.snapshotId()) { manifests.add(manifest); } } } else { for (ManifestFile manifest : currentSnapshot.deleteManifests()) { if (manifest.snapshotId() == currentSnapshot.snapshotId()) { manifests.add(manifest); } } } } } ValidationException.check(lastSnapshot == null || Objects.equals(lastSnapshot.parentId(), startingSnapshotId), "Cannot determine history between starting snapshot %s and the last known ancestor %s", startingSnapshotId, lastSnapshot != null ? lastSnapshot.snapshotId() : null); return Pair.of(manifests, newSnapshots); } @Override protected Map summary() { summaryBuilder.setPartitionSummaryLimit(ops.current().propertyAsInt( TableProperties.WRITE_PARTITION_SUMMARY_LIMIT, TableProperties.WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT)); return summaryBuilder.build(); } @Override public List apply(TableMetadata base) { Snapshot current = base.currentSnapshot(); // filter any existing manifests List filtered = filterManager.filterManifests( base.schema(), current != null ? current.dataManifests() : null); long minDataSequenceNumber = filtered.stream() .map(ManifestFile::minSequenceNumber) .filter(seq -> seq != ManifestWriter.UNASSIGNED_SEQ) // filter out unassigned in rewritten manifests .reduce(base.lastSequenceNumber(), Math::min); deleteFilterManager.dropDeleteFilesOlderThan(minDataSequenceNumber); List filteredDeletes = deleteFilterManager.filterManifests( base.schema(), current != null ? current.deleteManifests() : null); // only keep manifests that have live data files or that were written by this commit Predicate shouldKeep = manifest -> manifest.hasAddedFiles() || manifest.hasExistingFiles() || manifest.snapshotId() == snapshotId(); Iterable unmergedManifests = Iterables.filter( Iterables.concat(prepareNewManifests(), filtered), shouldKeep); Iterable unmergedDeleteManifests = Iterables.filter( Iterables.concat(prepareDeleteManifests(), filteredDeletes), shouldKeep); // update the snapshot summary summaryBuilder.clear(); summaryBuilder.merge(addedFilesSummary); summaryBuilder.merge(appendedManifestsSummary); summaryBuilder.merge(filterManager.buildSummary(filtered)); summaryBuilder.merge(deleteFilterManager.buildSummary(filteredDeletes)); List manifests = Lists.newArrayList(); Iterables.addAll(manifests, mergeManager.mergeManifests(unmergedManifests)); Iterables.addAll(manifests, deleteMergeManager.mergeManifests(unmergedDeleteManifests)); return manifests; } @Override public Object updateEvent() { long snapshotId = snapshotId(); Snapshot justSaved = ops.refresh().snapshot(snapshotId); long sequenceNumber = TableMetadata.INVALID_SEQUENCE_NUMBER; Map summary; if (justSaved == null) { // The snapshot just saved may not be present if the latest metadata couldn't be loaded due to eventual // consistency problems in refresh. LOG.warn("Failed to load committed snapshot: omitting sequence number from notifications"); summary = summary(); } else { sequenceNumber = justSaved.sequenceNumber(); summary = justSaved.summary(); } return new CreateSnapshotEvent( tableName, operation(), snapshotId, sequenceNumber, summary); } private void cleanUncommittedAppends(Set committed) { if (cachedNewManifest != null && !committed.contains(cachedNewManifest)) { deleteFile(cachedNewManifest.path()); this.cachedNewManifest = null; } ListIterator deleteManifestsIterator = cachedNewDeleteManifests.listIterator(); while (deleteManifestsIterator.hasNext()) { ManifestFile deleteManifest = deleteManifestsIterator.next(); if (!committed.contains(deleteManifest)) { deleteFile(deleteManifest.path()); deleteManifestsIterator.remove(); } } // rewritten manifests are always owned by the table for (ManifestFile manifest : rewrittenAppendManifests) { if (!committed.contains(manifest)) { deleteFile(manifest.path()); } } // manifests that are not rewritten are only owned by the table if the commit succeeded if (!committed.isEmpty()) { // the commit succeeded if at least one manifest was committed // the table now owns appendManifests; clean up any that are not used for (ManifestFile manifest : appendManifests) { if (!committed.contains(manifest)) { deleteFile(manifest.path()); } } } } @Override protected void cleanUncommitted(Set committed) { mergeManager.cleanUncommitted(committed); filterManager.cleanUncommitted(committed); deleteMergeManager.cleanUncommitted(committed); deleteFilterManager.cleanUncommitted(committed); cleanUncommittedAppends(committed); } private Iterable prepareNewManifests() { Iterable newManifests; if (newFiles.size() > 0) { ManifestFile newManifest = newFilesAsManifest(); newManifests = Iterables.concat(ImmutableList.of(newManifest), appendManifests, rewrittenAppendManifests); } else { newManifests = Iterables.concat(appendManifests, rewrittenAppendManifests); } return Iterables.transform( newManifests, manifest -> GenericManifestFile.copyOf(manifest).withSnapshotId(snapshotId()).build()); } private ManifestFile newFilesAsManifest() { if (hasNewFiles && cachedNewManifest != null) { deleteFile(cachedNewManifest.path()); cachedNewManifest = null; } if (cachedNewManifest == null) { try { ManifestWriter writer = newManifestWriter(dataSpec()); try { if (newFilesSequenceNumber == null) { writer.addAll(newFiles); } else { newFiles.forEach(f -> writer.add(f, newFilesSequenceNumber)); } } finally { writer.close(); } this.cachedNewManifest = writer.toManifestFile(); this.hasNewFiles = false; } catch (IOException e) { throw new RuntimeIOException(e, "Failed to close manifest writer"); } } return cachedNewManifest; } private Iterable prepareDeleteManifests() { if (newDeleteFilesBySpec.isEmpty()) { return ImmutableList.of(); } return newDeleteFilesAsManifests(); } private List newDeleteFilesAsManifests() { if (hasNewDeleteFiles && cachedNewDeleteManifests.size() > 0) { for (ManifestFile cachedNewDeleteManifest : cachedNewDeleteManifests) { deleteFile(cachedNewDeleteManifest.path()); } // this triggers a rewrite of all delete manifests even if there is only one new delete file // if there is a relevant use case in the future, the behavior can be optimized cachedNewDeleteManifests.clear(); } if (cachedNewDeleteManifests.isEmpty()) { newDeleteFilesBySpec.forEach((specId, deleteFiles) -> { PartitionSpec spec = ops.current().spec(specId); try { ManifestWriter writer = newDeleteManifestWriter(spec); try { writer.addAll(deleteFiles); } finally { writer.close(); } cachedNewDeleteManifests.add(writer.toManifestFile()); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to close manifest writer"); } }); this.hasNewDeleteFiles = false; } return cachedNewDeleteManifests; } private class DataFileFilterManager extends ManifestFilterManager { private DataFileFilterManager() { super(ops.current().specsById()); } @Override protected void deleteFile(String location) { MergingSnapshotProducer.this.deleteFile(location); } @Override protected ManifestWriter newManifestWriter(PartitionSpec manifestSpec) { return MergingSnapshotProducer.this.newManifestWriter(manifestSpec); } @Override protected ManifestReader newManifestReader(ManifestFile manifest) { return MergingSnapshotProducer.this.newManifestReader(manifest); } } private class DataFileMergeManager extends ManifestMergeManager { DataFileMergeManager(long targetSizeBytes, int minCountToMerge, boolean mergeEnabled) { super(targetSizeBytes, minCountToMerge, mergeEnabled); } @Override protected long snapshotId() { return MergingSnapshotProducer.this.snapshotId(); } @Override protected PartitionSpec spec(int specId) { return ops.current().spec(specId); } @Override protected void deleteFile(String location) { MergingSnapshotProducer.this.deleteFile(location); } @Override protected ManifestWriter newManifestWriter(PartitionSpec manifestSpec) { return MergingSnapshotProducer.this.newManifestWriter(manifestSpec); } @Override protected ManifestReader newManifestReader(ManifestFile manifest) { return MergingSnapshotProducer.this.newManifestReader(manifest); } } private class DeleteFileFilterManager extends ManifestFilterManager { private DeleteFileFilterManager() { super(ops.current().specsById()); } @Override protected void deleteFile(String location) { MergingSnapshotProducer.this.deleteFile(location); } @Override protected ManifestWriter newManifestWriter(PartitionSpec manifestSpec) { return MergingSnapshotProducer.this.newDeleteManifestWriter(manifestSpec); } @Override protected ManifestReader newManifestReader(ManifestFile manifest) { return MergingSnapshotProducer.this.newDeleteManifestReader(manifest); } } private class DeleteFileMergeManager extends ManifestMergeManager { DeleteFileMergeManager(long targetSizeBytes, int minCountToMerge, boolean mergeEnabled) { super(targetSizeBytes, minCountToMerge, mergeEnabled); } @Override protected long snapshotId() { return MergingSnapshotProducer.this.snapshotId(); } @Override protected PartitionSpec spec(int specId) { return ops.current().spec(specId); } @Override protected void deleteFile(String location) { MergingSnapshotProducer.this.deleteFile(location); } @Override protected ManifestWriter newManifestWriter(PartitionSpec manifestSpec) { return MergingSnapshotProducer.this.newDeleteManifestWriter(manifestSpec); } @Override protected ManifestReader newManifestReader(ManifestFile manifest) { return MergingSnapshotProducer.this.newDeleteManifestReader(manifest); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy