All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.FileCleanupStrategy Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg;

import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.function.Consumer;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.util.Tasks;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@SuppressWarnings("checkstyle:VisibilityModifier")
abstract class FileCleanupStrategy {
  private static final Logger LOG = LoggerFactory.getLogger(FileCleanupStrategy.class);

  protected final FileIO fileIO;
  protected final ExecutorService planExecutorService;
  private final Consumer deleteFunc;
  private final ExecutorService deleteExecutorService;

  protected FileCleanupStrategy(
      FileIO fileIO,
      ExecutorService deleteExecutorService,
      ExecutorService planExecutorService,
      Consumer deleteFunc) {
    this.fileIO = fileIO;
    this.deleteExecutorService = deleteExecutorService;
    this.planExecutorService = planExecutorService;
    this.deleteFunc = deleteFunc;
  }

  public abstract void cleanFiles(TableMetadata beforeExpiration, TableMetadata afterExpiration);

  private static final Schema MANIFEST_PROJECTION =
      ManifestFile.schema()
          .select(
              "manifest_path",
              "manifest_length",
              "partition_spec_id",
              "added_snapshot_id",
              "deleted_data_files_count");

  protected CloseableIterable readManifests(Snapshot snapshot) {
    if (snapshot.manifestListLocation() != null) {
      return Avro.read(fileIO.newInputFile(snapshot.manifestListLocation()))
          .rename("manifest_file", GenericManifestFile.class.getName())
          .classLoader(GenericManifestFile.class.getClassLoader())
          .project(MANIFEST_PROJECTION)
          .reuseContainers(true)
          .build();
    } else {
      return CloseableIterable.withNoopClose(snapshot.allManifests(fileIO));
    }
  }

  protected void deleteFiles(Set pathsToDelete, String fileType) {
    Tasks.foreach(pathsToDelete)
        .executeWith(deleteExecutorService)
        .retry(3)
        .stopRetryOn(NotFoundException.class)
        .suppressFailureWhenFinished()
        .onFailure(
            (file, thrown) -> LOG.warn("Delete failed for {} file: {}", fileType, file, thrown))
        .run(deleteFunc::accept);
  }

  protected boolean hasAnyStatisticsFiles(TableMetadata tableMetadata) {
    return !tableMetadata.statisticsFiles().isEmpty()
        || !tableMetadata.partitionStatisticsFiles().isEmpty();
  }

  protected Set expiredStatisticsFilesLocations(
      TableMetadata beforeExpiration, TableMetadata afterExpiration) {
    Set statsFileLocationsBeforeExpiration = statsFileLocations(beforeExpiration);
    Set statsFileLocationsAfterExpiration = statsFileLocations(afterExpiration);

    return Sets.difference(statsFileLocationsBeforeExpiration, statsFileLocationsAfterExpiration);
  }

  private Set statsFileLocations(TableMetadata tableMetadata) {
    Set statsFileLocations = Sets.newHashSet();

    if (tableMetadata.statisticsFiles() != null) {
      tableMetadata.statisticsFiles().stream()
          .map(StatisticsFile::path)
          .forEach(statsFileLocations::add);
    }

    if (tableMetadata.partitionStatisticsFiles() != null) {
      tableMetadata.partitionStatisticsFiles().stream()
          .map(PartitionStatisticsFile::path)
          .forEach(statsFileLocations::add);
    }

    return statsFileLocations;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy