All Downloads are FREE. Search and download functionalities are using the official Maven repository.

software.amazon.glue.operations.GlueUpdateFilesOperation Maven / Gradle / Ivy

The newest version!
/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package software.amazon.glue.operations;

import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.function.Consumer;
import org.apache.commons.compress.utils.Lists;
import org.apache.commons.compress.utils.Sets;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.GlueTable;
import org.apache.iceberg.MetadataUpdate;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.SnapshotUpdate;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.UpdateRequirement;
import org.apache.iceberg.UpdateRequirements;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.ExpressionParser;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import software.amazon.glue.ErrorHandlers;
import software.amazon.glue.GlueExtensionsTableOperations;
import software.amazon.glue.requests.UpdateTableRequest;

public abstract class GlueUpdateFilesOperation implements SnapshotUpdate {
  protected final TableOperations ops;
  protected final DataFileManifestGenerator dataManifestBuilder;
  protected final GlueTable table;
  protected final List addedDataFiles = Lists.newArrayList();
  protected final List removedDataFiles = Lists.newArrayList();
  protected final Set deletedDataFilePaths = Sets.newHashSet();

  public GlueUpdateFilesOperation(GlueTable table) {
    this.ops = table.operations();
    this.dataManifestBuilder = table.manifestBuilder();
    this.table = table;
  }

  protected abstract ThisT self();

  public ThisT add(DataFile file) {
    addedDataFiles.add(file);
    return self();
  }

  public ThisT remove(DataFile file) {
    removedDataFiles.add(file);
    deletedDataFilePaths.add(file.path().toString());
    return self();
  }

  @Override
  public ThisT set(String property, String value) {
    // no-op SparkWrite will set commit properties
    return self();
  }

  @Override
  public ThisT deleteWith(Consumer deleteFunc) {
    throw new UnsupportedOperationException("Setting delete callback function is not supported");
  }

  @Override
  public ThisT stageOnly() {
    throw new UnsupportedOperationException("Staging snapshot is not supported");
  }

  @Override
  public ThisT scanManifestsWith(ExecutorService executorService) {
    throw new UnsupportedOperationException("Setting manifest scanner is not supported");
  }

  @Override
  public Snapshot apply() {
    throw new UnsupportedOperationException("Applying changes to snapshot is not supported");
  }

  @Override
  public void commit() {
    if (hasChanges()) {
      List addedManifestLocations = buildManifest(addedDataFiles);
      List removedManifestLocations = buildManifest(removedDataFiles);

      OverwriteRowsWithManifest overwriteRowsWithManifest =
          new OverwriteRowsWithManifest(addedManifestLocations, removedManifestLocations);
      commitWithTransaction(overwriteRowsWithManifest);
    }
  }

  protected boolean hasChanges() {
    return !addedDataFiles.isEmpty() || !removedDataFiles.isEmpty();
  }

  protected void commitWithDeleteFilterRequest(Expression deleteExpr) {
    List addedManifestLocations = buildManifest(addedDataFiles);

    String deleteFilter = ExpressionParser.toJson(deleteExpr);
    OverwriteRowsWithDeleteFilter overwriteRowsWithDeleteFilter =
        new OverwriteRowsWithDeleteFilter(addedManifestLocations, deleteFilter);
    commitWithTransaction(overwriteRowsWithDeleteFilter);
  }

  protected void commitWithTransaction(MetadataUpdate update) {
    List requirements = buildUpdateRequirements(ImmutableList.of(update));
    UpdateTableRequest request = new UpdateTableRequest(requirements, ImmutableList.of(update));
    ((GlueExtensionsTableOperations) ops)
        .commitUpdateWithTransaction(request, ErrorHandlers.tableCommitHandler());
    table.invalidateScanCache();
  }

  private List buildUpdateRequirements(List updates) {
    return UpdateRequirements.forUpdateTable(ops.current(), ImmutableList.copyOf(updates));
  }

  private List buildManifest(List dataFiles) {
    return !dataFiles.isEmpty()
        ? dataManifestBuilder.buildDataManifest(dataFiles)
        : ImmutableList.of();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy