All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport Maven / Gradle / Ivy

Go to download

This module contains code to support integration with Amazon Web Services. It also declares the dependencies needed to work with AWS services.

There is a newer version: 3.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.s3a.impl;

import java.io.IOException;
import java.nio.file.AccessDeniedException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;

import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.MultiObjectDeleteException;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.AWSS3IOException;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.fs.s3a.s3guard.BulkOperationState;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.PathMetadata;

import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull;

/**
 * Support for Multi Object Deletion.
 */
public final class MultiObjectDeleteSupport extends AbstractStoreOperation {

  private static final Logger LOG = LoggerFactory.getLogger(
      MultiObjectDeleteSupport.class);

  private final BulkOperationState operationState;

  /**
   * Initiate with a store context.
   * @param context store context.
   * @param operationState any ongoing bulk operation.
   */
  public MultiObjectDeleteSupport(final StoreContext context,
      final BulkOperationState operationState) {
    super(context);
    this.operationState = operationState;
  }

  /**
   * This is the exception exit code if access was denied on a delete.
   * {@value}.
   */
  public static final String ACCESS_DENIED = "AccessDenied";

  /**
   * A {@code MultiObjectDeleteException} is raised if one or more
   * paths listed in a bulk DELETE operation failed.
   * The top-level exception is therefore just "something wasn't deleted",
   * but doesn't include the what or the why.
   * This translation will extract an AccessDeniedException if that's one of
   * the causes, otherwise grabs the status code and uses it in the
   * returned exception.
   * @param message text for the exception
   * @param deleteException the delete exception. to translate
   * @return an IOE with more detail.
   */
  public static IOException translateDeleteException(
      final String message,
      final MultiObjectDeleteException deleteException) {
    List errors
        = deleteException.getErrors();
    LOG.warn("Bulk delete operation failed to delete all objects;"
            + " failure count = {}",
        errors.size());
    final StringBuilder result = new StringBuilder(
        errors.size() * 256);
    result.append(message).append(": ");
    String exitCode = "";
    for (MultiObjectDeleteException.DeleteError error :
        deleteException.getErrors()) {
      String code = error.getCode();
      String item = String.format("%s: %s%s: %s%n", code, error.getKey(),
          (error.getVersionId() != null
              ? (" (" + error.getVersionId() + ")")
              : ""),
          error.getMessage());
      LOG.warn(item);
      result.append(item);
      if (exitCode == null || exitCode.isEmpty() || ACCESS_DENIED.equals(code)) {
        exitCode = code;
      }
    }
    if (ACCESS_DENIED.equals(exitCode)) {
      return (IOException) new AccessDeniedException(result.toString())
          .initCause(deleteException);
    } else {
      return new AWSS3IOException(result.toString(), deleteException);
    }
  }

  /**
   * Process a multi object delete exception by building two paths from
   * the delete request: one of all deleted files, one of all undeleted values.
   * The latter are those rejected in the delete call.
   * @param deleteException the delete exception.
   * @param keysToDelete the keys in the delete request
   * @return tuple of (undeleted, deleted) paths.
   */
  public Pair, List> splitUndeletedKeys(
      final MultiObjectDeleteException deleteException,
      final Collection keysToDelete) {
    LOG.debug("Processing delete failure; keys to delete count = {};"
            + " errors in exception {}; successful deletions = {}",
        keysToDelete.size(),
        deleteException.getErrors().size(),
        deleteException.getDeletedObjects().size());
    // convert the collection of keys being deleted into paths
    final List pathsBeingDeleted = keysToKeyPaths(keysToDelete);
    // Take this ist of paths
    // extract all undeleted entries contained in the exception and
    // then remove them from the original list.
    List undeleted = removeUndeletedPaths(deleteException,
        pathsBeingDeleted,
        getStoreContext()::keyToPath);
    return Pair.of(undeleted, pathsBeingDeleted);
  }

  /**
   * Given a list of delete requests, convert them all to paths.
   * @param keysToDelete list of keys for the delete operation.
   * @return the paths.
   */
  public List keysToPaths(
      final Collection keysToDelete) {
    return toPathList(keysToKeyPaths(keysToDelete));
  }

  /**
   * Given a list of delete requests, convert them all to keypaths.
   * @param keysToDelete list of keys for the delete operation.
   * @return list of keypath entries
   */
  public List keysToKeyPaths(
      final Collection keysToDelete) {
    return convertToKeyPaths(keysToDelete,
        getStoreContext()::keyToPath);
  }

  /**
   * Given a list of delete requests, convert them all to paths.
   * @param keysToDelete list of keys for the delete operation.
   * @param qualifier path qualifier
   * @return the paths.
   */
  public static List convertToKeyPaths(
      final Collection keysToDelete,
      final Function qualifier) {
    List l = new ArrayList<>(keysToDelete.size());
    for (DeleteObjectsRequest.KeyVersion kv : keysToDelete) {
      String key = kv.getKey();
      Path p = qualifier.apply(key);
      boolean isDir = key.endsWith("/");
      l.add(new KeyPath(key, p, isDir));
    }
    return l;
  }

  /**
   * Process a delete failure by removing from the metastore all entries
   * which where deleted, as inferred from the delete failures exception
   * and the original list of files to delete declares to have been deleted.
   * @param deleteException the delete exception.
   * @param keysToDelete collection of keys which had been requested.
   * @param retainedMarkers list built up of retained markers.
   * @return a tuple of (undeleted, deleted, failures)
   */
  public Triple, List, List>>
      processDeleteFailure(
      final MultiObjectDeleteException deleteException,
      final List keysToDelete,
      final List retainedMarkers) {
    final MetadataStore metadataStore =
        checkNotNull(getStoreContext().getMetadataStore(),
            "context metadatastore");
    final List> failures = new ArrayList<>();
    final Pair, List> outcome =
        splitUndeletedKeys(deleteException, keysToDelete);
    List deleted = outcome.getRight();
    List deletedPaths = new ArrayList<>();
    List undeleted = outcome.getLeft();
    retainedMarkers.clear();
    List undeletedPaths = toPathList((List) undeleted);
    // sort shorter keys first,
    // so that if the left key is longer than the first it is considered
    // smaller, so appears in the list first.
    // thus when we look for a dir being empty, we know it holds
    deleted.sort((l, r) -> r.getKey().length() - l.getKey().length());

    // now go through and delete from S3Guard all paths listed in
    // the result which are either files or directories with
    // no children.
    deleted.forEach(kp -> {
      Path path = kp.getPath();
      try{
        boolean toDelete = true;
        if (kp.isDirectoryMarker()) {
          // its a dir marker, which could be an empty dir
          // (which is then tombstoned), or a non-empty dir, which
          // is not tombstoned.
          // for this to be handled, we have to have removed children
          // from the store first, which relies on the sort
          PathMetadata pmentry = metadataStore.get(path, true);
          if (pmentry != null && !pmentry.isDeleted()) {
            toDelete = pmentry.getFileStatus().isEmptyDirectory()
                == Tristate.TRUE;
          } else {
            toDelete = false;
          }
        }
        if (toDelete) {
          LOG.debug("Removing deleted object from S3Guard Store {}", path);
          metadataStore.delete(path, operationState);
        } else {
          LOG.debug("Retaining S3Guard directory entry {}", path);
          retainedMarkers.add(path);
        }
      } catch (IOException e) {
        // trouble: we failed to delete the far end entry
        // try with the next one.
        // if this is a big network failure, this is going to be noisy.
        LOG.warn("Failed to update S3Guard store with deletion of {}", path);
        failures.add(Pair.of(path, e));
      }
      // irrespective of the S3Guard outcome, it is declared as deleted, as
      // it is no longer in the S3 store.
      deletedPaths.add(path);
    });
    if (LOG.isDebugEnabled()) {
      undeleted.forEach(p -> LOG.debug("Deleted {}", p));
    }
    return Triple.of(undeletedPaths, deletedPaths, failures);
  }

  /**
   * Given a list of keypaths, convert to a list of paths.
   * @param keyPaths source list
   * @return a listg of paths
   */
  public static List toPathList(final List keyPaths) {
    return keyPaths.stream()
        .map(KeyPath::getPath)
        .collect(Collectors.toList());
  }

  /**
   * Build a list of undeleted paths from a {@code MultiObjectDeleteException}.
   * Outside of unit tests, the qualifier function should be
   * {@link S3AFileSystem#keyToQualifiedPath(String)}.
   * @param deleteException the delete exception.
   * @param qualifierFn function to qualify paths
   * @return the possibly empty list of paths.
   */
  @VisibleForTesting
  public static List extractUndeletedPaths(
      final MultiObjectDeleteException deleteException,
      final Function qualifierFn) {
    return toPathList(extractUndeletedKeyPaths(deleteException, qualifierFn));
  }

  /**
   * Build a list of undeleted paths from a {@code MultiObjectDeleteException}.
   * Outside of unit tests, the qualifier function should be
   * {@link S3AFileSystem#keyToQualifiedPath(String)}.
   * @param deleteException the delete exception.
   * @param qualifierFn function to qualify paths
   * @return the possibly empty list of paths.
   */
  @VisibleForTesting
  public static List extractUndeletedKeyPaths(
      final MultiObjectDeleteException deleteException,
      final Function qualifierFn) {

    List errors
        = deleteException.getErrors();
    return errors.stream()
        .map((error) -> {
          String key = error.getKey();
          Path path = qualifierFn.apply(key);
          boolean isDir = key.endsWith("/");
          return new KeyPath(key, path, isDir);
        })
        .collect(Collectors.toList());
  }

  /**
   * Process a {@code MultiObjectDeleteException} by
   * removing all undeleted paths from the list of paths being deleted.
   * The original list is updated, and so becomes the list of successfully
   * deleted paths.
   * @param deleteException the delete exception.
   * @param pathsBeingDeleted list of paths which were being deleted.
   * This has all undeleted paths removed, leaving only those deleted.
   * @return the list of undeleted entries
   */
  @VisibleForTesting
  static List removeUndeletedPaths(
      final MultiObjectDeleteException deleteException,
      final Collection pathsBeingDeleted,
      final Function qualifier) {
    // get the undeleted values
    List undeleted = extractUndeletedKeyPaths(deleteException,
        qualifier);
    // and remove them from the undeleted list, matching on key
    for (KeyPath undel : undeleted) {
      pathsBeingDeleted.removeIf(kp -> kp.getPath().equals(undel.getPath()));
    }
    return undeleted;
  }

  /**
   * A delete operation failed.
   * Currently just returns the list of all paths.
   * @param ex exception.
   * @param keysToDelete the keys which were being deleted.
   * @return all paths which were not deleted.
   */
  public List processDeleteFailureGenericException(Exception ex,
      final List keysToDelete) {
    return keysToPaths(keysToDelete);
  }

  /**
   * Representation of a (key, path) which couldn't be deleted;
   * the dir marker flag is inferred from the key suffix.
   * 

* Added because Pairs of Lists of Triples was just too complex * for Java code. *

*/ public static final class KeyPath { /** Key in bucket. */ private final String key; /** Full path. */ private final Path path; /** Is this a directory marker? */ private final boolean directoryMarker; public KeyPath(final String key, final Path path, final boolean directoryMarker) { this.key = key; this.path = path; this.directoryMarker = directoryMarker; } public String getKey() { return key; } public Path getPath() { return path; } public boolean isDirectoryMarker() { return directoryMarker; } @Override public String toString() { return "KeyPath{" + "key='" + key + '\'' + ", path=" + path + ", directoryMarker=" + directoryMarker + '}'; } /** * Equals test is on key alone. */ @Override public boolean equals(final Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } KeyPath keyPath = (KeyPath) o; return key.equals(keyPath.key); } @Override public int hashCode() { return Objects.hash(key); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy