All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.s3a.S3AUtils Maven / Gradle / Ivy

Go to download

This module contains code to support integration with Amazon Web Services. It also declares the dependencies needed to work with AWS services.

There is a newer version: 3.4.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.s3a;

import com.amazonaws.AbortedException;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.Protocol;
import com.amazonaws.SdkBaseException;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.EnvironmentVariableCredentialsProvider;
import com.amazonaws.retry.RetryUtils;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.MultiObjectDeleteException;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.util.functional.RemoteIterators;
import org.apache.hadoop.fs.s3a.audit.AuditFailureException;
import org.apache.hadoop.fs.s3a.audit.AuditIntegration;
import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets;
import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider;
import org.apache.hadoop.fs.s3a.impl.NetworkBinding;
import org.apache.hadoop.fs.s3a.impl.V2Migration;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
import org.apache.hadoop.net.ConnectTimeoutException;
import org.apache.hadoop.security.ProviderUtils;
import org.apache.hadoop.util.VersionInfo;

import org.apache.hadoop.thirdparty.com.google.common.collect.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.io.UncheckedIOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.net.SocketTimeoutException;
import java.net.URI;
import java.nio.file.AccessDeniedException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;

import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH;
import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator;

/**
 * Utility methods for S3A code.
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
public final class S3AUtils {

  private static final Logger LOG = LoggerFactory.getLogger(S3AUtils.class);
  static final String CONSTRUCTOR_EXCEPTION = "constructor exception";
  static final String INSTANTIATION_EXCEPTION
      = "instantiation exception";
  static final String NOT_AWS_PROVIDER =
      "does not implement AWSCredentialsProvider";
  static final String ABSTRACT_PROVIDER =
      "is abstract and therefore cannot be created";
  static final String ENDPOINT_KEY = "Endpoint";

  /** Filesystem is closed; kept here to keep the errors close. */
  static final String E_FS_CLOSED = "FileSystem is closed!";

  /**
   * Core property for provider path. Duplicated here for consistent
   * code across Hadoop version: {@value}.
   */
  static final String CREDENTIAL_PROVIDER_PATH =
      "hadoop.security.credential.provider.path";

  /**
   * Encryption SSE-C used but the config lacks an encryption key.
   */
  public static final String SSE_C_NO_KEY_ERROR =
      S3AEncryptionMethods.SSE_C.getMethod()
          + " is enabled but no encryption key was declared in "
          + Constants.S3_ENCRYPTION_KEY;
  /**
   * Encryption SSE-S3 is used but the caller also set an encryption key.
   */
  public static final String SSE_S3_WITH_KEY_ERROR =
      S3AEncryptionMethods.SSE_S3.getMethod()
          + " is enabled but an encryption key was set in "
          + Constants.S3_ENCRYPTION_KEY;
  public static final String EOF_MESSAGE_IN_XML_PARSER
      = "Failed to sanitize XML document destined for handler class";

  public static final String EOF_READ_DIFFERENT_LENGTH
      = "Data read has a different length than the expected";

  private static final String BUCKET_PATTERN = FS_S3A_BUCKET_PREFIX + "%s.%s";

  /**
   * Error message when the AWS provider list built up contains a forbidden
   * entry.
   */
  @VisibleForTesting
  public static final String E_FORBIDDEN_AWS_PROVIDER
      = "AWS provider class cannot be used";

  private S3AUtils() {
  }

  /**
   * Translate an exception raised in an operation into an IOException.
   * The specific type of IOException depends on the class of
   * {@link AmazonClientException} passed in, and any status codes included
   * in the operation. That is: HTTP error codes are examined and can be
   * used to build a more specific response.
   *
   * @see S3 Error responses
   * @see Amazon S3 Error Best Practices
   * @param operation operation
   * @param path path operated on (must not be null)
   * @param exception amazon exception raised
   * @return an IOE which wraps the caught exception.
   */
  public static IOException translateException(String operation,
      Path path,
      AmazonClientException exception) {
    return translateException(operation, path.toString(), exception);
  }

  /**
   * Translate an exception raised in an operation into an IOException.
   * The specific type of IOException depends on the class of
   * {@link AmazonClientException} passed in, and any status codes included
   * in the operation. That is: HTTP error codes are examined and can be
   * used to build a more specific response.
   * @param operation operation
   * @param path path operated on (may be null)
   * @param exception amazon exception raised
   * @return an IOE which wraps the caught exception.
   */
  @SuppressWarnings("ThrowableInstanceNeverThrown")
  public static IOException translateException(@Nullable String operation,
      String path,
      SdkBaseException exception) {
    String message = String.format("%s%s: %s",
        operation,
        StringUtils.isNotEmpty(path)? (" on " + path) : "",
        exception);
    if (!(exception instanceof AmazonServiceException)) {
      Exception innerCause = containsInterruptedException(exception);
      if (innerCause != null) {
        // interrupted IO, or a socket exception underneath that class
        return translateInterruptedException(exception, innerCause, message);
      }
      if (isMessageTranslatableToEOF(exception)) {
        // call considered an sign of connectivity failure
        return (EOFException)new EOFException(message).initCause(exception);
      }
      // if the exception came from the auditor, hand off translation
      // to it.
      if (exception instanceof AuditFailureException) {
        return AuditIntegration.translateAuditException(path, (AuditFailureException) exception);
      }
      if (exception instanceof CredentialInitializationException) {
        // the exception raised by AWSCredentialProvider list if the
        // credentials were not accepted,
        return (AccessDeniedException)new AccessDeniedException(path, null,
            exception.toString()).initCause(exception);
      }
      return new AWSClientIOException(message, exception);
    } else {
      IOException ioe;
      AmazonServiceException ase = (AmazonServiceException) exception;
      // this exception is non-null if the service exception is an s3 one
      AmazonS3Exception s3Exception = ase instanceof AmazonS3Exception
          ? (AmazonS3Exception) ase
          : null;
      int status = ase.getStatusCode();
      message = message + ":" + ase.getErrorCode();
      switch (status) {

      case 301:
      case 307:
        if (s3Exception != null) {
          if (s3Exception.getAdditionalDetails() != null &&
              s3Exception.getAdditionalDetails().containsKey(ENDPOINT_KEY)) {
            message = String.format("Received permanent redirect response to "
                + "endpoint %s.  This likely indicates that the S3 endpoint "
                + "configured in %s does not match the AWS region containing "
                + "the bucket.",
                s3Exception.getAdditionalDetails().get(ENDPOINT_KEY), ENDPOINT);
          }
          ioe = new AWSRedirectException(message, s3Exception);
        } else {
          ioe = new AWSRedirectException(message, ase);
        }
        break;

      case 400:
        ioe = new AWSBadRequestException(message, ase);
        break;

      // permissions
      case 401:
      case 403:
        ioe = new AccessDeniedException(path, null, message);
        ioe.initCause(ase);
        break;

      // the object isn't there
      case 404:
        if (isUnknownBucket(ase)) {
          // this is a missing bucket
          ioe = new UnknownStoreException(path, message, ase);
        } else {
          // a normal unknown object
          ioe = new FileNotFoundException(message);
          ioe.initCause(ase);
        }
        break;

      // this also surfaces sometimes and is considered to
      // be ~ a not found exception.
      case 410:
        ioe = new FileNotFoundException(message);
        ioe.initCause(ase);
        break;

      // method not allowed; seen on S3 Select.
      // treated as a bad request
      case 405:
        ioe = new AWSBadRequestException(message, s3Exception);
        break;

      // out of range. This may happen if an object is overwritten with
      // a shorter one while it is being read.
      case 416:
        ioe = new EOFException(message);
        ioe.initCause(ase);
        break;

      // this has surfaced as a "no response from server" message.
      // so rare we haven't replicated it.
      // Treating as an idempotent proxy error.
      case 443:
      case 444:
        ioe = new AWSNoResponseException(message, ase);
        break;

      // throttling
      case 503:
        ioe = new AWSServiceThrottledException(message, ase);
        break;

      // internal error
      case 500:
        ioe = new AWSStatus500Exception(message, ase);
        break;

      case 200:
        if (exception instanceof MultiObjectDeleteException) {
          // failure during a bulk delete
          return translateDeleteException(message,
              (MultiObjectDeleteException) exception);
        }
        // other 200: FALL THROUGH

      default:
        // no specific exit code. Choose an IOE subclass based on the class
        // of the caught exception
        ioe = s3Exception != null
            ? new AWSS3IOException(message, s3Exception)
            : new AWSServiceIOException(message, ase);
        break;
      }
      return ioe;
    }
  }

  /**
   * Extract an exception from a failed future, and convert to an IOE.
   * @param operation operation which failed
   * @param path path operated on (may be null)
   * @param ee execution exception
   * @return an IOE which can be thrown
   */
  public static IOException extractException(String operation,
      String path,
      ExecutionException ee) {
    IOException ioe;
    Throwable cause = ee.getCause();
    if (cause instanceof AmazonClientException) {
      ioe = translateException(operation, path, (AmazonClientException) cause);
    } else if (cause instanceof IOException) {
      ioe = (IOException) cause;
    } else {
      ioe = new IOException(operation + " failed: " + cause, cause);
    }
    return ioe;
  }

  /**
   * Recurse down the exception loop looking for any inner details about
   * an interrupted exception.
   * @param thrown exception thrown
   * @return the actual exception if the operation was an interrupt
   */
  static Exception containsInterruptedException(Throwable thrown) {
    if (thrown == null) {
      return null;
    }
    if (thrown instanceof InterruptedException ||
        thrown instanceof InterruptedIOException ||
        thrown instanceof AbortedException) {
      return (Exception)thrown;
    }
    // tail recurse
    return containsInterruptedException(thrown.getCause());
  }

  /**
   * Handles translation of interrupted exception. This includes
   * preserving the class of the fault for better retry logic
   * @param exception outer exception
   * @param innerCause inner cause (which is guaranteed to be some form
   * of interrupted exception
   * @param message message for the new exception.
   * @return an IOE which can be rethrown
   */
  private static InterruptedIOException translateInterruptedException(
      SdkBaseException exception,
      final Exception innerCause,
      String message) {
    InterruptedIOException ioe;
    if (innerCause instanceof SocketTimeoutException) {
      ioe = new SocketTimeoutException(message);
    } else {
      String name = innerCause.getClass().getName();
      if (name.endsWith(".ConnectTimeoutException")
          || name.endsWith(".ConnectionPoolTimeoutException")
          || name.endsWith("$ConnectTimeoutException")) {
        // TCP connection http timeout from the shaded or unshaded filenames
        // com.amazonaws.thirdparty.apache.http.conn.ConnectTimeoutException
        ioe = new ConnectTimeoutException(message);
      } else {
        // any other exception
        ioe = new InterruptedIOException(message);
      }
    }
    ioe.initCause(exception);
    return ioe;
  }

  /**
   * Is the exception an instance of a throttling exception. That
   * is an AmazonServiceException with a 503 response, an
   * {@link AWSServiceThrottledException},
   * or anything which the AWS SDK's RetryUtils considers to be
   * a throttling exception.
   * @param ex exception to examine
   * @return true if it is considered a throttling exception
   */
  public static boolean isThrottleException(Exception ex) {
    return ex instanceof AWSServiceThrottledException
        || (ex instanceof AmazonServiceException
            && 503  == ((AmazonServiceException)ex).getStatusCode())
        || (ex instanceof SdkBaseException
            && RetryUtils.isThrottlingException((SdkBaseException) ex));
  }

  /**
   * Cue that an AWS exception is likely to be an EOF Exception based
   * on the message coming back from the client. This is likely to be
   * brittle, so only a hint.
   * @param ex exception
   * @return true if this is believed to be a sign the connection was broken.
   */
  public static boolean isMessageTranslatableToEOF(SdkBaseException ex) {
    return ex.toString().contains(EOF_MESSAGE_IN_XML_PARSER) ||
            ex.toString().contains(EOF_READ_DIFFERENT_LENGTH);
  }

  /**
   * Get low level details of an amazon exception for logging; multi-line.
   * @param e exception
   * @return string details
   */
  public static String stringify(AmazonServiceException e) {
    StringBuilder builder = new StringBuilder(
        String.format("%s: %s error %d: %s; %s%s%n",
            e.getErrorType(),
            e.getServiceName(),
            e.getStatusCode(),
            e.getErrorCode(),
            e.getErrorMessage(),
            (e.isRetryable() ? " (retryable)": "")
        ));
    String rawResponseContent = e.getRawResponseContent();
    if (rawResponseContent != null) {
      builder.append(rawResponseContent);
    }
    return builder.toString();
  }

  /**
   * Get low level details of an amazon exception for logging; multi-line.
   * @param e exception
   * @return string details
   */
  public static String stringify(AmazonS3Exception e) {
    // get the low level details of an exception,
    StringBuilder builder = new StringBuilder(
        stringify((AmazonServiceException) e));
    Map details = e.getAdditionalDetails();
    if (details != null) {
      builder.append('\n');
      for (Map.Entry d : details.entrySet()) {
        builder.append(d.getKey()).append('=')
            .append(d.getValue()).append('\n');
      }
    }
    return builder.toString();
  }

  /**
   * Create a files status instance from a listing.
   * @param keyPath path to entry
   * @param summary summary from AWS
   * @param blockSize block size to declare.
   * @param owner owner of the file
   * @param eTag S3 object eTag or null if unavailable
   * @param versionId S3 object versionId or null if unavailable
   * @param isCSEEnabled is client side encryption enabled?
   * @return a status entry
   */
  public static S3AFileStatus createFileStatus(Path keyPath,
      S3ObjectSummary summary,
      long blockSize,
      String owner,
      String eTag,
      String versionId,
      boolean isCSEEnabled) {
    long size = summary.getSize();
    // check if cse is enabled; strip out constant padding length.
    if (isCSEEnabled && size >= CSE_PADDING_LENGTH) {
      size -= CSE_PADDING_LENGTH;
    }
    return createFileStatus(keyPath,
        objectRepresentsDirectory(summary.getKey()),
        size, summary.getLastModified(), blockSize, owner, eTag, versionId);
  }

  /**
   * Create a file status for object we just uploaded.  For files, we use
   * current time as modification time, since s3a uses S3's service-based
   * modification time, which will not be available until we do a
   * getFileStatus() later on.
   * @param keyPath path for created object
   * @param isDir true iff directory
   * @param size file length
   * @param blockSize block size for file status
   * @param owner Hadoop username
   * @param eTag S3 object eTag or null if unavailable
   * @param versionId S3 object versionId or null if unavailable
   * @return a status entry
   */
  public static S3AFileStatus createUploadFileStatus(Path keyPath,
      boolean isDir, long size, long blockSize, String owner,
      String eTag, String versionId) {
    Date date = isDir ? null : new Date();
    return createFileStatus(keyPath, isDir, size, date, blockSize, owner,
        eTag, versionId);
  }

  /* Date 'modified' is ignored when isDir is true. */
  private static S3AFileStatus createFileStatus(Path keyPath, boolean isDir,
      long size, Date modified, long blockSize, String owner,
      String eTag, String versionId) {
    if (isDir) {
      return new S3AFileStatus(Tristate.UNKNOWN, keyPath, owner);
    } else {
      return new S3AFileStatus(size, dateToLong(modified), keyPath, blockSize,
          owner, eTag, versionId);
    }
  }

  /**
   * Predicate: does the object represent a directory?.
   * @param name object name
   * @return true if it meets the criteria for being an object
   */
  public static boolean objectRepresentsDirectory(final String name) {
    return !name.isEmpty()
        && name.charAt(name.length() - 1) == '/';
  }

  /**
   * Date to long conversion.
   * Handles null Dates that can be returned by AWS by returning 0
   * @param date date from AWS query
   * @return timestamp of the object
   */
  public static long dateToLong(final Date date) {
    if (date == null) {
      return 0L;
    }

    return date.getTime();
  }

  /**
   * The standard AWS provider list for AWS connections.
   */
  @SuppressWarnings("deprecation")
  public static final List>
      STANDARD_AWS_PROVIDERS = Collections.unmodifiableList(
      Arrays.asList(
          TemporaryAWSCredentialsProvider.class,
          SimpleAWSCredentialsProvider.class,
          EnvironmentVariableCredentialsProvider.class,
          IAMInstanceCredentialsProvider.class));

  /**
   * Create the AWS credentials from the providers, the URI and
   * the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration.
   * @param binding Binding URI -may be null
   * @param conf filesystem configuration
   * @return a credentials provider list
   * @throws IOException Problems loading the providers (including reading
   * secrets from credential files).
   */
  public static AWSCredentialProviderList createAWSCredentialProviderSet(
      @Nullable URI binding,
      Configuration conf) throws IOException {
    // this will reject any user:secret entries in the URI
    S3xLoginHelper.rejectSecretsInURIs(binding);
    AWSCredentialProviderList credentials =
        buildAWSProviderList(binding,
            conf,
            AWS_CREDENTIALS_PROVIDER,
            STANDARD_AWS_PROVIDERS,
            new HashSet<>());
    // make sure the logging message strips out any auth details
    LOG.debug("For URI {}, using credentials {}",
        binding, credentials);
    return credentials;
  }

  /**
   * Load list of AWS credential provider/credential provider factory classes.
   * @param conf configuration
   * @param key key
   * @param defaultValue list of default values
   * @return the list of classes, possibly empty
   * @throws IOException on a failure to load the list.
   */
  public static List> loadAWSProviderClasses(Configuration conf,
      String key,
      Class... defaultValue) throws IOException {
    try {
      return Arrays.asList(conf.getClasses(key, defaultValue));
    } catch (RuntimeException e) {
      Throwable c = e.getCause() != null ? e.getCause() : e;
      throw new IOException("From option " + key + ' ' + c, c);
    }
  }

  /**
   * Load list of AWS credential provider/credential provider factory classes;
   * support a forbidden list to prevent loops, mandate full secrets, etc.
   * @param binding Binding URI -may be null
   * @param conf configuration
   * @param key key
   * @param forbidden a possibly empty set of forbidden classes.
   * @param defaultValues list of default providers.
   * @return the list of classes, possibly empty
   * @throws IOException on a failure to load the list.
   */
  public static AWSCredentialProviderList buildAWSProviderList(
      @Nullable final URI binding,
      final Configuration conf,
      final String key,
      final List> defaultValues,
      final Set> forbidden) throws IOException {

    // build up the base provider
    List> awsClasses = loadAWSProviderClasses(conf,
        key,
        defaultValues.toArray(new Class[defaultValues.size()]));
    // and if the list is empty, switch back to the defaults.
    // this is to address the issue that configuration.getClasses()
    // doesn't return the default if the config value is just whitespace.
    if (awsClasses.isEmpty()) {
      awsClasses = defaultValues;
    }
    // iterate through, checking for blacklists and then instantiating
    // each provider
    AWSCredentialProviderList providers = new AWSCredentialProviderList();
    for (Class aClass : awsClasses) {

      // List of V1 credential providers that will be migrated with V2 upgrade
      if (!Arrays.asList("EnvironmentVariableCredentialsProvider",
              "EC2ContainerCredentialsProviderWrapper", "InstanceProfileCredentialsProvider")
          .contains(aClass.getSimpleName()) && aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)) {
        V2Migration.v1ProviderReferenced(aClass.getName());
      }

      if (forbidden.contains(aClass)) {
        throw new IOException(E_FORBIDDEN_AWS_PROVIDER
            + " in option " + key + ": " + aClass);
      }
      providers.add(createAWSCredentialProvider(conf,
          aClass, binding));
    }
    return providers;
  }

  /**
   * Create an AWS credential provider from its class by using reflection.  The
   * class must implement one of the following means of construction, which are
   * attempted in order:
   *
   * 
    *
  1. a public constructor accepting java.net.URI and * org.apache.hadoop.conf.Configuration
  2. *
  3. a public constructor accepting * org.apache.hadoop.conf.Configuration
  4. *
  5. a public static method named getInstance that accepts no * arguments and returns an instance of * com.amazonaws.auth.AWSCredentialsProvider, or
  6. *
  7. a public default constructor.
  8. *
* * @param conf configuration * @param credClass credential class * @param uri URI of the FS * @return the instantiated class * @throws IOException on any instantiation failure. */ private static AWSCredentialsProvider createAWSCredentialProvider( Configuration conf, Class credClass, @Nullable URI uri) throws IOException { AWSCredentialsProvider credentials = null; String className = credClass.getName(); if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) { throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER); } if (Modifier.isAbstract(credClass.getModifiers())) { throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); } LOG.debug("Credential provider class is {}", className); try { // new X(uri, conf) Constructor cons = getConstructor(credClass, URI.class, Configuration.class); if (cons != null) { credentials = (AWSCredentialsProvider)cons.newInstance(uri, conf); return credentials; } // new X(conf) cons = getConstructor(credClass, Configuration.class); if (cons != null) { credentials = (AWSCredentialsProvider)cons.newInstance(conf); return credentials; } // X.getInstance() Method factory = getFactoryMethod(credClass, AWSCredentialsProvider.class, "getInstance"); if (factory != null) { credentials = (AWSCredentialsProvider)factory.invoke(null); return credentials; } // new X() cons = getConstructor(credClass); if (cons != null) { credentials = (AWSCredentialsProvider)cons.newInstance(); return credentials; } // no supported constructor or factory method found throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION + ". A class specified in %s must provide a public constructor " + "of a supported signature, or a public factory method named " + "getInstance that accepts no arguments.", className, AWS_CREDENTIALS_PROVIDER)); } catch (InvocationTargetException e) { Throwable targetException = e.getTargetException(); if (targetException == null) { targetException = e; } if (targetException instanceof IOException) { throw (IOException) targetException; } else if (targetException instanceof SdkBaseException) { throw translateException("Instantiate " + className, "", (SdkBaseException) targetException); } else { // supported constructor or factory method found, but the call failed throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + targetException, targetException); } } catch (ReflectiveOperationException | IllegalArgumentException e) { // supported constructor or factory method found, but the call failed throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + e, e); } } /** * Set a key if the value is non-empty. * @param config config to patch * @param key key to set * @param val value to probe and set * @param origin origin * @return true if the property was set */ public static boolean setIfDefined(Configuration config, String key, String val, String origin) { if (StringUtils.isNotEmpty(val)) { config.set(key, val, origin); return true; } else { return false; } } /** * Return the access key and secret for S3 API use. * or indicated in the UserInfo of the name URI param. * @param name the URI for which we need the access keys; may be null * @param conf the Configuration object to interrogate for keys. * @return AWSAccessKeys * @throws IOException problems retrieving passwords from KMS. */ public static S3xLoginHelper.Login getAWSAccessKeys(URI name, Configuration conf) throws IOException { S3xLoginHelper.rejectSecretsInURIs(name); Configuration c = ProviderUtils.excludeIncompatibleCredentialProviders( conf, S3AFileSystem.class); String bucket = name != null ? name.getHost() : ""; // get the secrets from the configuration // get the access key String accessKey = lookupPassword(bucket, c, ACCESS_KEY); // and the secret String secretKey = lookupPassword(bucket, c, SECRET_KEY); return new S3xLoginHelper.Login(accessKey, secretKey); } /** * Get a password from a configuration, including JCEKS files, handling both * the absolute key and bucket override. * @param bucket bucket or "" if none known * @param conf configuration * @param baseKey base key to look up, e.g "fs.s3a.secret.key" * @param overrideVal override value: if non empty this is used instead of * querying the configuration. * @return a password or "". * @throws IOException on any IO problem * @throws IllegalArgumentException bad arguments */ @Deprecated public static String lookupPassword( String bucket, Configuration conf, String baseKey, String overrideVal) throws IOException { return lookupPassword(bucket, conf, baseKey, overrideVal, ""); } /** * Get a password from a configuration, including JCEKS files, handling both * the absolute key and bucket override. * @param bucket bucket or "" if none known * @param conf configuration * @param baseKey base key to look up, e.g "fs.s3a.secret.key" * @return a password or "". * @throws IOException on any IO problem * @throws IllegalArgumentException bad arguments */ public static String lookupPassword( String bucket, Configuration conf, String baseKey) throws IOException { return lookupPassword(bucket, conf, baseKey, null, ""); } /** * Get a password from a configuration, including JCEKS files, handling both * the absolute key and bucket override. * @param bucket bucket or "" if none known * @param conf configuration * @param baseKey base key to look up, e.g "fs.s3a.secret.key" * @param overrideVal override value: if non empty this is used instead of * querying the configuration. * @param defVal value to return if there is no password * @return a password or the value of defVal. * @throws IOException on any IO problem * @throws IllegalArgumentException bad arguments */ public static String lookupPassword( String bucket, Configuration conf, String baseKey, String overrideVal, String defVal) throws IOException { String initialVal; Preconditions.checkArgument(baseKey.startsWith(FS_S3A_PREFIX), "%s does not start with $%s", baseKey, FS_S3A_PREFIX); // if there's a bucket, work with it if (StringUtils.isNotEmpty(bucket)) { String subkey = baseKey.substring(FS_S3A_PREFIX.length()); String shortBucketKey = String.format( BUCKET_PATTERN, bucket, subkey); String longBucketKey = String.format( BUCKET_PATTERN, bucket, baseKey); // set from the long key unless overidden. initialVal = getPassword(conf, longBucketKey, overrideVal); // then override from the short one if it is set initialVal = getPassword(conf, shortBucketKey, initialVal); } else { // no bucket, make the initial value the override value initialVal = overrideVal; } return getPassword(conf, baseKey, initialVal, defVal); } /** * Get a password from a configuration, or, if a value is passed in, * pick that up instead. * @param conf configuration * @param key key to look up * @param val current value: if non empty this is used instead of * querying the configuration. * @return a password or "". * @throws IOException on any problem */ private static String getPassword(Configuration conf, String key, String val) throws IOException { return getPassword(conf, key, val, ""); } /** * Get a password from a configuration, or, if a value is passed in, * pick that up instead. * @param conf configuration * @param key key to look up * @param val current value: if non empty this is used instead of * querying the configuration. * @param defVal default value if nothing is set * @return a password or "". * @throws IOException on any problem */ private static String getPassword(Configuration conf, String key, String val, String defVal) throws IOException { return isEmpty(val) ? lookupPassword(conf, key, defVal) : val; } /** * Get a password from a configuration/configured credential providers. * @param conf configuration * @param key key to look up * @param defVal value to return if there is no password * @return a password or the value in {@code defVal} * @throws IOException on any problem */ static String lookupPassword(Configuration conf, String key, String defVal) throws IOException { try { final char[] pass = conf.getPassword(key); return pass != null ? new String(pass).trim() : defVal; } catch (IOException ioe) { throw new IOException("Cannot find password option " + key, ioe); } } /** * String information about a summary entry for debug messages. * @param summary summary object * @return string value */ public static String stringify(S3ObjectSummary summary) { StringBuilder builder = new StringBuilder(summary.getKey().length() + 100); builder.append(summary.getKey()).append(' '); builder.append("size=").append(summary.getSize()); return builder.toString(); } /** * Get a integer option >= the minimum allowed value. * @param conf configuration * @param key key to look up * @param defVal default value * @param min minimum value * @return the value * @throws IllegalArgumentException if the value is below the minimum */ public static int intOption(Configuration conf, String key, int defVal, int min) { int v = conf.getInt(key, defVal); Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min)); LOG.debug("Value of {} is {}", key, v); return v; } /** * Get a long option >= the minimum allowed value. * @param conf configuration * @param key key to look up * @param defVal default value * @param min minimum value * @return the value * @throws IllegalArgumentException if the value is below the minimum */ public static long longOption(Configuration conf, String key, long defVal, long min) { long v = conf.getLong(key, defVal); Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min)); LOG.debug("Value of {} is {}", key, v); return v; } /** * Get a long option >= the minimum allowed value, supporting memory * prefixes K,M,G,T,P. * @param conf configuration * @param key key to look up * @param defVal default value * @param min minimum value * @return the value * @throws IllegalArgumentException if the value is below the minimum */ public static long longBytesOption(Configuration conf, String key, long defVal, long min) { long v = conf.getLongBytes(key, defVal); Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min)); LOG.debug("Value of {} is {}", key, v); return v; } /** * Get a size property from the configuration: this property must * be at least equal to {@link Constants#MULTIPART_MIN_SIZE}. * If it is too small, it is rounded up to that minimum, and a warning * printed. * @param conf configuration * @param property property name * @param defVal default value * @return the value, guaranteed to be above the minimum size */ public static long getMultipartSizeProperty(Configuration conf, String property, long defVal) { long partSize = conf.getLongBytes(property, defVal); if (partSize < MULTIPART_MIN_SIZE) { LOG.warn("{} must be at least 5 MB; configured value is {}", property, partSize); partSize = MULTIPART_MIN_SIZE; } return partSize; } /** * Validates the output stream configuration. * @param path path: for error messages * @param conf : configuration object for the given context * @throws PathIOException Unsupported configuration. */ public static void validateOutputStreamConfiguration(final Path path, Configuration conf) throws PathIOException { if(!checkDiskBuffer(conf)){ throw new PathIOException(path.toString(), "Unable to create OutputStream with the given" + " multipart upload and buffer configuration."); } } /** * Check whether the configuration for S3ABlockOutputStream is * consistent or not. Multipart uploads allow all kinds of fast buffers to * be supported. When the option is disabled only disk buffers are allowed to * be used as the file size might be bigger than the buffer size that can be * allocated. * @param conf : configuration object for the given context * @return true if the disk buffer and the multipart settings are supported */ public static boolean checkDiskBuffer(Configuration conf) { boolean isMultipartUploadEnabled = conf.getBoolean(MULTIPART_UPLOADS_ENABLED, DEFAULT_MULTIPART_UPLOAD_ENABLED); return isMultipartUploadEnabled || FAST_UPLOAD_BUFFER_DISK.equals( conf.get(FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER)); } /** * Ensure that the long value is in the range of an integer. * @param name property name for error messages * @param size original size * @return the size, guaranteed to be less than or equal to the max * value of an integer. */ public static int ensureOutputParameterInRange(String name, long size) { if (size > Integer.MAX_VALUE) { LOG.warn("s3a: {} capped to ~2.14GB" + " (maximum allowed size with current output mechanism)", name); return Integer.MAX_VALUE; } else { return (int)size; } } /** * Returns the public constructor of {@code cl} specified by the list of * {@code args} or {@code null} if {@code cl} has no public constructor that * matches that specification. * @param cl class * @param args constructor argument types * @return constructor or null */ private static Constructor getConstructor(Class cl, Class... args) { try { Constructor cons = cl.getDeclaredConstructor(args); return Modifier.isPublic(cons.getModifiers()) ? cons : null; } catch (NoSuchMethodException | SecurityException e) { return null; } } /** * Returns the public static method of {@code cl} that accepts no arguments * and returns {@code returnType} specified by {@code methodName} or * {@code null} if {@code cl} has no public static method that matches that * specification. * @param cl class * @param returnType return type * @param methodName method name * @return method or null */ private static Method getFactoryMethod(Class cl, Class returnType, String methodName) { try { Method m = cl.getDeclaredMethod(methodName); if (Modifier.isPublic(m.getModifiers()) && Modifier.isStatic(m.getModifiers()) && returnType.isAssignableFrom(m.getReturnType())) { return m; } else { return null; } } catch (NoSuchMethodException | SecurityException e) { return null; } } /** * Propagates bucket-specific settings into generic S3A configuration keys. * This is done by propagating the values of the form * {@code fs.s3a.bucket.${bucket}.key} to * {@code fs.s3a.key}, for all values of "key" other than a small set * of unmodifiable values. * * The source of the updated property is set to the key name of the bucket * property, to aid in diagnostics of where things came from. * * Returns a new configuration. Why the clone? * You can use the same conf for different filesystems, and the original * values are not updated. * * The {@code fs.s3a.impl} property cannot be set, nor can * any with the prefix {@code fs.s3a.bucket}. * * This method does not propagate security provider path information from * the S3A property into the Hadoop common provider: callers must call * {@link #patchSecurityCredentialProviders(Configuration)} explicitly. * @param source Source Configuration object. * @param bucket bucket name. Must not be empty. * @return a (potentially) patched clone of the original. */ public static Configuration propagateBucketOptions(Configuration source, String bucket) { Preconditions.checkArgument(StringUtils.isNotEmpty(bucket), "bucket is null/empty"); final String bucketPrefix = FS_S3A_BUCKET_PREFIX + bucket +'.'; LOG.debug("Propagating entries under {}", bucketPrefix); final Configuration dest = new Configuration(source); for (Map.Entry entry : source) { final String key = entry.getKey(); // get the (unexpanded) value. final String value = entry.getValue(); if (!key.startsWith(bucketPrefix) || bucketPrefix.equals(key)) { continue; } // there's a bucket prefix, so strip it final String stripped = key.substring(bucketPrefix.length()); if (stripped.startsWith("bucket.") || "impl".equals(stripped)) { //tell user off LOG.debug("Ignoring bucket option {}", key); } else { // propagate the value, building a new origin field. // to track overwrites, the generic key is overwritten even if // already matches the new one. String origin = "[" + StringUtils.join( source.getPropertySources(key), ", ") +"]"; final String generic = FS_S3A_PREFIX + stripped; LOG.debug("Updating {} from {}", generic, origin); dest.set(generic, value, key + " via " + origin); } } return dest; } /** * Delete a path quietly: failures are logged at DEBUG. * @param fs filesystem * @param path path * @param recursive recursive? */ public static void deleteQuietly(FileSystem fs, Path path, boolean recursive) { try { fs.delete(path, recursive); } catch (IOException e) { LOG.debug("Failed to delete {}", path, e); } } /** * Delete a path: failures are logged at WARN. * @param fs filesystem * @param path path * @param recursive recursive? */ public static void deleteWithWarning(FileSystem fs, Path path, boolean recursive) { try { fs.delete(path, recursive); } catch (IOException e) { LOG.warn("Failed to delete {}", path, e); } } /** * Create a new AWS {@code ClientConfiguration}. * All clients to AWS services MUST use this for consistent setup * of connectivity, UA, proxy settings. * @param conf The Hadoop configuration * @param bucket Optional bucket to use to look up per-bucket proxy secrets * @return new AWS client configuration * @throws IOException problem creating AWS client configuration * * @deprecated use {@link #createAwsConf(Configuration, String, String)} */ @Deprecated public static ClientConfiguration createAwsConf(Configuration conf, String bucket) throws IOException { return createAwsConf(conf, bucket, null); } /** * Create a new AWS {@code ClientConfiguration}. All clients to AWS services * MUST use this or the equivalents for the specific service for * consistent setup of connectivity, UA, proxy settings. * * @param conf The Hadoop configuration * @param bucket Optional bucket to use to look up per-bucket proxy secrets * @param awsServiceIdentifier a string representing the AWS service (S3, * etc) for which the ClientConfiguration is being created. * @return new AWS client configuration * @throws IOException problem creating AWS client configuration */ public static ClientConfiguration createAwsConf(Configuration conf, String bucket, String awsServiceIdentifier) throws IOException { final ClientConfiguration awsConf = new ClientConfiguration(); initConnectionSettings(conf, awsConf); initProxySupport(conf, bucket, awsConf); initUserAgent(conf, awsConf); if (StringUtils.isNotEmpty(awsServiceIdentifier)) { String configKey = null; switch (awsServiceIdentifier) { case AWS_SERVICE_IDENTIFIER_S3: configKey = SIGNING_ALGORITHM_S3; break; case AWS_SERVICE_IDENTIFIER_STS: configKey = SIGNING_ALGORITHM_STS; break; default: // Nothing to do. The original signer override is already setup } if (configKey != null) { String signerOverride = conf.getTrimmed(configKey, ""); if (!signerOverride.isEmpty()) { LOG.debug("Signer override for {}} = {}", awsServiceIdentifier, signerOverride); awsConf.setSignerOverride(signerOverride); } } } return awsConf; } /** * Initializes all AWS SDK settings related to connection management. * * @param conf Hadoop configuration * @param awsConf AWS SDK configuration * * @throws IOException if there was an error initializing the protocol * settings */ public static void initConnectionSettings(Configuration conf, ClientConfiguration awsConf) throws IOException { awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS, DEFAULT_MAXIMUM_CONNECTIONS, 1)); initProtocolSettings(conf, awsConf); awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES, DEFAULT_MAX_ERROR_RETRIES, 0)); awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0)); awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0)); int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER, DEFAULT_SOCKET_SEND_BUFFER, 2048); int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER, DEFAULT_SOCKET_RECV_BUFFER, 2048); long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); if (requestTimeoutMillis > Integer.MAX_VALUE) { LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", requestTimeoutMillis, Integer.MAX_VALUE); requestTimeoutMillis = Integer.MAX_VALUE; } awsConf.setRequestTimeout((int) requestTimeoutMillis); awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer); String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); if (!signerOverride.isEmpty()) { LOG.debug("Signer override = {}", signerOverride); awsConf.setSignerOverride(signerOverride); } } /** * Initializes the connection protocol settings when connecting to S3 (e.g. * either HTTP or HTTPS). If secure connections are enabled, this method * will load the configured SSL providers. * * @param conf Hadoop configuration * @param awsConf AWS SDK configuration * * @throws IOException if there is an error initializing the configured * {@link javax.net.ssl.SSLSocketFactory} */ private static void initProtocolSettings(Configuration conf, ClientConfiguration awsConf) throws IOException { boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS); awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP); if (secureConnections) { NetworkBinding.bindSSLChannelMode(conf, awsConf); } } /** * Initializes AWS SDK proxy support in the AWS client configuration * if the S3A settings enable it. * * @param conf Hadoop configuration * @param bucket Optional bucket to use to look up per-bucket proxy secrets * @param awsConf AWS SDK configuration to update * @throws IllegalArgumentException if misconfigured * @throws IOException problem getting username/secret from password source. */ public static void initProxySupport(Configuration conf, String bucket, ClientConfiguration awsConf) throws IllegalArgumentException, IOException { String proxyHost = conf.getTrimmed(PROXY_HOST, ""); int proxyPort = conf.getInt(PROXY_PORT, -1); if (!proxyHost.isEmpty()) { awsConf.setProxyHost(proxyHost); if (proxyPort >= 0) { awsConf.setProxyPort(proxyPort); } else { if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { LOG.warn("Proxy host set without port. Using HTTPS default 443"); awsConf.setProxyPort(443); } else { LOG.warn("Proxy host set without port. Using HTTP default 80"); awsConf.setProxyPort(80); } } final String proxyUsername = lookupPassword(bucket, conf, PROXY_USERNAME, null, null); final String proxyPassword = lookupPassword(bucket, conf, PROXY_PASSWORD, null, null); if ((proxyUsername == null) != (proxyPassword == null)) { String msg = "Proxy error: " + PROXY_USERNAME + " or " + PROXY_PASSWORD + " set without the other."; LOG.error(msg); throw new IllegalArgumentException(msg); } boolean isProxySecured = conf.getBoolean(PROXY_SECURED, false); awsConf.setProxyUsername(proxyUsername); awsConf.setProxyPassword(proxyPassword); awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN)); awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); awsConf.setProxyProtocol(isProxySecured ? Protocol.HTTPS : Protocol.HTTP); if (LOG.isDebugEnabled()) { LOG.debug("Using proxy server {}://{}:{} as user {} with password {} " + "on domain {} as workstation {}", awsConf.getProxyProtocol(), awsConf.getProxyHost(), awsConf.getProxyPort(), String.valueOf(awsConf.getProxyUsername()), awsConf.getProxyPassword(), awsConf.getProxyDomain(), awsConf.getProxyWorkstation()); } } else if (proxyPort >= 0) { String msg = "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; LOG.error(msg); throw new IllegalArgumentException(msg); } } /** * Initializes the User-Agent header to send in HTTP requests to AWS * services. We always include the Hadoop version number. The user also * may set an optional custom prefix to put in front of the Hadoop version * number. The AWS SDK internally appends its own information, which seems * to include the AWS SDK version, OS and JVM version. * * @param conf Hadoop configuration * @param awsConf AWS SDK configuration to update */ private static void initUserAgent(Configuration conf, ClientConfiguration awsConf) { String userAgent = "Hadoop " + VersionInfo.getVersion(); String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); if (!userAgentPrefix.isEmpty()) { userAgent = userAgentPrefix + ", " + userAgent; } LOG.debug("Using User-Agent: {}", userAgent); awsConf.setUserAgentPrefix(userAgent); } /** * Convert the data of an iterator of {@link S3AFileStatus} to * an array. * @param iterator a non-null iterator * @return a possibly-empty array of file status entries * @throws IOException failure */ public static S3AFileStatus[] iteratorToStatuses( RemoteIterator iterator) throws IOException { S3AFileStatus[] statuses = RemoteIterators .toArray(iterator, new S3AFileStatus[0]); return statuses; } /** * An interface for use in lambda-expressions working with * directory tree listings. */ @FunctionalInterface public interface CallOnLocatedFileStatus { void call(LocatedFileStatus status) throws IOException; } /** * An interface for use in lambda-expressions working with * directory tree listings. */ @FunctionalInterface public interface LocatedFileStatusMap { T call(LocatedFileStatus status) throws IOException; } /** * Apply an operation to every {@link LocatedFileStatus} in a remote * iterator. * @param iterator iterator from a list * @param eval closure to evaluate * @return the number of files processed * @throws IOException anything in the closure, or iteration logic. */ public static long applyLocatedFiles( RemoteIterator iterator, CallOnLocatedFileStatus eval) throws IOException { return RemoteIterators.foreach(iterator, eval::call); } /** * Map an operation to every {@link LocatedFileStatus} in a remote * iterator, returning a list of the results. * @param return type of map * @param iterator iterator from a list * @param eval closure to evaluate * @return the list of mapped results. * @throws IOException anything in the closure, or iteration logic. */ public static List mapLocatedFiles( RemoteIterator iterator, LocatedFileStatusMap eval) throws IOException { final List results = new ArrayList<>(); applyLocatedFiles(iterator, (s) -> results.add(eval.call(s))); return results; } /** * Map an operation to every {@link LocatedFileStatus} in a remote * iterator, returning a list of the all results which were not empty. * @param return type of map * @param iterator iterator from a list * @param eval closure to evaluate * @return the flattened list of mapped results. * @throws IOException anything in the closure, or iteration logic. */ public static List flatmapLocatedFiles( RemoteIterator iterator, LocatedFileStatusMap> eval) throws IOException { final List results = new ArrayList<>(); applyLocatedFiles(iterator, (s) -> eval.call(s).map(r -> results.add(r))); return results; } /** * List located files and filter them as a classic listFiles(path, filter) * would do. * This will be incremental, fetching pages async. * While it is rare for job to have many thousands of files, jobs * against versioned buckets may return earlier if there are many * non-visible objects. * @param fileSystem filesystem * @param path path to list * @param recursive recursive listing? * @param filter filter for the filename * @return interator over the entries. * @throws IOException IO failure. */ public static RemoteIterator listAndFilter(FileSystem fileSystem, Path path, boolean recursive, PathFilter filter) throws IOException { return filteringRemoteIterator( fileSystem.listFiles(path, recursive), status -> filter.accept(status.getPath())); } /** * Convert a value into a non-empty Optional instance if * the value of {@code include} is true. * @param include flag to indicate the value is to be included. * @param value value to return * @param type of option. * @return if include is false, Optional.empty. Otherwise, the value. */ public static Optional maybe(boolean include, T value) { return include ? Optional.of(value) : Optional.empty(); } /** * Patch the security credential provider information in * {@link #CREDENTIAL_PROVIDER_PATH} * with the providers listed in * {@link Constants#S3A_SECURITY_CREDENTIAL_PROVIDER_PATH}. * * This allows different buckets to use different credential files. * @param conf configuration to patch */ static void patchSecurityCredentialProviders(Configuration conf) { Collection customCredentials = conf.getStringCollection( S3A_SECURITY_CREDENTIAL_PROVIDER_PATH); Collection hadoopCredentials = conf.getStringCollection( CREDENTIAL_PROVIDER_PATH); if (!customCredentials.isEmpty()) { List all = Lists.newArrayList(customCredentials); all.addAll(hadoopCredentials); String joined = StringUtils.join(all, ','); LOG.debug("Setting {} to {}", CREDENTIAL_PROVIDER_PATH, joined); conf.set(CREDENTIAL_PROVIDER_PATH, joined, "patch of " + S3A_SECURITY_CREDENTIAL_PROVIDER_PATH); } } /** * Lookup a per-bucket-secret from a configuration including JCEKS files. * No attempt is made to look for the global configuration. * @param bucket bucket or "" if none known * @param conf configuration * @param baseKey base key to look up, e.g "fs.s3a.secret.key" * @return the secret or null. * @throws IOException on any IO problem * @throws IllegalArgumentException bad arguments */ private static String lookupBucketSecret( String bucket, Configuration conf, String baseKey) throws IOException { Preconditions.checkArgument(!isEmpty(bucket), "null/empty bucket argument"); Preconditions.checkArgument(baseKey.startsWith(FS_S3A_PREFIX), "%s does not start with $%s", baseKey, FS_S3A_PREFIX); String subkey = baseKey.substring(FS_S3A_PREFIX.length()); // set from the long key unless overidden. String longBucketKey = String.format( BUCKET_PATTERN, bucket, baseKey); String initialVal = getPassword(conf, longBucketKey, null, null); // then override from the short one if it is set String shortBucketKey = String.format( BUCKET_PATTERN, bucket, subkey); return getPassword(conf, shortBucketKey, initialVal, null); } /** * Get any S3 encryption key, without propagating exceptions from * JCEKs files. * @param bucket bucket to query for * @param conf configuration to examine * @return the encryption key or "" * @throws IllegalArgumentException bad arguments. */ public static String getS3EncryptionKey( String bucket, Configuration conf) { try { return getS3EncryptionKey(bucket, conf, false); } catch (IOException e) { // never going to happen, but to make sure, covert to // runtime exception throw new UncheckedIOException(e); } } /** * Get any SSE/CSE key from a configuration/credential provider. * This operation handles the case where the option has been * set in the provider or configuration to the option * {@code SERVER_SIDE_ENCRYPTION_KEY}. * IOExceptions raised during retrieval are swallowed. * @param bucket bucket to query for * @param conf configuration to examine * @param propagateExceptions should IO exceptions be rethrown? * @return the encryption key or "" * @throws IllegalArgumentException bad arguments. * @throws IOException if propagateExceptions==true and reading a JCEKS file raised an IOE */ @SuppressWarnings("deprecation") public static String getS3EncryptionKey( String bucket, Configuration conf, boolean propagateExceptions) throws IOException { try { // look up the per-bucket value of the new key, // which implicitly includes the deprecation remapping String key = lookupBucketSecret(bucket, conf, S3_ENCRYPTION_KEY); if (key == null) { // old key in bucket, jceks key = lookupBucketSecret(bucket, conf, SERVER_SIDE_ENCRYPTION_KEY); } if (key == null) { // new key, global; implicit translation of old key in XML files. key = lookupPassword(null, conf, S3_ENCRYPTION_KEY); } if (key == null) { // old key, JCEKS key = lookupPassword(null, conf, SERVER_SIDE_ENCRYPTION_KEY); } if (key == null) { // no key, return "" key = ""; } return key; } catch (IOException e) { if (propagateExceptions) { throw e; } LOG.warn("Cannot retrieve {} for bucket {}", S3_ENCRYPTION_KEY, bucket, e); return ""; } } /** * Get the server-side encryption or client side encryption algorithm. * This includes validation of the configuration, checking the state of * the encryption key given the chosen algorithm. * * @param bucket bucket to query for * @param conf configuration to scan * @return the encryption mechanism (which will be {@code NONE} unless * one is set. * @throws IOException on JCKES lookup or invalid method/key configuration. */ public static S3AEncryptionMethods getEncryptionAlgorithm(String bucket, Configuration conf) throws IOException { return buildEncryptionSecrets(bucket, conf).getEncryptionMethod(); } /** * Get the server-side encryption or client side encryption algorithm. * This includes validation of the configuration, checking the state of * the encryption key given the chosen algorithm. * * @param bucket bucket to query for * @param conf configuration to scan * @return the encryption mechanism (which will be {@code NONE} unless * one is set and secrets. * @throws IOException on JCKES lookup or invalid method/key configuration. */ @SuppressWarnings("deprecation") public static EncryptionSecrets buildEncryptionSecrets(String bucket, Configuration conf) throws IOException { // new key, per-bucket // this will include fixup of the old key in config XML entries String algorithm = lookupBucketSecret(bucket, conf, S3_ENCRYPTION_ALGORITHM); if (algorithm == null) { // try the old key, per-bucket setting, which will find JCEKS values algorithm = lookupBucketSecret(bucket, conf, SERVER_SIDE_ENCRYPTION_ALGORITHM); } if (algorithm == null) { // new key, global setting // this will include fixup of the old key in config XML entries algorithm = lookupPassword(null, conf, S3_ENCRYPTION_ALGORITHM); } if (algorithm == null) { // old key, global setting, for JCEKS entries. algorithm = lookupPassword(null, conf, SERVER_SIDE_ENCRYPTION_ALGORITHM); } // now determine the algorithm final S3AEncryptionMethods encryptionMethod = S3AEncryptionMethods.getMethod(algorithm); // look up the encryption key String encryptionKey = getS3EncryptionKey(bucket, conf, encryptionMethod.requiresSecret()); int encryptionKeyLen = StringUtils.isBlank(encryptionKey) ? 0 : encryptionKey.length(); String diagnostics = passwordDiagnostics(encryptionKey, "key"); switch (encryptionMethod) { case SSE_C: LOG.debug("Using SSE-C with {}", diagnostics); if (encryptionKeyLen == 0) { throw new IOException(SSE_C_NO_KEY_ERROR); } break; case SSE_S3: if (encryptionKeyLen != 0) { throw new IOException(SSE_S3_WITH_KEY_ERROR + " (" + diagnostics + ")"); } break; case SSE_KMS: LOG.debug("Using SSE-KMS with {}", diagnostics); break; case CSE_KMS: LOG.debug("Using CSE-KMS with {}", diagnostics); break; case NONE: default: LOG.debug("Data is unencrypted"); break; } return new EncryptionSecrets(encryptionMethod, encryptionKey); } /** * Provide a password diagnostics string. * This aims to help diagnostics without revealing significant password details * @param pass password * @param description description for text, e.g "key" or "password" * @return text for use in messages. */ private static String passwordDiagnostics(String pass, String description) { if (pass == null) { return "null " + description; } int len = pass.length(); switch (len) { case 0: return "empty " + description; case 1: return description + " of length 1"; default: return description + " of length " + len + " ending with " + pass.charAt(len - 1); } } /** * Close the Closeable objects and ignore any Exception or * null pointers. * This is obsolete: use * {@link org.apache.hadoop.io.IOUtils#cleanupWithLogger(Logger, Closeable...)} * @param log the log to log at debug level. Can be null. * @param closeables the objects to close */ @Deprecated public static void closeAll(Logger log, Closeable... closeables) { cleanupWithLogger(log, closeables); } /** * Close the Closeable objects and ignore any Exception or * null pointers. * (This is the SLF4J equivalent of that in {@code IOUtils}). * @param log the log to log at debug level. Can be null. * @param closeables the objects to close */ public static void closeAutocloseables(Logger log, AutoCloseable... closeables) { if (log == null) { log = LOG; } for (AutoCloseable c : closeables) { if (c != null) { try { log.debug("Closing {}", c); c.close(); } catch (Exception e) { log.debug("Exception in closing {}", c, e); } } } } /** * Set a bucket-specific property to a particular value. * If the generic key passed in has an {@code fs.s3a. prefix}, * that's stripped off, so that when the the bucket properties are propagated * down to the generic values, that value gets copied down. * @param conf configuration to set * @param bucket bucket name * @param genericKey key; can start with "fs.s3a." * @param value value to set */ public static void setBucketOption(Configuration conf, String bucket, String genericKey, String value) { final String baseKey = genericKey.startsWith(FS_S3A_PREFIX) ? genericKey.substring(FS_S3A_PREFIX.length()) : genericKey; conf.set(FS_S3A_BUCKET_PREFIX + bucket + '.' + baseKey, value, "S3AUtils"); } /** * Clear a bucket-specific property. * If the generic key passed in has an {@code fs.s3a. prefix}, * that's stripped off, so that when the the bucket properties are propagated * down to the generic values, that value gets copied down. * @param conf configuration to set * @param bucket bucket name * @param genericKey key; can start with "fs.s3a." */ public static void clearBucketOption(Configuration conf, String bucket, String genericKey) { final String baseKey = genericKey.startsWith(FS_S3A_PREFIX) ? genericKey.substring(FS_S3A_PREFIX.length()) : genericKey; String k = FS_S3A_BUCKET_PREFIX + bucket + '.' + baseKey; LOG.debug("Unset {}", k); conf.unset(k); } /** * Get a bucket-specific property. * If the generic key passed in has an {@code fs.s3a. prefix}, * that's stripped off. * @param conf configuration to set * @param bucket bucket name * @param genericKey key; can start with "fs.s3a." * @return the bucket option, null if there is none */ public static String getBucketOption(Configuration conf, String bucket, String genericKey) { final String baseKey = genericKey.startsWith(FS_S3A_PREFIX) ? genericKey.substring(FS_S3A_PREFIX.length()) : genericKey; return conf.get(FS_S3A_BUCKET_PREFIX + bucket + '.' + baseKey); } /** * Turns a path (relative or otherwise) into an S3 key, adding a trailing * "/" if the path is not the root and does not already have a "/" * at the end. * * @param key s3 key or "" * @return the with a trailing "/", or, if it is the root key, "", */ public static String maybeAddTrailingSlash(String key) { if (!key.isEmpty() && !key.endsWith("/")) { return key + '/'; } else { return key; } } /** * Path filter which ignores any file which starts with . or _. */ public static final PathFilter HIDDEN_FILE_FILTER = new PathFilter() { @Override public boolean accept(Path path) { String name = path.getName(); return !name.startsWith("_") && !name.startsWith("."); } @Override public String toString() { return "HIDDEN_FILE_FILTER"; } }; /** * A Path filter which accepts all filenames. */ public static final PathFilter ACCEPT_ALL = new PathFilter() { @Override public boolean accept(Path file) { return true; } @Override public String toString() { return "ACCEPT_ALL"; } }; }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy