All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.storage.s3.S3Utils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.storage.s3;

import com.amazonaws.AmazonClientException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.Protocol;
import com.amazonaws.SdkClientException;
import com.amazonaws.services.s3.model.AccessControlList;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.CanonicalGrantee;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.Grant;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.Permission;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.google.common.base.Joiner;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import org.apache.druid.common.aws.AWSClientConfig;
import org.apache.druid.common.aws.AWSClientUtil;
import org.apache.druid.common.aws.AWSEndpointConfig;
import org.apache.druid.common.aws.AWSProxyConfig;
import org.apache.druid.data.input.impl.CloudObjectLocation;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.RetryUtils;
import org.apache.druid.java.util.common.RetryUtils.Task;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.URIs;
import org.apache.druid.java.util.common.logger.Logger;

import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;

/**
 *
 */
public class S3Utils
{
  private static final String SCHEME = S3StorageDruidModule.SCHEME;
  private static final Joiner JOINER = Joiner.on("/").skipNulls();
  private static final Logger log = new Logger(S3Utils.class);

  /**
   * Error for calling putObject with an entity over 5GB in size.
   */
  public static final String ERROR_ENTITY_TOO_LARGE = "EntityTooLarge";

  public static final Predicate S3RETRY = new Predicate()
  {
    @Override
    public boolean apply(Throwable e)
    {
      if (e == null) {
        return false;
      } else if (e instanceof IOException) {
        if (e.getCause() != null) {
          // Recurse with the underlying cause to see if it's retriable.
          return apply(e.getCause());
        }
        return true;
      } else if (e instanceof SdkClientException
                 && e.getMessage().contains("Data read has a different length than the expected")) {
        // Can happen when connections to S3 are dropped; see https://github.com/apache/druid/pull/11941.
        // SdkClientException can be thrown for many reasons and the only way to distinguish it is to look at
        // the message. This is not ideal, since the message may change, so it may need to be adjusted in the future.
        return true;
      } else if (e instanceof SdkClientException && e.getMessage().contains("Unable to execute HTTP request")) {
        // This is likely due to a temporary DNS issue and can be retried.
        return true;
      } else if (e instanceof SdkClientException && e.getMessage().contains("Unable to find a region via the region provider chain")) {
        // This can happen sometimes when AWS isn't able to obtain the credentials for some service:
        // https://github.com/aws/aws-sdk-java/issues/2285
        return true;
      } else if (e instanceof AmazonClientException) {
        return AWSClientUtil.isClientExceptionRecoverable((AmazonClientException) e);
      } else {
        return apply(e.getCause());
      }
    }
  };

  /**
   * Retries S3 operations that fail intermittently (due to io-related exceptions, during obtaining credentials, etc).
   * Service-level exceptions (access denied, file not found, etc) are not retried.
   */
  public static  T retryS3Operation(Task f) throws Exception
  {
    return RetryUtils.retry(f, S3RETRY, RetryUtils.DEFAULT_MAX_TRIES);
  }

  /**
   * Retries S3 operations that fail intermittently (due to io-related exceptions, during obtaining credentials, etc).
   * Service-level exceptions (access denied, file not found, etc) are not retried.
   * Also provide a way to set maxRetries that can be useful, i.e. for testing.
   */
  public static  T retryS3Operation(Task f, int maxRetries) throws Exception
  {
    return RetryUtils.retry(f, S3RETRY, maxRetries);
  }

  @Nullable
  public static String getS3ErrorCode(final Throwable e)
  {
    if (e == null) {
      return null;
    } else if (e instanceof AmazonS3Exception) {
      return ((AmazonS3Exception) e).getErrorCode();
    } else {
      return getS3ErrorCode(e.getCause());
    }
  }

  static boolean isObjectInBucketIgnoringPermission(
      ServerSideEncryptingAmazonS3 s3Client,
      String bucketName,
      String objectKey
  )
  {
    try {
      return s3Client.doesObjectExist(bucketName, objectKey);
    }
    catch (AmazonS3Exception e) {
      if (e.getStatusCode() == 404) {
        // Object is inaccessible to current user, but does exist.
        return true;
      }
      // Something else has gone wrong
      throw e;
    }
  }

  /**
   * Create an iterator over a set of S3 objects specified by a set of prefixes.
   *
   * For each provided prefix URI, the iterator will walk through all objects that are in the same bucket as the
   * provided URI and whose keys start with that URI's path, except for directory placeholders (which will be
   * ignored). The iterator is computed incrementally by calling {@link ServerSideEncryptingAmazonS3#listObjectsV2} for
   * each prefix in batches of {@param maxListLength}. The first call is made at the same time the iterator is
   * constructed.
   */
  public static Iterator objectSummaryIterator(
      final ServerSideEncryptingAmazonS3 s3Client,
      final Iterable prefixes,
      final int maxListingLength
  )
  {
    return new ObjectSummaryIterator(s3Client, prefixes, maxListingLength);
  }

  /**
   * Create an iterator over a set of S3 objects specified by a set of prefixes.
   *
   * For each provided prefix URI, the iterator will walk through all objects that are in the same bucket as the
   * provided URI and whose keys start with that URI's path, except for directory placeholders (which will be
   * ignored). The iterator is computed incrementally by calling {@link ServerSideEncryptingAmazonS3#listObjectsV2} for
   * each prefix in batches of {@param maxListLength}. The first call is made at the same time the iterator is
   * constructed.
   */
  public static Iterator objectSummaryIterator(
      final ServerSideEncryptingAmazonS3 s3Client,
      final Iterable prefixes,
      final int maxListingLength,
      final int maxRetries
  )
  {
    return new ObjectSummaryIterator(s3Client, prefixes, maxListingLength, maxRetries);
  }

  /**
   * Create an {@link URI} from the given {@link S3ObjectSummary}. The result URI is composed as below.
   *
   * 
   * {@code s3://{BUCKET_NAME}/{OBJECT_KEY}}
   * 
*/ public static URI summaryToUri(S3ObjectSummary object) { return summaryToCloudObjectLocation(object).toUri(SCHEME); } public static CloudObjectLocation summaryToCloudObjectLocation(S3ObjectSummary object) { return new CloudObjectLocation(object.getBucketName(), object.getKey()); } static String constructSegmentPath(String baseKey, String storageDir) { return JOINER.join( baseKey.isEmpty() ? null : baseKey, storageDir ) + "/index.zip"; } static AccessControlList grantFullControlToBucketOwner(ServerSideEncryptingAmazonS3 s3Client, String bucket) { final AccessControlList acl = s3Client.getBucketAcl(bucket); acl.grantAllPermissions(new Grant(new CanonicalGrantee(acl.getOwner().getId()), Permission.FullControl)); return acl; } public static String extractS3Key(URI uri) { return StringUtils.maybeRemoveLeadingSlash(uri.getPath()); } public static URI checkURI(URI uri) { if (uri.getScheme().equalsIgnoreCase(S3StorageDruidModule.SCHEME_S3_ZIP)) { uri = URI.create(SCHEME + uri.toString().substring(S3StorageDruidModule.SCHEME_S3_ZIP.length())); } return CloudObjectLocation.validateUriScheme(SCHEME, uri); } /** * Gets a single {@link ObjectMetadata} from s3. * * @param s3Client s3 client * @param bucket s3 bucket * @param key s3 object key */ public static ObjectMetadata getSingleObjectMetadata(ServerSideEncryptingAmazonS3 s3Client, String bucket, String key) { try { return retryS3Operation(() -> s3Client.getObjectMetadata(bucket, key)); } catch (Exception e) { throw new RuntimeException(e); } } /** * Delete the files from S3 in a specified bucket, matching a specified prefix and filter * * @param s3Client s3 client * @param maxListingLength maximum number of keys to fetch and delete at a time * @param bucket s3 bucket * @param prefix the file prefix * @param filter function which returns true if the prefix file found should be deleted and false otherwise. * * @throws Exception in case of errors */ public static void deleteObjectsInPath( ServerSideEncryptingAmazonS3 s3Client, int maxListingLength, String bucket, String prefix, Predicate filter ) throws Exception { deleteObjectsInPath(s3Client, maxListingLength, bucket, prefix, filter, RetryUtils.DEFAULT_MAX_TRIES); } public static void deleteObjectsInPath( ServerSideEncryptingAmazonS3 s3Client, int maxListingLength, String bucket, String prefix, Predicate filter, int maxRetries ) throws Exception { log.debug("Deleting directory at bucket: [%s], path: [%s]", bucket, prefix); final List keysToDelete = new ArrayList<>(maxListingLength); final ObjectSummaryIterator iterator = new ObjectSummaryIterator( s3Client, ImmutableList.of(new CloudObjectLocation(bucket, prefix).toUri("s3")), maxListingLength ); while (iterator.hasNext()) { final S3ObjectSummary nextObject = iterator.next(); if (filter.apply(nextObject)) { keysToDelete.add(new DeleteObjectsRequest.KeyVersion(nextObject.getKey())); if (keysToDelete.size() == maxListingLength) { deleteBucketKeys(s3Client, bucket, keysToDelete, maxRetries); keysToDelete.clear(); } } } if (keysToDelete.size() > 0) { deleteBucketKeys(s3Client, bucket, keysToDelete, maxRetries); } } public static void deleteBucketKeys( ServerSideEncryptingAmazonS3 s3Client, String bucket, List keysToDelete, int retries ) throws Exception { if (keysToDelete != null && log.isDebugEnabled()) { List keys = keysToDelete.stream() .map(DeleteObjectsRequest.KeyVersion::getKey) .collect(Collectors.toList()); log.debug("Deleting keys from bucket: [%s], keys: [%s]", bucket, keys); } DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(bucket).withKeys(keysToDelete); S3Utils.retryS3Operation(() -> { s3Client.deleteObjects(deleteRequest); return null; }, retries); log.info("Deleted %d files", keysToDelete.size()); } /** * Uploads a file to S3 if possible. First trying to set ACL to give the bucket owner full control of the file before uploading. * * @param service S3 client * @param disableAcl true if ACL shouldn't be set for the file * @param key The key under which to store the new object. * @param file The path of the file to upload to Amazon S3. */ static void uploadFileIfPossible( ServerSideEncryptingAmazonS3 service, boolean disableAcl, String bucket, String key, File file ) { final PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key, file); if (!disableAcl) { putObjectRequest.setAccessControlList(S3Utils.grantFullControlToBucketOwner(service, bucket)); } log.info("Pushing [%s] to bucket[%s] and key[%s].", file, bucket, key); service.putObject(putObjectRequest); } @Nullable private static Protocol parseProtocol(@Nullable String protocol) { if (protocol == null) { return null; } if (protocol.equalsIgnoreCase("http")) { return Protocol.HTTP; } else if (protocol.equalsIgnoreCase("https")) { return Protocol.HTTPS; } else { throw new IAE("Unknown protocol[%s]", protocol); } } public static Protocol determineProtocol(AWSClientConfig clientConfig, AWSEndpointConfig endpointConfig) { final Protocol protocolFromClientConfig = parseProtocol(clientConfig.getProtocol()); final String endpointUrl = endpointConfig.getUrl(); if (org.apache.commons.lang.StringUtils.isNotEmpty(endpointUrl)) { //noinspection ConstantConditions final URI uri = URIs.parse(endpointUrl, protocolFromClientConfig.toString()); final Protocol protocol = parseProtocol(uri.getScheme()); if (protocol != null && (protocol != protocolFromClientConfig)) { log.warn("[%s] protocol will be used for endpoint [%s]", protocol, endpointUrl); } return protocol; } else { return protocolFromClientConfig; } } public static ClientConfiguration setProxyConfig(ClientConfiguration conf, AWSProxyConfig proxyConfig) { if (org.apache.commons.lang.StringUtils.isNotEmpty(proxyConfig.getHost())) { conf.setProxyHost(proxyConfig.getHost()); } if (proxyConfig.getPort() != -1) { conf.setProxyPort(proxyConfig.getPort()); } if (org.apache.commons.lang.StringUtils.isNotEmpty(proxyConfig.getUsername())) { conf.setProxyUsername(proxyConfig.getUsername()); } if (org.apache.commons.lang.StringUtils.isNotEmpty(proxyConfig.getPassword())) { conf.setProxyPassword(proxyConfig.getPassword()); } return conf; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy