All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.s3a.S3AFileSystem Maven / Gradle / Ivy

Go to download

This module contains code to support integration with Amazon Web Services. It also declares the dependencies needed to work with AWS services.

There is a newer version: 3.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.s3a;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.URI;
import java.nio.file.AccessDeniedException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.annotation.Nullable;

import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.Headers;
import com.amazonaws.services.s3.model.CannedAccessControlList;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.DeleteObjectsResult;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadResult;
import com.amazonaws.services.s3.model.ListMultipartUploadsRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.MultiObjectDeleteException;
import com.amazonaws.services.s3.model.MultipartUpload;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import com.amazonaws.services.s3.transfer.Copy;
import com.amazonaws.services.s3.transfer.TransferManager;
import com.amazonaws.services.s3.transfer.TransferManagerConfiguration;
import com.amazonaws.services.s3.transfer.Upload;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import com.amazonaws.services.s3.transfer.model.UploadResult;
import com.amazonaws.event.ProgressListener;

import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A;
import org.apache.hadoop.fs.s3a.impl.CopyFromLocalOperation;
import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore;
import org.apache.hadoop.fs.store.audit.ActiveThreadSpanSource;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonPathCapabilities;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Globber;
import org.apache.hadoop.fs.impl.OpenFileParameters;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.s3a.auth.SignerManager;
import org.apache.hadoop.fs.s3a.auth.delegation.DelegationOperations;
import org.apache.hadoop.fs.s3a.auth.delegation.DelegationTokenProvider;
import org.apache.hadoop.fs.s3a.impl.BulkDeleteRetryHandler;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.ContextAccessors;
import org.apache.hadoop.fs.s3a.impl.CopyOutcome;
import org.apache.hadoop.fs.s3a.impl.DeleteOperation;
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy;
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl;
import org.apache.hadoop.fs.s3a.impl.GetContentSummaryOperation;
import org.apache.hadoop.fs.s3a.impl.HeaderProcessing;
import org.apache.hadoop.fs.s3a.impl.InternalConstants;
import org.apache.hadoop.fs.s3a.impl.ListingOperationCallbacks;
import org.apache.hadoop.fs.s3a.impl.MkdirOperation;
import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport;
import org.apache.hadoop.fs.s3a.impl.OperationCallbacks;
import org.apache.hadoop.fs.s3a.impl.RenameOperation;
import org.apache.hadoop.fs.s3a.impl.RequestFactoryImpl;
import org.apache.hadoop.fs.s3a.impl.S3AMultipartUploaderBuilder;
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
import org.apache.hadoop.fs.s3a.impl.StoreContext;
import org.apache.hadoop.fs.s3a.impl.StoreContextBuilder;
import org.apache.hadoop.fs.s3a.s3guard.BulkOperationState;
import org.apache.hadoop.fs.s3a.select.InternalSelectConstants;
import org.apache.hadoop.fs.s3a.tools.MarkerToolOperations;
import org.apache.hadoop.fs.s3a.tools.MarkerToolOperationsImpl;
import org.apache.hadoop.fs.statistics.DurationTracker;
import org.apache.hadoop.fs.statistics.DurationTrackerFactory;
import org.apache.hadoop.fs.statistics.IOStatistics;
import org.apache.hadoop.fs.statistics.IOStatisticsLogging;
import org.apache.hadoop.fs.statistics.IOStatisticsSource;
import org.apache.hadoop.fs.store.audit.AuditEntryPoint;
import org.apache.hadoop.fs.store.audit.AuditSpan;
import org.apache.hadoop.fs.store.audit.AuditSpanSource;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.token.DelegationTokenIssuer;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.DurationInfo;
import org.apache.hadoop.util.LambdaUtils;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.GlobalStorageStatistics;
import org.apache.hadoop.fs.InvalidRequestException;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.StreamCapabilities;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.s3a.api.RequestFactory;
import org.apache.hadoop.fs.s3a.audit.AuditManagerS3A;
import org.apache.hadoop.fs.s3a.audit.AuditIntegration;
import org.apache.hadoop.fs.s3a.audit.OperationAuditor;
import org.apache.hadoop.fs.s3a.auth.RoleModel;
import org.apache.hadoop.fs.s3a.auth.delegation.AWSPolicyProvider;
import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets;
import org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens;
import org.apache.hadoop.fs.s3a.auth.delegation.AbstractS3ATokenIdentifier;
import org.apache.hadoop.fs.s3a.commit.CommitConstants;
import org.apache.hadoop.fs.s3a.commit.PutTracker;
import org.apache.hadoop.fs.s3a.commit.MagicCommitIntegration;
import org.apache.hadoop.fs.s3a.impl.ChangeTracker;
import org.apache.hadoop.fs.s3a.select.SelectBinding;
import org.apache.hadoop.fs.s3a.select.SelectConstants;
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.PathMetadata;
import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
import org.apache.hadoop.fs.s3a.s3guard.ITtlTimeProvider;
import org.apache.hadoop.fs.s3a.statistics.BlockOutputStreamStatistics;
import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics;
import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext;
import org.apache.hadoop.fs.s3a.statistics.impl.BondedS3AStatisticsContext;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.fs.store.EtagChecksum;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
import org.apache.hadoop.security.ProviderUtils;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.SemaphoredDelegatingExecutor;
import org.apache.hadoop.util.concurrent.HadoopExecutors;
import org.apache.hadoop.util.functional.CallableRaisingIOE;

import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL;
import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_DEFAULT;
import static org.apache.hadoop.fs.impl.AbstractFSBuilderImpl.rejectUnknownMandatoryKeys;
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.Invoker.*;
import static org.apache.hadoop.fs.s3a.Listing.toLocatedFileStatusIterator;
import static org.apache.hadoop.fs.s3a.S3AUtils.*;
import static org.apache.hadoop.fs.s3a.Statistic.*;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.INITIALIZE_SPAN;
import static org.apache.hadoop.fs.s3a.auth.RolePolicies.STATEMENT_ALLOW_SSE_KMS_RW;
import static org.apache.hadoop.fs.s3a.auth.RolePolicies.allowS3Operations;
import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.TokenIssuingPolicy.NoTokensAvailable;
import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.hasDelegationTokenBinding;
import static org.apache.hadoop.fs.s3a.commit.CommitConstants.FS_S3A_COMMITTER_ABORT_PENDING_UPLOADS;
import static org.apache.hadoop.fs.s3a.commit.CommitConstants.FS_S3A_COMMITTER_STAGING_ABORT_PENDING_UPLOADS;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIgnoringExceptions;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isObjectNotFound;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AP_INACCESSIBLE;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AP_REQUIRED_EXCEPTION;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AP_S3GUARD_INCOMPATIBLE;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.ARN_BUCKET_OPTION;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_S3GUARD_INCOMPATIBLE;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DEFAULT_UPLOAD_PART_COUNT_LIMIT;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DELETE_CONSIDERED_IDEMPOTENT;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_403;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.UPLOAD_PART_COUNT_LIMIT;
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion;
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.logDnsLookup;
import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.dirMetaToStatuses;
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel;
import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_CONTINUE_LIST_REQUEST;
import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.pairedTrackerFactory;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfOperation;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfSupplier;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
import static org.apache.hadoop.util.functional.RemoteIterators.typeCastingRemoteIterator;

/**
 * The core S3A Filesystem implementation.
 *
 * This subclass is marked as private as code should not be creating it
 * directly; use {@link FileSystem#get(Configuration)} and variants to
 * create one.
 *
 * If cast to {@code S3AFileSystem}, extra methods and features may be accessed.
 * Consider those private and unstable.
 *
 * Because it prints some of the state of the instrumentation,
 * the output of {@link #toString()} must also be considered unstable.
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class S3AFileSystem extends FileSystem implements StreamCapabilities,
    AWSPolicyProvider, DelegationTokenProvider, IOStatisticsSource,
    AuditSpanSource, ActiveThreadSpanSource {

  /**
   * Default blocksize as used in blocksize and FS status queries.
   */
  public static final int DEFAULT_BLOCKSIZE = 32 * 1024 * 1024;

  private URI uri;
  private Path workingDir;
  private String username;
  private AmazonS3 s3;
  // initial callback policy is fail-once; it's there just to assist
  // some mock tests and other codepaths trying to call the low level
  // APIs on an uninitialized filesystem.
  private Invoker invoker = new Invoker(RetryPolicies.TRY_ONCE_THEN_FAIL,
      Invoker.LOG_EVENT);
  // Only used for very specific code paths which behave differently for
  // S3Guard. Retries FileNotFound, so be careful if you use this.
  private Invoker s3guardInvoker = new Invoker(RetryPolicies.TRY_ONCE_THEN_FAIL,
      Invoker.LOG_EVENT);
  private final Retried onRetry = this::operationRetried;

  /**
   * Represents bucket name for all S3 operations. If per bucket override for
   * {@link InternalConstants#ARN_BUCKET_OPTION} property  is set, then the bucket is updated to
   * point to the configured Arn.
   */
  private String bucket;
  private int maxKeys;
  private Listing listing;
  private long partSize;
  private boolean enableMultiObjectsDelete;
  private TransferManager transfers;
  private ExecutorService boundedThreadPool;
  private ThreadPoolExecutor unboundedThreadPool;
  private int executorCapacity;
  private long multiPartThreshold;
  public static final Logger LOG = LoggerFactory.getLogger(S3AFileSystem.class);
  private static final Logger PROGRESS =
      LoggerFactory.getLogger("org.apache.hadoop.fs.s3a.S3AFileSystem.Progress");
  private LocalDirAllocator directoryAllocator;
  private CannedAccessControlList cannedACL;
  private boolean failOnMetadataWriteError;

  /**
   * This must never be null; until initialized it just declares that there
   * is no encryption.
   */
  private EncryptionSecrets encryptionSecrets = new EncryptionSecrets();
  /** The core instrumentation. */
  private S3AInstrumentation instrumentation;
  /** Accessors to statistics for this FS. */
  private S3AStatisticsContext statisticsContext;
  /** Storage Statistics Bonded to the instrumentation. */
  private S3AStorageStatistics storageStatistics;

  private long readAhead;
  private S3AInputPolicy inputPolicy;
  private ChangeDetectionPolicy changeDetectionPolicy;
  private final AtomicBoolean closed = new AtomicBoolean(false);
  private volatile boolean isClosed = false;
  private MetadataStore metadataStore;
  private boolean allowAuthoritativeMetadataStore;
  private Collection allowAuthoritativePaths;

  /** Delegation token integration; non-empty when DT support is enabled. */
  private Optional delegationTokens = Optional.empty();

  /** Principal who created the FS; recorded during initialization. */
  private UserGroupInformation owner;

  private String blockOutputBuffer;
  private S3ADataBlocks.BlockFactory blockFactory;
  private int blockOutputActiveBlocks;
  private WriteOperationHelper writeHelper;
  private boolean useListV1;
  private MagicCommitIntegration committerIntegration;

  private AWSCredentialProviderList credentials;
  private SignerManager signerManager;

  private ITtlTimeProvider ttlTimeProvider;

  /**
   * Page size for deletions.
   */
  private int pageSize;

  private final ListingOperationCallbacks listingOperationCallbacks =
          new ListingOperationCallbacksImpl();
  /**
   * Directory policy.
   */
  private DirectoryPolicy directoryPolicy;

  /**
   * Context accessors for re-use.
   */
  private final ContextAccessors contextAccessors = new ContextAccessorsImpl();

  /**
   * Factory for AWS requests.
   */
  private RequestFactory requestFactory;

  /**
   * Audit manager (service lifecycle).
   * Creates the audit service and manages the binding of different audit spans
   * to different threads.
   * Initially this is a no-op manager; once the service is initialized it will
   * be replaced with a configured one.
   */
  private AuditManagerS3A auditManager =
      AuditIntegration.stubAuditManager();

  /**
   * Is this S3A FS instance using S3 client side encryption?
   */
  private boolean isCSEEnabled;

  /**
   * Bucket AccessPoint.
   */
  private ArnResource accessPoint;

  /** Add any deprecated keys. */
  @SuppressWarnings("deprecation")
  private static void addDeprecatedKeys() {
    Configuration.DeprecationDelta[] deltas = {
        new Configuration.DeprecationDelta(
            FS_S3A_COMMITTER_STAGING_ABORT_PENDING_UPLOADS,
            FS_S3A_COMMITTER_ABORT_PENDING_UPLOADS),
        new Configuration.DeprecationDelta(
            SERVER_SIDE_ENCRYPTION_ALGORITHM,
            S3_ENCRYPTION_ALGORITHM),
        new Configuration.DeprecationDelta(
            SERVER_SIDE_ENCRYPTION_KEY,
            S3_ENCRYPTION_KEY)
    };

    if (deltas.length > 0) {
      Configuration.addDeprecations(deltas);
      Configuration.reloadExistingConfigurations();
    }
  }

  static {
    addDeprecatedKeys();
  }

  /** Called after a new FileSystem instance is constructed.
   * @param name a uri whose authority section names the host, port, etc.
   *   for this FileSystem
   * @param originalConf the configuration to use for the FS. The
   * bucket-specific options are patched over the base ones before any use is
   * made of the config.
   */
  public void initialize(URI name, Configuration originalConf)
      throws IOException {
    // get the host; this is guaranteed to be non-null, non-empty
    bucket = name.getHost();
    AuditSpan span = null;
    try {
      LOG.debug("Initializing S3AFileSystem for {}", bucket);
      // clone the configuration into one with propagated bucket options
      Configuration conf = propagateBucketOptions(originalConf, bucket);
      // HADOOP-17894. remove references to s3a stores in JCEKS credentials.
      conf = ProviderUtils.excludeIncompatibleCredentialProviders(
          conf, S3AFileSystem.class);
      String arn = String.format(ARN_BUCKET_OPTION, bucket);
      String configuredArn = conf.getTrimmed(arn, "");
      if (!configuredArn.isEmpty()) {
        accessPoint = ArnResource.accessPointFromArn(configuredArn);
        LOG.info("Using AccessPoint ARN \"{}\" for bucket {}", configuredArn, bucket);
        bucket = accessPoint.getFullArn();
      } else if (conf.getBoolean(AWS_S3_ACCESSPOINT_REQUIRED, false)) {
        LOG.warn("Access Point usage is required because \"{}\" is enabled," +
            " but not configured for the bucket: {}", AWS_S3_ACCESSPOINT_REQUIRED, bucket);
        throw new PathIOException(bucket, AP_REQUIRED_EXCEPTION);
      }

      // fix up the classloader of the configuration to be whatever
      // classloader loaded this filesystem.
      // See: HADOOP-17372
      conf.setClassLoader(this.getClass().getClassLoader());

      // patch the Hadoop security providers
      patchSecurityCredentialProviders(conf);
      // look for delegation token support early.
      boolean delegationTokensEnabled = hasDelegationTokenBinding(conf);
      if (delegationTokensEnabled) {
        LOG.debug("Using delegation tokens");
      }
      // set the URI, this will do any fixup of the URI to remove secrets,
      // canonicalize.
      setUri(name, delegationTokensEnabled);
      super.initialize(uri, conf);
      setConf(conf);

      // look for encryption data
      // DT Bindings may override this
      setEncryptionSecrets(
          buildEncryptionSecrets(bucket, conf));

      invoker = new Invoker(new S3ARetryPolicy(getConf()), onRetry);
      instrumentation = new S3AInstrumentation(uri);
      initializeStatisticsBinding();
      // If CSE-KMS method is set then CSE is enabled.
      isCSEEnabled = S3AEncryptionMethods.CSE_KMS.getMethod()
          .equals(getS3EncryptionAlgorithm().getMethod());
      LOG.debug("Client Side Encryption enabled: {}", isCSEEnabled);
      setCSEGauge();
      // Username is the current user at the time the FS was instantiated.
      owner = UserGroupInformation.getCurrentUser();
      username = owner.getShortUserName();
      workingDir = new Path("/user", username)
          .makeQualified(this.uri, this.getWorkingDirectory());

      s3guardInvoker = new Invoker(new S3GuardExistsRetryPolicy(getConf()),
          onRetry);

      failOnMetadataWriteError = conf.getBoolean(FAIL_ON_METADATA_WRITE_ERROR,
          FAIL_ON_METADATA_WRITE_ERROR_DEFAULT);

      maxKeys = intOption(conf, MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS, 1);
      partSize = getMultipartSizeProperty(conf,
          MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
      multiPartThreshold = getMultipartSizeProperty(conf,
          MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD);

      //check but do not store the block size
      longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
      enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);

      readAhead = longBytesOption(conf, READAHEAD_RANGE,
          DEFAULT_READAHEAD_RANGE, 0);

      initThreadPools(conf);

      int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION);
      if (listVersion < 1 || listVersion > 2) {
        LOG.warn("Configured fs.s3a.list.version {} is invalid, forcing " +
            "version 2", listVersion);
      }
      useListV1 = (listVersion == 1);
      if (accessPoint != null && useListV1) {
        LOG.warn("V1 list configured in fs.s3a.list.version. This is not supported in by" +
            " access points. Upgrading to V2");
        useListV1 = false;
      }

      signerManager = new SignerManager(bucket, this, conf, owner);
      signerManager.initCustomSigners();

      // start auditing
      initializeAuditService();

      // create the requestFactory.
      // requires the audit manager to be initialized.
      requestFactory = createRequestFactory();

      // create the static write operation helper.
      // this doesn't have a short-lived span; auditors which
      // require one may reject usages.
      writeHelper = createWriteOperationHelper(getActiveAuditSpan());

      // create an initial span for all other operations.
      span = createSpan(INITIALIZE_SPAN, bucket, null);

      // creates the AWS client, including overriding auth chain if
      // the FS came with a DT
      // this may do some patching of the configuration (e.g. setting
      // the encryption algorithms)
      bindAWSClient(name, delegationTokensEnabled);

      initTransferManager();


      // This initiates a probe against S3 for the bucket existing.
      doBucketProbing();

      inputPolicy = S3AInputPolicy.getPolicy(
          conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
      LOG.debug("Input fadvise policy = {}", inputPolicy);
      changeDetectionPolicy = ChangeDetectionPolicy.getPolicy(conf);
      LOG.debug("Change detection policy = {}", changeDetectionPolicy);
      boolean magicCommitterEnabled = conf.getBoolean(
          CommitConstants.MAGIC_COMMITTER_ENABLED,
          CommitConstants.DEFAULT_MAGIC_COMMITTER_ENABLED);
      LOG.debug("Filesystem support for magic committers {} enabled",
          magicCommitterEnabled ? "is" : "is not");
      committerIntegration = new MagicCommitIntegration(
          this, magicCommitterEnabled);

      boolean blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, true);

      if (!blockUploadEnabled) {
        LOG.warn("The \"slow\" output stream is no longer supported");
      }
      blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER,
          DEFAULT_FAST_UPLOAD_BUFFER);
      partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
      blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer);
      blockOutputActiveBlocks = intOption(conf,
          FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1);
      // If CSE is enabled, do multipart uploads serially.
      if (isCSEEnabled) {
        blockOutputActiveBlocks = 1;
      }
      LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" +
              " queue limit={}",
          blockOutputBuffer, partSize, blockOutputActiveBlocks);
      long authDirTtl = conf.getTimeDuration(METADATASTORE_METADATA_TTL,
          DEFAULT_METADATASTORE_METADATA_TTL, TimeUnit.MILLISECONDS);
      ttlTimeProvider = new S3Guard.TtlTimeProvider(authDirTtl);

      setMetadataStore(S3Guard.getMetadataStore(this, ttlTimeProvider));
      allowAuthoritativeMetadataStore = conf.getBoolean(METADATASTORE_AUTHORITATIVE,
          DEFAULT_METADATASTORE_AUTHORITATIVE);
      allowAuthoritativePaths = S3Guard.getAuthoritativePaths(this);

      if (hasMetadataStore()) {
        LOG.debug("Using metadata store {}, authoritative store={}, authoritative path={}",
            getMetadataStore(), allowAuthoritativeMetadataStore, allowAuthoritativePaths);
        if (isCSEEnabled) {
          throw new PathIOException(uri.toString(), CSE_S3GUARD_INCOMPATIBLE);
        }
        if (accessPoint != null) {
          throw new PathIOException(uri.toString(), AP_S3GUARD_INCOMPATIBLE);
        }
      }

      // LOG if S3Guard is disabled on the warn level set in config
      if (!hasMetadataStore()) {
        String warnLevel = conf.getTrimmed(S3GUARD_DISABLED_WARN_LEVEL,
            DEFAULT_S3GUARD_DISABLED_WARN_LEVEL);
        S3Guard.logS3GuardDisabled(LOG, warnLevel, bucket);
      }
      // directory policy, which may look at authoritative paths
      directoryPolicy = DirectoryPolicyImpl.getDirectoryPolicy(conf,
          this::allowAuthoritative);
      LOG.debug("Directory marker retention policy is {}", directoryPolicy);

      initMultipartUploads(conf);

      pageSize = intOption(getConf(), BULK_DELETE_PAGE_SIZE,
          BULK_DELETE_PAGE_SIZE_DEFAULT, 0);
      listing = new Listing(listingOperationCallbacks, createStoreContext());
    } catch (AmazonClientException e) {
      // amazon client exception: stop all services then throw the translation
      cleanupWithLogger(LOG, span);
      stopAllServices();
      throw translateException("initializing ", new Path(name), e);
    } catch (IOException | RuntimeException e) {
      // other exceptions: stop the services.
      cleanupWithLogger(LOG, span);
      stopAllServices();
      throw e;
    }
  }

  /**
   * Set the client side encryption gauge to 0 or 1, indicating if CSE is
   * enabled through the gauge or not.
   */
  private void setCSEGauge() {
    IOStatisticsStore ioStatisticsStore =
        (IOStatisticsStore) getIOStatistics();
    if (isCSEEnabled) {
      ioStatisticsStore
          .setGauge(CLIENT_SIDE_ENCRYPTION_ENABLED.getSymbol(), 1L);
    } else {
      ioStatisticsStore
          .setGauge(CLIENT_SIDE_ENCRYPTION_ENABLED.getSymbol(), 0L);
    }
  }

  /**
   * Test bucket existence in S3.
   * When the value of {@link Constants#S3A_BUCKET_PROBE} is set to 0,
   * bucket existence check is not done to improve performance of
   * S3AFileSystem initialization. When set to 1 or 2, bucket existence check
   * will be performed which is potentially slow.
   * If 3 or higher: warn and use the v2 check.
   * Also logging DNS address of the s3 endpoint if the bucket probe value is
   * greater than 0 else skipping it for increased performance.
   * @throws UnknownStoreException the bucket is absent
   * @throws IOException any other problem talking to S3
   */
  @Retries.RetryTranslated
  private void doBucketProbing() throws IOException {
    int bucketProbe = getConf()
            .getInt(S3A_BUCKET_PROBE, S3A_BUCKET_PROBE_DEFAULT);
    Preconditions.checkArgument(bucketProbe >= 0,
            "Value of " + S3A_BUCKET_PROBE + " should be >= 0");
    switch (bucketProbe) {
    case 0:
      LOG.debug("skipping check for bucket existence");
      break;
    case 1:
      logDnsLookup(getConf());
      verifyBucketExists();
      break;
    case 2:
      logDnsLookup(getConf());
      verifyBucketExistsV2();
      break;
    default:
      // we have no idea what this is, assume it is from a later release.
      LOG.warn("Unknown bucket probe option {}: {}; falling back to check #2",
          S3A_BUCKET_PROBE, bucketProbe);
      verifyBucketExistsV2();
      break;
    }
  }

  /**
   * Initialize the statistics binding.
   * This is done by creating an {@code IntegratedS3AStatisticsContext}
   * with callbacks to get the FS's instrumentation and FileSystem.statistics
   * field; the latter may change after {@link #initialize(URI, Configuration)},
   * so needs to be dynamically adapted.
   * Protected so that (mock) subclasses can replace it with a
   * different statistics binding, if desired.
   */
  protected void initializeStatisticsBinding() {
    storageStatistics = createStorageStatistics(
        requireNonNull(getIOStatistics()));
    statisticsContext = new BondedS3AStatisticsContext(
        new BondedS3AStatisticsContext.S3AFSStatisticsSource() {

          @Override
          public S3AInstrumentation getInstrumentation() {
            return S3AFileSystem.this.getInstrumentation();
          }

          @Override
          public Statistics getInstanceStatistics() {
            return S3AFileSystem.this.statistics;
          }
        });
  }

  /**
   * Initialize the thread pool.
   * This must be re-invoked after replacing the S3Client during test
   * runs.
   * @param conf configuration.
   */
  private void initThreadPools(Configuration conf) {
    final String name = "s3a-transfer-" + getBucket();
    int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS);
    if (maxThreads < 2) {
      LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2.");
      maxThreads = 2;
    }
    int totalTasks = intOption(conf,
        MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1);
    long keepAliveTime = longOption(conf, KEEPALIVE_TIME,
        DEFAULT_KEEPALIVE_TIME, 0);
    boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(
        maxThreads,
        maxThreads + totalTasks,
        keepAliveTime, TimeUnit.SECONDS,
        name + "-bounded");
    unboundedThreadPool = new ThreadPoolExecutor(
        maxThreads, Integer.MAX_VALUE,
        keepAliveTime, TimeUnit.SECONDS,
        new LinkedBlockingQueue<>(),
        BlockingThreadPoolExecutorService.newDaemonThreadFactory(
            name + "-unbounded"));
    unboundedThreadPool.allowCoreThreadTimeOut(true);
    executorCapacity = intOption(conf,
        EXECUTOR_CAPACITY, DEFAULT_EXECUTOR_CAPACITY, 1);
  }

  /**
   * Create the storage statistics or bind to an existing one.
   * @param ioStatistics IOStatistics to build the storage statistics from.
   * @return a storage statistics instance; expected to be that of the FS.
   */
  protected static S3AStorageStatistics createStorageStatistics(
      final IOStatistics ioStatistics) {
    return (S3AStorageStatistics)
        GlobalStorageStatistics.INSTANCE
            .put(S3AStorageStatistics.NAME,
                () -> new S3AStorageStatistics(ioStatistics));
  }

  /**
   * Verify that the bucket exists. This does not check permissions,
   * not even read access.
   * Retry policy: retrying, translated.
   * @throws UnknownStoreException the bucket is absent
   * @throws IOException any other problem talking to S3
   */
  @Retries.RetryTranslated
  protected void verifyBucketExists()
      throws UnknownStoreException, IOException {
    if (!invoker.retry("doesBucketExist", bucket, true,
        trackDurationOfOperation(getDurationTrackerFactory(),
            STORE_EXISTS_PROBE.getSymbol(),
            () -> s3.doesBucketExist(bucket)))) {
      throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does "
          + "not exist");
    }
  }

  /**
   * Verify that the bucket exists. This will correctly throw an exception
   * when credentials are invalid.
   * Retry policy: retrying, translated.
   * @throws UnknownStoreException the bucket is absent
   * @throws IOException any other problem talking to S3
   */
  @Retries.RetryTranslated
  protected void verifyBucketExistsV2()
      throws UnknownStoreException, IOException {
    if (!invoker.retry("doesBucketExistV2", bucket, true,
        trackDurationOfOperation(getDurationTrackerFactory(),
            STORE_EXISTS_PROBE.getSymbol(),
            () -> {
              // Bug in SDK always returns `true` for AccessPoint ARNs with `doesBucketExistV2()`
              // expanding implementation to use ARNs and buckets correctly
              try {
                s3.getBucketAcl(bucket);
              } catch (AmazonServiceException ex) {
                int statusCode = ex.getStatusCode();
                if (statusCode == SC_404 ||
                    (statusCode == SC_403 && ex.getMessage().contains(AP_INACCESSIBLE))) {
                  return false;
                }
              }

              return true;
            }))) {
      throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does "
          + "not exist");
    }
  }

  /**
   * Get S3A Instrumentation. For test purposes.
   * @return this instance's instrumentation.
   */
  @VisibleForTesting
  public S3AInstrumentation getInstrumentation() {
    return instrumentation;
  }

  /**
   * Get FS Statistic for this S3AFS instance.
   *
   * @return FS statistic instance.
   */
  @VisibleForTesting
  public FileSystem.Statistics getFsStatistics() {
    return statistics;
  }

  /**
   * Get current listing instance.
   * @return this instance's listing.
   */
  public Listing getListing() {
    return listing;
  }

  /**
   * Set up the client bindings.
   * If delegation tokens are enabled, the FS first looks for a DT
   * ahead of any other bindings;.
   * If there is a DT it uses that to do the auth
   * and switches to the DT authenticator automatically (and exclusively)
   * @param name URI of the FS
   * @param dtEnabled are delegation tokens enabled?
   * @throws IOException failure.
   */
  private void bindAWSClient(URI name, boolean dtEnabled) throws IOException {
    Configuration conf = getConf();
    credentials = null;
    String uaSuffix = "";

    if (dtEnabled) {
      // Delegation support.
      // Create and start the DT integration.
      // Then look for an existing DT for this bucket, switch to authenticating
      // with it if so.

      LOG.debug("Using delegation tokens");
      S3ADelegationTokens tokens = new S3ADelegationTokens();
      this.delegationTokens = Optional.of(tokens);
      tokens.bindToFileSystem(getCanonicalUri(),
          createStoreContext(),
          createDelegationOperations());
      tokens.init(conf);
      tokens.start();
      // switch to the DT provider and bypass all other configured
      // providers.
      if (tokens.isBoundToDT()) {
        // A DT was retrieved.
        LOG.debug("Using existing delegation token");
        // and use the encryption settings from that client, whatever they were
      } else {
        LOG.debug("No delegation token for this instance");
      }
      // Get new credential chain
      credentials = tokens.getCredentialProviders();
      // and any encryption secrets which came from a DT
      tokens.getEncryptionSecrets()
          .ifPresent(this::setEncryptionSecrets);
      // and update the UA field with any diagnostics provided by
      // the DT binding.
      uaSuffix = tokens.getUserAgentField();
    } else {
      // DT support is disabled, so create the normal credential chain
      credentials = createAWSCredentialProviderSet(name, conf);
    }
    LOG.debug("Using credential provider {}", credentials);
    Class s3ClientFactoryClass = conf.getClass(
        S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL,
        S3ClientFactory.class);

    String endpoint = accessPoint == null
        ? conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT)
        : accessPoint.getEndpoint();

    S3ClientFactory.S3ClientCreationParameters parameters = null;
    parameters = new S3ClientFactory.S3ClientCreationParameters()
        .withCredentialSet(credentials)
        .withEndpoint(endpoint)
        .withMetrics(statisticsContext.newStatisticsFromAwsSdk())
        .withPathStyleAccess(conf.getBoolean(PATH_STYLE_ACCESS, false))
        .withUserAgentSuffix(uaSuffix)
        .withRequestHandlers(auditManager.createRequestHandlers());

    s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf)
        .createS3Client(getUri(),
            parameters);
  }

  /**
   * Initialize and launch the audit manager and service.
   * As this takes the FS IOStatistics store, it must be invoked
   * after instrumentation is initialized.
   * @throws IOException failure to instantiate/initialize.
   */
  protected void initializeAuditService() throws IOException {
    auditManager = AuditIntegration.createAndStartAuditManager(
        getConf(),
        instrumentation.createMetricsUpdatingStore());
  }

  /**
   * The audit manager.
   * @return the audit manager
   */
  @InterfaceAudience.Private
  public AuditManagerS3A getAuditManager() {
    return auditManager;
  }

  /**
   * Get the auditor; valid once initialized.
   * @return the auditor.
   */
  @InterfaceAudience.Private
  public OperationAuditor getAuditor() {
    return getAuditManager().getAuditor();
  }

  /**
   * Get the active audit span.
   * @return the span.
   */
  @InterfaceAudience.Private
  @Override
  public AuditSpanS3A getActiveAuditSpan() {
    return getAuditManager().getActiveAuditSpan();
  }

  /**
   * Get the audit span source; allows for components like the committers
   * to have a source of spans without being hard coded to the FS only.
   * @return the source of spans -base implementation is this instance.
   */
  @InterfaceAudience.Private
  public AuditSpanSource getAuditSpanSource() {
    return this;
  }

  /**
   * Start an operation; this informs the audit service of the event
   * and then sets it as the active span.
   * @param operation operation name.
   * @param path1 first path of operation
   * @param path2 second path of operation
   * @return a span for the audit
   * @throws IOException failure
   */
  public AuditSpanS3A createSpan(String operation,
      @Nullable String path1,
      @Nullable String path2)
      throws IOException {

    return getAuditManager().createSpan(operation, path1, path2);
  }

  /**
   * Build the request factory.
   * MUST be called after reading encryption secrets from settings/
   * delegation token.
   * Protected, in case test/mock classes want to implement their
   * own variants.
   * @return request factory.
   */
  protected RequestFactory createRequestFactory() {
    long partCountLimit = longOption(getConf(),
        UPLOAD_PART_COUNT_LIMIT,
        DEFAULT_UPLOAD_PART_COUNT_LIMIT,
        1);
    if (partCountLimit != DEFAULT_UPLOAD_PART_COUNT_LIMIT) {
      LOG.warn("Configuration property {} shouldn't be overridden by client",
          UPLOAD_PART_COUNT_LIMIT);
    }

    // ACLs; this is passed to the
    // request factory.
    initCannedAcls(getConf());

    return RequestFactoryImpl.builder()
        .withBucket(requireNonNull(bucket))
        .withCannedACL(getCannedACL())
        .withEncryptionSecrets(requireNonNull(encryptionSecrets))
        .withMultipartPartCountLimit(partCountLimit)
        .withRequestPreparer(getAuditManager()::requestCreated)
        .build();
  }

  /**
   * Get the request factory which uses this store's audit span.
   * @return the request factory.
   */
  @VisibleForTesting
  public RequestFactory getRequestFactory() {
    return requestFactory;
  }

  /**
   * Implementation of all operations used by delegation tokens.
   */
  private class DelegationOperationsImpl implements DelegationOperations {

    @Override
    public List listAWSPolicyRules(final Set access) {
      return S3AFileSystem.this.listAWSPolicyRules(access);
    }
  }

  /**
   * Create an instance of the delegation operations.
   * @return callbacks for DT support.
   */
  @VisibleForTesting
  public DelegationOperations createDelegationOperations() {
    return new DelegationOperationsImpl();
  }

  /**
   * Set the encryption secrets for requests.
   * @param secrets secrets
   */
  protected void setEncryptionSecrets(final EncryptionSecrets secrets) {
    this.encryptionSecrets = secrets;
    if (requestFactory != null) {
      requestFactory.setEncryptionSecrets(secrets);
    }
  }

  /**
   * Get the encryption secrets.
   * This potentially sensitive information and must be treated with care.
   * @return the current encryption secrets.
   */
  public EncryptionSecrets getEncryptionSecrets() {
    return encryptionSecrets;
  }

  private void initTransferManager() {
    TransferManagerConfiguration transferConfiguration =
        new TransferManagerConfiguration();
    transferConfiguration.setMinimumUploadPartSize(partSize);
    transferConfiguration.setMultipartUploadThreshold(multiPartThreshold);
    transferConfiguration.setMultipartCopyPartSize(partSize);
    transferConfiguration.setMultipartCopyThreshold(multiPartThreshold);

    transfers = new TransferManager(s3, unboundedThreadPool);
    transfers.setConfiguration(transferConfiguration);
  }

  private void initCannedAcls(Configuration conf) {
    String cannedACLName = conf.get(CANNED_ACL, DEFAULT_CANNED_ACL);
    if (!cannedACLName.isEmpty()) {
      cannedACL = CannedAccessControlList.valueOf(cannedACLName);
    } else {
      cannedACL = null;
    }
  }

  @Retries.RetryTranslated
  private void initMultipartUploads(Configuration conf) throws IOException {
    boolean purgeExistingMultipart = conf.getBoolean(PURGE_EXISTING_MULTIPART,
        DEFAULT_PURGE_EXISTING_MULTIPART);
    long purgeExistingMultipartAge = longOption(conf,
        PURGE_EXISTING_MULTIPART_AGE, DEFAULT_PURGE_EXISTING_MULTIPART_AGE, 0);

    if (purgeExistingMultipart) {
      try {
        abortOutstandingMultipartUploads(purgeExistingMultipartAge);
      } catch (AccessDeniedException e) {
        instrumentation.errorIgnored();
        LOG.debug("Failed to purge multipart uploads against {}," +
            " FS may be read only", bucket);
      }
    }
  }

  /**
   * Abort all outstanding MPUs older than a given age.
   * @param seconds time in seconds
   * @throws IOException on any failure, other than 403 "permission denied"
   */
  @Retries.RetryTranslated
  public void abortOutstandingMultipartUploads(long seconds)
      throws IOException {
    Preconditions.checkArgument(seconds >= 0);
    Date purgeBefore =
        new Date(new Date().getTime() - seconds * 1000);
    LOG.debug("Purging outstanding multipart uploads older than {}",
        purgeBefore);
    invoker.retry("Purging multipart uploads", bucket, true,
        () -> transfers.abortMultipartUploads(bucket, purgeBefore));
  }

  /**
   * Return the protocol scheme for the FileSystem.
   *
   * @return "s3a"
   */
  @Override
  public String getScheme() {
    return "s3a";
  }

  /**
   * Returns a URI whose scheme and authority identify this FileSystem.
   */
  @Override
  public URI getUri() {
    return uri;
  }

  /**
   * Set the URI field through {@link S3xLoginHelper} and
   * optionally {@link #canonicalizeUri(URI)}
   * Exported for testing.
   * @param fsUri filesystem URI.
   * @param canonicalize true if the URI should be canonicalized.
   */
  @VisibleForTesting
  protected void setUri(URI fsUri, boolean canonicalize) {
    URI u = S3xLoginHelper.buildFSURI(fsUri);
    this.uri = canonicalize ? u : canonicalizeUri(u);
  }

  /**
   * Get the canonical URI.
   * @return the canonical URI of this FS.
   */
  public URI getCanonicalUri() {
    return uri;
  }

  @VisibleForTesting
  @Override
  public int getDefaultPort() {
    return 0;
  }

  /**
   * Returns the S3 client used by this filesystem.
   * This is for internal use within the S3A code itself.
   * @return AmazonS3Client
   */
  AmazonS3 getAmazonS3Client() {
    return s3;
  }

  /**
   * Returns the S3 client used by this filesystem.
   * Warning: this must only be used for testing, as it bypasses core
   * S3A operations. 
   * @param reason a justification for requesting access.
   * @return AmazonS3Client
   */
  @VisibleForTesting
  public AmazonS3 getAmazonS3ClientForTesting(String reason) {
    LOG.warn("Access to S3A client requested, reason {}", reason);
    return s3;
  }

  /**
   * Set the client -used in mocking tests to force in a different client.
   * @param client client.
   */
  protected void setAmazonS3Client(AmazonS3 client) {
    Preconditions.checkNotNull(client, "client");
    LOG.debug("Setting S3 client to {}", client);
    s3 = client;

    // Need to use a new TransferManager that uses the new client.
    // Also, using a new TransferManager requires a new threadpool as the old
    // TransferManager will shut the thread pool down when it is garbage
    // collected.
    initThreadPools(getConf());
    initTransferManager();
  }

  /**
   * Get the region of a bucket.
   * Invoked from StoreContext; consider an entry point.
   * @return the region in which a bucket is located
   * @throws AccessDeniedException if the caller lacks permission.
   * @throws IOException on any failure.
   */
  @Retries.RetryTranslated
  @InterfaceAudience.LimitedPrivate("diagnostics")
  public String getBucketLocation() throws IOException {
    return getBucketLocation(bucket);
  }

  /**
   * Get the region of a bucket; fixing up the region so it can be used
   * in the builders of other AWS clients.
   * Requires the caller to have the AWS role permission
   * {@code s3:GetBucketLocation}.
   * Retry policy: retrying, translated.
   * @param bucketName the name of the bucket
   * @return the region in which a bucket is located
   * @throws AccessDeniedException if the caller lacks permission.
   * @throws IOException on any failure.
   */
  @VisibleForTesting
  @AuditEntryPoint
  @Retries.RetryTranslated
  public String getBucketLocation(String bucketName) throws IOException {
    final String region = trackDurationAndSpan(
        STORE_EXISTS_PROBE, bucketName, null, () ->
            invoker.retry("getBucketLocation()", bucketName, true, () ->
                // If accessPoint then region is known from Arn
                accessPoint != null
                    ? accessPoint.getRegion()
                    : s3.getBucketLocation(bucketName)));
    return fixBucketRegion(region);
  }

  /**
   * Returns the read ahead range value used by this filesystem.
   * @return the readahead range
   */
  @VisibleForTesting
  long getReadAheadRange() {
    return readAhead;
  }

  /**
   * Get the input policy for this FS instance.
   * @return the input policy
   */
  @InterfaceStability.Unstable
  public S3AInputPolicy getInputPolicy() {
    return inputPolicy;
  }

  /**
   * Get the change detection policy for this FS instance.
   * Only public to allow access in tests in other packages.
   * @return the change detection policy
   */
  @VisibleForTesting
  public ChangeDetectionPolicy getChangeDetectionPolicy() {
    return changeDetectionPolicy;
  }

  /**
   * Get the encryption algorithm of this endpoint.
   * @return the encryption algorithm.
   */
  public S3AEncryptionMethods getS3EncryptionAlgorithm() {
    return encryptionSecrets.getEncryptionMethod();
  }

  /**
   * Demand create the directory allocator, then create a temporary file.
   * This does not mark the file for deletion when a process exits.
   * {@link LocalDirAllocator#createTmpFileForWrite(String, long, Configuration)}.
   * @param pathStr prefix for the temporary file
   * @param size the size of the file that is going to be written
   * @param conf the Configuration object
   * @return a unique temporary file
   * @throws IOException IO problems
   */
  File createTmpFileForWrite(String pathStr, long size,
      Configuration conf) throws IOException {
    if (directoryAllocator == null) {
      synchronized (this) {
        String bufferDir = conf.get(BUFFER_DIR) != null
            ? BUFFER_DIR : HADOOP_TMP_DIR;
        directoryAllocator = new LocalDirAllocator(bufferDir);
      }
    }
    Path path = directoryAllocator.getLocalPathForWrite(pathStr,
        size, conf);
    File dir = new File(path.getParent().toUri().getPath());
    String prefix = path.getName();
    // create a temp file on this directory
    return File.createTempFile(prefix, null, dir);
  }

  /**
   * Get the bucket of this filesystem.
   * @return the bucket
   */
  public String getBucket() {
    return bucket;
  }

  /**
   * Set the bucket.
   * @param bucket the bucket
   */
  @VisibleForTesting
  protected void setBucket(String bucket) {
    this.bucket = bucket;
  }

  /**
   * Get the canned ACL of this FS.
   * @return an ACL, if any
   */
  CannedAccessControlList getCannedACL() {
    return cannedACL;
  }

  /**
   * Change the input policy for this FS.
   * @param inputPolicy new policy
   */
  @InterfaceStability.Unstable
  public void setInputPolicy(S3AInputPolicy inputPolicy) {
    Objects.requireNonNull(inputPolicy, "Null inputStrategy");
    LOG.debug("Setting input strategy: {}", inputPolicy);
    this.inputPolicy = inputPolicy;
  }

  /**
   * Turns a path (relative or otherwise) into an S3 key.
   *
   * @param path input path, may be relative to the working dir
   * @return a key excluding the leading "/", or, if it is the root path, ""
   */
  @VisibleForTesting
  public String pathToKey(Path path) {
    if (!path.isAbsolute()) {
      path = new Path(workingDir, path);
    }

    if (path.toUri().getScheme() != null && path.toUri().getPath().isEmpty()) {
      return "";
    }

    return path.toUri().getPath().substring(1);
  }

  /**
   * Turns a path (relative or otherwise) into an S3 key, adding a trailing
   * "/" if the path is not the root and does not already have a "/"
   * at the end.
   *
   * @param key s3 key or ""
   * @return the with a trailing "/", or, if it is the root key, "",
   */
  @InterfaceAudience.Private
  public String maybeAddTrailingSlash(String key) {
    return S3AUtils.maybeAddTrailingSlash(key);
  }

  /**
   * Convert a path back to a key.
   * @param key input key
   * @return the path from this key
   */
  Path keyToPath(String key) {
    return new Path("/" + key);
  }

  /**
   * Convert a key to a fully qualified path.
   * This includes fixing up the URI so that if it ends with a trailing slash,
   * that is corrected, similar to {@code Path.normalizePath()}.
   * @param key input key
   * @return the fully qualified path including URI scheme and bucket name.
   */
  public Path keyToQualifiedPath(String key) {
    return qualify(keyToPath(key));
  }

  @Override
  public Path makeQualified(final Path path) {
    Path q = super.makeQualified(path);
    if (!q.isRoot()) {
      String urlString = q.toUri().toString();
      if (urlString.endsWith(Path.SEPARATOR)) {
        // this is a path which needs root stripping off to avoid
        // confusion, See HADOOP-15430
        LOG.debug("Stripping trailing '/' from {}", q);
        // deal with an empty "/" at the end by mapping to the parent and
        // creating a new path from it
        q = new Path(urlString.substring(0, urlString.length() - 1));
      }
    }
    if (!q.isRoot() && q.getName().isEmpty()) {
      q = q.getParent();
    }
    return q;
  }

  /**
   * Qualify a path.
   * This includes fixing up the URI so that if it ends with a trailing slash,
   * that is corrected, similar to {@code Path.normalizePath()}.
   * @param path path to qualify
   * @return a qualified path.
   */
  public Path qualify(Path path) {
    return makeQualified(path);
  }

  /**
   * Check that a Path belongs to this FileSystem.
   * Unlike the superclass, this version does not look at authority,
   * only hostnames.
   * @param path to check
   * @throws IllegalArgumentException if there is an FS mismatch
   */
  @Override
  public void checkPath(Path path) {
    S3xLoginHelper.checkPath(getConf(), getUri(), path, getDefaultPort());
  }

  /**
   * Override the base canonicalization logic and relay to
   * {@link S3xLoginHelper#canonicalizeUri(URI, int)}.
   * This allows for the option of changing this logic for better DT handling.
   * @param rawUri raw URI.
   * @return the canonical URI to use in delegation tokens and file context.
   */
  @Override
  protected URI canonicalizeUri(URI rawUri) {
    return S3xLoginHelper.canonicalizeUri(rawUri, getDefaultPort());
  }

  /**
   * Opens an FSDataInputStream at the indicated Path.
   * @param f the file name to open
   * @param bufferSize the size of the buffer to be used.
   */
  @Retries.RetryTranslated
  public FSDataInputStream open(Path f, int bufferSize)
      throws IOException {
    return open(f, Optional.empty(), Optional.empty());
  }

  /**
   * Opens an FSDataInputStream at the indicated Path.
   * if status contains an S3AFileStatus reference, it is used
   * and so a HEAD request to the store is avoided.
   *
   * @param file the file to open
   * @param options configuration options if opened with the builder API.
   * @param providedStatus optional file status.
   * @throws IOException IO failure.
   */
  @Retries.RetryTranslated
  @AuditEntryPoint
  private FSDataInputStream open(
      final Path file,
      final Optional options,
      final Optional providedStatus)
      throws IOException {

    final Path path = qualify(file);
    // this span is passed into the stream.
    final AuditSpan auditSpan = entryPoint(INVOCATION_OPEN, path);
    S3AFileStatus fileStatus = extractOrFetchSimpleFileStatus(path,
        providedStatus);

    S3AReadOpContext readContext;
    if (options.isPresent()) {
      Configuration o = options.get();
      // normal path. Open the file with the chosen seek policy, if different
      // from the normal one.
      // and readahead.
      S3AInputPolicy policy = S3AInputPolicy.getPolicy(
          o.get(INPUT_FADVISE, inputPolicy.toString()));
      long readAheadRange2 = o.getLong(READAHEAD_RANGE, readAhead);
      // TODO support change detection policy from options?
      readContext = createReadContext(
          fileStatus,
          policy,
          changeDetectionPolicy,
          readAheadRange2,
          auditSpan);
    } else {
      readContext = createReadContext(
          fileStatus,
          inputPolicy,
          changeDetectionPolicy,
          readAhead,
          auditSpan);
    }
    LOG.debug("Opening '{}'", readContext);

    return new FSDataInputStream(
        new S3AInputStream(
            readContext,
            createObjectAttributes(fileStatus),
            createInputStreamCallbacks(auditSpan)));
  }

  /**
   * Override point: create the callbacks for S3AInputStream.
   * @return an implementation of the InputStreamCallbacks,
   */
  private S3AInputStream.InputStreamCallbacks createInputStreamCallbacks(
      final AuditSpan auditSpan) {
    return new InputStreamCallbacksImpl(auditSpan);
  }

  /**
   * Operations needed by S3AInputStream to read data.
   */
  private final class InputStreamCallbacksImpl implements
      S3AInputStream.InputStreamCallbacks {

    /**
     * Audit span to activate before each call.
     */
    private final AuditSpan auditSpan;

    /**
     * Create.
     * @param auditSpan Audit span to activate before each call.
     */
    private InputStreamCallbacksImpl(final AuditSpan auditSpan) {
      this.auditSpan = requireNonNull(auditSpan);
    }

    /**
     * Closes the audit span.
     */
    @Override
    public void close()  {
      auditSpan.close();
    }

    @Override
    public GetObjectRequest newGetRequest(final String key) {
      // active the audit span used for the operation
      try (AuditSpan span = auditSpan.activate()) {
        return getRequestFactory().newGetObjectRequest(key);
      }
    }

    @Override
    public S3Object getObject(GetObjectRequest request) {
      // active the audit span used for the operation
      try (AuditSpan span = auditSpan.activate()) {
        return s3.getObject(request);
      }
    }
  }

  /**
   * Create the read context for reading from the referenced file,
   * using FS state as well as the status.
   * @param fileStatus file status.
   * @param seekPolicy input policy for this operation
   * @param changePolicy change policy for this operation.
   * @param readAheadRange readahead value.
   * @param auditSpan audit span.
   * @return a context for read and select operations.
   */
  @VisibleForTesting
  protected S3AReadOpContext createReadContext(
      final FileStatus fileStatus,
      final S3AInputPolicy seekPolicy,
      final ChangeDetectionPolicy changePolicy,
      final long readAheadRange,
      final AuditSpan auditSpan) {
    return new S3AReadOpContext(fileStatus.getPath(),
        hasMetadataStore(),
        invoker,
        s3guardInvoker,
        statistics,
        statisticsContext,
        fileStatus,
        seekPolicy,
        changePolicy,
        readAheadRange,
        auditSpan);
  }

  /**
   * Create the attributes of an object for subsequent use.
   * @param f path path of the request.
   * @param eTag the eTag of the S3 object
   * @param versionId S3 object version ID
   * @param len length of the file
   * @return attributes to use when building the query.
   */
  private S3ObjectAttributes createObjectAttributes(
      final Path f,
      final String eTag,
      final String versionId,
      final long len) {
    return new S3ObjectAttributes(bucket,
        f,
        pathToKey(f),
        getS3EncryptionAlgorithm(),
        encryptionSecrets.getEncryptionKey(),
        eTag,
        versionId,
        len);
  }

  /**
   * Create the attributes of an object for subsequent use.
   * @param fileStatus file status to build from.
   * @return attributes to use when building the query.
   */
  private S3ObjectAttributes createObjectAttributes(
      final S3AFileStatus fileStatus) {
    return createObjectAttributes(
        fileStatus.getPath(),
        fileStatus.getEtag(),
        fileStatus.getVersionId(),
        fileStatus.getLen());
  }

  /**
   * Create an FSDataOutputStream at the indicated Path with write-progress
   * reporting.
   * Retry policy: retrying, translated on the getFileStatus() probe.
   * No data is uploaded to S3 in this call, so retry issues related to that.
   * @param f the file name to open
   * @param permission the permission to set.
   * @param overwrite if a file with this name already exists, then if true,
   *   the file will be overwritten, and if false an error will be thrown.
   * @param bufferSize the size of the buffer to be used.
   * @param replication required block replication for the file.
   * @param blockSize the requested block size.
   * @param progress the progress reporter.
   * @throws IOException in the event of IO related errors.
   * @see #setPermission(Path, FsPermission)
   */
  @Override
  @AuditEntryPoint
  @SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
  public FSDataOutputStream create(Path f, FsPermission permission,
      boolean overwrite, int bufferSize, short replication, long blockSize,
      Progressable progress) throws IOException {
    final Path path = qualify(f);
    // the span will be picked up inside the output stream
    return trackDurationAndSpan(INVOCATION_CREATE, path, () ->
        innerCreateFile(path, permission, overwrite, bufferSize, replication,
            blockSize, progress));
  }

  /**
   * Create an FSDataOutputStream at the indicated Path with write-progress
   * reporting; in the active span.
   * Retry policy: retrying, translated on the getFileStatus() probe.
   * No data is uploaded to S3 in this call, so no retry issues related to that.
   * @param path the file name to open
   * @param permission the permission to set.
   * @param overwrite if a file with this name already exists, then if true,
   *   the file will be overwritten, and if false an error will be thrown.
   * @param bufferSize the size of the buffer to be used.
   * @param replication required block replication for the file.
   * @param blockSize the requested block size.
   * @param progress the progress reporter.
   * @throws IOException in the event of IO related errors.
   * @see #setPermission(Path, FsPermission)
   */
  @SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
  @Retries.RetryTranslated
  private FSDataOutputStream innerCreateFile(Path path,
      FsPermission permission,
      boolean overwrite,
      int bufferSize,
      short replication,
      long blockSize,
      Progressable progress) throws IOException {
    String key = pathToKey(path);
    FileStatus status = null;
    try {
      // get the status or throw an FNFE.
      // when overwriting, there is no need to look for any existing file,
      // and attempting to do so can poison the load balancers with 404
      // entries.
      status = innerGetFileStatus(path, false,
          overwrite
              ? StatusProbeEnum.DIRECTORIES
              : StatusProbeEnum.ALL);

      // if the thread reaches here, there is something at the path
      if (status.isDirectory()) {
        // path references a directory: automatic error
        throw new FileAlreadyExistsException(path + " is a directory");
      }
      if (!overwrite) {
        // path references a file and overwrite is disabled
        throw new FileAlreadyExistsException(path + " already exists");
      }
      LOG.debug("Overwriting file {}", path);
    } catch (FileNotFoundException e) {
      // this means the file is not found

    }
    instrumentation.fileCreated();
    PutTracker putTracker =
        committerIntegration.createTracker(path, key);
    String destKey = putTracker.getDestKey();
    final BlockOutputStreamStatistics outputStreamStatistics
        = statisticsContext.newOutputStreamStatistics();
    final S3ABlockOutputStream.BlockOutputStreamBuilder builder =
        S3ABlockOutputStream.builder()
        .withKey(destKey)
        .withBlockFactory(blockFactory)
        .withBlockSize(partSize)
        .withStatistics(outputStreamStatistics)
        .withProgress(progress)
        .withPutTracker(putTracker)
        .withWriteOperations(
            createWriteOperationHelper(getActiveAuditSpan()))
        .withExecutorService(
            new SemaphoredDelegatingExecutor(
                boundedThreadPool,
                blockOutputActiveBlocks,
                true))
        .withDowngradeSyncableExceptions(
            getConf().getBoolean(
                DOWNGRADE_SYNCABLE_EXCEPTIONS,
                DOWNGRADE_SYNCABLE_EXCEPTIONS_DEFAULT))
        .withCSEEnabled(isCSEEnabled);
    return new FSDataOutputStream(
        new S3ABlockOutputStream(builder),
        null);
  }

  /**
   * Create a Write Operation Helper with the current active span.
   * All operations made through this helper will activate the
   * span before execution.
   *
   * This class permits other low-level operations against the store.
   * It is unstable and
   * only intended for code with intimate knowledge of the object store.
   * If using this, be prepared for changes even on minor point releases.
   * @return a new helper.
   */
  @InterfaceAudience.Private
  public WriteOperationHelper getWriteOperationHelper() {
    return createWriteOperationHelper(getActiveAuditSpan());
  }

  /**
   * Create a Write Operation Helper with the given span.
   * All operations made through this helper will activate the
   * span before execution.
   * @param auditSpan audit span
   * @return a new helper.
   */
  @InterfaceAudience.Private
  public WriteOperationHelper createWriteOperationHelper(AuditSpan auditSpan) {
    return new WriteOperationHelper(this,
        getConf(),
        statisticsContext,
        getAuditSpanSource(),
        auditSpan);
  }

  /**
   * {@inheritDoc}
   * @throws FileNotFoundException if the parent directory is not present -or
   * is not a directory.
   */
  @Override
  @AuditEntryPoint
  public FSDataOutputStream createNonRecursive(Path p,
      FsPermission permission,
      EnumSet flags,
      int bufferSize,
      short replication,
      long blockSize,
      Progressable progress) throws IOException {
    final Path path = makeQualified(p);
    // this span is passed into the stream.
    try (AuditSpan span = entryPoint(INVOCATION_CREATE_NON_RECURSIVE, path)) {
      Path parent = path.getParent();
      // expect this to raise an exception if there is no parent dir
      if (parent != null && !parent.isRoot()) {
        S3AFileStatus status;
        try {
          // optimize for the directory existing: Call list first
          status = innerGetFileStatus(parent, false,
              StatusProbeEnum.DIRECTORIES);
        } catch (FileNotFoundException e) {
          // no dir, fall back to looking for a file
          // (failure condition if true)
          status = innerGetFileStatus(parent, false,
              StatusProbeEnum.HEAD_ONLY);
        }
        if (!status.isDirectory()) {
          throw new FileAlreadyExistsException("Not a directory: " + parent);
        }
      }
      return innerCreateFile(path, permission,
          flags.contains(CreateFlag.OVERWRITE), bufferSize,
          replication, blockSize, progress);
    }
  }

  /**
   * Append to an existing file (optional operation).
   * @param f the existing file to be appended.
   * @param bufferSize the size of the buffer to be used.
   * @param progress for reporting progress if it is not null.
   * @throws IOException indicating that append is not supported.
   */
  public FSDataOutputStream append(Path f, int bufferSize,
      Progressable progress) throws IOException {
    throw new UnsupportedOperationException("Append is not supported "
        + "by S3AFileSystem");
  }


  /**
   * Renames Path src to Path dst.  Can take place on local fs
   * or remote DFS.
   *
   * Warning: S3 does not support renames. This method does a copy which can
   * take S3 some time to execute with large files and directories. Since
   * there is no Progressable passed in, this can time out jobs.
   *
   * Note: This implementation differs with other S3 drivers. Specifically:
   * 
   *       Fails if src is a file and dst is a directory.
   *       Fails if src is a directory and dst is a file.
   *       Fails if the parent of dst does not exist or is a file.
   *       Fails if dst is a directory that is not empty.
   * 
* * @param src path to be renamed * @param dst new path after rename * @throws IOException on IO failure * @return true if rename is successful */ @AuditEntryPoint @Retries.RetryTranslated public boolean rename(Path src, Path dst) throws IOException { try { long bytesCopied = trackDurationAndSpan( INVOCATION_RENAME, src.toString(), dst.toString(), () -> innerRename(src, dst)); LOG.debug("Copied {} bytes", bytesCopied); return true; } catch (AmazonClientException e) { throw translateException("rename(" + src +", " + dst + ")", src, e); } catch (RenameFailedException e) { LOG.info("{}", e.getMessage()); LOG.debug("rename failure", e); return e.getExitCode(); } } /** * Validate the rename parameters and status of the filesystem; * returns the source and any destination File Status. * @param src qualified path to be renamed * @param dst qualified path after rename * @return the source and (possibly null) destination status entries. * @throws RenameFailedException if some criteria for a state changing * rename was not met. This means work didn't happen; it's not something * which is reported upstream to the FileSystem APIs, for which the semantics * of "false" are pretty vague. * @throws FileNotFoundException there's no source file. * @throws IOException on IO failure. */ @Retries.RetryTranslated private Pair initiateRename( final Path src, final Path dst) throws IOException { String srcKey = pathToKey(src); String dstKey = pathToKey(dst); if (srcKey.isEmpty()) { throw new RenameFailedException(src, dst, "source is root directory"); } if (dstKey.isEmpty()) { throw new RenameFailedException(src, dst, "dest is root directory"); } // get the source file status; this raises a FNFE if there is no source // file. S3AFileStatus srcStatus = innerGetFileStatus(src, true, StatusProbeEnum.ALL); if (srcKey.equals(dstKey)) { LOG.debug("rename: src and dest refer to the same file or directory: {}", dst); throw new RenameFailedException(src, dst, "source and dest refer to the same file or directory") .withExitCode(srcStatus.isFile()); } S3AFileStatus dstStatus = null; try { dstStatus = innerGetFileStatus(dst, true, StatusProbeEnum.ALL); // if there is no destination entry, an exception is raised. // hence this code sequence can assume that there is something // at the end of the path; the only detail being what it is and // whether or not it can be the destination of the rename. if (srcStatus.isDirectory()) { if (dstStatus.isFile()) { throw new FileAlreadyExistsException( "Failed to rename " + src + " to " + dst +"; source is a directory and dest is a file"); } else if (dstStatus.isEmptyDirectory() != Tristate.TRUE) { throw new RenameFailedException(src, dst, "Destination is a non-empty directory") .withExitCode(false); } // at this point the destination is an empty directory } else { // source is a file. The destination must be a directory, // empty or not if (dstStatus.isFile()) { throw new FileAlreadyExistsException( "Failed to rename " + src + " to " + dst + "; destination file exists"); } } } catch (FileNotFoundException e) { LOG.debug("rename: destination path {} not found", dst); // Parent must exist Path parent = dst.getParent(); if (!pathToKey(parent).isEmpty() && !parent.equals(src.getParent())) { try { // make sure parent isn't a file. // don't look for parent being a dir as there is a risk // of a race between dest dir cleanup and rename in different // threads. S3AFileStatus dstParentStatus = innerGetFileStatus(parent, false, StatusProbeEnum.FILE); // if this doesn't raise an exception then it's one of // raw S3: parent is a file: error // guarded S3: parent is a file or a dir. if (!dstParentStatus.isDirectory()) { throw new RenameFailedException(src, dst, "destination parent is not a directory"); } } catch (FileNotFoundException expected) { // nothing was found. Don't worry about it; // expect rename to implicitly create the parent dir (raw S3) // or the s3guard parents (guarded) } } } return Pair.of(srcStatus, dstStatus); } /** * The inner rename operation. See {@link #rename(Path, Path)} for * the description of the operation. * This operation throws an exception on any failure which needs to be * reported and downgraded to a failure. * Retries: retry translated, assuming all operations it is called do * so. For safely, consider catch and handle AmazonClientException * because this is such a complex method there's a risk it could surface. * @param source path to be renamed * @param dest new path after rename * @throws RenameFailedException if some criteria for a state changing * rename was not met. This means work didn't happen; it's not something * which is reported upstream to the FileSystem APIs, for which the semantics * of "false" are pretty vague. * @return the number of bytes copied. * @throws FileNotFoundException there's no source file. * @throws IOException on IO failure. * @throws AmazonClientException on failures inside the AWS SDK */ @Retries.RetryMixed private long innerRename(Path source, Path dest) throws RenameFailedException, FileNotFoundException, IOException, AmazonClientException { Path src = qualify(source); Path dst = qualify(dest); LOG.debug("Rename path {} to {}", src, dst); String srcKey = pathToKey(src); String dstKey = pathToKey(dst); Pair p = initiateRename(src, dst); // Initiate the rename. // this will call back into this class via the rename callbacks // and interact directly with any metastore. RenameOperation renameOperation = new RenameOperation( createStoreContext(), src, srcKey, p.getLeft(), dst, dstKey, p.getRight(), new OperationCallbacksImpl(), pageSize); return renameOperation.execute(); } @Override public Token getFsDelegationToken() throws IOException { return getDelegationToken(null); } /** * The callbacks made by the rename and delete operations. * This separation allows the operation to be factored out and * still avoid knowledge of the S3AFilesystem implementation. * The Audit span active at the time of creation is cached and activated * before every call. */ private final class OperationCallbacksImpl implements OperationCallbacks { /** Audit Span at time of creation. */ private final AuditSpan auditSpan; private OperationCallbacksImpl() { auditSpan = getActiveAuditSpan(); } @Override public S3ObjectAttributes createObjectAttributes(final Path path, final String eTag, final String versionId, final long len) { return S3AFileSystem.this.createObjectAttributes(path, eTag, versionId, len); } @Override public S3ObjectAttributes createObjectAttributes( final S3AFileStatus fileStatus) { return S3AFileSystem.this.createObjectAttributes(fileStatus); } @Override public S3AReadOpContext createReadContext(final FileStatus fileStatus) { return S3AFileSystem.this.createReadContext(fileStatus, inputPolicy, changeDetectionPolicy, readAhead, auditSpan); } @Override @Retries.RetryTranslated public void deleteObjectAtPath(final Path path, final String key, final boolean isFile, final BulkOperationState operationState) throws IOException { auditSpan.activate(); once("delete", path.toString(), () -> S3AFileSystem.this.deleteObjectAtPath(path, key, isFile, operationState)); } @Override @Retries.RetryTranslated public RemoteIterator listFilesAndDirectoryMarkers( final Path path, final S3AFileStatus status, final boolean collectTombstones, final boolean includeSelf) throws IOException { auditSpan.activate(); return innerListFiles( path, true, includeSelf ? Listing.ACCEPT_ALL_BUT_S3N : new Listing.AcceptAllButSelfAndS3nDirs(path), status, collectTombstones, true); } @Override public CopyResult copyFile(final String srcKey, final String destKey, final S3ObjectAttributes srcAttributes, final S3AReadOpContext readContext) throws IOException { auditSpan.activate(); return S3AFileSystem.this.copyFile(srcKey, destKey, srcAttributes.getLen(), srcAttributes, readContext); } @Override public DeleteObjectsResult removeKeys( final List keysToDelete, final boolean deleteFakeDir, final List undeletedObjectsOnFailure, final BulkOperationState operationState, final boolean quiet) throws MultiObjectDeleteException, AmazonClientException, IOException { auditSpan.activate(); return S3AFileSystem.this.removeKeys(keysToDelete, deleteFakeDir, undeletedObjectsOnFailure, operationState, quiet); } @Override public void finishRename(final Path sourceRenamed, final Path destCreated) throws IOException { auditSpan.activate(); Path destParent = destCreated.getParent(); if (!sourceRenamed.getParent().equals(destParent)) { LOG.debug("source & dest parents are different; fix up dir markers"); if (!keepDirectoryMarkers(destParent)) { deleteUnnecessaryFakeDirectories(destParent, null); } maybeCreateFakeParentDirectory(sourceRenamed); } } @Override public boolean allowAuthoritative(final Path p) { return S3AFileSystem.this.allowAuthoritative(p); } @Override @Retries.RetryTranslated public RemoteIterator listObjects( final Path path, final String key) throws IOException { return once("listObjects", key, () -> listing.createFileStatusListingIterator(path, createListObjectsRequest(key, null), ACCEPT_ALL, Listing.ACCEPT_ALL_BUT_S3N, null, auditSpan)); } } /** * Callbacks from {@link Listing}. * Auditing: the listing object is long-lived; the audit span * for a single listing is passed in from the listing * method calls and then down to the callbacks. */ protected class ListingOperationCallbacksImpl implements ListingOperationCallbacks { @Override @Retries.RetryRaw public CompletableFuture listObjectsAsync( S3ListRequest request, DurationTrackerFactory trackerFactory, AuditSpan span) throws IOException { return submit(unboundedThreadPool, span, () -> listObjects(request, pairedTrackerFactory(trackerFactory, getDurationTrackerFactory()))); } @Override @Retries.RetryRaw public CompletableFuture continueListObjectsAsync( S3ListRequest request, S3ListResult prevResult, DurationTrackerFactory trackerFactory, AuditSpan span) throws IOException { return submit(unboundedThreadPool, span, () -> continueListObjects(request, prevResult, pairedTrackerFactory(trackerFactory, getDurationTrackerFactory()))); } @Override public S3ALocatedFileStatus toLocatedFileStatus( S3AFileStatus status) throws IOException { return S3AFileSystem.this.toLocatedFileStatus(status); } @Override public S3ListRequest createListObjectsRequest( String key, String delimiter, AuditSpan span) { span.activate(); return S3AFileSystem.this.createListObjectsRequest(key, delimiter); } @Override public long getDefaultBlockSize(Path path) { return S3AFileSystem.this.getDefaultBlockSize(path); } @Override public int getMaxKeys() { return S3AFileSystem.this.getMaxKeys(); } @Override public ITtlTimeProvider getUpdatedTtlTimeProvider() { return S3AFileSystem.this.ttlTimeProvider; } @Override public boolean allowAuthoritative(final Path p) { return S3AFileSystem.this.allowAuthoritative(p); } } /** * Low-level call to get at the object metadata. * This method is used in some external applications and so * must be viewed as a public entry point. * Auditing: An audit entry point. * @param path path to the object. This will be qualified. * @return metadata * @throws IOException IO and object access problems. */ @VisibleForTesting @AuditEntryPoint @InterfaceAudience.LimitedPrivate("utilities") @Retries.RetryTranslated @InterfaceStability.Evolving public ObjectMetadata getObjectMetadata(Path path) throws IOException { return trackDurationAndSpan(INVOCATION_GET_FILE_STATUS, path, () -> getObjectMetadata(makeQualified(path), null, invoker, "getObjectMetadata")); } /** * Low-level call to get at the object metadata. * @param path path to the object * @param changeTracker the change tracker to detect version inconsistencies * @param changeInvoker the invoker providing the retry policy * @param operation the operation being performed (e.g. "read" or "copy") * @return metadata * @throws IOException IO and object access problems. */ @Retries.RetryTranslated private ObjectMetadata getObjectMetadata(Path path, ChangeTracker changeTracker, Invoker changeInvoker, String operation) throws IOException { String key = pathToKey(path); return once(operation, path.toString(), () -> // this always does a full HEAD to the object getObjectMetadata( key, changeTracker, changeInvoker, operation)); } /** * Does this Filesystem have a metadata store? * @return true iff the FS has been instantiated with a metadata store */ public boolean hasMetadataStore() { return !S3Guard.isNullMetadataStore(metadataStore); } /** * Does the filesystem have an authoritative metadata store? * @return true if there is a metadata store and the authoritative flag * is set for this filesystem. */ @VisibleForTesting public boolean hasAuthoritativeMetadataStore() { return hasMetadataStore() && allowAuthoritativeMetadataStore; } /** * Get the metadata store. * This will always be non-null, but may be bound to the * {@code NullMetadataStore}. * @return the metadata store of this FS instance */ @VisibleForTesting public MetadataStore getMetadataStore() { return metadataStore; } /** For testing only. See ITestS3GuardEmptyDirs. */ @VisibleForTesting void setMetadataStore(MetadataStore ms) { Preconditions.checkNotNull(ms); metadataStore = ms; } /** * Entry point to an operation. * Increments the statistic; verifies the FS is active. * @param operation The operation being invoked * @param path first path of operation * @return a span for the audit * @throws IOException failure of audit service */ protected AuditSpan entryPoint(Statistic operation, Path path) throws IOException { return entryPoint(operation, (path != null ? pathToKey(path): null), null); } /** * Entry point to an operation. * Increments the statistic; verifies the FS is active. * @param operation The operation being invoked * @param path1 first path of operation * @param path2 second path of operation * @return a span for the audit * @throws IOException failure of audit service */ protected AuditSpan entryPoint(Statistic operation, @Nullable String path1, @Nullable String path2) throws IOException { checkNotClosed(); incrementStatistic(operation); return createSpan(operation.getSymbol(), path1, path2); } /** * Given an IOException raising callable/lambda expression, * execute it and update the relevant statistic within a span * of the same statistic. * @param statistic statistic key * @param path first path for span (nullable) * @param path2 second path for span * @param input input callable. * @param return type. * @return the result of the operation. * @throws IOException if raised in the callable */ private B trackDurationAndSpan( Statistic statistic, String path, String path2, CallableRaisingIOE input) throws IOException { checkNotClosed(); try (AuditSpan span = createSpan(statistic.getSymbol(), path, path2)) { return trackDuration(getDurationTrackerFactory(), statistic.getSymbol(), input); } } /** * Overloaded version of {@code trackDurationAndSpan()}. * Takes a single nullable path as the path param, * @param statistic statistic key * @param path path for span (nullable) * @param input input callable. * @param return type. * @return the result of the operation. * @throws IOException if raised in the callable */ private B trackDurationAndSpan( Statistic statistic, @Nullable Path path, CallableRaisingIOE input) throws IOException { return trackDurationAndSpan(statistic, path != null ? pathToKey(path): null, null, input); } /** * Increment a statistic by 1. * This increments both the instrumentation and storage statistics. * @param statistic The operation to increment */ protected void incrementStatistic(Statistic statistic) { incrementStatistic(statistic, 1); } /** * Increment a statistic by a specific value. * This increments both the instrumentation and storage statistics. * @param statistic The operation to increment * @param count the count to increment */ protected void incrementStatistic(Statistic statistic, long count) { statisticsContext.incrementCounter(statistic, count); } /** * Decrement a gauge by a specific value. * @param statistic The operation to decrement * @param count the count to decrement */ protected void decrementGauge(Statistic statistic, long count) { statisticsContext.decrementGauge(statistic, count); } /** * Increment a gauge by a specific value. * @param statistic The operation to increment * @param count the count to increment */ protected void incrementGauge(Statistic statistic, long count) { statisticsContext.incrementGauge(statistic, count); } /** * Callback when an operation was retried. * Increments the statistics of ignored errors or throttled requests, * depending up on the exception class. * @param ex exception. */ public void operationRetried(Exception ex) { if (isThrottleException(ex)) { operationThrottled(false); } else { incrementStatistic(STORE_IO_RETRY); incrementStatistic(IGNORED_ERRORS); } } /** * Callback from {@link Invoker} when an operation is retried. * @param text text of the operation * @param ex exception * @param retries number of retries * @param idempotent is the method idempotent */ public void operationRetried( String text, Exception ex, int retries, boolean idempotent) { operationRetried(ex); } /** * Callback from {@link Invoker} when an operation against a metastore * is retried. * Always increments the {@link Statistic#S3GUARD_METADATASTORE_RETRY} * statistic/counter; * if it is a throttling exception will update the associated * throttled metrics/statistics. * * @param ex exception * @param retries number of retries * @param idempotent is the method idempotent */ public void metastoreOperationRetried(Exception ex, int retries, boolean idempotent) { incrementStatistic(S3GUARD_METADATASTORE_RETRY); if (isThrottleException(ex)) { operationThrottled(true); } else { incrementStatistic(IGNORED_ERRORS); } } /** * Note that an operation was throttled -this will update * specific counters/metrics. * @param metastore was the throttling observed in the S3Guard metastore? */ private void operationThrottled(boolean metastore) { LOG.debug("Request throttled on {}", metastore ? "S3": "DynamoDB"); if (metastore) { incrementStatistic(S3GUARD_METADATASTORE_THROTTLED); statisticsContext.addValueToQuantiles(S3GUARD_METADATASTORE_THROTTLE_RATE, 1); } else { incrementStatistic(STORE_IO_THROTTLED); statisticsContext.addValueToQuantiles(STORE_IO_THROTTLE_RATE, 1); } } /** * Get the storage statistics of this filesystem. * @return the storage statistics */ @Override public S3AStorageStatistics getStorageStatistics() { return storageStatistics; } /** * Get the instrumentation's IOStatistics. * @return statistics */ @Override public IOStatistics getIOStatistics() { return instrumentation != null ? instrumentation.getIOStatistics() : null; } /** * Get the factory for duration tracking. * @return a factory from the instrumentation. */ protected DurationTrackerFactory getDurationTrackerFactory() { return instrumentation != null ? instrumentation.getDurationTrackerFactory() : null; } /** * Request object metadata; increments counters in the process. * Retry policy: retry untranslated. * This method is used in some external applications and so * must be viewed as a public entry point. * Auditing: this call does NOT initiate a new AuditSpan; the expectation * is that there is already an active span. * @param key key * @return the metadata * @throws IOException if the retry invocation raises one (it shouldn't). */ @Retries.RetryRaw @VisibleForTesting @InterfaceAudience.LimitedPrivate("external utilities") ObjectMetadata getObjectMetadata(String key) throws IOException { return getObjectMetadata(key, null, invoker, "getObjectMetadata"); } /** * Request object metadata; increments counters in the process. * Retry policy: retry untranslated. * Uses changeTracker to detect an unexpected file version (eTag or versionId) * @param key key * @param changeTracker the change tracker to detect unexpected object version * @param changeInvoker the invoker providing the retry policy * @param operation the operation (e.g. "read" or "copy") triggering this call * @return the metadata * @throws IOException if the retry invocation raises one (it shouldn't). * @throws RemoteFileChangedException if an unexpected version is detected */ @Retries.RetryRaw protected ObjectMetadata getObjectMetadata(String key, ChangeTracker changeTracker, Invoker changeInvoker, String operation) throws IOException { ObjectMetadata meta = changeInvoker.retryUntranslated("GET " + key, true, () -> { GetObjectMetadataRequest request = getRequestFactory().newGetObjectMetadataRequest(key); incrementStatistic(OBJECT_METADATA_REQUESTS); DurationTracker duration = getDurationTrackerFactory() .trackDuration(ACTION_HTTP_HEAD_REQUEST.getSymbol()); try { LOG.debug("HEAD {} with change tracker {}", key, changeTracker); if (changeTracker != null) { changeTracker.maybeApplyConstraint(request); } ObjectMetadata objectMetadata = s3.getObjectMetadata(request); if (changeTracker != null) { changeTracker.processMetadata(objectMetadata, operation); } return objectMetadata; } catch(AmazonServiceException ase) { if (!isObjectNotFound(ase)) { // file not found is not considered a failure of the call, // so only switch the duration tracker to update failure // metrics on other exception outcomes. duration.failed(); } throw ase; } finally { // update the tracker. duration.close(); } }); incrementReadOperations(); return meta; } /** * Initiate a {@code listObjects} operation, incrementing metrics * in the process. * * Retry policy: retry untranslated. * @param request request to initiate * @param trackerFactory duration tracking * @return the results * @throws IOException if the retry invocation raises one (it shouldn't). */ @Retries.RetryRaw protected S3ListResult listObjects(S3ListRequest request, @Nullable final DurationTrackerFactory trackerFactory) throws IOException { incrementReadOperations(); LOG.debug("LIST {}", request); validateListArguments(request); try(DurationInfo ignored = new DurationInfo(LOG, false, "LIST")) { return invoker.retryUntranslated( request.toString(), true, trackDurationOfOperation(trackerFactory, OBJECT_LIST_REQUEST, () -> { if (useListV1) { return S3ListResult.v1(s3.listObjects(request.getV1())); } else { return S3ListResult.v2(s3.listObjectsV2(request.getV2())); } })); } } /** * Validate the list arguments with this bucket's settings. * @param request the request to validate */ private void validateListArguments(S3ListRequest request) { if (useListV1) { Preconditions.checkArgument(request.isV1()); } else { Preconditions.checkArgument(!request.isV1()); } } /** * List the next set of objects. * Retry policy: retry untranslated. * @param request last list objects request to continue * @param prevResult last paged result to continue from * @param trackerFactory duration tracking * @return the next result object * @throws IOException none, just there for retryUntranslated. */ @Retries.RetryRaw protected S3ListResult continueListObjects(S3ListRequest request, S3ListResult prevResult, final DurationTrackerFactory trackerFactory) throws IOException { incrementReadOperations(); validateListArguments(request); try(DurationInfo ignored = new DurationInfo(LOG, false, "LIST (continued)")) { return invoker.retryUntranslated( request.toString(), true, trackDurationOfOperation( trackerFactory, OBJECT_CONTINUE_LIST_REQUEST, () -> { if (useListV1) { return S3ListResult.v1( s3.listNextBatchOfObjects( getRequestFactory() .newListNextBatchOfObjectsRequest( prevResult.getV1()))); } else { request.getV2().setContinuationToken(prevResult.getV2() .getNextContinuationToken()); return S3ListResult.v2(s3.listObjectsV2(request.getV2())); } })); } } /** * Increment read operations. */ public void incrementReadOperations() { statistics.incrementReadOps(1); } /** * Increment the write operation counter. * This is somewhat inaccurate, as it appears to be invoked more * often than needed in progress callbacks. */ public void incrementWriteOperations() { statistics.incrementWriteOps(1); } /** * Delete an object. This is the low-level internal call which * does not update the metastore. * Increments the {@code OBJECT_DELETE_REQUESTS} and write * operation statistics. * This call does not create any mock parent entries. * * Retry policy: retry untranslated; delete considered idempotent. * @param key key to blob to delete. * @throws AmazonClientException problems working with S3 * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory. */ @VisibleForTesting @Retries.RetryRaw protected void deleteObject(String key) throws AmazonClientException, IOException { blockRootDelete(key); incrementWriteOperations(); try (DurationInfo ignored = new DurationInfo(LOG, false, "deleting %s", key)) { invoker.retryUntranslated(String.format("Delete %s:/%s", bucket, key), DELETE_CONSIDERED_IDEMPOTENT, ()-> { incrementStatistic(OBJECT_DELETE_OBJECTS); trackDurationOfInvocation(getDurationTrackerFactory(), OBJECT_DELETE_REQUEST.getSymbol(), () -> s3.deleteObject(getRequestFactory() .newDeleteObjectRequest(key))); return null; }); } } /** * Delete an object, also updating the metastore. * This call does not create any mock parent entries. * Retry policy: retry untranslated; delete considered idempotent. * @param f path path to delete * @param key key of entry * @param isFile is the path a file (used for instrumentation only) * @param operationState (nullable) operational state for a bulk update * @throws AmazonClientException problems working with S3 * @throws IOException IO failure in the metastore */ @Retries.RetryMixed void deleteObjectAtPath(Path f, String key, boolean isFile, @Nullable final BulkOperationState operationState) throws AmazonClientException, IOException { if (isFile) { instrumentation.fileDeleted(1); } else { instrumentation.directoryDeleted(); } deleteObject(key); metadataStore.delete(f, operationState); } /** * Reject any request to delete an object where the key is root. * @param key key to validate * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory. */ private void blockRootDelete(String key) throws InvalidRequestException { if (key.isEmpty() || "/".equals(key)) { throw new InvalidRequestException("Bucket "+ bucket +" cannot be deleted"); } } /** * Perform a bulk object delete operation against S3; leaves S3Guard * alone. * Increments the {@code OBJECT_DELETE_REQUESTS} and write * operation statistics *

* {@code OBJECT_DELETE_OBJECTS} is updated with the actual number * of objects deleted in the request. *

* Retry policy: retry untranslated; delete considered idempotent. * If the request is throttled, this is logged in the throttle statistics, * with the counter set to the number of keys, rather than the number * of invocations of the delete operation. * This is because S3 considers each key as one mutating operation on * the store when updating its load counters on a specific partition * of an S3 bucket. * If only the request was measured, this operation would under-report. * @param deleteRequest keys to delete on the s3-backend * @return the AWS response * @throws MultiObjectDeleteException one or more of the keys could not * be deleted. * @throws AmazonClientException amazon-layer failure. */ @Retries.RetryRaw private DeleteObjectsResult deleteObjects(DeleteObjectsRequest deleteRequest) throws MultiObjectDeleteException, AmazonClientException, IOException { incrementWriteOperations(); BulkDeleteRetryHandler retryHandler = new BulkDeleteRetryHandler(createStoreContext()); int keyCount = deleteRequest.getKeys().size(); try(DurationInfo ignored = new DurationInfo(LOG, false, "DELETE %d keys", keyCount)) { return invoker.retryUntranslated("delete", DELETE_CONSIDERED_IDEMPOTENT, (text, e, r, i) -> { // handle the failure retryHandler.bulkDeleteRetried(deleteRequest, e); }, // duration is tracked in the bulk delete counters trackDurationOfOperation(getDurationTrackerFactory(), OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); return s3.deleteObjects(deleteRequest); })); } catch (MultiObjectDeleteException e) { // one or more of the keys could not be deleted. // log and rethrow List errors = e.getErrors(); LOG.debug("Partial failure of delete, {} errors", errors.size(), e); for (MultiObjectDeleteException.DeleteError error : errors) { LOG.debug("{}: \"{}\" - {}", error.getKey(), error.getCode(), error.getMessage()); } throw e; } } /** * Create a putObject request. * Adds the ACL and metadata * @param key key of object * @param metadata metadata header * @param srcfile source file * @return the request */ public PutObjectRequest newPutObjectRequest(String key, ObjectMetadata metadata, File srcfile) { return requestFactory.newPutObjectRequest(key, metadata, srcfile); } /** * Create a new object metadata instance. * Any standard metadata headers are added here, for example: * encryption. * * @param length length of data to set in header. * @return a new metadata instance */ public ObjectMetadata newObjectMetadata(long length) { return requestFactory.newObjectMetadata(length); } /** * Start a transfer-manager managed async PUT of an object, * incrementing the put requests and put bytes * counters. * It does not update the other counters, * as existing code does that as progress callbacks come in. * Byte length is calculated from the file length, or, if there is no * file, from the content length of the header. * Because the operation is async, any stream supplied in the request * must reference data (files, buffers) which stay valid until the upload * completes. * Retry policy: N/A: the transfer manager is performing the upload. * Auditing: must be inside an audit span. * @param putObjectRequest the request * @return the upload initiated */ @Retries.OnceRaw public UploadInfo putObject(PutObjectRequest putObjectRequest) { long len = getPutRequestLength(putObjectRequest); LOG.debug("PUT {} bytes to {} via transfer manager ", len, putObjectRequest.getKey()); incrementPutStartStatistics(len); Upload upload = transfers.upload(putObjectRequest); return new UploadInfo(upload, len); } /** * PUT an object directly (i.e. not via the transfer manager). * Byte length is calculated from the file length, or, if there is no * file, from the content length of the header. * * Retry Policy: none. * Auditing: must be inside an audit span. * Important: this call will close any input stream in the request. * @param putObjectRequest the request * @return the upload initiated * @throws AmazonClientException on problems * @throws MetadataPersistenceException if metadata about the write could * not be saved to the metadata store and * fs.s3a.metadatastore.fail.on.write.error=true */ @VisibleForTesting @Retries.OnceRaw("For PUT; post-PUT actions are RetryTranslated") PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest) throws AmazonClientException, MetadataPersistenceException { long len = getPutRequestLength(putObjectRequest); LOG.debug("PUT {} bytes to {}", len, putObjectRequest.getKey()); incrementPutStartStatistics(len); try { PutObjectResult result = trackDurationOfSupplier( getDurationTrackerFactory(), OBJECT_PUT_REQUESTS.getSymbol(), () -> s3.putObject(putObjectRequest)); incrementPutCompletedStatistics(true, len); // update metadata finishedWrite(putObjectRequest.getKey(), len, result.getETag(), result.getVersionId(), null); return result; } catch (SdkBaseException e) { incrementPutCompletedStatistics(false, len); throw e; } } /** * Get the length of the PUT, verifying that the length is known. * @param putObjectRequest a request bound to a file or a stream. * @return the request length * @throws IllegalArgumentException if the length is negative */ private long getPutRequestLength(PutObjectRequest putObjectRequest) { long len; if (putObjectRequest.getFile() != null) { len = putObjectRequest.getFile().length(); } else { len = putObjectRequest.getMetadata().getContentLength(); } Preconditions.checkState(len >= 0, "Cannot PUT object of unknown length"); return len; } /** * Upload part of a multi-partition file. * Increments the write and put counters. * Important: this call does not close any input stream in the request. * * Retry Policy: none. * @param request request * @return the result of the operation. * @throws AmazonClientException on problems */ @Retries.OnceRaw UploadPartResult uploadPart(UploadPartRequest request) throws AmazonClientException { long len = request.getPartSize(); incrementPutStartStatistics(len); try { UploadPartResult uploadPartResult = s3.uploadPart(request); incrementPutCompletedStatistics(true, len); return uploadPartResult; } catch (AmazonClientException e) { incrementPutCompletedStatistics(false, len); throw e; } } /** * At the start of a put/multipart upload operation, update the * relevant counters. * * @param bytes bytes in the request. */ public void incrementPutStartStatistics(long bytes) { LOG.debug("PUT start {} bytes", bytes); incrementWriteOperations(); incrementGauge(OBJECT_PUT_REQUESTS_ACTIVE, 1); if (bytes > 0) { incrementGauge(OBJECT_PUT_BYTES_PENDING, bytes); } } /** * At the end of a put/multipart upload operation, update the * relevant counters and gauges. * * @param success did the operation succeed? * @param bytes bytes in the request. */ public void incrementPutCompletedStatistics(boolean success, long bytes) { LOG.debug("PUT completed success={}; {} bytes", success, bytes); if (bytes > 0) { incrementStatistic(OBJECT_PUT_BYTES, bytes); decrementGauge(OBJECT_PUT_BYTES_PENDING, bytes); } incrementStatistic(OBJECT_PUT_REQUESTS_COMPLETED); decrementGauge(OBJECT_PUT_REQUESTS_ACTIVE, 1); } /** * Callback for use in progress callbacks from put/multipart upload events. * Increments those statistics which are expected to be updated during * the ongoing upload operation. * @param key key to file that is being written (for logging) * @param bytes bytes successfully uploaded. */ public void incrementPutProgressStatistics(String key, long bytes) { PROGRESS.debug("PUT {}: {} bytes", key, bytes); incrementWriteOperations(); if (bytes > 0) { statistics.incrementBytesWritten(bytes); } } /** * Delete a list of keys on a s3-backend. * This does not update the metastore. * Retry policy: retry untranslated; delete considered idempotent. * @param keysToDelete collection of keys to delete on the s3-backend. * if empty, no request is made of the object store. * @param deleteFakeDir indicates whether this is for deleting fake dirs * @param quiet should a bulk query be quiet, or should its result list * all deleted keys? * @return the deletion result if a multi object delete was invoked * and it returned without a failure. * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. * The number of rejected objects will be added to the metric * {@link Statistic#FILES_DELETE_REJECTED}. * @throws AmazonClientException other amazon-layer failure. */ @Retries.RetryRaw private DeleteObjectsResult removeKeysS3( List keysToDelete, boolean deleteFakeDir, boolean quiet) throws MultiObjectDeleteException, AmazonClientException, IOException { if (LOG.isDebugEnabled()) { LOG.debug("Initiating delete operation for {} objects", keysToDelete.size()); for (DeleteObjectsRequest.KeyVersion key : keysToDelete) { LOG.debug(" {} {}", key.getKey(), key.getVersion() != null ? key.getVersion() : ""); } } DeleteObjectsResult result = null; if (keysToDelete.isEmpty()) { // exit fast if there are no keys to delete return result; } for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { blockRootDelete(keyVersion.getKey()); } try { if (enableMultiObjectsDelete) { result = deleteObjects( getRequestFactory().newBulkDeleteRequest(keysToDelete, quiet)); } else { for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { deleteObject(keyVersion.getKey()); } } } catch (MultiObjectDeleteException ex) { // partial delete. // Update the stats with the count of the actual number of successful // deletions. int rejected = ex.getErrors().size(); noteDeleted(keysToDelete.size() - rejected, deleteFakeDir); incrementStatistic(FILES_DELETE_REJECTED, rejected); throw ex; } noteDeleted(keysToDelete.size(), deleteFakeDir); return result; } /** * Note the deletion of files or fake directories deleted. * @param count count of keys deleted. * @param deleteFakeDir are the deletions fake directories? */ private void noteDeleted(final int count, final boolean deleteFakeDir) { if (!deleteFakeDir) { instrumentation.fileDeleted(count); } else { instrumentation.fakeDirsDeleted(count); } } /** * Invoke {@link #removeKeysS3(List, boolean, boolean)} with handling of * {@code MultiObjectDeleteException}. * * @param keysToDelete collection of keys to delete on the s3-backend. * if empty, no request is made of the object store. * @param deleteFakeDir indicates whether this is for deleting fake dirs * @param operationState (nullable) operational state for a bulk update * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. * @throws AmazonClientException amazon-layer failure. * @throws IOException other IO Exception. */ @VisibleForTesting @Retries.RetryMixed public void removeKeys( final List keysToDelete, final boolean deleteFakeDir, final BulkOperationState operationState) throws MultiObjectDeleteException, AmazonClientException, IOException { removeKeys(keysToDelete, deleteFakeDir, new ArrayList<>(), operationState, true); } /** * Invoke {@link #removeKeysS3(List, boolean, boolean)} with handling of * {@code MultiObjectDeleteException} before the exception is rethrown. * Specifically: *
    *
  1. Failure and !deleteFakeDir: S3Guard is updated with all * deleted entries
  2. *
  3. Failure where deleteFakeDir == true: do nothing with S3Guard
  4. *
  5. Success: do nothing with S3Guard
  6. *
* @param keysToDelete collection of keys to delete on the s3-backend. * if empty, no request is made of the object store. * @param deleteFakeDir indicates whether this is for deleting fake dirs. * @param undeletedObjectsOnFailure List which will be built up of all * files that were not deleted. This happens even as an exception * is raised. * @param operationState (nullable) operational state for a bulk update * @param quiet should a bulk query be quiet, or should its result list * all deleted keys * @return the deletion result if a multi object delete was invoked * and it returned without a failure, else null. * @throws InvalidRequestException if the request was rejected due to * a mistaken attempt to delete the root directory. * @throws MultiObjectDeleteException one or more of the keys could not * be deleted in a multiple object delete operation. * @throws AmazonClientException amazon-layer failure. * @throws IOException other IO Exception. */ @Retries.RetryMixed private DeleteObjectsResult removeKeys( final List keysToDelete, final boolean deleteFakeDir, final List undeletedObjectsOnFailure, final BulkOperationState operationState, final boolean quiet) throws MultiObjectDeleteException, AmazonClientException, IOException { undeletedObjectsOnFailure.clear(); try (DurationInfo ignored = new DurationInfo(LOG, false, "Deleting %d keys", keysToDelete.size())) { return removeKeysS3(keysToDelete, deleteFakeDir, quiet); } catch (MultiObjectDeleteException ex) { LOG.debug("Partial delete failure"); // what to do if an IOE was raised? Given an exception was being // raised anyway, and the failures are logged, do nothing. if (!deleteFakeDir) { // when deleting fake directories we don't want to delete metastore // entries so we only process these failures on "real" deletes. Triple, List, List>> results = new MultiObjectDeleteSupport(createStoreContext(), operationState) .processDeleteFailure(ex, keysToDelete, new ArrayList()); undeletedObjectsOnFailure.addAll(results.getLeft()); } throw ex; } catch (AmazonClientException | IOException ex) { List paths = new MultiObjectDeleteSupport( createStoreContext(), operationState) .processDeleteFailureGenericException(ex, keysToDelete); // other failures. Assume nothing was deleted undeletedObjectsOnFailure.addAll(paths); throw ex; } } /** * Delete a Path. This operation is at least {@code O(files)}, with * added overheads to enumerate the path. It is also not atomic. * * @param f the path to delete. * @param recursive if path is a directory and set to * true, the directory is deleted else throws an exception. In * case of a file the recursive can be set to either true or false. * @return true if the path existed and then was deleted; false if there * was no path in the first place, or the corner cases of root path deletion * have surfaced. * @throws IOException due to inability to delete a directory or file. */ @Override @Retries.RetryTranslated @AuditEntryPoint public boolean delete(Path f, boolean recursive) throws IOException { checkNotClosed(); final Path path = qualify(f); // span covers delete, getFileStatus, fake directory operations. try (AuditSpan span = createSpan(INVOCATION_DELETE.getSymbol(), path.toString(), null)) { boolean outcome = trackDuration(getDurationTrackerFactory(), INVOCATION_DELETE.getSymbol(), new DeleteOperation( createStoreContext(), innerGetFileStatus(path, true, StatusProbeEnum.ALL), recursive, new OperationCallbacksImpl(), pageSize)); if (outcome) { try { maybeCreateFakeParentDirectory(path); } catch (AccessDeniedException e) { LOG.warn("Cannot create directory marker at {}: {}", f.getParent(), e.toString()); LOG.debug("Failed to create fake dir above {}", path, e); } } return outcome; } catch (FileNotFoundException e) { LOG.debug("Couldn't delete {} - does not exist: {}", path, e.toString()); instrumentation.errorIgnored(); return false; } catch (AmazonClientException e) { throw translateException("delete", path, e); } } /** * Create a fake directory if required. * That is: it is not the root path and the path does not exist. * Retry policy: retrying; untranslated. * @param f path to create * @throws IOException IO problem */ @Retries.RetryTranslated private void createFakeDirectoryIfNecessary(Path f) throws IOException, AmazonClientException { String key = pathToKey(f); // we only make the LIST call; the codepaths to get here should not // be reached if there is an empty dir marker -and if they do, it // is mostly harmless to create a new one. if (!key.isEmpty() && !s3Exists(f, StatusProbeEnum.DIRECTORIES)) { LOG.debug("Creating new fake directory at {}", f); createFakeDirectory(key); } } /** * Create a fake parent directory if required. * That is: it parent is not the root path and does not yet exist. * @param path whose parent is created if needed. * @throws IOException IO problem */ @Retries.RetryTranslated @VisibleForTesting protected void maybeCreateFakeParentDirectory(Path path) throws IOException, AmazonClientException { Path parent = path.getParent(); if (parent != null && !parent.isRoot()) { createFakeDirectoryIfNecessary(parent); } } /** * Override subclass such that we benefit for async listing done * in {@code S3AFileSystem}. See {@code Listing#ObjectListingIterator}. * {@inheritDoc} * */ @Override @AuditEntryPoint public RemoteIterator listStatusIterator(Path p) throws FileNotFoundException, IOException { Path path = qualify(p); return typeCastingRemoteIterator(trackDurationAndSpan( INVOCATION_LIST_STATUS, path, () -> once("listStatus", path.toString(), () -> innerListStatus(p)))); } /** * List the statuses of the files/directories in the given path if the path is * a directory. * * @param f given path * @return the statuses of the files/directories in the given patch * @throws FileNotFoundException when the path does not exist; * IOException see specific implementation */ @Override @AuditEntryPoint public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException { Path path = qualify(f); return trackDurationAndSpan(INVOCATION_LIST_STATUS, path, () -> once("listStatus", path.toString(), () -> iteratorToStatuses(innerListStatus(path), new HashSet<>()))); } /** * List the statuses of the files/directories in the given path if the path is * a directory. The returned iterator is within the current active span. * * Auditing: This method MUST be called within a span. * The span is attached to the iterator. All further S3 calls * made by the iterator will be within the span. * @param f qualified path * @return the statuses of the files/directories in the given patch * @throws FileNotFoundException when the path does not exist; * @throws IOException due to an IO problem. * @throws AmazonClientException on failures inside the AWS SDK */ private RemoteIterator innerListStatus(Path f) throws FileNotFoundException, IOException, AmazonClientException { Path path = qualify(f); LOG.debug("List status for path: {}", path); Triple, DirListingMetadata, Boolean> statusesAssumingNonEmptyDir = listing .getFileStatusesAssumingNonEmptyDir(path, getActiveAuditSpan()); if (!statusesAssumingNonEmptyDir.getLeft().hasNext() && statusesAssumingNonEmptyDir.getRight()) { // We are sure that this is an empty directory in auth mode. return statusesAssumingNonEmptyDir.getLeft(); } else if (!statusesAssumingNonEmptyDir.getLeft().hasNext()) { // We may have an empty dir, or may have file or may have nothing. // So we call innerGetFileStatus to get the status, this may throw // FileNotFoundException if we have nothing. // So We are guaranteed to have either a dir marker or a file. final S3AFileStatus fileStatus = innerGetFileStatus(path, false, StatusProbeEnum.ALL); // If it is a file return directly. if (fileStatus.isFile()) { LOG.debug("Adding: rd (not a dir): {}", path); S3AFileStatus[] stats = new S3AFileStatus[1]; stats[0] = fileStatus; return listing.createProvidedFileStatusIterator( stats, ACCEPT_ALL, Listing.ACCEPT_ALL_BUT_S3N); } } // Here we have a directory which may or may not be empty. // So we update the metastore and return. return S3Guard.dirListingUnion( metadataStore, path, statusesAssumingNonEmptyDir.getLeft(), statusesAssumingNonEmptyDir.getMiddle(), allowAuthoritative(path), ttlTimeProvider, p -> listing.createProvidedFileStatusIterator( dirMetaToStatuses(statusesAssumingNonEmptyDir.getMiddle()), ACCEPT_ALL, Listing.ACCEPT_ALL_BUT_S3N)); } /** * Is a path to be considered as authoritative? * True iff there is an authoritative metastore or if there * is a non-auth store with the supplied path under * one of the paths declared as authoritative. * @param path path * @return true if the path is auth */ public boolean allowAuthoritative(final Path path) { return S3Guard.allowAuthoritative(path, this, allowAuthoritativeMetadataStore, allowAuthoritativePaths); } /** * Create a {@code ListObjectsRequest} request against this bucket, * with the maximum keys returned in a query set by {@link #maxKeys}. * @param key key for request * @param delimiter any delimiter * @return the request */ @VisibleForTesting public S3ListRequest createListObjectsRequest(String key, String delimiter) { return createListObjectsRequest(key, delimiter, maxKeys); } /** * Create the List objects request appropriate for the * active list request option. * @param key key for request * @param delimiter any delimiter * @param limit limit of keys * @return the request */ private S3ListRequest createListObjectsRequest(String key, String delimiter, int limit) { if (!useListV1) { ListObjectsV2Request request = getRequestFactory().newListObjectsV2Request( key, delimiter, limit); return S3ListRequest.v2(request); } else { ListObjectsRequest request = getRequestFactory().newListObjectsV1Request( key, delimiter, limit); return S3ListRequest.v1(request); } } /** * Set the current working directory for the given file system. All relative * paths will be resolved relative to it. * * @param newDir the current working directory. */ public void setWorkingDirectory(Path newDir) { workingDir = makeQualified(newDir); } /** * Get the current working directory for the given file system. * @return the directory pathname */ public Path getWorkingDirectory() { return workingDir; } /** * Get the username of the FS. * @return the short name of the user who instantiated the FS */ public String getUsername() { return username; } /** * Get the owner of this FS: who created it? * @return the owner of the FS. */ public UserGroupInformation getOwner() { return owner; } /** * * Make the given path and all non-existent parents into * directories. Has the semantics of Unix {@code 'mkdir -p'}. * Existence of the directory hierarchy is not an error. * @param p path to create * @param permission to apply to path * @return true if a directory was created or already existed * @throws FileAlreadyExistsException there is a file at the path specified * or is discovered on one of its ancestors. * @throws IOException other IO problems */ @Override @AuditEntryPoint public boolean mkdirs(Path p, FsPermission permission) throws IOException, FileAlreadyExistsException { Path path = qualify(p); return trackDurationAndSpan( INVOCATION_MKDIRS, path, new MkdirOperation( createStoreContext(), path, createMkdirOperationCallbacks())); } /** * Override point: create the callbacks for Mkdir. * This does not create a new span; caller must be in one. * @return an implementation of the MkdirCallbacks, */ @VisibleForTesting public MkdirOperation.MkdirCallbacks createMkdirOperationCallbacks() { return new MkdirOperationCallbacksImpl(); } /** * Callbacks from the {@link MkdirOperation}. */ protected class MkdirOperationCallbacksImpl implements MkdirOperation.MkdirCallbacks { @Override public S3AFileStatus probePathStatus(final Path path, final Set probes) throws IOException { return S3AFileSystem.this.innerGetFileStatus(path, false, probes); } @Override public void createFakeDirectory(final String key) throws IOException { S3AFileSystem.this.createEmptyObject(key); } } /** * This is a very slow operation against object storage. * Execute it as a single span with whatever optimizations * have been implemented. * {@inheritDoc} */ @Override @Retries.RetryTranslated @AuditEntryPoint public ContentSummary getContentSummary(final Path f) throws IOException { final Path path = qualify(f); return trackDurationAndSpan( INVOCATION_GET_CONTENT_SUMMARY, path, new GetContentSummaryOperation( createStoreContext(), path, createGetContentSummaryCallbacks())); } /** * Override point: create the callbacks for getContentSummary. * This does not create a new span; caller must be in one. * @return an implementation of the GetContentSummaryCallbacksImpl */ protected GetContentSummaryOperation.GetContentSummaryCallbacks createGetContentSummaryCallbacks() { return new GetContentSummaryCallbacksImpl(); } /** * Callbacks from the {@link GetContentSummaryOperation}. */ protected class GetContentSummaryCallbacksImpl implements GetContentSummaryOperation.GetContentSummaryCallbacks { @Override public S3AFileStatus probePathStatus(final Path path, final Set probes) throws IOException { return S3AFileSystem.this.innerGetFileStatus(path, false, probes); } @Override public RemoteIterator listStatusIterator(final Path path) throws IOException { return S3AFileSystem.this.innerListStatus(path); } } /** * Soft check of access by forwarding to the audit manager * and so on to the auditor. * {@inheritDoc} */ @Override @AuditEntryPoint public void access(final Path f, final FsAction mode) throws AccessControlException, FileNotFoundException, IOException { Path path = qualify(f); LOG.debug("check access mode {} for {}", path, mode); trackDurationAndSpan( INVOCATION_ACCESS, path, () -> { final S3AFileStatus stat = innerGetFileStatus(path, false, StatusProbeEnum.ALL); if (!getAuditManager().checkAccess(path, stat, mode)) { incrementStatistic(AUDIT_ACCESS_CHECK_FAILURE); throw new AccessControlException(String.format( "Permission denied: user=%s, path=\"%s\":%s:%s:%s%s", getOwner().getUserName(), stat.getPath(), stat.getOwner(), stat.getGroup(), stat.isDirectory() ? "d" : "-", mode)); } // simply for the API binding. return true; }); } /** * Return a file status object that represents the path. * @param f The path we want information from * @return a FileStatus object * @throws FileNotFoundException when the path does not exist * @throws IOException on other problems. */ @Override @AuditEntryPoint @Retries.RetryTranslated public FileStatus getFileStatus(final Path f) throws IOException { Path path = qualify(f); return trackDurationAndSpan( INVOCATION_GET_FILE_STATUS, path, () -> innerGetFileStatus(path, false, StatusProbeEnum.ALL)); } /** * Get the status of a file or directory, first through S3Guard and then * through S3. * The S3 probes can leave 404 responses in the S3 load balancers; if * a check is only needed for a directory, declaring this saves time and * avoids creating one for the object. * When only probing for directories, if an entry for a file is found in * S3Guard it is returned, but checks for updated values are skipped. * Internal version of {@link #getFileStatus(Path)}. * @param f The path we want information from * @param needEmptyDirectoryFlag if true, implementation will calculate * a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()} * @param probes probes to make. * @return a S3AFileStatus object * @throws FileNotFoundException when the path does not exist * @throws IOException on other problems. */ @VisibleForTesting @Retries.RetryTranslated S3AFileStatus innerGetFileStatus(final Path f, final boolean needEmptyDirectoryFlag, final Set probes) throws IOException { final Path path = qualify(f); String key = pathToKey(path); LOG.debug("Getting path status for {} ({}); needEmptyDirectory={}", path, key, needEmptyDirectoryFlag); boolean allowAuthoritative = allowAuthoritative(path); // Check MetadataStore, if any. PathMetadata pm = null; if (hasMetadataStore()) { pm = S3Guard.getWithTtl(metadataStore, path, ttlTimeProvider, needEmptyDirectoryFlag, allowAuthoritative); } Set tombstones = Collections.emptySet(); if (pm != null) { S3AFileStatus msStatus = pm.getFileStatus(); if (pm.isDeleted()) { OffsetDateTime deletedAt = OffsetDateTime.ofInstant( Instant.ofEpochMilli(msStatus.getModificationTime()), ZoneOffset.UTC); throw new FileNotFoundException("Path " + path + " is recorded as " + "deleted by S3Guard at " + deletedAt); } // if ms is not authoritative, check S3 if there's any recent // modification - compare the modTime to check if metadata is up to date // Skip going to s3 if the file checked is a directory. Because if the // dest is also a directory, there's no difference. if (!msStatus.isDirectory() && !allowAuthoritative && probes.contains(StatusProbeEnum.Head)) { // a file has been found in a non-auth path and the caller has not said // they only care about directories LOG.debug("Metadata for {} found in the non-auth metastore.", path); final long msModTime = pm.getFileStatus().getModificationTime(); S3AFileStatus s3AFileStatus; try { s3AFileStatus = s3GetFileStatus(path, key, probes, tombstones, needEmptyDirectoryFlag); } catch (FileNotFoundException fne) { LOG.trace("File Not Found from probes for {}", key, fne); s3AFileStatus = null; } if (s3AFileStatus == null) { LOG.warn("Failed to find file {}. Either it is not yet visible, or " + "it has been deleted.", path); } else { final long s3ModTime = s3AFileStatus.getModificationTime(); if(s3ModTime > msModTime) { LOG.debug("S3Guard metadata for {} is outdated;" + " s3modtime={}; msModTime={} updating metastore", path, s3ModTime, msModTime); return S3Guard.putAndReturn(metadataStore, s3AFileStatus, ttlTimeProvider); } } } if (needEmptyDirectoryFlag && msStatus.isDirectory()) { // the caller needs to know if a directory is empty, // and that this is a directory. if (pm.isEmptyDirectory() != Tristate.UNKNOWN) { // We have a definitive true / false from MetadataStore, we are done. return msStatus; } else { // execute a S3Guard listChildren command to list tombstones under the // path. // This list will be used in the forthcoming s3GetFileStatus call. DirListingMetadata children = S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider, allowAuthoritative); if (children != null) { tombstones = children.listTombstones(); } LOG.debug("MetadataStore doesn't know if {} is empty, using S3.", path); } } else { // Either this is not a directory, or we don't care if it is empty return msStatus; } // now issue the S3 getFileStatus call. try { S3AFileStatus s3FileStatus = s3GetFileStatus(path, key, probes, tombstones, true); // entry was found, so save in S3Guard and return the final value. return S3Guard.putAndReturn(metadataStore, s3FileStatus, ttlTimeProvider); } catch (FileNotFoundException e) { // If the metadata store has no children for it and it's not listed in // S3 yet, we'll conclude that it is an empty directory return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE, null, null); } } else { // there was no entry in S3Guard // retrieve the data and update the metadata store in the process. return S3Guard.putAndReturn(metadataStore, s3GetFileStatus(path, key, probes, tombstones, needEmptyDirectoryFlag), ttlTimeProvider); } } /** * Raw {@code getFileStatus} that talks direct to S3. * Used to implement {@link #innerGetFileStatus(Path, boolean, Set)}, * and for direct management of empty directory blobs. * * Checks made, in order: *
    *
  1. * Head: look for an object at the given key, provided that * the key doesn't end in "/" *
  2. *
  3. * DirMarker: look for the directory marker -the key with a trailing / * if not passed in. * If an object was found with size 0 bytes, a directory status entry * is returned which declares that the directory is empty. *
  4. *
  5. * List: issue a LIST on the key (with / if needed), require one * entry to be found for the path to be considered a non-empty directory. *
  6. *
* * Notes: *
    *
  • * Objects ending in / which are not 0-bytes long are not treated as * directory markers, but instead as files. *
  • *
  • * There's ongoing discussions about whether a dir marker * should be interpreted as an empty dir. *
  • *
  • * The HEAD requests require the permissions to read an object, * including (we believe) the ability to decrypt the file. * At the very least, for SSE-C markers, you need the same key on * the client for the HEAD to work. *
  • *
  • * The List probe needs list permission; it is also more prone to * inconsistency, even on newly created files. *
  • *
* * Retry policy: retry translated. * @param path Qualified path * @param key Key string for the path * @param probes probes to make * @param tombstones tombstones to filter * @param needEmptyDirectoryFlag if true, implementation will calculate * a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()} * @return Status * @throws FileNotFoundException the supplied probes failed. * @throws IOException on other problems. */ @VisibleForTesting @Retries.RetryTranslated S3AFileStatus s3GetFileStatus(final Path path, final String key, final Set probes, @Nullable final Set tombstones, final boolean needEmptyDirectoryFlag) throws IOException { LOG.debug("S3GetFileStatus {}", path); // either you aren't looking for the directory flag, or you are, // and if you are, the probe list must contain list. Preconditions.checkArgument(!needEmptyDirectoryFlag || probes.contains(StatusProbeEnum.List), "s3GetFileStatus(%s) wants to know if a directory is empty but" + " does not request a list probe", path); if (key.isEmpty() && !needEmptyDirectoryFlag) { return new S3AFileStatus(Tristate.UNKNOWN, path, username); } if (!key.isEmpty() && !key.endsWith("/") && probes.contains(StatusProbeEnum.Head)) { try { // look for the simple file ObjectMetadata meta = getObjectMetadata(key); LOG.debug("Found exact file: normal file {}", key); long contentLength = meta.getContentLength(); // check if CSE is enabled, then strip padded length. if (isCSEEnabled && meta.getUserMetaDataOf(Headers.CRYPTO_CEK_ALGORITHM) != null && contentLength >= CSE_PADDING_LENGTH) { contentLength -= CSE_PADDING_LENGTH; } return new S3AFileStatus(contentLength, dateToLong(meta.getLastModified()), path, getDefaultBlockSize(path), username, meta.getETag(), meta.getVersionId()); } catch (AmazonServiceException e) { // if the response is a 404 error, it just means that there is // no file at that path...the remaining checks will be needed. if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } } catch (AmazonClientException e) { throw translateException("getFileStatus", path, e); } } // execute the list if (probes.contains(StatusProbeEnum.List)) { try { // this will find a marker dir / as well as an entry. // When making a simple "is this a dir check" all is good. // but when looking for an empty dir, we need to verify there are no // children, so ask for two entries, so as to find // a child String dirKey = maybeAddTrailingSlash(key); // list size is dir marker + at least one non-tombstone entry // there's a corner case: more tombstones than you have in a // single page list. We assume that if you have been deleting // that many files, then the AWS listing will have purged some // by the time of listing so that the response includes some // which have not. int listSize; if (tombstones == null) { // no tombstones so look for a marker and at least one child. listSize = 2; } else { // build a listing > tombstones. If the caller has many thousands // of tombstones this won't work properly, which is why pruning // of expired tombstones matters. listSize = Math.min(2 + tombstones.size(), Math.max(2, maxKeys)); } S3ListRequest request = createListObjectsRequest(dirKey, "/", listSize); // execute the request S3ListResult listResult = listObjects(request, getDurationTrackerFactory()); if (listResult.hasPrefixesOrObjects(contextAccessors, tombstones)) { if (LOG.isDebugEnabled()) { LOG.debug("Found path as directory (with /)"); listResult.logAtDebug(LOG); } // At least one entry has been found. // If looking for an empty directory, the marker must exist but no // children. // So the listing must contain the marker entry only. if (needEmptyDirectoryFlag && listResult.representsEmptyDirectory( contextAccessors, dirKey, tombstones)) { return new S3AFileStatus(Tristate.TRUE, path, username); } // either an empty directory is not needed, or the // listing does not meet the requirements. return new S3AFileStatus(Tristate.FALSE, path, username); } else if (key.isEmpty()) { LOG.debug("Found root directory"); return new S3AFileStatus(Tristate.TRUE, path, username); } } catch (AmazonServiceException e) { if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } } catch (AmazonClientException e) { throw translateException("getFileStatus", path, e); } } LOG.debug("Not Found: {}", path); throw new FileNotFoundException("No such file or directory: " + path); } /** * Raw version of {@link FileSystem#exists(Path)} which uses S3 only: * S3Guard MetadataStore, if any, will be skipped. * Retry policy: retrying; translated. * @param path qualified path to look for * @param probes probes to make * @return true if path exists in S3 * @throws IOException IO failure */ @Retries.RetryTranslated private boolean s3Exists(final Path path, final Set probes) throws IOException { String key = pathToKey(path); try { s3GetFileStatus(path, key, probes, null, false); return true; } catch (FileNotFoundException e) { return false; } } /** * The src file is on the local disk. Add it to FS at * the given dst name. * * This version doesn't need to create a temporary file to calculate the md5. * Sadly this doesn't seem to be used by the shell cp :( * * delSrc indicates if the source should be removed * @param delSrc whether to delete the src * @param overwrite whether to overwrite an existing file * @param src path * @param dst path * @throws IOException IO problem * @throws FileAlreadyExistsException the destination file exists and * overwrite==false * @throws AmazonClientException failure in the AWS SDK */ @Override @AuditEntryPoint public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException { checkNotClosed(); LOG.debug("Copying local file from {} to {}", src, dst); trackDurationAndSpan(INVOCATION_COPY_FROM_LOCAL_FILE, dst, () -> new CopyFromLocalOperation( createStoreContext(), src, dst, delSrc, overwrite, createCopyFromLocalCallbacks()).execute()); } protected CopyFromLocalOperation.CopyFromLocalOperationCallbacks createCopyFromLocalCallbacks() throws IOException { LocalFileSystem local = getLocal(getConf()); return new CopyFromLocalCallbacksImpl(local); } protected class CopyFromLocalCallbacksImpl implements CopyFromLocalOperation.CopyFromLocalOperationCallbacks { private final LocalFileSystem local; private CopyFromLocalCallbacksImpl(LocalFileSystem local) { this.local = local; } @Override public RemoteIterator listLocalStatusIterator( final Path path) throws IOException { return local.listLocatedStatus(path); } @Override public File pathToLocalFile(Path path) { return local.pathToFile(path); } @Override public boolean deleteLocal(Path path, boolean recursive) throws IOException { return local.delete(path, recursive); } @Override public void copyLocalFileFromTo(File file, Path from, Path to) throws IOException { trackDurationAndSpan( OBJECT_PUT_REQUESTS, to, () -> { final String key = pathToKey(to); final ObjectMetadata om = newObjectMetadata(file.length()); Progressable progress = null; PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, file); S3AFileSystem.this.invoker.retry( "putObject(" + "" + ")", to.toString(), true, () -> executePut(putObjectRequest, progress)); return null; }); } @Override public FileStatus getFileStatus(Path f) throws IOException { return S3AFileSystem.this.getFileStatus(f); } @Override public boolean createEmptyDir(Path path, StoreContext storeContext) throws IOException { return trackDuration(getDurationTrackerFactory(), INVOCATION_MKDIRS.getSymbol(), new MkdirOperation( storeContext, path, createMkdirOperationCallbacks())); } } /** * Execute a PUT via the transfer manager, blocking for completion, * updating the metastore afterwards. * If the waiting for completion is interrupted, the upload will be * aborted before an {@code InterruptedIOException} is thrown. * @param putObjectRequest request * @param progress optional progress callback * @return the upload result * @throws InterruptedIOException if the blocking was interrupted. * @throws MetadataPersistenceException if metadata about the write could * not be saved to the metadata store and * fs.s3a.metadatastore.fail.on.write.error=true */ @Retries.OnceRaw("For PUT; post-PUT actions are RetryTranslated") UploadResult executePut(PutObjectRequest putObjectRequest, Progressable progress) throws InterruptedIOException, MetadataPersistenceException { String key = putObjectRequest.getKey(); UploadInfo info = putObject(putObjectRequest); Upload upload = info.getUpload(); ProgressableProgressListener listener = new ProgressableProgressListener( this, key, upload, progress); upload.addProgressListener(listener); UploadResult result = waitForUploadCompletion(key, info); listener.uploadCompleted(); // post-write actions finishedWrite(key, info.getLength(), result.getETag(), result.getVersionId(), null); return result; } /** * Wait for an upload to complete. * If the waiting for completion is interrupted, the upload will be * aborted before an {@code InterruptedIOException} is thrown. * If the upload (or its result collection) failed, this is where * the failure is raised as an AWS exception * @param key destination key * @param uploadInfo upload to wait for * @return the upload result * @throws InterruptedIOException if the blocking was interrupted. */ @Retries.OnceRaw UploadResult waitForUploadCompletion(String key, UploadInfo uploadInfo) throws InterruptedIOException { Upload upload = uploadInfo.getUpload(); try { UploadResult result = upload.waitForUploadResult(); incrementPutCompletedStatistics(true, uploadInfo.getLength()); return result; } catch (InterruptedException e) { LOG.info("Interrupted: aborting upload"); incrementPutCompletedStatistics(false, uploadInfo.getLength()); upload.abort(); throw (InterruptedIOException) new InterruptedIOException("Interrupted in PUT to " + keyToQualifiedPath(key)) .initCause(e); } } /** * Close the filesystem. This shuts down all transfers. * @throws IOException IO problem */ @Override public void close() throws IOException { if (closed.getAndSet(true)) { // already closed return; } isClosed = true; LOG.debug("Filesystem {} is closed", uri); if (getConf() != null) { String iostatisticsLoggingLevel = getConf().getTrimmed(IOSTATISTICS_LOGGING_LEVEL, IOSTATISTICS_LOGGING_LEVEL_DEFAULT); logIOStatisticsAtLevel(LOG, iostatisticsLoggingLevel, getIOStatistics()); } try { super.close(); } finally { stopAllServices(); } // Log IOStatistics at debug. if (LOG.isDebugEnabled()) { // robust extract and convert to string LOG.debug("Statistics for {}: {}", uri, IOStatisticsLogging.ioStatisticsToPrettyString(getIOStatistics())); } } /** * Stop all services. * This is invoked in close() and during failures of initialize() * -make sure that all operations here are robust to failures in * both the expected state of this FS and of failures while being stopped. */ protected synchronized void stopAllServices() { // shutting down the transfer manager also shuts // down the S3 client it is bonded to. if (transfers != null) { try { transfers.shutdownNow(true); } catch (RuntimeException e) { // catch and swallow for resilience. LOG.debug("When shutting down", e); } transfers = null; } // At this point the S3A client is shut down, // now the executor pools are closed HadoopExecutors.shutdown(boundedThreadPool, LOG, THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS); boundedThreadPool = null; HadoopExecutors.shutdown(unboundedThreadPool, LOG, THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS); unboundedThreadPool = null; // other services are shutdown. cleanupWithLogger(LOG, metadataStore, instrumentation, delegationTokens.orElse(null), signerManager, auditManager); closeAutocloseables(LOG, credentials); delegationTokens = Optional.empty(); signerManager = null; credentials = null; } /** * Verify that the input stream is open. Non blocking; this gives * the last state of the volatile {@link #closed} field. * @throws IOException if the connection is closed. */ private void checkNotClosed() throws IOException { if (isClosed) { throw new IOException(uri + ": " + E_FS_CLOSED); } } /** * Get the delegation token support for this filesystem; * not null iff delegation support is enabled. * @return the token support, or an empty option. */ @VisibleForTesting public Optional getDelegationTokens() { return delegationTokens; } /** * Return a service name iff delegation tokens are enabled and the * token binding is issuing delegation tokens. * @return the canonical service name or null */ @Override public String getCanonicalServiceName() { // this could all be done in map statements, but it'd be harder to // understand and maintain. // Essentially: no DTs, no canonical service name. if (!delegationTokens.isPresent()) { return null; } // DTs present: ask the binding if it is willing to // serve tokens (or fail noisily). S3ADelegationTokens dt = delegationTokens.get(); return dt.getTokenIssuingPolicy() != NoTokensAvailable ? dt.getCanonicalServiceName() : null; } /** * Get a delegation token if the FS is set up for them. * If the user already has a token, it is returned, * even if it has expired. * @param renewer the account name that is allowed to renew the token. * @return the delegation token or null * @throws IOException IO failure */ @Override @AuditEntryPoint public Token getDelegationToken(String renewer) throws IOException { checkNotClosed(); LOG.debug("Delegation token requested"); if (delegationTokens.isPresent()) { return trackDurationAndSpan( INVOCATION_GET_DELEGATION_TOKEN, null, () -> delegationTokens.get().getBoundOrNewDT( encryptionSecrets, (renewer != null ? new Text(renewer) : new Text()))); } else { // Delegation token support is not set up LOG.debug("Token support is not enabled"); return null; } } /** * Ask any DT plugin for any extra token issuers. * These do not get told of the encryption secrets and can * return any type of token. * This allows DT plugins to issue extra tokens for * ancillary services. */ @Override public DelegationTokenIssuer[] getAdditionalTokenIssuers() throws IOException { checkNotClosed(); if (delegationTokens.isPresent()) { return delegationTokens.get().getAdditionalTokenIssuers(); } else { // Delegation token support is not set up LOG.debug("Token support is not enabled"); return null; } } /** * Build the AWS policy for restricted access to the resources needed * by this bucket. * The policy generated includes S3 access, S3Guard access * if needed, and KMS operations. * @param access access level desired. * @return a policy for use in roles */ @Override @InterfaceAudience.Private public List listAWSPolicyRules( final Set access) { if (access.isEmpty()) { return Collections.emptyList(); } List statements = new ArrayList<>( allowS3Operations(bucket, access.contains(AccessLevel.WRITE) || access.contains(AccessLevel.ADMIN))); // no attempt is made to qualify KMS access; there's no // way to predict read keys, and not worried about granting // too much encryption access. statements.add(STATEMENT_ALLOW_SSE_KMS_RW); // add any metastore policies if (metadataStore instanceof AWSPolicyProvider) { statements.addAll( ((AWSPolicyProvider) metadataStore).listAWSPolicyRules(access)); } return statements; } /** * Copy a single object in the bucket via a COPY operation. * There's no update of metadata, directory markers, etc. * Callers must implement. * @param srcKey source object path * @param dstKey destination object path * @param size object size * @param srcAttributes S3 attributes of the source object * @param readContext the read context * @return the result of the copy * @throws InterruptedIOException the operation was interrupted * @throws IOException Other IO problems */ @Retries.RetryTranslated private CopyResult copyFile(String srcKey, String dstKey, long size, S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) throws IOException, InterruptedIOException { LOG.debug("copyFile {} -> {} ", srcKey, dstKey); ProgressListener progressListener = progressEvent -> { switch (progressEvent.getEventType()) { case TRANSFER_PART_COMPLETED_EVENT: incrementWriteOperations(); break; default: break; } }; ChangeTracker changeTracker = new ChangeTracker( keyToQualifiedPath(srcKey).toString(), changeDetectionPolicy, readContext.getS3AStatisticsContext() .newInputStreamStatistics() .getChangeTrackerStatistics(), srcAttributes); String action = "copyFile(" + srcKey + ", " + dstKey + ")"; Invoker readInvoker = readContext.getReadInvoker(); ObjectMetadata srcom; try { srcom = once(action, srcKey, () -> getObjectMetadata(srcKey, changeTracker, readInvoker, "copy")); } catch (FileNotFoundException e) { // if rename fails at this point it means that the expected file was not // found. // The cause is believed to always be one of // - File was deleted since LIST/S3Guard metastore.list.() knew of it. // - S3Guard is asking for a specific version and it's been removed by // lifecycle rules. // - there's a 404 cached in the S3 load balancers. LOG.debug("getObjectMetadata({}) failed to find an expected file", srcKey, e); // We create an exception, but the text depends on the S3Guard state String message = hasMetadataStore() ? RemoteFileChangedException.FILE_NEVER_FOUND : RemoteFileChangedException.FILE_NOT_FOUND_SINGLE_ATTEMPT; throw new RemoteFileChangedException( keyToQualifiedPath(srcKey).toString(), action, message, e); } return readInvoker.retry( action, srcKey, true, () -> { CopyObjectRequest copyObjectRequest = getRequestFactory().newCopyObjectRequest(srcKey, dstKey, srcom); changeTracker.maybeApplyConstraint(copyObjectRequest); incrementStatistic(OBJECT_COPY_REQUESTS); Copy copy = transfers.copy(copyObjectRequest, getAuditManager().createStateChangeListener()); copy.addProgressListener(progressListener); CopyOutcome copyOutcome = CopyOutcome.waitForCopy(copy); InterruptedException interruptedException = copyOutcome.getInterruptedException(); if (interruptedException != null) { // copy interrupted: convert to an IOException. throw (IOException)new InterruptedIOException( "Interrupted copying " + srcKey + " to " + dstKey + ", cancelling") .initCause(interruptedException); } SdkBaseException awsException = copyOutcome.getAwsException(); if (awsException != null) { changeTracker.processException(awsException, "copy"); throw awsException; } CopyResult result = copyOutcome.getCopyResult(); changeTracker.processResponse(result); incrementWriteOperations(); instrumentation.filesCopied(1, size); return result; }); } /** * Initiate a multipart upload from the preconfigured request. * Retry policy: none + untranslated. * @param request request to initiate * @return the result of the call * @throws AmazonClientException on failures inside the AWS SDK * @throws IOException Other IO problems */ @Retries.OnceRaw InitiateMultipartUploadResult initiateMultipartUpload( InitiateMultipartUploadRequest request) throws IOException { LOG.debug("Initiate multipart upload to {}", request.getKey()); return trackDurationOfSupplier(getDurationTrackerFactory(), OBJECT_MULTIPART_UPLOAD_INITIATED.getSymbol(), () -> getAmazonS3Client().initiateMultipartUpload(request)); } /** * Perform post-write actions. *

* This operation MUST be called after any PUT/multipart PUT completes * successfully. *

* The actions include: *
    *
  1. * Calling * {@link #deleteUnnecessaryFakeDirectories(Path, BulkOperationState)} * if directory markers are not being retained. *
  2. *
  3. * Updating any metadata store with details on the newly created * object. *
  4. *
* @param key key written to * @param length total length of file written * @param eTag eTag of the written object * @param versionId S3 object versionId of the written object * @param operationState state of any ongoing bulk operation. * @throws MetadataPersistenceException if metadata about the write could * not be saved to the metadata store and * fs.s3a.metadatastore.fail.on.write.error=true */ @InterfaceAudience.Private @Retries.RetryTranslated("Except if failOnMetadataWriteError=false, in which" + " case RetryExceptionsSwallowed") void finishedWrite(String key, long length, String eTag, String versionId, @Nullable final BulkOperationState operationState) throws MetadataPersistenceException { LOG.debug("Finished write to {}, len {}. etag {}, version {}", key, length, eTag, versionId); Path p = keyToQualifiedPath(key); Preconditions.checkArgument(length >= 0, "content length is negative"); final boolean isDir = objectRepresentsDirectory(key); // kick off an async delete CompletableFuture deletion; if (!keepDirectoryMarkers(p)) { deletion = submit( unboundedThreadPool, getActiveAuditSpan(), () -> { deleteUnnecessaryFakeDirectories( p.getParent(), operationState); return null; }); } else { deletion = null; } // this is only set if there is a metastore to update and the // operationState parameter passed in was null. BulkOperationState stateToClose = null; // See note about failure semantics in S3Guard documentation try { if (hasMetadataStore()) { BulkOperationState activeState = operationState; if (activeState == null) { // create an operation state if there was none, so that the // information gleaned from addAncestors is preserved into the // subsequent put. stateToClose = S3Guard.initiateBulkWrite(metadataStore, isDir ? BulkOperationState.OperationType.Mkdir : BulkOperationState.OperationType.Put, keyToPath(key)); activeState = stateToClose; } S3Guard.addAncestors(metadataStore, p, ttlTimeProvider, activeState); S3AFileStatus status = createUploadFileStatus(p, isDir, length, getDefaultBlockSize(p), username, eTag, versionId); boolean authoritative = false; if (isDir) { // this is a directory marker so put it as such. status.setIsEmptyDirectory(Tristate.TRUE); // and maybe mark as auth authoritative = allowAuthoritative(p); } if (!authoritative) { // for files and non-auth directories S3Guard.putAndReturn(metadataStore, status, ttlTimeProvider, activeState); } else { // authoritative directory S3Guard.putAuthDirectoryMarker(metadataStore, status, ttlTimeProvider, activeState); } } // and catch up with any delete operation. waitForCompletionIgnoringExceptions(deletion); } catch (IOException e) { if (failOnMetadataWriteError) { throw new MetadataPersistenceException(p.toString(), e); } else { LOG.error("S3Guard: Error updating MetadataStore for write to {}", p, e); } instrumentation.errorIgnored(); } finally { // if a new operation state was created, close it. IOUtils.cleanupWithLogger(LOG, stateToClose); } } /** * Should we keep directory markers under the path being created * by mkdir/file creation/rename? * @param path path to probe * @return true if the markers MAY be retained, * false if they MUST be deleted */ private boolean keepDirectoryMarkers(Path path) { return directoryPolicy.keepDirectoryMarkers(path); } /** * Delete mock parent directories which are no longer needed. * Retry policy: retrying; exceptions swallowed. * @param path path * @param operationState (nullable) operational state for a bulk update */ @Retries.RetryExceptionsSwallowed private void deleteUnnecessaryFakeDirectories(Path path, final BulkOperationState operationState) { List keysToRemove = new ArrayList<>(); while (!path.isRoot()) { String key = pathToKey(path); key = (key.endsWith("/")) ? key : (key + "/"); LOG.trace("To delete unnecessary fake directory {} for {}", key, path); keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key)); path = path.getParent(); } try { removeKeys(keysToRemove, true, operationState); } catch(AmazonClientException | IOException e) { instrumentation.errorIgnored(); if (LOG.isDebugEnabled()) { StringBuilder sb = new StringBuilder(); for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) { sb.append(kv.getKey()).append(","); } LOG.debug("While deleting keys {} ", sb.toString(), e); } } } /** * Create a fake directory, always ending in "/". * Retry policy: retrying; translated. * @param objectName name of directory object. * @throws IOException IO failure */ @Retries.RetryTranslated private void createFakeDirectory(final String objectName) throws IOException { createEmptyObject(objectName); } /** * Used to create an empty file that represents an empty directory. * Retry policy: retrying; translated. * @param objectName object to create * @throws IOException IO failure */ @Retries.RetryTranslated private void createEmptyObject(final String objectName) throws IOException { invoker.retry("PUT 0-byte object ", objectName, true, () -> putObjectDirect(getRequestFactory() .newDirectoryMarkerRequest(objectName))); incrementPutProgressStatistics(objectName, 0); instrumentation.directoryCreated(); } /** * Return the number of bytes that large input files should be optimally * be split into to minimize I/O time. * @deprecated use {@link #getDefaultBlockSize(Path)} instead */ @Deprecated public long getDefaultBlockSize() { return getConf().getLongBytes(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE); } /** * Get the directory marker policy of this filesystem. * @return the marker policy. */ public DirectoryPolicy getDirectoryMarkerPolicy() { return directoryPolicy; } @Override public String toString() { final StringBuilder sb = new StringBuilder( "S3AFileSystem{"); sb.append("uri=").append(uri); sb.append(", workingDir=").append(workingDir); sb.append(", inputPolicy=").append(inputPolicy); sb.append(", partSize=").append(partSize); sb.append(", enableMultiObjectsDelete=").append(enableMultiObjectsDelete); sb.append(", maxKeys=").append(maxKeys); if (cannedACL != null) { sb.append(", cannedACL=").append(cannedACL.toString()); } sb.append(", readAhead=").append(readAhead); if (getConf() != null) { sb.append(", blockSize=").append(getDefaultBlockSize()); } sb.append(", multiPartThreshold=").append(multiPartThreshold); if (getS3EncryptionAlgorithm() != null) { sb.append(", s3EncryptionAlgorithm='") .append(getS3EncryptionAlgorithm()) .append('\''); } if (blockFactory != null) { sb.append(", blockFactory=").append(blockFactory); } sb.append(", auditManager=").append(auditManager); sb.append(", metastore=").append(metadataStore); sb.append(", authoritativeStore=").append(allowAuthoritativeMetadataStore); sb.append(", authoritativePath=").append(allowAuthoritativePaths); sb.append(", useListV1=").append(useListV1); if (committerIntegration != null) { sb.append(", magicCommitter=").append(isMagicCommitEnabled()); } sb.append(", boundedExecutor=").append(boundedThreadPool); sb.append(", unboundedExecutor=").append(unboundedThreadPool); sb.append(", credentials=").append(credentials); sb.append(", delegation tokens=") .append(delegationTokens.map(Objects::toString).orElse("disabled")); sb.append(", ").append(directoryPolicy); // if logging at debug, toString returns the entire IOStatistics set. if (getInstrumentation() != null) { sb.append(", instrumentation {") .append(getInstrumentation().toString()) .append("}"); } sb.append(", ClientSideEncryption=").append(isCSEEnabled); if (accessPoint != null) { sb.append(", arnForBucket=").append(accessPoint.getFullArn()); } sb.append('}'); return sb.toString(); } /** * Get the partition size for multipart operations. * @return the value as set during initialization */ public long getPartitionSize() { return partSize; } /** * Get the threshold for multipart files. * @return the value as set during initialization */ public long getMultiPartThreshold() { return multiPartThreshold; } /** * Get the maximum key count. * @return a value, valid after initialization */ int getMaxKeys() { return maxKeys; } /** * Is magic commit enabled? * @return true if magic commit support is turned on. */ public boolean isMagicCommitEnabled() { return committerIntegration.isMagicCommitEnabled(); } /** * Predicate: is a path a magic commit path? * True if magic commit is enabled and the path qualifies as special. * @param path path to examine * @return true if the path is or is under a magic directory */ public boolean isMagicCommitPath(Path path) { return committerIntegration.isMagicCommitPath(path); } /** * Increments the statistic {@link Statistic#INVOCATION_GLOB_STATUS}. * Override superclass so as to disable symlink resolution as symlinks * are not supported by S3A. * {@inheritDoc} */ @Override public FileStatus[] globStatus(Path pathPattern) throws IOException { return globStatus(pathPattern, ACCEPT_ALL); } /** * Increments the statistic {@link Statistic#INVOCATION_GLOB_STATUS}. * Override superclass so as to disable symlink resolution as symlinks * are not supported by S3A. * * Although an AuditEntryPoint, the globber itself will talk do * the filesystem through the filesystem API, so its operations will * all appear part of separate operations. * {@inheritDoc} */ @Override @AuditEntryPoint public FileStatus[] globStatus( final Path pathPattern, final PathFilter filter) throws IOException { return trackDurationAndSpan( INVOCATION_GLOB_STATUS, pathPattern, () -> Globber.createGlobber(this) .withPathPattern(pathPattern) .withPathFiltern(filter) .withResolveSymlinks(false) .build() .glob()); } /** * Override superclass so as to add statistic collection. * {@inheritDoc} */ @Override @AuditEntryPoint public boolean exists(Path f) throws IOException { final Path path = qualify(f); try { trackDurationAndSpan( INVOCATION_EXISTS, path, () -> innerGetFileStatus(path, false, StatusProbeEnum.ALL)); return true; } catch (FileNotFoundException e) { return false; } } /** * Optimized probe for a path referencing a dir. * Even though it is optimized to a single HEAD, applications * should not over-use this method...it is all too common. * {@inheritDoc} */ @Override @AuditEntryPoint @SuppressWarnings("deprecation") public boolean isDirectory(Path f) throws IOException { final Path path = qualify(f); try { return trackDurationAndSpan( INVOCATION_IS_DIRECTORY, path, () -> innerGetFileStatus(path, false, StatusProbeEnum.DIRECTORIES) .isDirectory()); } catch (FileNotFoundException e) { // not found or it is a file. return false; } } /** * Optimized probe for a path referencing a file. * Even though it is optimized to a single HEAD, applications * should not over-use this method...it is all too common. * {@inheritDoc} */ @Override @AuditEntryPoint @SuppressWarnings("deprecation") public boolean isFile(Path f) throws IOException { final Path path = qualify(f); try { return trackDurationAndSpan(INVOCATION_IS_FILE, path, () -> innerGetFileStatus(path, false, StatusProbeEnum.HEAD_ONLY) .isFile()); } catch (FileNotFoundException e) { // not found or it is a dir. return false; } } /** * When enabled, get the etag of a object at the path via HEAD request and * return it as a checksum object. *
    *
  1. If a tag has not changed, consider the object unchanged.
  2. *
  3. Two tags being different does not imply the data is different.
  4. *
* Different S3 implementations may offer different guarantees. * * This check is (currently) only made if * {@link Constants#ETAG_CHECKSUM_ENABLED} is set; turning it on * has caused problems with Distcp (HADOOP-15273). * * @param f The file path * @param length The length of the file range for checksum calculation * @return The EtagChecksum or null if checksums are not enabled or supported. * @throws IOException IO failure * @see Common Response Headers */ @Override @Retries.RetryTranslated @AuditEntryPoint public EtagChecksum getFileChecksum(Path f, final long length) throws IOException { Preconditions.checkArgument(length >= 0); final Path path = qualify(f); if (getConf().getBoolean(ETAG_CHECKSUM_ENABLED, ETAG_CHECKSUM_ENABLED_DEFAULT)) { return trackDurationAndSpan(INVOCATION_GET_FILE_CHECKSUM, path, () -> { LOG.debug("getFileChecksum({})", path); ObjectMetadata headers = getObjectMetadata(path, null, invoker, "getFileChecksum are"); String eTag = headers.getETag(); return eTag != null ? new EtagChecksum(eTag) : null; }); } else { // disabled return null; } } /** * Get header processing support. * @return a new header processing instance. */ private HeaderProcessing getHeaderProcessing() { return new HeaderProcessing(createStoreContext(), createHeaderProcessingCallbacks()); } @Override @AuditEntryPoint public byte[] getXAttr(final Path path, final String name) throws IOException { checkNotClosed(); try (AuditSpan span = createSpan( INVOCATION_XATTR_GET_NAMED.getSymbol(), path.toString(), null)) { return getHeaderProcessing().getXAttr(path, name); } } @Override @AuditEntryPoint public Map getXAttrs(final Path path) throws IOException { checkNotClosed(); try (AuditSpan span = createSpan( INVOCATION_XATTR_GET_MAP.getSymbol(), path.toString(), null)) { return getHeaderProcessing().getXAttrs(path); } } @Override @AuditEntryPoint public Map getXAttrs(final Path path, final List names) throws IOException { checkNotClosed(); try (AuditSpan span = createSpan( INVOCATION_XATTR_GET_NAMED_MAP.getSymbol(), path.toString(), null)) { return getHeaderProcessing().getXAttrs(path, names); } } @Override @AuditEntryPoint public List listXAttrs(final Path path) throws IOException { checkNotClosed(); try (AuditSpan span = createSpan( INVOCATION_OP_XATTR_LIST.getSymbol(), path.toString(), null)) { return getHeaderProcessing().listXAttrs(path); } } /** * Create the callbacks. * @return An implementation of the header processing * callbacks. */ protected HeaderProcessing.HeaderProcessingCallbacks createHeaderProcessingCallbacks() { return new HeaderProcessingCallbacksImpl(); } /** * Operations needed for Header Processing. */ protected final class HeaderProcessingCallbacksImpl implements HeaderProcessing.HeaderProcessingCallbacks { @Override public ObjectMetadata getObjectMetadata(final String key) throws IOException { return once("getObjectMetadata", key, () -> S3AFileSystem.this.getObjectMetadata(key)); } } /** * {@inheritDoc}. * * This implementation is optimized for S3, which can do a bulk listing * off all entries under a path in one single operation. Thus there is * no need to recursively walk the directory tree. * * Instead a {@link ListObjectsRequest} is created requesting a (windowed) * listing of all entries under the given path. This is used to construct * an {@code ObjectListingIterator} instance, iteratively returning the * sequence of lists of elements under the path. This is then iterated * over in a {@code FileStatusListingIterator}, which generates * {@link S3AFileStatus} instances, one per listing entry. * These are then translated into {@link LocatedFileStatus} instances. * * This is essentially a nested and wrapped set of iterators, with some * generator classes. * @param f a path * @param recursive if the subdirectories need to be traversed recursively * * @return an iterator that traverses statuses of the files/directories * in the given path * @throws FileNotFoundException if {@code path} does not exist * @throws IOException if any I/O error occurred */ @Override @Retries.RetryTranslated @AuditEntryPoint public RemoteIterator listFiles(Path f, boolean recursive) throws FileNotFoundException, IOException { final Path path = qualify(f); return toLocatedFileStatusIterator( trackDurationAndSpan(INVOCATION_LIST_FILES, path, () -> innerListFiles(path, recursive, new Listing.AcceptFilesOnly(path), null, true, false))); } /** * Recursive List of files and empty directories. * @param f path to list from * @param recursive recursive? * @return an iterator. * @throws IOException failure */ @InterfaceAudience.Private @Retries.RetryTranslated @AuditEntryPoint public RemoteIterator listFilesAndEmptyDirectories( Path f, boolean recursive) throws IOException { final Path path = qualify(f); return trackDurationAndSpan(INVOCATION_LIST_FILES, path, () -> innerListFiles(path, recursive, Listing.ACCEPT_ALL_BUT_S3N, null, true, false)); } /** * Recursive List of files and empty directories, force metadatastore * to act like it is non-authoritative. * @param f path to list from * @param recursive recursive listing? * @return an iterator. * @throws IOException failure */ @InterfaceAudience.Private @Retries.RetryTranslated @AuditEntryPoint public RemoteIterator listFilesAndEmptyDirectoriesForceNonAuth( Path f, boolean recursive) throws IOException { final Path path = qualify(f); return trackDurationAndSpan(INVOCATION_LIST_FILES, path, () -> innerListFiles(path, recursive, Listing.ACCEPT_ALL_BUT_S3N, null, true, true)); } /** * List files under the path. *
    *
  1. * If the path is authoritative on the client, * only S3Guard will be queried. *
  2. *
  3. * Otherwise, the S3Guard values are returned first, then the S3 * entries will be retrieved and returned if not already listed.
  4. *
  5. * when collectTombstones} is true, S3Guard tombstones will * be used to filter out deleted files. * They MUST be used for normal listings; it is only for * deletion and low-level operations that they MAY be bypassed. *
  6. *
  7. * The optional {@code status} parameter will be used to skip the * initial getFileStatus call. *
  8. *
* * In case of recursive listing, if any of the directories reachable from * the path are not authoritative on the client, this method will query S3 * for all the directories in the listing in addition to returning S3Guard * entries. * * @param f path * @param recursive recursive listing? * @param acceptor file status filter * @param status optional status of path to list. * @param collectTombstones should tombstones be collected from S3Guard? * @param forceNonAuthoritativeMS forces metadata store to act like non * authoritative. This is useful when * listFiles output is used by import tool. * @return an iterator over the listing. * @throws IOException failure */ @Retries.RetryTranslated private RemoteIterator innerListFiles( final Path f, final boolean recursive, final Listing.FileStatusAcceptor acceptor, final S3AFileStatus status, final boolean collectTombstones, final boolean forceNonAuthoritativeMS) throws IOException { Path path = qualify(f); LOG.debug("listFiles({}, {})", path, recursive); try { // if a status was given and it is a file. if (status != null && status.isFile()) { // simple case: File LOG.debug("Path is a file: {}", path); return listing.createSingleStatusIterator( toLocatedFileStatus(status)); } // Assuming the path to be a directory // do a bulk operation. RemoteIterator listFilesAssumingDir = listing.getListFilesAssumingDir(path, recursive, acceptor, collectTombstones, forceNonAuthoritativeMS, getActiveAuditSpan()); // If there are no list entries present, we // fallback to file existence check as the path // can be a file or empty directory. if (!listFilesAssumingDir.hasNext()) { // If file status was already passed, reuse it. final S3AFileStatus fileStatus = status != null ? status : innerGetFileStatus(path, false, StatusProbeEnum.ALL); if (fileStatus.isFile()) { return listing.createSingleStatusIterator( toLocatedFileStatus(fileStatus)); } } // If we have reached here, it means either there are files // in this directory or it is empty. return listFilesAssumingDir; } catch (AmazonClientException e) { throw translateException("listFiles", path, e); } } /** * Override superclass so as to add statistic collection. * {@inheritDoc} */ @Override public RemoteIterator listLocatedStatus(Path f) throws FileNotFoundException, IOException { return listLocatedStatus(f, ACCEPT_ALL); } /** * {@inheritDoc}. * * S3 Optimized directory listing. The initial operation performs the * first bulk listing; extra listings will take place * when all the current set of results are used up. * @param f a path * @param filter a path filter * @return an iterator that traverses statuses of the files/directories * in the given path * @throws FileNotFoundException if {@code path} does not exist * @throws IOException if any I/O error occurred */ @Override @Retries.OnceTranslated("s3guard not retrying") @AuditEntryPoint public RemoteIterator listLocatedStatus(final Path f, final PathFilter filter) throws FileNotFoundException, IOException { Path path = qualify(f); AuditSpan span = entryPoint(INVOCATION_LIST_LOCATED_STATUS, path); LOG.debug("listLocatedStatus({}, {}", path, filter); RemoteIterator iterator = once("listLocatedStatus", path.toString(), () -> { // Assuming the path to be a directory, // trigger a list call directly. final RemoteIterator locatedFileStatusIteratorForDir = listing.getLocatedFileStatusIteratorForDir(path, filter, span); // If no listing is present then path might be a file. if (!locatedFileStatusIteratorForDir.hasNext()) { final S3AFileStatus fileStatus = innerGetFileStatus(path, false, StatusProbeEnum.ALL); if (fileStatus.isFile()) { // simple case: File LOG.debug("Path is a file"); return listing.createSingleStatusIterator( filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); } } // Either empty or non-empty directory. return locatedFileStatusIteratorForDir; }); return toLocatedFileStatusIterator(iterator); } /** * Build a {@link S3ALocatedFileStatus} from a {@link FileStatus} instance. * @param status file status * @return a located status with block locations set up from this FS. * @throws IOException IO Problems. */ S3ALocatedFileStatus toLocatedFileStatus(S3AFileStatus status) throws IOException { return new S3ALocatedFileStatus(status, status.isFile() ? getFileBlockLocations(status, 0, status.getLen()) : null); } /** * List any pending multipart uploads whose keys begin with prefix, using * an iterator that can handle an unlimited number of entries. * See {@link #listMultipartUploads(String)} for a non-iterator version of * this. * * @param prefix optional key prefix to search * @return Iterator over multipart uploads. * @throws IOException on failure */ @InterfaceAudience.Private @Retries.RetryTranslated @AuditEntryPoint public MultipartUtils.UploadIterator listUploads(@Nullable String prefix) throws IOException { // span is picked up retained in the listing. return trackDurationAndSpan(MULTIPART_UPLOAD_LIST, prefix, null, () -> MultipartUtils.listMultipartUploads( createStoreContext(), s3, prefix, maxKeys )); } /** * Listing all multipart uploads; limited to the first few hundred. * See {@link #listUploads(String)} for an iterator-based version that does * not limit the number of entries returned. * Retry policy: retry, translated. * @return a listing of multipart uploads. * @param prefix prefix to scan for, "" for none * @throws IOException IO failure, including any uprated AmazonClientException */ @InterfaceAudience.Private @Retries.RetryTranslated public List listMultipartUploads(String prefix) throws IOException { // add a trailing / if needed. if (prefix != null && !prefix.isEmpty() && !prefix.endsWith("/")) { prefix = prefix + "/"; } String p = prefix; return invoker.retry("listMultipartUploads", p, true, () -> { ListMultipartUploadsRequest request = getRequestFactory() .newListMultipartUploadsRequest(p); return s3.listMultipartUploads(request).getMultipartUploads(); }); } /** * Abort a multipart upload. * Retry policy: none. * @param destKey destination key * @param uploadId Upload ID */ @Retries.OnceRaw void abortMultipartUpload(String destKey, String uploadId) { LOG.info("Aborting multipart upload {} to {}", uploadId, destKey); getAmazonS3Client().abortMultipartUpload( getRequestFactory().newAbortMultipartUploadRequest( destKey, uploadId)); } /** * Abort a multipart upload. * Retry policy: none. * @param upload the listed upload to abort. */ @Retries.OnceRaw void abortMultipartUpload(MultipartUpload upload) { String destKey; String uploadId; destKey = upload.getKey(); uploadId = upload.getUploadId(); if (LOG.isInfoEnabled()) { DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); LOG.debug("Aborting multipart upload {} to {} initiated by {} on {}", uploadId, destKey, upload.getInitiator(), df.format(upload.getInitiated())); } getAmazonS3Client().abortMultipartUpload( getRequestFactory().newAbortMultipartUploadRequest( destKey, uploadId)); } /** * Create a new instance of the committer statistics. * @return a new committer statistics instance */ public CommitterStatistics newCommitterStatistics() { return statisticsContext.newCommitterStatistics(); } @SuppressWarnings("deprecation") @Override public boolean hasPathCapability(final Path path, final String capability) throws IOException { final Path p = makeQualified(path); String cap = validatePathCapabilityArgs(p, capability); switch (cap) { case CommitConstants.STORE_CAPABILITY_MAGIC_COMMITTER: case CommitConstants.STORE_CAPABILITY_MAGIC_COMMITTER_OLD: // capability depends on FS configuration return isMagicCommitEnabled(); case SelectConstants.S3_SELECT_CAPABILITY: // select is only supported if enabled and client side encryption is // disabled. return !isCSEEnabled && SelectBinding.isSelectEnabled(getConf()); case CommonPathCapabilities.FS_CHECKSUMS: // capability depends on FS configuration return getConf().getBoolean(ETAG_CHECKSUM_ENABLED, ETAG_CHECKSUM_ENABLED_DEFAULT); case CommonPathCapabilities.ABORTABLE_STREAM: return true; case CommonPathCapabilities.FS_MULTIPART_UPLOADER: // client side encryption doesn't support multipart uploader. return !isCSEEnabled; // this client is safe to use with buckets // containing directory markers anywhere in // the hierarchy case STORE_CAPABILITY_DIRECTORY_MARKER_AWARE: return true; // etags are avaialable in listings, but they // are not consistent across renames. // therefore, only availability is declared case CommonPathCapabilities.ETAGS_AVAILABLE: return true; /* * Marker policy capabilities are handed off. */ case STORE_CAPABILITY_DIRECTORY_MARKER_POLICY_KEEP: case STORE_CAPABILITY_DIRECTORY_MARKER_POLICY_DELETE: case STORE_CAPABILITY_DIRECTORY_MARKER_POLICY_AUTHORITATIVE: case STORE_CAPABILITY_DIRECTORY_MARKER_ACTION_KEEP: case STORE_CAPABILITY_DIRECTORY_MARKER_ACTION_DELETE: return getDirectoryMarkerPolicy().hasPathCapability(path, cap); default: return super.hasPathCapability(p, cap); } } /** * Return the capabilities of this filesystem instance. * * This has been supplanted by {@link #hasPathCapability(Path, String)}. * @param capability string to query the stream support for. * @return whether the FS instance has the capability. */ @Deprecated @Override public boolean hasCapability(String capability) { try { return hasPathCapability(new Path("/"), capability); } catch (IOException ex) { // should never happen, so log and downgrade. LOG.debug("Ignoring exception on hasCapability({}})", capability, ex); return false; } } /** * Get a shared copy of the AWS credentials, with its reference * counter updated. * Caller is required to call {@code close()} on this after * they have finished using it. * @param purpose what is this for? This is initially for logging * @return a reference to shared credentials. */ public AWSCredentialProviderList shareCredentials(final String purpose) { LOG.debug("Sharing credentials for: {}", purpose); return credentials.share(); } @VisibleForTesting public ITtlTimeProvider getTtlTimeProvider() { return ttlTimeProvider; } @VisibleForTesting protected void setTtlTimeProvider(ITtlTimeProvider ttlTimeProvider) { this.ttlTimeProvider = ttlTimeProvider; metadataStore.setTtlTimeProvider(ttlTimeProvider); } /** * This is a proof of concept of a select API. * @param source path to source data * @param expression select expression * @param options request configuration from the builder. * @param providedStatus any passed in status * @return the stream of the results * @throws IOException IO failure */ @Retries.RetryTranslated @AuditEntryPoint private FSDataInputStream select(final Path source, final String expression, final Configuration options, final Optional providedStatus) throws IOException { final AuditSpan auditSpan = entryPoint(OBJECT_SELECT_REQUESTS, source); requireSelectSupport(source); final Path path = makeQualified(source); final S3AFileStatus fileStatus = extractOrFetchSimpleFileStatus(path, providedStatus); // readahead range can be dynamically set long ra = options.getLong(READAHEAD_RANGE, readAhead); S3ObjectAttributes objectAttributes = createObjectAttributes(fileStatus); S3AReadOpContext readContext = createReadContext(fileStatus, inputPolicy, changeDetectionPolicy, ra, auditSpan); if (changeDetectionPolicy.getSource() != ChangeDetectionPolicy.Source.None && fileStatus.getEtag() != null) { // if there is change detection, and the status includes at least an // etag, // check that the object metadata lines up with what is expected // based on the object attributes (which may contain an eTag or // versionId). // This is because the select API doesn't offer this. // (note: this is trouble for version checking as cannot force the old // version in the final read; nor can we check the etag match) ChangeTracker changeTracker = new ChangeTracker(uri.toString(), changeDetectionPolicy, readContext.getS3AStatisticsContext() .newInputStreamStatistics() .getChangeTrackerStatistics(), objectAttributes); // will retry internally if wrong version detected Invoker readInvoker = readContext.getReadInvoker(); getObjectMetadata(path, changeTracker, readInvoker, "select"); } // instantiate S3 Select support using the current span // as the active span for operations. SelectBinding selectBinding = new SelectBinding( createWriteOperationHelper(auditSpan)); // build and execute the request return selectBinding.select( readContext, expression, options, objectAttributes); } /** * Verify the FS supports S3 Select. * @param source source file. * @throws UnsupportedOperationException if not. */ private void requireSelectSupport(final Path source) throws UnsupportedOperationException { if (!isCSEEnabled && !SelectBinding.isSelectEnabled(getConf())) { throw new UnsupportedOperationException( SelectConstants.SELECT_UNSUPPORTED); } } /** * Extract the status from the optional parameter, querying * S3Guard/s3 if it is absent. * @param path path of the status * @param optStatus optional status * @return a file status * @throws FileNotFoundException if there is no normal file at that path * @throws IOException IO failure */ private S3AFileStatus extractOrFetchSimpleFileStatus( final Path path, final Optional optStatus) throws IOException { S3AFileStatus fileStatus; if (optStatus.isPresent()) { fileStatus = optStatus.get(); } else { // this looks at S3guard and gets any type of status back, // if it falls back to S3 it does a HEAD only. // therefore: if there is no S3Guard and there is a dir, this // will raise a FileNotFoundException fileStatus = innerGetFileStatus(path, false, StatusProbeEnum.HEAD_ONLY); } // we check here for the passed in status or the S3Guard value // for being a directory if (fileStatus.isDirectory()) { throw new FileNotFoundException(path.toString() + " is a directory"); } return fileStatus; } /** * Initiate the open or select operation. * This is invoked from both the FileSystem and FileContext APIs. * It's declared as an audit entry point but the span creation is pushed * down into the open/select methods it ultimately calls. * @param rawPath path to the file * @param parameters open file parameters from the builder. * @return a future which will evaluate to the opened/selected file. * @throws IOException failure to resolve the link. * @throws PathIOException operation is a select request but S3 select is * disabled * @throws IllegalArgumentException unknown mandatory key */ @Override @Retries.RetryTranslated @AuditEntryPoint public CompletableFuture openFileWithOptions( final Path rawPath, final OpenFileParameters parameters) throws IOException { final Path path = qualify(rawPath); Configuration options = parameters.getOptions(); Set mandatoryKeys = parameters.getMandatoryKeys(); String sql = options.get(SelectConstants.SELECT_SQL, null); boolean isSelect = sql != null; // choice of keys depends on open type if (isSelect) { rejectUnknownMandatoryKeys( mandatoryKeys, InternalSelectConstants.SELECT_OPTIONS, "for " + path + " in S3 Select operation"); } else { rejectUnknownMandatoryKeys( mandatoryKeys, InternalConstants.STANDARD_OPENFILE_KEYS, "for " + path + " in non-select file I/O"); } FileStatus providedStatus = parameters.getStatus(); S3AFileStatus fileStatus; if (providedStatus != null) { Preconditions.checkArgument(path.equals(providedStatus.getPath()), "FileStatus parameter is not for the path %s: %s", path, providedStatus); if (providedStatus instanceof S3AFileStatus) { // can use this status to skip our own probes, // including etag and version. LOG.debug("File was opened with a supplied S3AFileStatus;" + " skipping getFileStatus call in open() operation: {}", providedStatus); fileStatus = (S3AFileStatus) providedStatus; } else if (providedStatus instanceof S3ALocatedFileStatus) { LOG.debug("File was opened with a supplied S3ALocatedFileStatus;" + " skipping getFileStatus call in open() operation: {}", providedStatus); fileStatus = ((S3ALocatedFileStatus) providedStatus).toS3AFileStatus(); } else { LOG.debug("Ignoring file status {}", providedStatus); fileStatus = null; } } else { fileStatus = null; } Optional ost = Optional.ofNullable(fileStatus); CompletableFuture result = new CompletableFuture<>(); if (!isSelect) { // normal path. unboundedThreadPool.submit(() -> LambdaUtils.eval(result, () -> open(path, Optional.of(options), ost))); } else { // it is a select statement. // fail fast if the operation is not available requireSelectSupport(path); // submit the query unboundedThreadPool.submit(() -> LambdaUtils.eval(result, () -> select(path, sql, options, ost))); } return result; } @Override @AuditEntryPoint public S3AMultipartUploaderBuilder createMultipartUploader( final Path basePath) throws IOException { if(isCSEEnabled) { throw new UnsupportedOperationException("Multi-part uploader not " + "supported for Client side encryption."); } final Path path = makeQualified(basePath); try (AuditSpan span = entryPoint(MULTIPART_UPLOAD_INSTANTIATED, path)) { StoreContext ctx = createStoreContext(); return new S3AMultipartUploaderBuilder(this, createWriteOperationHelper(span), ctx, path, statisticsContext.createMultipartUploaderStatistics()); } } /** * Build an immutable store context. * If called while the FS is being initialized, * some of the context will be incomplete. * new store context instances should be created as appropriate. * @return the store context of this FS. */ @InterfaceAudience.Private public StoreContext createStoreContext() { return new StoreContextBuilder().setFsURI(getUri()) .setBucket(getBucket()) .setConfiguration(getConf()) .setUsername(getUsername()) .setOwner(owner) .setExecutor(boundedThreadPool) .setExecutorCapacity(executorCapacity) .setInvoker(invoker) .setInstrumentation(statisticsContext) .setStorageStatistics(getStorageStatistics()) .setInputPolicy(getInputPolicy()) .setChangeDetectionPolicy(changeDetectionPolicy) .setMultiObjectDeleteEnabled(enableMultiObjectsDelete) .setMetadataStore(metadataStore) .setUseListV1(useListV1) .setContextAccessors(new ContextAccessorsImpl()) .setTimeProvider(getTtlTimeProvider()) .setAuditor(getAuditor()) .setEnableCSE(isCSEEnabled) .build(); } /** * Create a marker tools operations binding for this store. * Auditing: * @param target target path * @return callbacks for operations. * @throws IOException if raised during span creation */ @AuditEntryPoint @InterfaceAudience.Private public MarkerToolOperations createMarkerToolOperations(final String target) throws IOException { createSpan("marker-tool-scan", target, null); return new MarkerToolOperationsImpl(new OperationCallbacksImpl()); } /** * This is purely for testing, as it force initializes all static * initializers. See HADOOP-17385 for details. */ @InterfaceAudience.Private public static void initializeClass() { LOG.debug("Initialize S3A class"); } /** * The implementation of context accessors. */ private class ContextAccessorsImpl implements ContextAccessors { @Override public Path keyToPath(final String key) { return keyToQualifiedPath(key); } @Override public String pathToKey(final Path path) { return S3AFileSystem.this.pathToKey(path); } @Override public File createTempFile(final String prefix, final long size) throws IOException { return createTmpFileForWrite(prefix, size, getConf()); } @Override public String getBucketLocation() throws IOException { return S3AFileSystem.this.getBucketLocation(); } @Override public Path makeQualified(final Path path) { return S3AFileSystem.this.makeQualified(path); } @Override public AuditSpan getActiveAuditSpan() { return S3AFileSystem.this.getActiveAuditSpan(); } @Override public RequestFactory getRequestFactory() { return S3AFileSystem.this.getRequestFactory(); } } /** * a method to know if Client side encryption is enabled or not. * @return a boolean stating if CSE is enabled. */ public boolean isCSEEnabled() { return isCSEEnabled; } }