All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.s3a.prefetch.S3ARemoteObject Maven / Gradle / Ivy

Go to download

This module contains code to support integration with Amazon Web Services. It also declares the dependencies needed to work with AWS services.

There is a newer version: 3.4.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hadoop.fs.s3a.prefetch;


import java.io.IOException;
import java.io.InputStream;
import java.util.IdentityHashMap;
import java.util.Map;

import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.fs.impl.prefetch.Validate;
import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.S3AInputStream;
import org.apache.hadoop.fs.s3a.S3AReadOpContext;
import org.apache.hadoop.fs.s3a.S3ObjectAttributes;
import org.apache.hadoop.fs.s3a.impl.ChangeTracker;
import org.apache.hadoop.fs.s3a.impl.SDKStreamDrainer;
import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics;
import org.apache.hadoop.fs.statistics.DurationTracker;

/**
 * Encapsulates low level interactions with S3 object on AWS.
 */
public class S3ARemoteObject {

  private static final Logger LOG =
      LoggerFactory.getLogger(S3ARemoteObject.class);

  /**
   * Read-specific operation context.
   */
  private final S3AReadOpContext context;

  /**
   * S3 object attributes.
   */
  private final S3ObjectAttributes s3Attributes;

  /**
   * Callbacks used for interacting with the underlying S3 client.
   */
  private final S3AInputStream.InputStreamCallbacks client;

  /**
   * Used for reporting input stream access statistics.
   */
  private final S3AInputStreamStatistics streamStatistics;

  /**
   * Enforces change tracking related policies.
   */
  private final ChangeTracker changeTracker;

  /**
   * Maps a stream returned by openForRead() to the associated S3 object.
   * That allows us to close the object when closing the stream.
   */
  private final Map s3Objects;

  /**
   * uri of the object being read.
   */
  private final String uri;

  /**
   * size of a buffer to create when draining the stream.
   */
  private static final int DRAIN_BUFFER_SIZE = 16384;

  /**
   * Initializes a new instance of the {@code S3ARemoteObject} class.
   *
   * @param context read-specific operation context.
   * @param s3Attributes attributes of the S3 object being read.
   * @param client callbacks used for interacting with the underlying S3 client.
   * @param streamStatistics statistics about stream access.
   * @param changeTracker helps enforce change tracking policy.
   *
   * @throws IllegalArgumentException if context is null.
   * @throws IllegalArgumentException if s3Attributes is null.
   * @throws IllegalArgumentException if client is null.
   * @throws IllegalArgumentException if streamStatistics is null.
   * @throws IllegalArgumentException if changeTracker is null.
   */
  public S3ARemoteObject(
      S3AReadOpContext context,
      S3ObjectAttributes s3Attributes,
      S3AInputStream.InputStreamCallbacks client,
      S3AInputStreamStatistics streamStatistics,
      ChangeTracker changeTracker) {

    Validate.checkNotNull(context, "context");
    Validate.checkNotNull(s3Attributes, "s3Attributes");
    Validate.checkNotNull(client, "client");
    Validate.checkNotNull(streamStatistics, "streamStatistics");
    Validate.checkNotNull(changeTracker, "changeTracker");

    this.context = context;
    this.s3Attributes = s3Attributes;
    this.client = client;
    this.streamStatistics = streamStatistics;
    this.changeTracker = changeTracker;
    this.s3Objects = new IdentityHashMap<>();
    this.uri = this.getPath();
  }

  /**
   * Gets an instance of {@code Invoker} for interacting with S3 API.
   *
   * @return an instance of {@code Invoker} for interacting with S3 API.
   */
  public Invoker getReadInvoker() {
    return context.getReadInvoker();
  }

  /**
   * Gets an instance of {@code S3AInputStreamStatistics} used for reporting access metrics.
   *
   * @return an instance of {@code S3AInputStreamStatistics} used for reporting access metrics.
   */
  public S3AInputStreamStatistics getStatistics() {
    return streamStatistics;
  }

  /**
   * Gets the path of this file.
   *
   * @return the path of this file.
   */
  public String getPath() {
    return getPath(s3Attributes);
  }

  /**
   * Gets the path corresponding to the given s3Attributes.
   *
   * @param s3Attributes attributes of an S3 object.
   * @return the path corresponding to the given s3Attributes.
   */
  public static String getPath(S3ObjectAttributes s3Attributes) {
    return String.format("s3a://%s/%s", s3Attributes.getBucket(),
        s3Attributes.getKey());
  }

  /**
   * Gets the size of this file.
   * Its value is cached once obtained from AWS.
   *
   * @return the size of this file.
   */
  public long size() {
    return s3Attributes.getLen();
  }

  /**
   * Opens a section of the file for reading.
   *
   * @param offset Start offset (0 based) of the section to read.
   * @param size Size of the section to read.
   * @return an {@code InputStream} corresponding to the given section of this file.
   *
   * @throws IOException if there is an error opening this file section for reading.
   * @throws IllegalArgumentException if offset is negative.
   * @throws IllegalArgumentException if offset is greater than or equal to file size.
   * @throws IllegalArgumentException if size is greater than the remaining bytes.
   */
  public InputStream openForRead(long offset, int size) throws IOException {
    Validate.checkNotNegative(offset, "offset");
    Validate.checkLessOrEqual(offset, "offset", size(), "size()");
    Validate.checkLessOrEqual(size, "size", size() - offset, "size() - offset");

    streamStatistics.streamOpened();
    final GetObjectRequest request =
        client.newGetRequest(s3Attributes.getKey())
            .withRange(offset, offset + size - 1);
    changeTracker.maybeApplyConstraint(request);

    String operation = String.format(
        "%s %s at %d", S3AInputStream.OPERATION_OPEN, uri, offset);
    DurationTracker tracker = streamStatistics.initiateGetRequest();
    S3Object object = null;

    try {
      object = Invoker.once(operation, uri, () -> client.getObject(request));
    } catch (IOException e) {
      tracker.failed();
      throw e;
    } finally {
      tracker.close();
    }

    changeTracker.processResponse(object, operation, offset);
    InputStream stream = object.getObjectContent();
    synchronized (s3Objects) {
      s3Objects.put(stream, object);
    }

    return stream;
  }

  void close(InputStream inputStream, int numRemainingBytes) {
    S3Object obj;
    synchronized (s3Objects) {
      obj = s3Objects.remove(inputStream);
      if (obj == null) {
        throw new IllegalArgumentException("inputStream not found");
      }
    }
    SDKStreamDrainer drainer = new SDKStreamDrainer(
        uri,
        obj,
        (S3ObjectInputStream)inputStream,
        false,
        numRemainingBytes,
        streamStatistics,
        "close() operation");
    if (numRemainingBytes <= context.getAsyncDrainThreshold()) {
      // don't bother with async io.
      drainer.apply();
    } else {
      LOG.debug("initiating asynchronous drain of {} bytes", numRemainingBytes);
      // schedule an async drain/abort
      client.submit(drainer);
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy