All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.underfs.s3.S3OutputStream Maven / Gradle / Ivy

The newest version!
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.underfs.s3;

import alluxio.Constants;
import alluxio.util.io.PathUtils;

import com.google.common.base.Preconditions;
import org.jets3t.service.S3Service;
import org.jets3t.service.model.S3Object;
import org.jets3t.service.model.StorageObject;
import org.jets3t.service.utils.Mimetypes;
import org.jets3t.service.utils.MultipartUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;

import javax.annotation.concurrent.NotThreadSafe;

/**
 * A stream for writing a file into S3. The data will be persisted to a temporary directory on the
 * local disk and copied as a complete file when the {@link #close()} method is called.
 */
@NotThreadSafe
public class S3OutputStream extends OutputStream {
  private static final Logger LOG = LoggerFactory.getLogger(S3OutputStream.class);

  /** Bucket name of the Alluxio S3 bucket. */
  private final String mBucketName;

  /** Key of the file when it is uploaded to S3. */
  private final String mKey;

  /** The local file that will be uploaded when the stream is closed. */
  private final File mFile;

  /** The JetS3t client for S3 operations. */
  private final S3Service mClient;

  /** The output stream to a local file where the file will be buffered until closed. */
  private OutputStream mLocalOutputStream;

  /** The MD5 hash of the file. */
  private MessageDigest mHash;

  /** Flag to indicate this stream has been closed, to ensure close is only done once. */
  private AtomicBoolean mClosed = new AtomicBoolean(false);

  /**
   * A {@link MultipartUtils} to upload the file to S3 using Multipart Uploads. Multipart Uploads
   * involves uploading an object's data in parts instead of all at once, which can work around S3's
   * limit of 5GB on a single Object PUT operation.
   *
   * It is recommended (http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html)
   * to upload file larger than 100MB using Multipart Uploads.
   */
  private static final MultipartUtils MULTIPART_UTIL = new MultipartUtils(Constants.MB * 100);

  /**
   * Constructs a new stream for writing a file.
   *
   * @param bucketName the name of the bucket
   * @param key the key of the file
   * @param client the JetS3t client
   */
  public S3OutputStream(String bucketName, String key, S3Service client) throws IOException {
    Preconditions.checkArgument(bucketName != null && !bucketName.isEmpty(), "Bucket name must "
        + "not be null or empty.");
    mBucketName = bucketName;
    mKey = key;
    mClient = client;
    mFile = new File(PathUtils.concatPath("/tmp", UUID.randomUUID()));
    try {
      mHash = MessageDigest.getInstance("MD5");
      mLocalOutputStream =
          new BufferedOutputStream(new DigestOutputStream(new FileOutputStream(mFile), mHash));
    } catch (NoSuchAlgorithmException e) {
      LOG.warn("Algorithm not available for MD5 hash.", e);
      mHash = null;
      mLocalOutputStream = new BufferedOutputStream(new FileOutputStream(mFile));
    }
  }

  @Override
  public void write(int b) throws IOException {
    mLocalOutputStream.write(b);
  }

  @Override
  public void write(byte[] b) throws IOException {
    mLocalOutputStream.write(b, 0, b.length);
  }

  @Override
  public void write(byte[] b, int off, int len) throws IOException {
    mLocalOutputStream.write(b, off, len);
  }

  @Override
  public void flush() throws IOException {
    mLocalOutputStream.flush();
  }

  @Override
  public void close() throws IOException {
    if (mClosed.getAndSet(true)) {
      return;
    }
    mLocalOutputStream.close();
    try {
      S3Object obj = new S3Object(mKey);
      obj.setBucketName(mBucketName);
      obj.setDataInputFile(mFile);
      obj.setContentLength(mFile.length());
      obj.setContentEncoding(Mimetypes.MIMETYPE_BINARY_OCTET_STREAM);
      if (mHash != null) {
        obj.setMd5Hash(mHash.digest());
      } else {
        LOG.warn("MD5 was not computed for: {}", mKey);
      }
      if (MULTIPART_UTIL.isFileLargerThanMaxPartSize(mFile)) {
        // Big object will be split into parts and uploaded to S3 in parallel.
        List objectsToUploadAsMultipart = new ArrayList<>();
        objectsToUploadAsMultipart.add(obj);
        MULTIPART_UTIL.uploadObjects(mBucketName, mClient, objectsToUploadAsMultipart, null);
      } else {
        // Avoid uploading file with Multipart if it's not necessary to save the
        // extra overhead.
        mClient.putObject(mBucketName, obj);
      }
      if (!mFile.delete()) {
        LOG.error("Failed to delete temporary file @ {}", mFile.getPath());
      }
    } catch (Exception e) {
      LOG.error("Failed to upload {}. Temporary file @ {}", mKey, mFile.getPath());
      throw new IOException(e);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy