All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.underfs.ObjectLowLevelOutputStream Maven / Gradle / Ivy

There is a newer version: 313
Show newest version
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.underfs;

import alluxio.Constants;
import alluxio.conf.AlluxioConfiguration;
import alluxio.conf.PropertyKey;
import alluxio.retry.CountingRetry;
import alluxio.retry.RetryPolicy;
import alluxio.retry.RetryUtils;
import alluxio.util.CommonUtils;
import alluxio.util.io.PathUtils;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import org.apache.commons.codec.binary.Base64;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;

/**
 * [Experimental] A stream for writing a file into object storage using streaming upload.
 * The data transfer is done using object storage low-level multipart upload.
 * 

* We upload data in partitions. When write(), the data will be persisted to * a temporary file {@link #mFile} on the local disk. When the data {@link #mPartitionOffset} * in this temporary file reaches the {@link #mPartitionSize}, the file will be submitted * to the upload executor {@link #mExecutor} and we do not wait for uploads to finish. * A new temp file will be created for the future write and the {@link #mPartitionOffset} * will be reset to zero. The process goes until all the data has been written to temp files. *

* In flush(), we upload the buffered data if they are bigger than 5MB * and wait for all uploads to finish. The temp files will be deleted after uploading successfully. *

* In close(), we upload the last part of data (if exists), wait for all uploads to finish, * and complete the multipart upload. *

* close() will not be retried, but all the multipart upload * related operations(init, upload, complete, and abort) will be retried. *

* If an error occurs and we have no way to recover, we abort the multipart uploads. * Some multipart uploads may not be completed/aborted in normal ways and need periodical cleanup * by enabling the {@link PropertyKey#UNDERFS_CLEANUP_ENABLED}. * When a leader master starts or a cleanup interval is reached, all the multipart uploads * older than clean age will be cleaned. */ @NotThreadSafe public abstract class ObjectLowLevelOutputStream extends OutputStream implements ContentHashable { protected static final Logger LOG = LoggerFactory.getLogger(ObjectLowLevelOutputStream.class); protected final List mTmpDirs; /** * Only parts bigger than 5MB could be uploaded through multipart upload, * except the last part. */ protected static final long UPLOAD_THRESHOLD = 5L * Constants.MB; /** Bucket name of the object storage bucket. */ protected final String mBucketName; /** Key of the file when it is uploaded to object storage. */ protected final String mKey; /** The retry policy of this multipart upload. */ protected final Supplier mRetryPolicy = () -> new CountingRetry(5); /** Pre-allocated byte buffer for writing single characters. */ protected final byte[] mSingleCharWrite = new byte[1]; /** The MD5 hash of the file. */ @Nullable protected MessageDigest mHash; /** Flag to indicate this stream has been closed, to ensure close is only done once. */ protected boolean mClosed = false; /** When the offset reaches the partition size, we upload the temp file. */ protected long mPartitionOffset; /** The maximum allowed size of a partition. */ protected final long mPartitionSize; /** * The local temp file that will be uploaded when reaches the partition size * or when flush() is called and this file is bigger than {@link #UPLOAD_THRESHOLD}. */ @Nullable protected File mFile; /** The output stream to the local temp file. */ @Nullable protected OutputStream mLocalOutputStream; /** * Give each upload request a unique and continuous id * so that object storage knows the part sequence to concatenate the parts to a single object. */ private final AtomicInteger mPartNumber; /** Executing the upload tasks. */ private final ListeningExecutorService mExecutor; /** Store the future of tags. */ private final List> mFutures = new ArrayList<>(); /** upload part timeout, null means no timeout. */ @Nullable private Long mUploadPartTimeoutMills; /** Whether the multi upload has been initialized. */ private boolean mMultiPartUploadInitialized = false; /** * Constructs a new stream for writing a file. * * @param bucketName the name of the bucket * @param key the key of the file * @param streamingUploadPartitionSize the size in bytes for partitions of streaming uploads * @param executor executor * @param ufsConf the object store under file system configuration */ public ObjectLowLevelOutputStream( String bucketName, String key, ListeningExecutorService executor, long streamingUploadPartitionSize, AlluxioConfiguration ufsConf) { Preconditions.checkArgument(bucketName != null && !bucketName.isEmpty(), "Bucket name must not be null or empty."); mBucketName = bucketName; mTmpDirs = ufsConf.getList(PropertyKey.TMP_DIRS); Preconditions.checkArgument(!mTmpDirs.isEmpty(), "No temporary directories available"); mExecutor = executor; mKey = key; initHash(); mPartitionSize = Math.max(UPLOAD_THRESHOLD, streamingUploadPartitionSize); mPartNumber = new AtomicInteger(1); if (ufsConf.isSet(PropertyKey.UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT)) { mUploadPartTimeoutMills = ufsConf.getDuration(PropertyKey.UNDERFS_OBJECT_STORE_STREAMING_UPLOAD_PART_TIMEOUT) .toMillis(); } } @Override public void write(int b) throws IOException { mSingleCharWrite[0] = (byte) b; write(mSingleCharWrite); } @Override public void write(byte[] b) throws IOException { write(b, 0, b.length); } @Override public void write(byte[] b, int off, int len) throws IOException { if (b == null || len == 0) { return; } Preconditions.checkNotNull(b); Preconditions.checkArgument(off >= 0 && off <= b.length && len >= 0 && off + len <= b.length); if (mFile == null) { initNewFile(); } if (mPartitionOffset + len <= mPartitionSize) { mLocalOutputStream.write(b, off, len); mPartitionOffset += len; } else { int firstLen = (int) (mPartitionSize - mPartitionOffset); mLocalOutputStream.write(b, off, firstLen); mPartitionOffset += firstLen; uploadPart(); write(b, off + firstLen, len - firstLen); } } @Override public void flush() throws IOException { if (!mMultiPartUploadInitialized) { return; } // We try to minimize the time use to close() // because Fuse release() method which calls close() is async. // In flush(), we upload the current writing file if it is bigger than 5 MB, // and wait for all current upload to complete. if (mLocalOutputStream != null) { mLocalOutputStream.flush(); } if (mPartitionOffset > UPLOAD_THRESHOLD) { uploadPart(); } waitForAllPartsUpload(); } @Override public void close() throws IOException { if (mClosed) { return; } // Set the closed flag, we never retry close() even if exception occurs mClosed = true; // Multi-part upload has not been initialized if (!mMultiPartUploadInitialized) { if (mFile == null) { LOG.debug("Streaming upload output stream closed without uploading any data."); RetryUtils.retry("put empty object for key" + mKey, () -> createEmptyObject(mKey), mRetryPolicy.get()); } else { try { mLocalOutputStream.close(); final String md5 = mHash != null ? Base64.encodeBase64String(mHash.digest()) : null; RetryUtils.retry("put object for key" + mKey, () -> putObject(mKey, mFile, md5), mRetryPolicy.get()); } finally { if (!mFile.delete()) { LOG.error("Failed to delete temporary file @ {}", mFile.getPath()); } } } return; } try { if (mFile != null) { mLocalOutputStream.close(); int partNumber = mPartNumber.getAndIncrement(); uploadPart(mFile, partNumber, true); } waitForAllPartsUpload(); RetryUtils.retry("complete multipart upload", this::completeMultiPartUploadInternal, mRetryPolicy.get()); } catch (Exception e) { LOG.error("Failed to upload {}", mKey, e); throw new IOException(e); } } /** * Creates a new temp file to write to. */ private void initNewFile() throws IOException { mFile = new File(PathUtils.concatPath(CommonUtils.getTmpDir(mTmpDirs), UUID.randomUUID())); initHash(); if (mHash != null) { mLocalOutputStream = new BufferedOutputStream(new DigestOutputStream(new FileOutputStream(mFile), mHash)); } else { mLocalOutputStream = new BufferedOutputStream(new FileOutputStream(mFile)); } mPartitionOffset = 0; LOG.debug("Init new temp file @ {}", mFile.getPath()); } private void initHash() { try { mHash = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { LOG.warn("Algorithm not available for MD5 hash.", e); mHash = null; } } /** * Uploads part async. */ protected void uploadPart() throws IOException { if (mFile == null) { return; } if (!mMultiPartUploadInitialized) { RetryUtils.retry("init multipart upload", this::initMultiPartUploadInternal, mRetryPolicy.get()); mMultiPartUploadInitialized = true; } mLocalOutputStream.close(); int partNumber = mPartNumber.getAndIncrement(); uploadPart(new File(mFile.getPath()), partNumber, false); mFile = null; mLocalOutputStream = null; } protected void uploadPart(File file, int partNumber, boolean lastPart) { final String md5 = mHash != null ? Base64.encodeBase64String(mHash.digest()) : null; Callable callable = () -> { try { RetryUtils.retry("upload part for key " + mKey + " and part number " + partNumber, () -> uploadPartInternal(file, partNumber, lastPart, md5), mRetryPolicy.get()); return null; } finally { // Delete the uploaded or failed to upload file if (!file.delete()) { LOG.error("Failed to delete temporary file @ {}", file.getPath()); } } }; ListenableFuture futureTag = mExecutor.submit(callable); mFutures.add(futureTag); LOG.debug( "Submit upload part request. key={}, partNum={}, file={}, fileSize={}, lastPart={}.", mKey, partNumber, file.getPath(), file.length(), lastPart); } protected void abortMultiPartUpload() { try { RetryUtils.retry("abort multipart upload for key " + mKey, this::abortMultiPartUploadInternal, mRetryPolicy.get()); } catch (IOException e) { LOG.warn("Unable to abort multipart upload for key '{}' and id '{}' to bucket {}. " + "You may need to enable the periodical cleanup by setting property {}" + "to be true.", mKey, mBucketName, PropertyKey.UNDERFS_CLEANUP_ENABLED.getName(), e); } } protected void waitForAllPartsUpload() throws IOException { try { for (ListenableFuture future : mFutures) { if (mUploadPartTimeoutMills == null) { future.get(); } else { future.get(mUploadPartTimeoutMills, TimeUnit.MILLISECONDS); } } } catch (ExecutionException e) { // No recover ways so that we need to cancel all the upload tasks // and abort the multipart upload Futures.allAsList(mFutures).cancel(true); abortMultiPartUpload(); throw new IOException( "Part upload failed in multipart upload with to " + mKey, e); } catch (InterruptedException e) { LOG.warn("Interrupted object upload.", e); Futures.allAsList(mFutures).cancel(true); abortMultiPartUpload(); Thread.currentThread().interrupt(); } catch (TimeoutException e) { LOG.error("timeout when upload part"); Futures.allAsList(mFutures).cancel(true); abortMultiPartUpload(); throw new IOException("timeout when upload part " + mKey, e); } mFutures.clear(); } /** * Get the part number. * @return the part number */ @VisibleForTesting public int getPartNumber() { return mPartNumber.get(); } protected abstract void uploadPartInternal( File file, int partNumber, boolean isLastPart, @Nullable String md5) throws IOException; protected abstract void initMultiPartUploadInternal() throws IOException; protected abstract void completeMultiPartUploadInternal() throws IOException; protected abstract void abortMultiPartUploadInternal() throws IOException; protected abstract void createEmptyObject(String key) throws IOException; protected abstract void putObject(String key, File file, @Nullable String md5) throws IOException; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy