All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.hadoop.fs.gcs.GoogleHadoopSyncableOutputStream Maven / Gradle / Ivy

/*
 * Copyright 2016 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.cloud.hadoop.fs.gcs;

import com.google.cloud.hadoop.gcsio.CreateFileOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageItemInfo;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.WritableByteChannel;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Syncable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * GoogleHadoopSyncableOutputStream implements the {@code Syncable} interface by composing
 * objects created in separate underlying streams for each hsync() call.
 * 

* Prior to the first hsync(), sync() or close() call, this channel will behave the same way as a * basic non-syncable channel, writing directly to the destination file. *

* On the first call to hsync()/sync(), the destination file is committed and a new temporary file * using a hidden-file prefix (underscore) is created with an additional suffix which differs for * each subsequent temporary file in the series; during this time readers can read the data * committed to the destination file, but not the bytes written to the temporary file since the * last hsync() call. *

* On each subsequent hsync()/sync() call, the temporary file closed(), composed onto the * destination file, then deleted, and a new temporary file is opened under a new filename for * further writes. *

* Caveats: * 1. Each hsync()/sync() requires many underlying read and mutation requests occurring * sequentially, so latency is expected to be fairly high. * 2. There is a hard limit to the number of times hsync()/sync() can be called due to the * GCS-level limit on the number of components a composite object can contain (1024). Any * attempt to hsync() more than this number of times will result in an IOException and * any data written since the last hsync() should be considered lost (unless manually * recovered as long as the temporary file wasn't deleted under the hood). *

* If errors occur mid-stream, there may be one or more temporary files failing to be cleaned up, * and require manual intervention to discover and delete any such unused files. Data written * prior to the most recent successful hsync() is persistent and safe in such a case. *

* If multiple writers are attempting to write to the same destination file, generation ids used * with low-level precondition checks will cause all but a one writer to fail their precondition * checks during writes, and a single remaining writer will safely occupy the stream. */ public class GoogleHadoopSyncableOutputStream extends OutputStream implements Syncable { // Prefix used for all temporary files created by this stream. public static final String TEMPFILE_PREFIX = "_GCS_SYNCABLE_TEMPFILE_"; // Maximum number of components a composite object can have; any attempts to compose onto // an object already having this many components will fail. This OutputStream will enforce // the limit before attempting the compose operation at all, so that the stream can be // considered still safe to use and eventually close() without losing data even if // intermediate attempts to hsync() throw exceptions due to the component limit. public static final int MAX_COMPOSITE_COMPONENTS = 1024; private static final Logger LOG = LoggerFactory.getLogger(GoogleHadoopSyncableOutputStream.class); // Temporary files don't need to contain the desired attributes of the final destination file // since metadata settings get clobbered on final compose() anyways; additionally, due to // the way we pick temp file names and already ensured directories for the destination file, // we can optimize tempfile creation by skipping various directory checks. private static final CreateFileOptions TEMPFILE_CREATE_OPTIONS = new CreateFileOptions( /* overwriteExisting= */ false, CreateFileOptions.DEFAULT_CONTENT_TYPE, CreateFileOptions.EMPTY_ATTRIBUTES, /* checkNoDirectoryConflict= */ false, /* ensureParentDirectoriesExist= */ false, /* existingGenerationId= */ 0L); // Deletion of temporary files occurs asynchronously for performance reasons, but in-flight // deletions are awaited on close() so as long as all output streams are closed, there should // be no remaining in-flight work occurring inside this threadpool. private static final ExecutorService TEMPFILE_CLEANUP_THREADPOOL = Executors.newCachedThreadPool( new ThreadFactoryBuilder() .setNameFormat("gcs-syncable-output-stream-cleanup-pool-%d") .setDaemon(true) .build()); // Instance of GoogleHadoopFileSystemBase. private final GoogleHadoopFileSystemBase ghfs; // The final destination path for this stream. private final URI finalGcsPath; // Buffer size to pass through to delegate streams. private final int bufferSize; // Statistics tracker provided by the parent GoogleHadoopFileSystemBase for recording // numbers of bytes written. private final FileSystem.Statistics statistics; // Metadata/overwrite options to use on final file. private final CreateFileOptions fileOptions; // List of file-deletion futures accrued during the lifetime of this output stream. private final List> deletionFutures; private final ExecutorService cleanupThreadpool; // Current GCS path pointing at the "tail" file which will be appended to the destination // on each hsync() call. private URI curGcsPath; // Current OutputStream pointing at the "tail" file which will be appended to the destination // on each hsync() call. private GoogleHadoopOutputStream curDelegate; // Stores the current component index corresponding curGcsPath. If close() is called, the total // number of components in the finalGcsPath will be curComponentIndex + 1. private int curComponentIndex; // The last known generationId of the final destination file, or possibly // StorageResourceId.UNKNOWN_GENERATION_ID if unknown. private long curDestGenerationId; /** * Creates a new GoogleHadoopSyncableOutputStream with initial stream initialized and expected * to begin at file-offset 0. This constructor is not suitable for "appending" to already * existing files. */ public GoogleHadoopSyncableOutputStream( GoogleHadoopFileSystemBase ghfs, URI gcsPath, int bufferSize, FileSystem.Statistics statistics, CreateFileOptions createFileOptions) throws IOException { this(ghfs, gcsPath, bufferSize, statistics, createFileOptions, TEMPFILE_CLEANUP_THREADPOOL); } GoogleHadoopSyncableOutputStream( GoogleHadoopFileSystemBase ghfs, URI gcsPath, int bufferSize, FileSystem.Statistics statistics, CreateFileOptions createFileOptions, ExecutorService cleanupThreadpool) throws IOException { LOG.debug("GoogleHadoopSyncableOutputStream({}, {})", gcsPath, bufferSize); this.ghfs = ghfs; this.finalGcsPath = gcsPath; this.bufferSize = bufferSize; this.statistics = statistics; this.fileOptions = createFileOptions; this.deletionFutures = new ArrayList<>(); this.cleanupThreadpool = cleanupThreadpool; // The first component of the stream will go straight to the destination filename to optimize // the case where no hsync() or a single hsync() is called during the lifetime of the stream; // committing the first component thus doesn't require any compose() call under the hood. this.curGcsPath = gcsPath; this.curDelegate = new GoogleHadoopOutputStream( ghfs, curGcsPath, bufferSize, statistics, fileOptions); // TODO(user): Make sure to initialize this to the correct value if a new stream is created to // "append" to an existing file. this.curComponentIndex = 0; this.curDestGenerationId = StorageResourceId.UNKNOWN_GENERATION_ID; } @Override public void write(int b) throws IOException { throwIfNotOpen(); curDelegate.write(b); } @Override public void write(byte[] b, int offset, int len) throws IOException { throwIfNotOpen(); curDelegate.write(b, offset, len); } @Override public void close() throws IOException { LOG.debug("close(): Current tail file: {} final destination: {}", curGcsPath, finalGcsPath); if (!isOpen()) { LOG.debug("close(): Ignoring; stream already closed."); return; } commitCurrentFile(); // null denotes stream closed. // TODO(user): Add checks which throw IOException if further operations are attempted on a // closed stream, except for multiple calls to close(), which should behave as no-ops. curGcsPath = null; curDelegate = null; LOG.debug("close(): Awaiting {} deletionFutures", deletionFutures.size()); for (Future deletion : deletionFutures) { try { deletion.get(); } catch (ExecutionException | InterruptedException ee) { if (ee.getCause() instanceof IOException) { throw (IOException) ee.getCause(); } else { throw new IOException(ee); } } } LOG.debug("close(): done"); } public void sync() throws IOException { hsync(); } /** * There is no way to flush data to become available for readers without a full-fledged * hsync(), so this method is a no-op. * This overrides Syncable.hflush(), but is not annotated as such because the method doesn't * exist in Hadoop 1. */ public void hflush() throws IOException { LOG.warn( "hflush() is a no-op; readers will *not* yet see flushed data for {}", finalGcsPath); throwIfNotOpen(); } /** * This overrides Syncable.hsync(), but is not annotated as such because the method doesn't * exist in Hadoop 1. * * @throws CompositeLimitExceededException if this hsync() call would require any future close() * call to exceed the component limit. If CompositeLimitExceededException is thrown, no * actual GCS operations are taken and it's safe to subsequently call close() on this * stream as normal; it just means data written since the last successful hsync() has not * yet been committed. */ public void hsync() throws IOException { LOG.debug("hsync(): Committing tail file {} to final destination {}", curGcsPath, finalGcsPath); throwIfNotOpen(); long startTime = System.nanoTime(); // If we were to call close() instead of hsync() right now, the final object would have this // many components. int curNumComponents = curComponentIndex + 1; if (curNumComponents >= MAX_COMPOSITE_COMPONENTS) { throw new CompositeLimitExceededException(String.format( "Cannot hsync() '%s' because subsequent component count would exceed limit of %d", finalGcsPath, MAX_COMPOSITE_COMPONENTS)); } commitCurrentFile(); // Use a different temporary path for each temporary component to reduce the possible avenues of // race conditions in the face of low-level retries, etc. ++curComponentIndex; curGcsPath = getNextTemporaryPath(); LOG.debug("hsync(): Opening next temporary tail file {} as component number {}", curGcsPath, curComponentIndex); curDelegate = new GoogleHadoopOutputStream( ghfs, curGcsPath, bufferSize, statistics, TEMPFILE_CREATE_OPTIONS); long endTime = System.nanoTime(); LOG.debug("Took {} ns to hsync()", endTime - startTime); } private void commitCurrentFile() throws IOException { // TODO(user): Optimize the case where 0 bytes have been written in the current component // to return early. WritableByteChannel innerChannel = curDelegate.getInternalChannel(); curDelegate.close(); long generationId = StorageResourceId.UNKNOWN_GENERATION_ID; if (innerChannel instanceof GoogleCloudStorageItemInfo.Provider) { generationId = ((GoogleCloudStorageItemInfo.Provider) innerChannel) .getItemInfo().getContentGeneration(); LOG.debug( "innerChannel is GoogleCloudStorageItemInfo.Provider; closed generationId {}.", generationId); } else { LOG.debug("innerChannel NOT instanceof provider: {}", innerChannel.getClass()); } // On the first component, curGcsPath will equal finalGcsPath, and no compose() call is // necessary. Otherwise, we compose in-place into the destination object and then delete // the temporary object. if (!finalGcsPath.equals(curGcsPath)) { StorageResourceId destResourceId = StorageResourceId.fromObjectName(finalGcsPath.toString(), curDestGenerationId); final StorageResourceId tempResourceId = StorageResourceId.fromObjectName(curGcsPath.toString(), generationId); if (!destResourceId.getBucketName().equals(tempResourceId.getBucketName())) { throw new IllegalStateException(String.format( "Destination bucket in path '%s' doesn't match temp file bucket in path '%s'", finalGcsPath, curGcsPath)); } GoogleCloudStorageItemInfo composedObject = ghfs.getGcsFs().getGcs().composeObjects( ImmutableList.of(destResourceId, tempResourceId), destResourceId, GoogleCloudStorageFileSystem.objectOptionsFromFileOptions(fileOptions)); curDestGenerationId = composedObject.getContentGeneration(); deletionFutures.add(cleanupThreadpool.submit(new Callable() { @Override public Void call() throws IOException { ghfs.getGcsFs().getGcs().deleteObjects(ImmutableList.of(tempResourceId)); return null; } })); } else { // First commit was direct to the destination; the generationId of the object we just // committed will be used as the destination generation id for future compose calls. curDestGenerationId = generationId; } } /** * Returns URI to be used for the next "tail" file in the series. */ private URI getNextTemporaryPath() { Path basePath = ghfs.getHadoopPath(finalGcsPath); Path baseDir = basePath.getParent(); Path tempPath = new Path( baseDir, String.format("%s%s.%d.%s", TEMPFILE_PREFIX, basePath.getName(), curComponentIndex, UUID.randomUUID().toString())); return ghfs.getGcsPath(tempPath); } private boolean isOpen() { return curDelegate != null; } private void throwIfNotOpen() throws IOException { if (!isOpen()) { throw new ClosedChannelException(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy