All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.hadoop.fs.gcs.GoogleHadoopSyncableOutputStream Maven / Gradle / Ivy

/*
 * Copyright 2016 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.cloud.hadoop.fs.gcs;

import com.google.cloud.hadoop.gcsio.CreateFileOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageItemInfo;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.cloud.hadoop.util.GoogleCloudStorageEventBus;
import com.google.cloud.hadoop.util.ITraceFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.flogger.GoogleLogger;
import com.google.common.util.concurrent.RateLimiter;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.WritableByteChannel;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Syncable;

/**
 * GoogleHadoopSyncableOutputStream implements the {@code Syncable} interface by composing objects
 * created in separate underlying streams for each hsync() call.
 *
 * 

Prior to the first hsync(), sync() or close() call, this channel will behave the same way as a * basic non-syncable channel, writing directly to the destination file. * *

On the first call to hsync()/sync(), the destination file is committed and a new temporary * file using a hidden-file prefix (underscore) is created with an additional suffix which differs * for each subsequent temporary file in the series; during this time readers can read the data * committed to the destination file, but not the bytes written to the temporary file since the last * hsync() call. * *

On each subsequent hsync()/sync() call, the temporary file closed(), composed onto the * destination file, then deleted, and a new temporary file is opened under a new filename for * further writes. * *

Caveat: each hsync()/sync() requires many underlying read and mutation requests occurring * sequentially, so latency is expected to be fairly high. * *

If errors occur mid-stream, there may be one or more temporary files failing to be cleaned up, * and require manual intervention to discover and delete any such unused files. Data written prior * to the most recent successful hsync() is persistent and safe in such a case. * *

If multiple writers are attempting to write to the same destination file, generation ids used * with low-level precondition checks will cause all but a one writer to fail their precondition * checks during writes, and a single remaining writer will safely occupy the stream. */ public class GoogleHadoopSyncableOutputStream extends OutputStream implements Syncable { private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); // Prefix used for all temporary files created by this stream. public static final String TEMPFILE_PREFIX = "_GCS_SYNCABLE_TEMPFILE_"; // Temporary files don't need to contain the desired attributes of the final destination file // since metadata settings get clobbered on final compose() anyways; additionally, due to // the way we pick temp file names and already ensured directories for the destination file, // we can optimize tempfile creation by skipping various directory checks. private static final CreateFileOptions TEMPFILE_CREATE_OPTIONS = CreateFileOptions.DEFAULT_NO_OVERWRITE .toBuilder() .setEnsureNoDirectoryConflict(false) .setOverwriteGenerationId(0) .build(); // Deletion of temporary files occurs asynchronously for performance reasons, but in-flight // deletions are awaited on close() so as long as all output streams are closed, there should // be no remaining in-flight work occurring inside this threadpool. private static final ExecutorService TEMPFILE_CLEANUP_THREADPOOL = Executors.newCachedThreadPool( new ThreadFactoryBuilder() .setNameFormat("gcs-syncable-output-stream-cleanup-pool-%d") .setDaemon(true) .build()); // Instance of GoogleHadoopFileSystemBase. private final GoogleHadoopFileSystemBase ghfs; // The final destination path for this stream. private final URI finalGcsPath; // Statistics tracker provided by the parent GoogleHadoopFileSystemBase for recording // numbers of bytes written. private final FileSystem.Statistics statistics; // Metadata/overwrite options to use on final file. private final CreateFileOptions fileOptions; // List of file-deletion futures accrued during the lifetime of this output stream. private final List> deletionFutures; private final SyncableOutputStreamOptions options; private final RateLimiter syncRateLimiter; private final ExecutorService cleanupThreadpool; private final GhfsStorageStatistics storageStatistics; private final ITraceFactory traceFactory; // Current GCS path pointing at the "tail" file which will be appended to the destination // on each hsync() call. private URI curGcsPath; // Current OutputStream pointing at the "tail" file which will be appended to the destination // on each hsync() call. private GoogleHadoopOutputStream curDelegate; // Stores the current component index corresponding curGcsPath. If close() is called, the total // number of components in the finalGcsPath will be curComponentIndex + 1. private int curComponentIndex; // The last known generationId of the final destination file, or possibly // StorageResourceId.UNKNOWN_GENERATION_ID if unknown. private long curDestGenerationId; /** Creates a new GoogleHadoopSyncableOutputStream. */ public GoogleHadoopSyncableOutputStream( GoogleHadoopFileSystemBase ghfs, URI gcsPath, FileSystem.Statistics statistics, CreateFileOptions createFileOptions, SyncableOutputStreamOptions options) throws IOException { this(ghfs, gcsPath, statistics, createFileOptions, options, TEMPFILE_CLEANUP_THREADPOOL); } @VisibleForTesting GoogleHadoopSyncableOutputStream( GoogleHadoopFileSystemBase ghfs, URI gcsPath, FileSystem.Statistics statistics, CreateFileOptions createFileOptions, SyncableOutputStreamOptions options, ExecutorService cleanupThreadpool) throws IOException { logger.atFiner().log( "GoogleHadoopSyncableOutputStream(gcsPath: %s, createFileOptions: %s, options: %s)", gcsPath, createFileOptions, options); this.ghfs = ghfs; this.finalGcsPath = gcsPath; this.statistics = statistics; this.fileOptions = createFileOptions; this.deletionFutures = new ArrayList<>(); this.cleanupThreadpool = cleanupThreadpool; this.options = options; this.syncRateLimiter = createRateLimiter(options.getMinSyncInterval()); if (options.isAppendEnabled()) { // When appending first component has to go to new temporary file. this.curGcsPath = getNextTemporaryPath(); this.curComponentIndex = 1; } else { // The first component of the stream will go straight to the destination filename to optimize // the case where no hsync() or a single hsync() is called during the lifetime of the stream; // committing the first component thus doesn't require any compose() call under the hood. this.curGcsPath = gcsPath; this.curComponentIndex = 0; } this.curDelegate = new GoogleHadoopOutputStream(ghfs, curGcsPath, statistics, fileOptions); this.curDestGenerationId = StorageResourceId.UNKNOWN_GENERATION_ID; this.storageStatistics = ghfs.getStorageStatistics(); this.traceFactory = ghfs.getTraceFactory(); } private static RateLimiter createRateLimiter(Duration minSyncInterval) { if (minSyncInterval.isNegative() || minSyncInterval.isZero()) { return null; } double permitsPerSecond = 1000.0 / minSyncInterval.toMillis(); return RateLimiter.create(permitsPerSecond); } @Override public void write(int b) throws IOException { throwIfNotOpen(); curDelegate.write(b); } @Override public void write(byte[] b, int offset, int len) throws IOException { throwIfNotOpen(); curDelegate.write(b, offset, len); } @Override public void close() throws IOException { logger.atFiner().log( "close(): Current tail file: %s final destination: %s", curGcsPath, finalGcsPath); if (!isOpen()) { logger.atFiner().log("close(): Ignoring; stream already closed."); return; } commitCurrentFile(); // null denotes stream closed. // TODO(user): Add checks which throw IOException if further operations are attempted on a // closed stream, except for multiple calls to close(), which should behave as no-ops. curGcsPath = null; curDelegate = null; logger.atFiner().log("close(): Awaiting %s deletionFutures", deletionFutures.size()); for (Future deletion : deletionFutures) { try { deletion.get(); } catch (ExecutionException | InterruptedException e) { if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } GoogleCloudStorageEventBus.postOnException(); throw new IOException( String.format("Failed to delete files while closing stream. cause=%s", e.getMessage()), e); } } } public void sync() throws IOException { hsync(); } /** * There is no way to flush data to become available for readers without a full-fledged hsync(), * If the output stream is only syncable, this method is a no-op. If the output stream is also * flushable, this method will simply use the same implementation of hsync(). * *

If it is rate limited, unlike hsync(), which will try to acquire the permits and block, it * will do nothing. */ @Override public void hflush() throws IOException { GhfsStorageStatistics.trackDuration( storageStatistics, GhfsStatistic.INVOCATION_HFLUSH, finalGcsPath, traceFactory, () -> { long startTimeNs = System.nanoTime(); if (!options.isSyncOnFlushEnabled()) { logger.atWarning().log( "hflush(): No-op: readers will *not* yet see flushed data for %s", finalGcsPath); throwIfNotOpen(); return null; } // If rate limit not set or permit acquired than use hsync() if (syncRateLimiter == null || syncRateLimiter.tryAcquire()) { logger.atFine().log("hflush() uses hsync() for %s", finalGcsPath); hsyncInternal(startTimeNs); return null; } logger.atInfo().atMostEvery(1, TimeUnit.MINUTES).log( "hflush(): No-op due to rate limit (%s): readers will *not* yet see flushed data for %s", syncRateLimiter, finalGcsPath); throwIfNotOpen(); return null; }); } @Override public void hsync() throws IOException { GhfsStorageStatistics.trackDuration( storageStatistics, GhfsStatistic.INVOCATION_HSYNC, finalGcsPath, traceFactory, () -> { long startTimeNs = System.nanoTime(); if (syncRateLimiter != null) { logger.atFiner().log( "hsync(): Rate limited (%s) with blocking permit acquisition for %s", syncRateLimiter, finalGcsPath); syncRateLimiter.acquire(); } hsyncInternal(startTimeNs); return null; }); } /** Internal implementation of hsync, can be reused by hflush() as well. */ private void hsyncInternal(long startTimeNs) throws IOException { logger.atFiner().log( "hsync(): Committing tail file %s to final destination %s", curGcsPath, finalGcsPath); throwIfNotOpen(); commitCurrentFile(); // Use a different temporary path for each temporary component to reduce the possible avenues of // race conditions in the face of low-level retries, etc. ++curComponentIndex; curGcsPath = getNextTemporaryPath(); logger.atFiner().log( "hsync(): Opening next temporary tail file %s as component number %s", curGcsPath, curComponentIndex); curDelegate = new GoogleHadoopOutputStream(ghfs, curGcsPath, statistics, TEMPFILE_CREATE_OPTIONS); long finishTimeNs = System.nanoTime(); logger.atFiner().log("Took %d ns to sync() for %s", finishTimeNs - startTimeNs, finalGcsPath); } private void commitCurrentFile() throws IOException { // TODO(user): Optimize the case where 0 bytes have been written in the current component // to return early. WritableByteChannel innerChannel = curDelegate.getInternalChannel(); curDelegate.close(); long generationId = StorageResourceId.UNKNOWN_GENERATION_ID; if (innerChannel instanceof GoogleCloudStorageItemInfo.Provider) { generationId = ((GoogleCloudStorageItemInfo.Provider) innerChannel).getItemInfo().getContentGeneration(); logger.atFiner().log( "innerChannel is GoogleCloudStorageItemInfo.Provider; closed generationId %s.", generationId); } else { logger.atFiner().log("innerChannel NOT instanceof provider: %s", innerChannel.getClass()); } // On the first component, curGcsPath will equal finalGcsPath, and no compose() call is // necessary. Otherwise, we compose in-place into the destination object and then delete // the temporary object. if (!finalGcsPath.equals(curGcsPath)) { StorageResourceId destResourceId = StorageResourceId.fromStringPath(finalGcsPath.toString(), curDestGenerationId); final StorageResourceId tempResourceId = StorageResourceId.fromStringPath(curGcsPath.toString(), generationId); if (!destResourceId.getBucketName().equals(tempResourceId.getBucketName())) { GoogleCloudStorageEventBus.postOnException(); throw new IllegalStateException( String.format( "Destination bucket in path '%s' doesn't match temp file bucket in path '%s'", finalGcsPath, curGcsPath)); } GoogleCloudStorageItemInfo composedObject = ghfs.getGcsFs() .getGcs() .composeObjects( ImmutableList.of(destResourceId, tempResourceId), destResourceId, GoogleCloudStorageFileSystem.objectOptionsFromFileOptions(fileOptions)); curDestGenerationId = composedObject.getContentGeneration(); deletionFutures.add( cleanupThreadpool.submit( () -> { ghfs.getGcsFs().getGcs().deleteObjects(ImmutableList.of(tempResourceId)); return null; })); } else { // First commit was direct to the destination; the generationId of the object we just // committed will be used as the destination generation id for future compose calls. curDestGenerationId = generationId; } } /** Returns URI to be used for the next "tail" file in the series. */ private URI getNextTemporaryPath() { Path basePath = ghfs.getHadoopPath(finalGcsPath); Path baseDir = basePath.getParent(); Path tempPath = new Path( baseDir, String.format( "%s%s.%d.%s", TEMPFILE_PREFIX, basePath.getName(), curComponentIndex, UUID.randomUUID().toString())); return ghfs.getGcsPath(tempPath); } private boolean isOpen() { return curDelegate != null; } private void throwIfNotOpen() throws IOException { if (!isOpen()) { GoogleCloudStorageEventBus.postOnException(); throw new ClosedChannelException(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy