com.google.cloud.hadoop.fs.gcs.GoogleHadoopOutputStream Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of gcs-connector Show documentation
An implementation of org.apache.hadoop.fs.FileSystem targeting Google Cloud Storage
There is a newer version: 3.1.3
/*
 * Copyright 2013 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.cloud.hadoop.fs.gcs;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Strings.isNullOrEmpty;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration;

import com.google.cloud.hadoop.gcsio.CreateFileOptions;
import com.google.cloud.hadoop.gcsio.CreateObjectOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorage;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemImpl;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageItemInfo;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.cloud.hadoop.util.GoogleCloudStorageEventBus;
import com.google.cloud.hadoop.util.ITraceFactory;
import com.google.common.base.Ascii;
import com.google.common.collect.ImmutableList;
import com.google.common.flogger.GoogleLogger;
import com.google.common.util.concurrent.RateLimiter;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.channels.Channels;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.WritableByteChannel;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nonnull;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StreamCapabilities;
import org.apache.hadoop.fs.Syncable;
import org.apache.hadoop.fs.statistics.DurationTrackerFactory;
import org.apache.hadoop.fs.statistics.IOStatistics;
import org.apache.hadoop.fs.statistics.IOStatisticsSource;
import org.apache.hadoop.util.functional.CallableRaisingIOE;

class GoogleHadoopOutputStream extends OutputStream
    implements IOStatisticsSource, StreamCapabilities, Syncable {

  private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();

  private final GhfsGlobalStorageStatistics storageStatistics;

  private final ITraceFactory traceFactory;

  // Prefix used for all temporary files created by this stream.
  public static final String TMP_FILE_PREFIX = "_GHFS_SYNC_TMP_FILE_";

  // Temporary files don't need to contain the desired attributes of the final destination file
  // since metadata settings get clobbered on final compose() anyways; additionally, due to
  // the way we pick temp file names and already ensured directories for the destination file,
  // we can optimize tempfile creation by skipping various directory checks.
  private static final CreateFileOptions TMP_FILE_CREATE_OPTIONS =
      CreateFileOptions.builder().setEnsureNoDirectoryConflict(false).build();

  // Deletion of temporary files occurs asynchronously for performance reasons, but in-flight
  // deletions are awaited on close() so as long as all output streams are closed, there should
  // be no remaining in-flight work occurring inside this threadpool.
  private static final ExecutorService TMP_FILE_CLEANUP_THREADPOOL =
      Executors.newCachedThreadPool(
          new ThreadFactoryBuilder()
              .setNameFormat("ghfs-output-stream-sync-cleanup-%d")
              .setDaemon(true)
              .build());

  private final GoogleHadoopFileSystem ghfs;

  private final CreateObjectOptions composeObjectOptions;

  // Path of the file to write to.
  private final URI dstGcsPath;

  /**
   * The last known generationId of the {@link #dstGcsPath} file, or possibly {@link
   * StorageResourceId#UNKNOWN_GENERATION_ID} if unknown.
   */
  private long dstGenerationId;

  // GCS path pointing at the "tail" file which will be appended to the destination
  // on hflush()/hsync() call.
  private URI tmpGcsPath;

  /**
   * Stores the component index corresponding to {@link #tmpGcsPath}. If close() is called, the
   * total number of components in the {@link #dstGcsPath} will be {@code tmpIndex + 1}.
   */
  private int tmpIndex;

  // OutputStream pointing at the "tail" file which will be appended to the destination
  // on hflush()/hsync() call.
  private OutputStream tmpOut;

  private final RateLimiter syncRateLimiter;

  // List of temporary file-deletion futures accrued during the lifetime of this output stream.
  private final List> tmpDeletionFutures = new ArrayList<>();

  // Statistics tracker provided by the parent GoogleHadoopFileSystem for recording
  // numbers of bytes written.
  private final FileSystem.Statistics statistics;
  // Statistics tracker for output stream related statistics
  private final GhfsOutputStreamStatistics streamStatistics;
  // Instrumentation to track Statistics
  private final GhfsInstrumentation instrumentation;

  private final GhfsStreamStats streamStats;

  /**
   * Constructs an instance of GoogleHadoopOutputStream object.
   *
   * @param ghfs Instance of {@link GoogleHadoopFileSystem}.
   * @param dstGcsPath Path of the file to write to.
   * @param statistics File system statistics object.
   * @param createFileOptions options for file creation
   * @throws IOException if an IO error occurs.
   */
  public GoogleHadoopOutputStream(
      GoogleHadoopFileSystem ghfs,
      URI dstGcsPath,
      CreateFileOptions createFileOptions,
      FileSystem.Statistics statistics)
      throws IOException {
    logger.atFiner().log(
        "GoogleHadoopOutputStream(gcsPath: %s, createFileOptions: %s)",
        dstGcsPath, createFileOptions);
    this.ghfs = ghfs;
    this.dstGcsPath = dstGcsPath;
    this.statistics = statistics;
    this.storageStatistics = ghfs.getGlobalGcsStorageStatistics();
    this.streamStatistics = ghfs.getInstrumentation().newOutputStreamStatistics(statistics);

    this.streamStats =
        new GhfsStreamStats(storageStatistics, GhfsStatistic.STREAM_WRITE_OPERATIONS, dstGcsPath);
    Duration minSyncInterval = createFileOptions.getMinSyncInterval();
    this.instrumentation = ghfs.getInstrumentation();

    this.syncRateLimiter =
        minSyncInterval.isNegative() || minSyncInterval.isZero()
            ? null
            : RateLimiter.create(/* permitsPerSecond= */ 1_000.0 / minSyncInterval.toMillis());
    this.composeObjectOptions =
        GoogleCloudStorageFileSystemImpl.objectOptionsFromFileOptions(
            createFileOptions.toBuilder()
                // Set write mode to OVERWRITE because we use compose operation to append new data
                // to an existing object
                .setWriteMode(CreateFileOptions.WriteMode.OVERWRITE)
                .build());

    if (createFileOptions.getWriteMode() == CreateFileOptions.WriteMode.APPEND) {
      // When appending first component has to go to new temporary file.
      this.tmpGcsPath = getNextTmpPath();
      this.tmpIndex = 1;
    } else {
      // The first component of the stream will go straight to the destination filename to optimize
      // the case where no hsync() or a single hsync() is called during the lifetime of the stream;
      // committing the first component thus doesn't require any compose() call under the hood.
      this.tmpGcsPath = dstGcsPath;
      this.tmpIndex = 0;
    }

    this.tmpOut =
        createOutputStream(
            ghfs.getGcsFs(),
            tmpGcsPath,
            tmpIndex == 0 ? createFileOptions : TMP_FILE_CREATE_OPTIONS);
    this.dstGenerationId = StorageResourceId.UNKNOWN_GENERATION_ID;
    this.traceFactory = ghfs.getTraceFactory();
  }

  private static OutputStream createOutputStream(
      GoogleCloudStorageFileSystem gcsfs, URI gcsPath, CreateFileOptions options)
      throws IOException {
    WritableByteChannel channel;
    try {
      channel = gcsfs.create(gcsPath, options);
    } catch (java.nio.file.FileAlreadyExistsException e) {
      GoogleCloudStorageEventBus.postOnException();
      throw (FileAlreadyExistsException)
          new FileAlreadyExistsException(String.format("'%s' already exists", gcsPath))
              .initCause(e);
    }
    OutputStream outputStream = Channels.newOutputStream(channel);
    int bufferSize =
        gcsfs.getOptions().getCloudStorageOptions().getWriteChannelOptions().getBufferSize();
    return bufferSize > 0 ? new BufferedOutputStream(outputStream, bufferSize) : outputStream;
  }

  @Override
  public void write(int b) throws IOException {
    trackDuration(
        streamStatistics,
        GhfsStatistic.STREAM_WRITE_OPERATIONS.getSymbol(),
        () -> {
          long start = System.nanoTime();
          throwIfNotOpen();
          tmpOut.write(b);
          streamStatistics.writeBytes(1);
          // Using a lightweight implementation to update instrumentation. This method can be called
          // quite
          // frequently and need to be lightweight.
          statistics.incrementBytesWritten(1);
          statistics.incrementWriteOps(1);
          streamStats.updateWriteStreamStats(1, start);
          return null;
        });
  }

  /**
   * Tracks the duration of the operation {@code operation}. Also setup operation tracking using
   * {@code ThreadTrace}.
   */
  private  B trackDurationWithTracing(
      DurationTrackerFactory factory,
      @Nonnull GhfsGlobalStorageStatistics stats,
      GhfsStatistic statistic,
      Object context,
      ITraceFactory traceFactory,
      CallableRaisingIOE operation)
      throws IOException {

    return GhfsGlobalStorageStatistics.trackDuration(
        factory, stats, statistic, context, traceFactory, operation);
  }

  @Override
  public void write(@Nonnull byte[] b, int offset, int len) throws IOException {
    trackDuration(
        streamStatistics,
        GhfsStatistic.STREAM_WRITE_OPERATIONS.getSymbol(),
        () -> {
          long start = System.nanoTime();
          throwIfNotOpen();
          tmpOut.write(b, offset, len);
          statistics.incrementBytesWritten(len);
          statistics.incrementWriteOps(1);
          streamStats.updateWriteStreamStats(len, start);
          streamStatistics.writeBytes(len);
          return null;
        });
  }

  /**
   * There is no way to flush data to become available for readers without a full-fledged hsync(),
   * If the output stream is only syncable, this method is a no-op. If the output stream is also
   * flushable, this method will simply use the same implementation of hsync().
   *
   * If it is rate limited, unlike hsync(), which will try to acquire the permits and block, it
   * will do nothing.
   */
  @Override
  public void hflush() throws IOException {
    trackDurationWithTracing(
        streamStatistics,
        storageStatistics,
        GhfsStatistic.INVOCATION_HFLUSH,
        dstGcsPath,
        traceFactory,
        () -> {
          logger.atFiner().log("hflush(): %s", dstGcsPath);

          long startMs = System.currentTimeMillis();
          throwIfNotOpen();
          // If rate limit not set or permit acquired than use hsync()
          if (syncRateLimiter == null || syncRateLimiter.tryAcquire()) {
            logger.atFine().log("hflush() uses hsyncInternal() for %s", dstGcsPath);
            hsyncInternal(startMs);
            return null;
          }
          logger.atInfo().atMostEvery(1, TimeUnit.MINUTES).log(
              "hflush(): No-op due to rate limit (%s): readers will *not* yet see flushed data for %s",
              syncRateLimiter, dstGcsPath);

          return null;
        });
  }

  @Override
  public void hsync() throws IOException {
    trackDurationWithTracing(
        streamStatistics,
        storageStatistics,
        GhfsStatistic.INVOCATION_HSYNC,
        dstGcsPath,
        traceFactory,
        () -> {
          logger.atFiner().log("hsync(): %s", dstGcsPath);

          long startMs = System.currentTimeMillis();
          throwIfNotOpen();
          if (syncRateLimiter != null) {
            logger.atFiner().log(
                "hsync(): Rate limited (%s) with blocking permit acquisition for %s",
                syncRateLimiter, dstGcsPath);
            syncRateLimiter.acquire();
          }
          hsyncInternal(startMs);
          return null;
        });
  }

  /** Internal implementation of hsync, can be reused by hflush() as well. */
  private void hsyncInternal(long startMs) throws IOException {
    logger.atFiner().log(
        "hsyncInternal(): Committing tail file %s to final destination %s", tmpGcsPath, dstGcsPath);
    commitTempFile();

    // Use a different temporary path for each temporary component to reduce the possible avenues of
    // race conditions in the face of low-level retries, etc.
    ++tmpIndex;
    tmpGcsPath = getNextTmpPath();

    logger.atFiner().log(
        "hsync(): Opening next temporary tail file %s at %d index", tmpGcsPath, tmpIndex);
    tmpOut = createOutputStream(ghfs.getGcsFs(), tmpGcsPath, TMP_FILE_CREATE_OPTIONS);

    long finishMs = System.currentTimeMillis();
    logger.atFiner().log("Took %dms to sync() for %s", finishMs - startMs, dstGcsPath);
  }

  private void commitTempFile() throws IOException {
    // TODO(user): return early when 0 bytes have been written in the temp files
    tmpOut.close();

    long tmpGenerationId =
        tmpOut instanceof GoogleCloudStorageItemInfo.Provider
            ? ((GoogleCloudStorageItemInfo.Provider) tmpOut).getItemInfo().getContentGeneration()
            : StorageResourceId.UNKNOWN_GENERATION_ID;
    logger.atFiner().log(
        "tmpOut is an instance of %s; expected generationId %d.",
        tmpOut.getClass(), tmpGenerationId);

    // On the first component, tmpGcsPath will equal finalGcsPath, and no compose() call is
    // necessary. Otherwise, we compose in-place into the destination object and then delete
    // the temporary object.
    if (dstGcsPath.equals(tmpGcsPath)) {
      // First commit was direct to the destination; the generationId of the object we just
      // committed will be used as the destination generation id for future compose calls.
      dstGenerationId = tmpGenerationId;
    } else {
      StorageResourceId dstId =
          StorageResourceId.fromUriPath(
              dstGcsPath, /* allowEmptyObjectName= */ false, dstGenerationId);
      StorageResourceId tmpId =
          StorageResourceId.fromUriPath(
              tmpGcsPath, /* allowEmptyObjectName= */ false, tmpGenerationId);
      checkState(
          dstId.getBucketName().equals(tmpId.getBucketName()),
          "Destination bucket in path '%s' doesn't match temp file bucket in path '%s'",
          dstGcsPath,
          tmpGcsPath);
      GoogleCloudStorage gcs = ghfs.getGcsFs().getGcs();
      GoogleCloudStorageItemInfo composedObject =
          gcs.composeObjects(ImmutableList.of(dstId, tmpId), dstId, composeObjectOptions);
      dstGenerationId = composedObject.getContentGeneration();
      tmpDeletionFutures.add(
          TMP_FILE_CLEANUP_THREADPOOL.submit(
              () -> {
                gcs.deleteObjects(ImmutableList.of(tmpId));
                return null;
              }));
    }
  }

  /** Returns URI to be used for the next temp "tail" file in the series. */
  private URI getNextTmpPath() {
    Path basePath = ghfs.getHadoopPath(dstGcsPath);
    Path tempPath =
        new Path(
            basePath.getParent(),
            String.format(
                "%s%s.%d.%s", TMP_FILE_PREFIX, basePath.getName(), tmpIndex, UUID.randomUUID()));
    return ghfs.getGcsPath(tempPath);
  }

  @Override
  public void close() throws IOException {
    boolean isClosed = tmpOut == null;
    trackDurationWithTracing(
        streamStatistics,
        storageStatistics,
        GhfsStatistic.STREAM_WRITE_CLOSE_OPERATIONS,
        dstGcsPath,
        traceFactory,
        () -> {
          logger.atFiner().log(
              "close(): temp tail file: %s final destination: %s", tmpGcsPath, dstGcsPath);

          if (tmpOut == null) {
            logger.atFiner().log("close(): Ignoring; stream already closed.");
            return null;
          }

          commitTempFile();

          try {
            tmpOut.close();
          } finally {
            tmpOut = null;
          }
          tmpGcsPath = null;
          tmpIndex = -1;

          logger.atFiner().log("close(): Awaiting %s deletionFutures", tmpDeletionFutures.size());
          for (Future deletion : tmpDeletionFutures) {
            try {
              deletion.get();
            } catch (ExecutionException | InterruptedException e) {
              GoogleCloudStorageEventBus.postOnException();
              if (e instanceof InterruptedException) {
                Thread.currentThread().interrupt();
              }
              throw new IOException(
                  String.format(
                      "Failed to delete temporary files while closing stream: '%s'", dstGcsPath),
                  e);
            }
          }
          return null;
        });

    if (!isClosed) {
      streamStats.close();
      streamStatistics.close();
    }
    // TODO: do we need make a `cleanerThreadpool` an object field and close it here?
  }

  private void throwIfNotOpen() throws IOException {
    if (tmpOut == null) {
      throw new ClosedChannelException();
    }
  }

  /** Get the current IOStatistics from output stream */
  @Override
  public IOStatistics getIOStatistics() {
    return streamStatistics.getIOStatistics();
  }

  @Override
  public boolean hasCapability(String capability) {
    checkArgument(!isNullOrEmpty(capability), "capability must not be null or empty string");
    switch (Ascii.toLowerCase(capability)) {
      case StreamCapabilities.HFLUSH:
      case StreamCapabilities.HSYNC:
        return syncRateLimiter != null;
      case StreamCapabilities.IOSTATISTICS:
        return true;
      default:
        return false;
    }
  }
}