com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of gcs-connector Show documentation
An implementation of org.apache.hadoop.fs.FileSystem targeting Google Cloud Storage
There is a newer version: 3.0.4
/*
 * Copyright 2013 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.cloud.hadoop.fs.gcs;

import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.OutputStreamType.FLUSHABLE_COMPOSITE;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.BLOCK_SIZE;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.CONFIG_KEY_PREFIXES;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.DELEGATION_TOKEN_BINDING_CLASS;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_CONFIG_PREFIX;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_FILE_CHECKSUM_TYPE;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_GLOB_ALGORITHM;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_LAZY_INITIALIZATION_ENABLE;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_OPERATION_TRACE_LOG_ENABLE;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_OUTPUT_STREAM_SYNC_MIN_INTERVAL_MS;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_OUTPUT_STREAM_TYPE;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_WORKING_DIRECTORY;
import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.PERMISSIONS_TO_REPORT;
import static com.google.cloud.hadoop.gcsio.CreateFileOptions.DEFAULT_OVERWRITE;
import static com.google.cloud.hadoop.util.HadoopCredentialConfiguration.GROUP_IMPERSONATION_SERVICE_ACCOUNT_SUFFIX;
import static com.google.cloud.hadoop.util.HadoopCredentialConfiguration.IMPERSONATION_SERVICE_ACCOUNT_SUFFIX;
import static com.google.cloud.hadoop.util.HadoopCredentialConfiguration.USER_IMPERSONATION_SERVICE_ACCOUNT_SUFFIX;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.flogger.LazyArgs.lazy;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.concurrent.Executors.newFixedThreadPool;

import com.google.api.client.auth.oauth2.Credential;
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.http.HttpTransport;
import com.google.cloud.hadoop.fs.gcs.auth.GcsDelegationTokens;
import com.google.cloud.hadoop.gcsio.CreateFileOptions;
import com.google.cloud.hadoop.gcsio.CreateObjectOptions;
import com.google.cloud.hadoop.gcsio.FileInfo;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorage;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorage.ListPage;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageItemInfo;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadOptions;
import com.google.cloud.hadoop.gcsio.ListFileOptions;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.cloud.hadoop.gcsio.UpdatableItemInfo;
import com.google.cloud.hadoop.gcsio.UriPaths;
import com.google.cloud.hadoop.util.AccessTokenProvider;
import com.google.cloud.hadoop.util.AccessTokenProvider.AccessTokenType;
import com.google.cloud.hadoop.util.ApiErrorExtractor;
import com.google.cloud.hadoop.util.CredentialFactory;
import com.google.cloud.hadoop.util.CredentialFactory.CredentialHttpRetryInitializer;
import com.google.cloud.hadoop.util.CredentialFromAccessTokenProviderClassFactory;
import com.google.cloud.hadoop.util.GoogleCloudStorageEventBus;
import com.google.cloud.hadoop.util.GoogleCredentialWithIamAccessToken;
import com.google.cloud.hadoop.util.HadoopCredentialConfiguration;
import com.google.cloud.hadoop.util.HttpTransportFactory;
import com.google.cloud.hadoop.util.ITraceFactory;
import com.google.cloud.hadoop.util.PropertyUtil;
import com.google.cloud.hadoop.util.TraceFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Ascii;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.flogger.GoogleLogger;
import com.google.common.io.BaseEncoding;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.file.DirectoryNotEmptyException;
import java.security.GeneralSecurityException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.GlobPattern;
import org.apache.hadoop.fs.GlobalStorageStatistics;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.XAttrSetFlag;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.ProviderUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;

/**
 * This class provides a Hadoop compatible File System on top of Google Cloud Storage (GCS).
 *
 * It is implemented as a thin abstraction layer on top of GCS. The layer hides any specific
 * characteristics of the underlying store and exposes FileSystem interface understood by the Hadoop
 * engine.
 *
 * 
Users interact with the files in the storage using fully qualified URIs. The file system
 * exposed by this class is identified using the 'gs' scheme. For example, {@code
 * gs://dir1/dir2/file1.txt}.
 *
 * 
This implementation translates paths between hadoop Path and GCS URI with the convention that
 * the Hadoop root directly corresponds to the GCS "root", e.g. gs:/. This is convenient for many
 * reasons, such as data portability and close equivalence to gsutil paths, but imposes certain
 * inherited constraints, such as files not being allowed in root (only 'directories' can be placed
 * in root), and directory names inside root have a more limited set of allowed characters.
 *
 * 
One of the main goals of this implementation is to maintain compatibility with behavior of
 * HDFS implementation when accessed through FileSystem interface. HDFS implementation is not very
 * consistent about the cases when it throws versus the cases when methods return false. We run GHFS
 * tests and HDFS tests against the same test data and use that as a guide to decide whether to
 * throw or to return false.
 */
public abstract class GoogleHadoopFileSystemBase extends FileSystem
    implements FileSystemDescriptor {

  private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();

  static final String SCHEME = GoogleCloudStorageFileSystem.SCHEME;

  // Request only object fields that are used in Hadoop FileStatus:
  // https://cloud.google.com/storage/docs/json_api/v1/objects#resource-representations
  private static final String OBJECT_FIELDS = "bucket,name,size,updated";

  private static final ListFileOptions LIST_OPTIONS =
      ListFileOptions.DEFAULT.toBuilder().setFields(OBJECT_FIELDS).build();

  /**
   * Available types for use with {@link
   * GoogleHadoopFileSystemConfiguration#GCS_OUTPUT_STREAM_TYPE}.
   */
  public enum OutputStreamType {
    BASIC,
    FLUSHABLE_COMPOSITE,
    SYNCABLE_COMPOSITE
  }

  /**
   * Available GCS checksum types for use with {@link
   * GoogleHadoopFileSystemConfiguration#GCS_FILE_CHECKSUM_TYPE}.
   */
  public enum GcsFileChecksumType {
    NONE(null, 0),
    CRC32C("COMPOSITE-CRC32C", 4),
    MD5("MD5", 16);

    private final String algorithmName;
    private final int byteLength;

    GcsFileChecksumType(String algorithmName, int byteLength) {
      this.algorithmName = algorithmName;
      this.byteLength = byteLength;
    }

    public String getAlgorithmName() {
      return algorithmName;
    }

    public int getByteLength() {
      return byteLength;
    }
  }

  /**
   * Available GCS glob algorithms for use with {@link
   * GoogleHadoopFileSystemConfiguration#GCS_GLOB_ALGORITHM}.
   */
  public enum GlobAlgorithm {
    CONCURRENT,
    DEFAULT,
    FLAT
  }

  /** Default value of replication factor. */
  public static final short REPLICATION_FACTOR_DEFAULT = 3;

  /** Default PathFilter that accepts all paths. */
  public static final PathFilter DEFAULT_FILTER = path -> true;

  /** A resource file containing GCS related build properties. */
  public static final String PROPERTIES_FILE = "gcs.properties";

  /** The key in the PROPERTIES_FILE that contains the version built. */
  public static final String VERSION_PROPERTY = "gcs.connector.version";

  /** The version returned when one cannot be found in properties. */
  public static final String UNKNOWN_VERSION = "0.0.0";

  /** Current version. */
  public static final String VERSION;

  /** Identifies this version of the GoogleHadoopFileSystemBase library. */
  public static final String GHFS_ID;

  static {
    VERSION =
        PropertyUtil.getPropertyOrDefault(
            GoogleHadoopFileSystemBase.class, PROPERTIES_FILE, VERSION_PROPERTY, UNKNOWN_VERSION);
    logger.atFine().log("GHFS version: %s", VERSION);
    GHFS_ID = String.format("GHFS/%s", VERSION);
  }

  private static final String XATTR_KEY_PREFIX = "GHFS_XATTR_";

  // Use empty array as null value because GCS API already uses null value to remove metadata key
  private static final byte[] XATTR_NULL_VALUE = new byte[0];

  private static final ThreadFactory DAEMON_THREAD_FACTORY =
      new ThreadFactoryBuilder().setNameFormat("ghfs-thread-%d").setDaemon(true).build();

  @VisibleForTesting GlobAlgorithm globAlgorithm = GCS_GLOB_ALGORITHM.getDefault();

  private GcsFileChecksumType checksumType = GCS_FILE_CHECKSUM_TYPE.getDefault();

  /** The URI the File System is passed in initialize. */
  protected URI initUri;

  /** Delegation token support */
  protected GcsDelegationTokens delegationTokens = null;

  /** Underlying GCS file system object. */
  private Supplier gcsFsSupplier;

  private boolean gcsFsInitialized = false;

  /**
   * Current working directory; overridden in initialize() if {@link
   * GoogleHadoopFileSystemConfiguration#GCS_WORKING_DIRECTORY} is set.
   */
  private Path workingDirectory;

  /**
   * Default block size. Note that this is the size that is reported to Hadoop FS clients. It does
   * not modify the actual block size of an underlying GCS object, because GCS JSON API does not
   * allow modifying or querying the value. Modifying this value allows one to control how many
   * mappers are used to process a given file.
   */
  protected long defaultBlockSize = BLOCK_SIZE.getDefault();

  /** The fixed reported permission of all files. */
  private FsPermission reportedPermissions;

  private ITraceFactory traceFactory = TraceFactory.get(/* isEnabled */ false);

  ITraceFactory getTraceFactory() {
    return this.traceFactory;
  }

  private final GhfsStorageStatistics storageStatistics;

  /**
   * GCS {@link FileChecksum} which takes constructor parameters to define the return values of the
   * various abstract methods of {@link FileChecksum}.
   */
  private static class GcsFileChecksum extends FileChecksum {
    private final GcsFileChecksumType checksumType;
    private final byte[] bytes;

    public GcsFileChecksum(GcsFileChecksumType checksumType, byte[] bytes) {
      this.checksumType = checksumType;
      this.bytes = bytes;
      checkState(
          bytes == null || bytes.length == checksumType.getByteLength(),
          "Checksum value length (%s) should be equal to the algorithm byte length (%s)",
          checksumType.getByteLength(),
          bytes.length);
    }

    @Override
    public String getAlgorithmName() {
      return checksumType.getAlgorithmName();
    }

    @Override
    public int getLength() {
      return checksumType.getByteLength();
    }

    @Override
    public byte[] getBytes() {
      return bytes;
    }

    @Override
    public void readFields(DataInput in) throws IOException {
      in.readFully(bytes);
    }

    @Override
    public void write(DataOutput out) throws IOException {
      out.write(bytes);
    }

    @Override
    public String toString() {
      return String.format(
          "%s: %s", getAlgorithmName(), bytes == null ? null : BaseEncoding.base16().encode(bytes));
    }
  }

  /**
   * Constructs an instance of GoogleHadoopFileSystemBase; the internal {@link
   * GoogleCloudStorageFileSystem} will be set up with config settings when initialize() is called.
   */
  public GoogleHadoopFileSystemBase() {
    // Inserts in to GlobalStorageStatistics. Spark Plugin for e.g. can query this and register to
    // Spark metrics system.
    StorageStatistics globalStats =
        GlobalStorageStatistics.INSTANCE.put(
            GhfsStorageStatistics.NAME, () -> new GhfsStorageStatistics());

    if (GhfsStorageStatistics.class.isAssignableFrom(globalStats.getClass())) {
      storageStatistics = (GhfsStorageStatistics) globalStats;
    } else {
      logger.atWarning().log(
          "Encountered an error while registering to GlobalStorageStatistics. Some of the GCS connector metrics will not be reported to metrics sinks. globalStatsClassLoader=<%s>; classLoader=<%s>",
          globalStats.getClass().getClassLoader(), GhfsStorageStatistics.class.getClassLoader());
      storageStatistics = GhfsStorageStatistics.DUMMY_INSTANCE;
    }

    GoogleCloudStorageEventBus.register(storageStatistics);
  }

  /**
   * Constructs an instance of {@link GoogleHadoopFileSystemBase} using the provided
   * GoogleCloudStorageFileSystem; initialize() will not re-initialize it.
   */
  // TODO(b/120887495): This @VisibleForTesting annotation was being ignored by prod code.
  // Please check that removing it is correct, and remove this comment along with it.
  // @VisibleForTesting
  GoogleHadoopFileSystemBase(GoogleCloudStorageFileSystem gcsFs) {
    this();
    checkNotNull(gcsFs, "gcsFs must not be null");
    setGcsFs(gcsFs);
  }

  private void setGcsFs(GoogleCloudStorageFileSystem gcsFs) {
    this.gcsFsSupplier = Suppliers.ofInstance(gcsFs);
    this.gcsFsInitialized = true;
  }

  /**
   * Returns an unqualified path without any leading slash, relative to the filesystem root, which
   * serves as the home directory of the current user; see {@code getHomeDirectory} for a
   * description of what the home directory means.
   */
  protected abstract String getHomeDirectorySubpath();

  /**
   * Gets Hadoop path corresponding to the given GCS path.
   *
   * @param gcsPath Fully-qualified GCS path, of the form gs://bucket/object-path.
   */
  public abstract Path getHadoopPath(URI gcsPath);

  /**
   * Gets GCS path corresponding to the given Hadoop path, which can be relative or absolute, and
   * can have either {@code gs://} or {@code gs:/} forms.
   *
   * @param hadoopPath Hadoop path.
   */
  public abstract URI getGcsPath(Path hadoopPath);

  /** Gets the default value of working directory. */
  public abstract Path getDefaultWorkingDirectory();

  // =================================================================
  // Methods implementing FileSystemDescriptor interface; these define the way
  // paths are translated between Hadoop and GCS.
  // =================================================================

  @Override
  public abstract Path getFileSystemRoot();

  @Override
  public abstract String getScheme();

  /**
   * Overridden to make root its own parent. This is POSIX compliant, but more importantly guards
   * against poor directory accounting in the PathData class of Hadoop 2's FsShell.
   */
  @Override
  public Path makeQualified(final Path path) {
    Path qualifiedPath = super.makeQualified(path);

    URI uri = qualifiedPath.toUri();

    checkState(
        "".equals(uri.getPath()) || qualifiedPath.isAbsolute(),
        "Path '%s' must be fully qualified.",
        qualifiedPath);

    // Strip initial '..'s to make root is its own parent.
    StringBuilder sb = new StringBuilder(uri.getPath());
    while (sb.indexOf("/../") == 0) {
      // Leave a preceding slash, so path is still absolute.
      sb.delete(0, 3);
    }

    String strippedPath = sb.toString();

    // Allow a Path of gs://someBucket to map to gs://someBucket/
    if (strippedPath.equals("/..") || strippedPath.equals("")) {
      strippedPath = "/";
    }

    Path result = new Path(uri.getScheme(), uri.getAuthority(), strippedPath);
    logger.atFiner().log("makeQualified(path: %s): %s", path, result);
    return result;
  }

  @Override
  protected void checkPath(Path path) {
    URI uri = path.toUri();
    String scheme = uri.getScheme();
    // Only check that the scheme matches. The authority and path will be
    // validated later.
    if (scheme == null || scheme.equalsIgnoreCase(getScheme())) {
      return;
    }

    GoogleCloudStorageEventBus.postOnException();
    String msg =
        String.format(
            "Wrong FS scheme: %s, in path: %s, expected scheme: %s", scheme, path, getScheme());
    throw new IllegalArgumentException(msg);
  }

  /**
   * Initializes this file system instance.
   *
   * 
Note: The path passed to this method could be path of any file/directory. It does not matter
   * because the only thing we check is whether it uses 'gs' scheme. The rest is ignored.
   *
   * @param path URI of a file/directory within this file system.
   * @param config Hadoop configuration.
   */
  @Override
  public void initialize(URI path, Configuration config) throws IOException {
    logger.atFiner().log("initialize(path: %s, config: %s)", path, config);

    checkArgument(path != null, "path must not be null");
    checkArgument(config != null, "config must not be null");
    checkArgument(path.getScheme() != null, "scheme of path must not be null");
    checkArgument(path.getScheme().equals(getScheme()), "URI scheme not supported: %s", path);

    config =
        ProviderUtils.excludeIncompatibleCredentialProviders(config, GoogleHadoopFileSystem.class);
    super.initialize(path, config);

    initUri = path;

    // Set this configuration as the default config for this instance; configure()
    // will perform some file-system-specific adjustments, but the original should
    // be sufficient (and is required) for the delegation token binding initialization.
    setConf(config);

    this.traceFactory =
        TraceFactory.get(GCS_OPERATION_TRACE_LOG_ENABLE.get(config, config::getBoolean));

    // Initialize the delegation token support, if it is configured
    initializeDelegationTokenSupport(config, path);

    configure(config);
  }

  /**
   * Initialize the delegation token support for this filesystem.
   *
   * @param config The filesystem configuration
   * @param path The filesystem path
   * @throws IOException
   */
  private void initializeDelegationTokenSupport(Configuration config, URI path) throws IOException {
    logger.atFiner().log("initializeDelegationTokenSupport(config: %s, path: %s)", config, path);
    // Load delegation token binding, if support is configured
    if (isNullOrEmpty(DELEGATION_TOKEN_BINDING_CLASS.get(config, config::get))) {
      return;
    }

    GcsDelegationTokens dts = new GcsDelegationTokens();
    Text service = new Text(getScheme() + "://" + path.getAuthority());
    dts.bindToFileSystem(this, service);
    dts.init(config);
    dts.start();
    delegationTokens = dts;
    if (delegationTokens.isBoundToDT()) {
      GoogleCloudStorageEventBus.postOnException();
      logger.atFine().log(
          "initializeDelegationTokenSupport(config: %s, path: %s): using existing delegation token",
          config, path);
    }
  }

  private void stopDelegationTokens() {
    if (delegationTokens != null) {
      try {
        delegationTokens.close();
      } catch (IOException e) {
        GoogleCloudStorageEventBus.postOnException();
        logger.atSevere().withCause(e).log("Failed to stop delegation tokens support");
      }
    }
  }

  /** Returns a URI of the root of this FileSystem. */
  @Override
  public URI getUri() {
    return getFileSystemRoot().toUri();
  }

  /** The default port is listed as -1 as an indication that ports are not used. */
  @Override
  protected int getDefaultPort() {
    int result = -1;
    logger.atFiner().log("getDefaultPort(): %d", result);
    return result;
  }

  public boolean hasPathCapability(Path path, String capability) throws IOException {
    switch (validatePathCapabilityArgs(path, capability)) {
        // TODO: remove string literals in favor of Constants in CommonPathCapabilities.java
        // from Hadoop 3 when Hadoop 2 is no longer supported
      case "fs.capability.paths.append":
      case "fs.capability.paths.concat":
        return true;
      default:
        return false;
    }
  }

  private static String validatePathCapabilityArgs(Path path, String capability) {
    checkNotNull(path);
    checkArgument(!isNullOrEmpty(capability), "capability parameter is empty string");
    return Ascii.toLowerCase(capability);
  }

  /**
   * Opens the given file for reading.
   *
   * @param hadoopPath File to open.
   * @param bufferSize Size of buffer to use for IO.
   * @return A readable stream.
   * @throws FileNotFoundException if the given path does not exist.
   * @throws IOException if an error occurs.
   */
  @Override
  public FSDataInputStream open(Path hadoopPath, int bufferSize) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_OPEN,
        hadoopPath,
        this.traceFactory,
        () -> {
          checkArgument(hadoopPath != null, "hadoopPath must not be null");

          checkOpen();

          logger.atFiner().log(
              "open(hadoopPath: %s, bufferSize: %d [ignored])", hadoopPath, bufferSize);
          URI gcsPath = getGcsPath(hadoopPath);
          GoogleCloudStorageReadOptions readChannelOptions =
              getGcsFs().getOptions().getCloudStorageOptions().getReadChannelOptions();
          GoogleHadoopFSInputStream in =
              new GoogleHadoopFSInputStream(this, gcsPath, readChannelOptions, statistics);

          return new FSDataInputStream(in);
        });
  }

  @FunctionalInterface
  public interface InvocationRaisingIOE {

    /**
     * Apply the operation.
     *
     * @throws IOException Any IO failure
     */
    R apply() throws IOException;
  }

  /**
   * Opens the given file for writing.
   *
   * 
Note: This function overrides the given bufferSize value with a higher number unless further
   * overridden using configuration parameter {@code fs.gs.outputstream.buffer.size}.
   *
   * @param hadoopPath The file to open.
   * @param permission Permissions to set on the new file. Ignored.
   * @param overwrite If a file with this name already exists, then if true, the file will be
   *     overwritten, and if false an error will be thrown.
   * @param bufferSize The size of the buffer to use.
   * @param replication Required block replication for the file. Ignored.
   * @param blockSize The block-size to be used for the new file. Ignored.
   * @param progress Progress is reported through this. Ignored.
   * @return A writable stream.
   * @throws IOException if an error occurs.
   * @see #setPermission(Path, FsPermission)
   */
  @Override
  public FSDataOutputStream create(
      Path hadoopPath,
      FsPermission permission,
      boolean overwrite,
      int bufferSize,
      short replication,
      long blockSize,
      Progressable progress)
      throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_CREATE,
        hadoopPath,
        traceFactory,
        () -> {
          checkArgument(hadoopPath != null, "hadoopPath must not be null");
          checkArgument(replication > 0, "replication must be a positive integer: %s", replication);
          checkArgument(blockSize > 0, "blockSize must be a positive integer: %s", blockSize);

          checkOpen();

          logger.atFiner().log(
              "create(hadoopPath: %s, overwrite: %b, bufferSize: %d [ignored])",
              hadoopPath, overwrite, bufferSize);

          URI gcsPath = getGcsPath(hadoopPath);

          OutputStreamType type = GCS_OUTPUT_STREAM_TYPE.get(getConf(), getConf()::getEnum);
          OutputStream out;
          switch (type) {
            case BASIC:
              out =
                  new GoogleHadoopOutputStream(
                      this,
                      gcsPath,
                      statistics,
                      CreateFileOptions.builder().setOverwriteExisting(overwrite).build());
              break;
            case FLUSHABLE_COMPOSITE:
              SyncableOutputStreamOptions flushableOutputStreamOptions =
                  SyncableOutputStreamOptions.builder()
                      .setMinSyncInterval(
                          Duration.ofMillis(
                              GCS_OUTPUT_STREAM_SYNC_MIN_INTERVAL_MS.get(
                                  getConf(), getConf()::getInt)))
                      .setSyncOnFlushEnabled(true)
                      .build();
              out =
                  new GoogleHadoopSyncableOutputStream(
                      this,
                      gcsPath,
                      statistics,
                      CreateFileOptions.builder().setOverwriteExisting(overwrite).build(),
                      flushableOutputStreamOptions);
              break;
            case SYNCABLE_COMPOSITE:
              SyncableOutputStreamOptions syncableOutputStreamOptions =
                  SyncableOutputStreamOptions.builder()
                      .setMinSyncInterval(
                          Duration.ofMillis(
                              GCS_OUTPUT_STREAM_SYNC_MIN_INTERVAL_MS.get(
                                  getConf(), getConf()::getInt)))
                      .build();
              out =
                  new GoogleHadoopSyncableOutputStream(
                      this,
                      gcsPath,
                      statistics,
                      CreateFileOptions.builder().setOverwriteExisting(overwrite).build(),
                      syncableOutputStreamOptions);
              break;
            default:
              GoogleCloudStorageEventBus.postOnException();
              throw new IOException(
                  String.format(
                      "Unsupported output stream type given for key '%s': '%s'",
                      GCS_OUTPUT_STREAM_TYPE.getKey(), type));
          }

          storageStatistics.filesCreated();
          return new FSDataOutputStream(out, /* stats= */ null);
        });
  }

  /** {@inheritDoc} */
  @Override
  public FSDataOutputStream createNonRecursive(
      Path hadoopPath,
      FsPermission permission,
      EnumSet flags,
      int bufferSize,
      short replication,
      long blockSize,
      Progressable progress)
      throws IOException {

    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_CREATE_NON_RECURSIVE,
        hadoopPath,
        traceFactory,
        () -> {
          URI gcsPath = getGcsPath(checkNotNull(hadoopPath, "hadoopPath must not be null"));
          URI parentGcsPath = UriPaths.getParentPath(gcsPath);
          if (!getGcsFs().getFileInfo(parentGcsPath).exists()) {
            GoogleCloudStorageEventBus.postOnException();
            throw new FileNotFoundException(
                String.format(
                    "Can not create '%s' file, because parent folder does not exist: %s",
                    gcsPath, parentGcsPath));
          }
          return create(
              hadoopPath,
              permission,
              flags.contains(org.apache.hadoop.fs.CreateFlag.OVERWRITE),
              bufferSize,
              replication,
              blockSize,
              progress);
        });
  }

  /**
   * Appends to an existing file (optional operation). Not supported.
   *
   * @param hadoopPath The existing file to be appended.
   * @param bufferSize The size of the buffer to be used.
   * @param progress For reporting progress if it is not null.
   * @return A writable stream.
   * @throws IOException if an error occurs.
   */
  @Override
  public FSDataOutputStream append(Path hadoopPath, int bufferSize, Progressable progress)
      throws IOException {
    checkArgument(hadoopPath != null, "hadoopPath must not be null");

    logger.atFiner().log(
        "append(hadoopPath: %s, bufferSize: %d [ignored])", hadoopPath, bufferSize);

    URI filePath = getGcsPath(hadoopPath);
    SyncableOutputStreamOptions syncableOutputStreamOptions =
        SyncableOutputStreamOptions.builder()
            .setAppendEnabled(true)
            .setMinSyncInterval(
                Duration.ofMillis(
                    GCS_OUTPUT_STREAM_SYNC_MIN_INTERVAL_MS.get(getConf(), getConf()::getInt)))
            .setSyncOnFlushEnabled(
                GCS_OUTPUT_STREAM_TYPE.get(getConf(), getConf()::getEnum) == FLUSHABLE_COMPOSITE)
            .build();

    return new FSDataOutputStream(
        new GoogleHadoopSyncableOutputStream(
            this, filePath, statistics, DEFAULT_OVERWRITE, syncableOutputStreamOptions),
        statistics);
  }

  /**
   * Concat existing files into one file.
   *
   * @param tgt the path to the target destination.
   * @param srcs the paths to the sources to use for the concatenation.
   * @throws IOException IO failure
   */
  @Override
  public void concat(Path tgt, Path[] srcs) throws IOException {
    logger.atFiner().log("concat(tgt: %s, srcs: %s)", tgt, lazy(() -> Arrays.toString(srcs)));

    checkArgument(srcs.length > 0, "srcs must have at least one source");

    URI tgtPath = getGcsPath(tgt);
    List srcPaths = Arrays.stream(srcs).map(this::getGcsPath).collect(toImmutableList());

    checkArgument(!srcPaths.contains(tgtPath), "target must not be contained in sources");

    List> partitions =
        Lists.partition(srcPaths, GoogleCloudStorage.MAX_COMPOSE_OBJECTS - 1);
    logger.atFiner().log("concat(tgt: %s, %d partitions: %s)", tgt, partitions.size(), partitions);
    for (List partition : partitions) {
      // We need to include the target in the list of sources to compose since
      // the GCS FS compose operation will overwrite the target, whereas the Hadoop
      // concat operation appends to the target.
      List sources = Lists.newArrayList(tgtPath);
      sources.addAll(partition);
      getGcsFs().compose(sources, tgtPath, CreateObjectOptions.CONTENT_TYPE_DEFAULT);
    }
  }

  /**
   * Renames src to dst. Src must not be equal to the filesystem root.
   *
   * @param src Source path.
   * @param dst Destination path.
   * @return true if successful, or false if the old name does not exist or if the new name already
   *     belongs to the namespace.
   * @throws IOException if an error occurs.
   */
  @Override
  public boolean rename(Path src, Path dst) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_RENAME,
        String.format("rename(%s -> %s)", src, dst),
        this.traceFactory,
        () -> {
          checkArgument(src != null, "src must not be null");
          checkArgument(dst != null, "dst must not be null");

          // Even though the underlying GCSFS will also throw an IAE if src is root, since our
          // filesystem
          // root happens to equal the global root, we want to explicitly check it here since
          // derived
          // classes may not have filesystem roots equal to the global root.
          if (src.makeQualified(this).equals(getFileSystemRoot())) {
            logger.atFiner().log("rename(src: %s, dst: %s): false [src is a root]", src, dst);
            return false;
          }
          try {
            renameInternal(src, dst);
          } catch (IOException e) {
            GoogleCloudStorageEventBus.postOnException();
            if (ApiErrorExtractor.INSTANCE.requestFailure(e)) {
              throw e;
            }
            logger.atFiner().withCause(e).log("rename(src: %s, dst: %s): false [failed]", src, dst);
            return false;
          }
          return true;
        });
  }

  /**
   * Renames src to dst.
   *
   * @param src Source path.
   * @param dst Destination path.
   * @throws IOException if an error occurs.
   */
  void renameInternal(Path src, Path dst) throws IOException {
    checkArgument(src != null, "src must not be null");
    checkArgument(dst != null, "dst must not be null");

    checkOpen();

    URI srcPath = getGcsPath(src);
    URI dstPath = getGcsPath(dst);

    getGcsFs().rename(srcPath, dstPath);

    logger.atFiner().log("rename(src: %s, dst: %s): true", src, dst);
  }

  /**
   * Deletes the given file or directory.
   *
   * @param hadoopPath The path to delete.
   * @param recursive If path is a directory and set to true, the directory is deleted, else throws
   *     an exception. In case of a file, the recursive parameter is ignored.
   * @return true if delete is successful else false.
   * @throws IOException if an error occurs.
   */
  @Override
  public boolean delete(Path hadoopPath, boolean recursive) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_DELETE,
        hadoopPath,
        traceFactory,
        () -> {
          checkArgument(hadoopPath != null, "hadoopPath must not be null");

          checkOpen();

          URI gcsPath = getGcsPath(hadoopPath);
          try {
            getGcsFs().delete(gcsPath, recursive);
          } catch (DirectoryNotEmptyException e) {
            GoogleCloudStorageEventBus.postOnException();
            throw e;
          } catch (IOException e) {
            if (ApiErrorExtractor.INSTANCE.requestFailure(e)) {
              GoogleCloudStorageEventBus.postOnException();
              throw e;
            }
            logger.atFiner().withCause(e).log(
                "delete(hadoopPath: %s, recursive: %b): false [failed]", hadoopPath, recursive);
            return false;
          }
          logger.atFiner().log(
              "delete(hadoopPath: %s, recursive: %b): true", hadoopPath, recursive);
          return true;
        });
  }

  /**
   * Lists file status. If the given path points to a directory then the status of children is
   * returned, otherwise the status of the given file is returned.
   *
   * @param hadoopPath Given path.
   * @return File status list or null if path does not exist.
   * @throws IOException if an error occurs.
   */
  @Override
  public FileStatus[] listStatus(Path hadoopPath) throws IOException {
    checkArgument(hadoopPath != null, "hadoopPath must not be null");

    checkOpen();

    logger.atFiner().log("listStatus(hadoopPath: %s)", hadoopPath);

    URI gcsPath = getGcsPath(hadoopPath);
    List status;

    try {
      List fileInfos = getGcsFs().listFileInfo(gcsPath, LIST_OPTIONS);
      status = new ArrayList<>(fileInfos.size());
      String userName = getUgiUserName();
      for (FileInfo fileInfo : fileInfos) {
        status.add(getFileStatus(fileInfo, userName));
      }
    } catch (FileNotFoundException fnfe) {
      GoogleCloudStorageEventBus.postOnException();
      throw (FileNotFoundException)
          new FileNotFoundException(
                  String.format(
                      "listStatus(hadoopPath: %s): '%s' does not exist.", hadoopPath, gcsPath))
              .initCause(fnfe);
    }
    return status.toArray(new FileStatus[0]);
  }

  /**
   * Sets the current working directory to the given path.
   *
   * @param hadoopPath New working directory.
   */
  @Override
  public void setWorkingDirectory(Path hadoopPath) {
    checkArgument(hadoopPath != null, "hadoopPath must not be null");

    URI gcsPath = UriPaths.toDirectory(getGcsPath(hadoopPath));
    Path newPath = getHadoopPath(gcsPath);

    // Ideally we should check (as we did earlier) if the given path really points to an existing
    // directory. However, it takes considerable amount of time for that check which hurts perf.
    // Given that HDFS code does not do such checks either, we choose to not do them in favor of
    // better performance.

    workingDirectory = newPath;
    logger.atFiner().log("setWorkingDirectory(hadoopPath: %s): %s", hadoopPath, workingDirectory);
  }

  /**
   * Gets the current working directory.
   *
   * @return The current working directory.
   */
  @Override
  public Path getWorkingDirectory() {
    logger.atFiner().log("getWorkingDirectory(): %s", workingDirectory);
    return workingDirectory;
  }

  /**
   * Makes the given path and all non-existent parents directories. Has the semantics of Unix 'mkdir
   * -p'.
   *
   * @param hadoopPath Given path.
   * @param permission Permissions to set on the given directory.
   * @return true on success, false otherwise.
   * @throws IOException if an error occurs.
   */
  @Override
  public boolean mkdirs(Path hadoopPath, FsPermission permission) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_MKDIRS,
        hadoopPath,
        traceFactory,
        () -> {
          checkArgument(hadoopPath != null, "hadoopPath must not be null");

          checkOpen();

          URI gcsPath = getGcsPath(hadoopPath);
          try {
            getGcsFs().mkdirs(gcsPath);
          } catch (java.nio.file.FileAlreadyExistsException faee) {
            GoogleCloudStorageEventBus.postOnException();
            // Need to convert to the Hadoop flavor of FileAlreadyExistsException.
            throw (FileAlreadyExistsException)
                new FileAlreadyExistsException(
                        String.format(
                            "mkdirs(hadoopPath: %s, permission: %s): failed",
                            hadoopPath, permission))
                    .initCause(faee);
          }
          logger.atFiner().log(
              "mkdirs(hadoopPath: %s, permission: %s): true", hadoopPath, permission);
          return true;
        });
  }

  /** Gets the default replication factor. */
  @Override
  public short getDefaultReplication() {
    return REPLICATION_FACTOR_DEFAULT;
  }

  /**
   * Gets status of the given path item.
   *
   * @param hadoopPath The path we want information about.
   * @return A FileStatus object for the given path.
   * @throws FileNotFoundException when the path does not exist;
   * @throws IOException on other errors.
   */
  @Override
  public FileStatus getFileStatus(Path hadoopPath) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_GET_FILE_STATUS,
        hadoopPath,
        traceFactory,
        () -> {
          checkArgument(hadoopPath != null, "hadoopPath must not be null");

          checkOpen();

          URI gcsPath = getGcsPath(hadoopPath);
          FileInfo fileInfo = getGcsFs().getFileInfo(gcsPath);
          if (!fileInfo.exists()) {
            GoogleCloudStorageEventBus.postOnException();
            throw new FileNotFoundException(
                String.format(
                    "%s not found: %s", fileInfo.isDirectory() ? "Directory" : "File", hadoopPath));
          }
          String userName = getUgiUserName();
          return getFileStatus(fileInfo, userName);
        });
  }

  /** Gets FileStatus corresponding to the given FileInfo value. */
  private FileStatus getFileStatus(FileInfo fileInfo, String userName) {
    // GCS does not provide modification time. It only provides creation time.
    // It works for objects because they are immutable once created.
    FileStatus status =
        new FileStatus(
            fileInfo.getSize(),
            fileInfo.isDirectory(),
            REPLICATION_FACTOR_DEFAULT,
            defaultBlockSize,
            /* modificationTime= */ fileInfo.getModificationTime(),
            /* accessTime= */ fileInfo.getModificationTime(),
            reportedPermissions,
            /* owner= */ userName,
            /* group= */ userName,
            getHadoopPath(fileInfo.getPath()));
    logger.atFiner().log(
        "getFileStatus(path: %s, userName: %s): %s",
        fileInfo.getPath(), userName, lazy(() -> fileStatusToString(status)));
    return status;
  }

  /**
   * Determines based on suitability of {@code fixedPath} whether to use flat globbing logic where
   * we use a single large listing during globStatus to then perform the core globbing logic
   * in-memory.
   */
  @VisibleForTesting
  boolean couldUseFlatGlob(Path fixedPath) {
    // Only works for filesystems where the base Hadoop Path scheme matches the underlying URI
    // scheme for GCS.
    if (!getUri().getScheme().equals(SCHEME)) {
      logger.atFine().log(
          "Flat glob is on, but doesn't work for scheme '%s', using default behavior.",
          getUri().getScheme());
      return false;
    }

    // The full pattern should have a wildcard, otherwise there's no point doing the flat glob.
    GlobPattern fullPattern = new GlobPattern(fixedPath.toString());
    if (!fullPattern.hasWildcard()) {
      logger.atFine().log(
          "Flat glob is on, but Path '%s' has no wildcard, using default behavior.", fixedPath);
      return false;
    }

    // To use a flat glob, there must be an authority defined.
    if (isNullOrEmpty(fixedPath.toUri().getAuthority())) {
      logger.atFine().log(
          "Flat glob is on, but Path '%s' has a empty authority, using default behavior.",
          fixedPath);
      return false;
    }

    // And the authority must not contain a wildcard.
    GlobPattern authorityPattern = new GlobPattern(fixedPath.toUri().getAuthority());
    if (authorityPattern.hasWildcard()) {
      logger.atFine().log(
          "Flat glob is on, but Path '%s' has a wildcard authority, using default behavior.",
          fixedPath);
      return false;
    }

    return true;
  }

  @VisibleForTesting
  String trimToPrefixWithoutGlob(String path) {
    char[] wildcardChars = "*?{[".toCharArray();
    int trimIndex = path.length();

    // Find the first occurrence of any one of the wildcard characters, or just path.length()
    // if none are found.
    for (char wildcard : wildcardChars) {
      int wildcardIndex = path.indexOf(wildcard);
      if (wildcardIndex >= 0 && wildcardIndex < trimIndex) {
        trimIndex = wildcardIndex;
      }
    }
    return path.substring(0, trimIndex);
  }

  /**
   * Returns an array of FileStatus objects whose path names match pathPattern.
   *
   * 
Return null if pathPattern has no glob and the path does not exist. Return an empty array if
   * pathPattern has a glob and no path matches it.
   *
   * @param pathPattern A regular expression specifying the path pattern.
   * @return An array of FileStatus objects.
   * @throws IOException if an error occurs.
   */
  @Override
  public FileStatus[] globStatus(Path pathPattern) throws IOException {
    return globStatus(pathPattern, DEFAULT_FILTER);
  }

  /**
   * Returns an array of FileStatus objects whose path names match pathPattern and is accepted by
   * the user-supplied path filter. Results are sorted by their path names.
   *
   * 
Return null if pathPattern has no glob and the path does not exist. Return an empty array if
   * pathPattern has a glob and no path matches it.
   *
   * @param pathPattern A regular expression specifying the path pattern.
   * @param filter A user-supplied path filter.
   * @return An array of FileStatus objects.
   * @throws IOException if an error occurs.
   */
  @Override
  public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException {

    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_GLOB_STATUS,
        String.format("path=%s; pattern=%s", pathPattern, filter),
        traceFactory,
        () -> {
          checkOpen();

          logger.atFiner().log("globStatus(pathPattern: %s, filter: %s)", pathPattern, filter);
          // URI does not handle glob expressions nicely, for the purpose of
          // fully-qualifying a path we can URI-encode them.
          // Using toString() to avoid Path(URI) constructor.
          Path encodedPath = new Path(pathPattern.toUri().toString());
          // We convert pathPattern to GCS path and then to Hadoop path to ensure that it ends up in
          // the correct format. See note in getHadoopPath for more information.
          Path encodedFixedPath = getHadoopPath(getGcsPath(encodedPath));
          // Decode URI-encoded path back into a glob path.
          Path fixedPath = new Path(URI.create(encodedFixedPath.toString()));
          logger.atFiner().log("fixed path pattern: %s => %s", pathPattern, fixedPath);

          if (globAlgorithm == GlobAlgorithm.CONCURRENT && couldUseFlatGlob(fixedPath)) {
            return concurrentGlobInternal(fixedPath, filter);
          }

          if (globAlgorithm == GlobAlgorithm.FLAT && couldUseFlatGlob(fixedPath)) {
            return flatGlobInternal(fixedPath, filter);
          }

          return super.globStatus(fixedPath, filter);
        });
  }

  /**
   * Use 2 glob algorithms that return the same result but one of them could be significantly faster
   * than another one depending on directory layout.
   */
  private FileStatus[] concurrentGlobInternal(Path fixedPath, PathFilter filter)
      throws IOException {
    ExecutorService globExecutor = newFixedThreadPool(2, DAEMON_THREAD_FACTORY);
    try {
      return globExecutor.invokeAny(
          ImmutableList.of(
              () -> flatGlobInternal(fixedPath, filter),
              () -> super.globStatus(fixedPath, filter)));
    } catch (InterruptedException e) {
      GoogleCloudStorageEventBus.postOnException();
      Thread.currentThread().interrupt();
      throw new IOException(String.format("Concurrent glob execution failed: %s", e), e);
    } catch (ExecutionException e) {
      GoogleCloudStorageEventBus.postOnException();
      throw new IOException(String.format("Concurrent glob execution failed: %s", e.getCause()), e);
    } finally {
      globExecutor.shutdownNow();
    }
  }

  private FileStatus[] flatGlobInternal(Path fixedPath, PathFilter filter) throws IOException {
    String pathString = fixedPath.toString();
    String prefixString = trimToPrefixWithoutGlob(pathString);
    Path prefixPath = new Path(prefixString);
    URI prefixUri = getGcsPath(prefixPath);

    if (prefixString.endsWith("/") && !prefixPath.toString().endsWith("/")) {
      // Path strips a trailing slash unless it's the 'root' path. We want to keep the trailing
      // slash so that we don't wastefully list sibling files which may match the directory-name
      // as a strict prefix but would've been omitted due to not containing the '/' at the end.
      prefixUri = UriPaths.toDirectory(prefixUri);
    }

    // Get everything matching the non-glob prefix.
    logger.atFiner().log("Listing everything with '%s' prefix", prefixUri);
    List matchedStatuses = null;
    String pageToken = null;
    do {
      ListPage infoPage =
          getGcsFs().listFileInfoForPrefixPage(prefixUri, LIST_OPTIONS, pageToken);

      Collection statusPage =
          toFileStatusesWithImplicitDirectories(infoPage.getItems());

      // TODO: refactor to use GlobPattern and PathFilter directly without helper FS
      FileSystem helperFileSystem =
          InMemoryGlobberFileSystem.createInstance(getConf(), getWorkingDirectory(), statusPage);
      FileStatus[] matchedStatusPage = helperFileSystem.globStatus(fixedPath, filter);
      if (matchedStatusPage != null) {
        Collections.addAll(
            (matchedStatuses == null ? matchedStatuses = new ArrayList<>() : matchedStatuses),
            matchedStatusPage);
      }

      pageToken = infoPage.getNextPageToken();
    } while (pageToken != null);

    if (matchedStatuses == null || matchedStatuses.isEmpty()) {
      return matchedStatuses == null ? null : new FileStatus[0];
    }

    matchedStatuses.sort(
        ((Comparator) Comparator.naturalOrder())
            // Place duplicate implicit directories after real directory
            .thenComparingInt((FileStatus f) -> isImplicitDirectory(f) ? 1 : 0));

    // Remove duplicate file statuses that could be in the matchedStatuses
    // because of pagination and implicit directories
    List filteredStatuses = new ArrayList<>(matchedStatuses.size());
    FileStatus lastAdded = null;
    for (FileStatus fileStatus : matchedStatuses) {
      if (lastAdded == null || lastAdded.compareTo(fileStatus) != 0) {
        filteredStatuses.add(fileStatus);
        lastAdded = fileStatus;
      }
    }

    return filteredStatuses.toArray(new FileStatus[0]);
  }

  private static boolean isImplicitDirectory(FileStatus curr) {
    // Modification time of 0 indicates implicit directory.
    return curr.isDir() && curr.getModificationTime() == 0;
  }

  /** Helper method that converts {@link FileInfo} collection to {@link FileStatus} collection. */
  private Collection toFileStatusesWithImplicitDirectories(
      Collection fileInfos) throws IOException {
    List fileStatuses = new ArrayList<>(fileInfos.size());
    Set filePaths = Sets.newHashSetWithExpectedSize(fileInfos.size());
    String userName = getUgiUserName();
    for (FileInfo fileInfo : fileInfos) {
      filePaths.add(fileInfo.getPath());
      fileStatuses.add(getFileStatus(fileInfo, userName));
    }

    // The flow for populating this doesn't bother to populate metadata entries for parent
    // directories but we know the parent directories are expected to exist, so we'll just
    // populate the missing entries explicitly here. Necessary for getFileStatus(parentOfInfo)
    // to work when using an instance of this class.
    for (FileInfo fileInfo : fileInfos) {
      URI parentPath = UriPaths.getParentPath(fileInfo.getPath());
      while (parentPath != null && !parentPath.equals(GoogleCloudStorageFileSystem.GCS_ROOT)) {
        if (!filePaths.contains(parentPath)) {
          logger.atFiner().log("Adding fake entry for missing parent path '%s'", parentPath);
          StorageResourceId id = StorageResourceId.fromUriPath(parentPath, true);

          GoogleCloudStorageItemInfo fakeItemInfo =
              GoogleCloudStorageItemInfo.createInferredDirectory(id);
          FileInfo fakeFileInfo = FileInfo.fromItemInfo(fakeItemInfo);

          filePaths.add(parentPath);
          fileStatuses.add(getFileStatus(fakeFileInfo, userName));
        }
        parentPath = UriPaths.getParentPath(parentPath);
      }
    }

    return fileStatuses;
  }

  /** Helper method to get the UGI short user name */
  private static String getUgiUserName() throws IOException {
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    return ugi.getShortUserName();
  }

  /**
   * Returns home directory of the current user.
   *
   * 
Note: This directory is only used for Hadoop purposes. It is not the same as a user's OS
   * home directory.
   */
  @Override
  public Path getHomeDirectory() {
    Path result = new Path(getFileSystemRoot(), getHomeDirectorySubpath());
    logger.atFiner().log("getHomeDirectory(): %s", result);
    return result;
  }

  /**
   * Converts the given FileStatus to its string representation.
   *
   * @param stat FileStatus to convert.
   * @return String representation of the given FileStatus.
   */
  private static String fileStatusToString(FileStatus stat) {
    assert stat != null;

    return String.format(
        "path: %s, isDir: %s, len: %d, owner: %s",
        stat.getPath().toString(), stat.isDir(), stat.getLen(), stat.getOwner());
  }

  /**
   * {@inheritDoc}
   *
   * Returns the service if delegation tokens are configured, otherwise, null.
   */
  @Override
  public String getCanonicalServiceName() {
    String service = null;
    if (delegationTokens != null) {
      service = delegationTokens.getService().toString();
    }
    logger.atFiner().log("getCanonicalServiceName(): %s", service);
    return service;
  }

  /** Gets GCS FS instance. */
  public GoogleCloudStorageFileSystem getGcsFs() {
    return gcsFsSupplier.get();
  }

  /**
   * Loads an {@link AccessTokenProvider} implementation. If the user provided an
   * AbstractDelegationTokenBinding we get the AccessTokenProvider, otherwise if a class name is
   * provided (See {@link HadoopCredentialConfiguration#ACCESS_TOKEN_PROVIDER_IMPL_SUFFIX} then we
   * use it, otherwise it's null.
   */
  private AccessTokenProvider getAccessTokenProvider(Configuration config) throws IOException {
    // Check if delegation token support is configured
    AccessTokenProvider accessTokenProvider =
        delegationTokens != null
            // If so, use the delegation token to acquire the Google credentials
            ? delegationTokens.getAccessTokenProvider()
            // If delegation token support is not configured, check if a
            // custom AccessTokenProvider implementation is configured
            : HadoopCredentialConfiguration.getAccessTokenProvider(
                config, ImmutableList.of(GCS_CONFIG_PREFIX));

    if (accessTokenProvider != null) {
      if (accessTokenProvider.getAccessTokenType() == AccessTokenType.DOWNSCOPED) {
        checkArgument(
            HadoopCredentialConfiguration.ENABLE_NULL_CREDENTIAL_SUFFIX
                    .withPrefixes(
                        HadoopCredentialConfiguration.getConfigKeyPrefixes(GCS_CONFIG_PREFIX))
                    .get(config, config::getBoolean)
                && !HadoopCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX
                    .withPrefixes(
                        HadoopCredentialConfiguration.getConfigKeyPrefixes(GCS_CONFIG_PREFIX))
                    .get(config, config::getBoolean),
            "When using DOWNSCOPED access token, `fs.gs.auth.null.enabled` should"
                + " be set to true and `fs.gs.auth.service.account.enable` should be set to false");
      }

      accessTokenProvider.setConf(config);
    }

    return accessTokenProvider;
  }

  /**
   * Retrieve user's Credential. If user implemented {@link AccessTokenProvider} and provided the
   * class name (See {@link HadoopCredentialConfiguration#ACCESS_TOKEN_PROVIDER_IMPL_SUFFIX} then
   * build a credential with access token provided by this provider; Otherwise obtain credential
   * through {@link HadoopCredentialConfiguration#getCredentialFactory(Configuration, String...)}.
   */
  private Credential getCredential(
      Configuration config,
      GoogleCloudStorageFileSystemOptions gcsFsOptions,
      AccessTokenProvider accessTokenProvider)
      throws IOException, GeneralSecurityException {
    Credential credential;

    if (accessTokenProvider == null) {
      // If delegation token support is not configured, check if a
      // custom AccessTokenProvider implementation is configured, and attempt
      // to acquire the Google credentials using it
      credential =
          CredentialFromAccessTokenProviderClassFactory.credential(
              config, ImmutableList.of(GCS_CONFIG_PREFIX), CredentialFactory.DEFAULT_SCOPES);

      if (credential == null) {
        // Finally, if no credentials have been acquired at this point, employ
        // the default mechanism.
        credential =
            HadoopCredentialConfiguration.getCredentialFactory(config, GCS_CONFIG_PREFIX)
                .getCredential(CredentialFactory.DEFAULT_SCOPES);
      }
    } else {
      switch (accessTokenProvider.getAccessTokenType()) {
        case GENERIC:
          // check if an AccessTokenProvider is configured
          // if so, try to get the credentials through the access token provider
          credential =
              CredentialFromAccessTokenProviderClassFactory.credential(
                  accessTokenProvider, CredentialFactory.DEFAULT_SCOPES);
          break;
        case DOWNSCOPED:
          // If the AccessTokenType is set to DOWNSCOPED`, Credential will be generated
          // when GCS requests are created.
          credential = null;
          break;
        default:
          GoogleCloudStorageEventBus.postOnException();
          throw new IllegalStateException(
              String.format(
                  "Unknown AccessTokenType: %s", accessTokenProvider.getAccessTokenType()));
      }
    }

    // If impersonation service account exists, then use current credential to request access token
    // for the impersonating service account.
    return getImpersonatedCredential(config, gcsFsOptions, credential).orElse(credential);
  }

  /**
   * Generate a {@link Credential} from the internal access token provider based on the service
   * account to impersonate.
   */
  private static Optional getImpersonatedCredential(
      Configuration config, GoogleCloudStorageFileSystemOptions gcsFsOptions, Credential credential)
      throws IOException {
    Map userImpersonationServiceAccounts =
        USER_IMPERSONATION_SERVICE_ACCOUNT_SUFFIX
            .withPrefixes(CONFIG_KEY_PREFIXES)
            .getPropsWithPrefix(config);
    Map groupImpersonationServiceAccounts =
        GROUP_IMPERSONATION_SERVICE_ACCOUNT_SUFFIX
            .withPrefixes(CONFIG_KEY_PREFIXES)
            .getPropsWithPrefix(config);
    String impersonationServiceAccount =
        IMPERSONATION_SERVICE_ACCOUNT_SUFFIX
            .withPrefixes(CONFIG_KEY_PREFIXES)
            .get(config, config::get);

    // Exit early if impersonation is not configured
    if (userImpersonationServiceAccounts.isEmpty()
        && groupImpersonationServiceAccounts.isEmpty()
        && isNullOrEmpty(impersonationServiceAccount)) {
      return Optional.empty();
    }

    UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
    Optional serviceAccountToImpersonate =
        Stream.of(
                () ->
                    getServiceAccountToImpersonateForUserGroup(
                        userImpersonationServiceAccounts,
                        ImmutableList.of(currentUser.getShortUserName())),
                () ->
                    getServiceAccountToImpersonateForUserGroup(
                        groupImpersonationServiceAccounts,
                        ImmutableList.copyOf(currentUser.getGroupNames())),
                (Supplier>) () -> Optional.ofNullable(impersonationServiceAccount))
            .map(Supplier::get)
            .filter(Optional::isPresent)
            .map(Optional::get)
            .filter(sa -> !isNullOrEmpty(sa))
            .findFirst();

    if (serviceAccountToImpersonate.isPresent()) {
      GoogleCloudStorageOptions options = gcsFsOptions.getCloudStorageOptions();
      HttpTransport httpTransport =
          HttpTransportFactory.createHttpTransport(
              options.getTransportType(),
              options.getProxyAddress(),
              options.getProxyUsername(),
              options.getProxyPassword(),
              Duration.ofMillis(options.getHttpRequestReadTimeout()));
      GoogleCredential impersonatedCredential =
          new GoogleCredentialWithIamAccessToken(
              httpTransport,
              new CredentialHttpRetryInitializer(credential),
              serviceAccountToImpersonate.get(),
              CredentialFactory.DEFAULT_SCOPES);
      logger.atFine().log(
          "Impersonating '%s' service account for '%s' user",
          serviceAccountToImpersonate.get(), currentUser);
      return Optional.of(impersonatedCredential.createScoped(CredentialFactory.DEFAULT_SCOPES));
    }

    return Optional.empty();
  }

  private static Optional getServiceAccountToImpersonateForUserGroup(
      Map serviceAccountMapping, List userGroups) {
    return serviceAccountMapping.entrySet().stream()
        .filter(e -> userGroups.contains(e.getKey()))
        .map(Map.Entry::getValue)
        .findFirst();
  }

  /**
   * Configures GHFS using the supplied configuration.
   *
   * @param config Hadoop configuration object.
   */
  private synchronized void configure(Configuration config) throws IOException {
    logger.atFiner().log("GHFS_ID=%s: configure(config: %s)", GHFS_ID, config);

    // Set this configuration as the default config for this instance.
    setConf(config);

    globAlgorithm = GCS_GLOB_ALGORITHM.get(config, config::getEnum);
    checksumType = GCS_FILE_CHECKSUM_TYPE.get(config, config::getEnum);
    defaultBlockSize = BLOCK_SIZE.get(config, config::getLong);
    reportedPermissions = new FsPermission(PERMISSIONS_TO_REPORT.get(config, config::get));

    if (gcsFsSupplier == null) {
      if (GCS_LAZY_INITIALIZATION_ENABLE.get(config, config::getBoolean)) {
        gcsFsSupplier =
            Suppliers.memoize(
                () -> {
                  try {
                    GoogleCloudStorageFileSystem gcsFs = createGcsFs(config);

                    configureBuckets(gcsFs);
                    configureWorkingDirectory(config);
                    gcsFsInitialized = true;

                    return gcsFs;
                  } catch (IOException e) {
                    GoogleCloudStorageEventBus.postOnException();
                    throw new RuntimeException("Failed to create GCS FS", e);
                  }
                });
      } else {
        setGcsFs(createGcsFs(config));
        configureBuckets(getGcsFs());
        configureWorkingDirectory(config);
      }
    } else {
      configureBuckets(getGcsFs());
      configureWorkingDirectory(config);
    }
  }

  private GoogleCloudStorageFileSystem createGcsFs(Configuration config) throws IOException {
    GoogleCloudStorageFileSystemOptions gcsFsOptions =
        GoogleHadoopFileSystemConfiguration.getGcsFsOptionsBuilder(config).build();

    AccessTokenProvider accessTokenProvider = getAccessTokenProvider(config);

    Credential credential;
    try {
      credential = getCredential(config, gcsFsOptions, accessTokenProvider);
    } catch (GeneralSecurityException e) {
      GoogleCloudStorageEventBus.postOnException();
      throw new RuntimeException(e);
    }

    return new GoogleCloudStorageFileSystem(
        credential,
        accessTokenProvider != null
                && accessTokenProvider.getAccessTokenType() == AccessTokenType.DOWNSCOPED
            ? accessBoundaries -> accessTokenProvider.getAccessToken(accessBoundaries).getToken()
            : null,
        gcsFsOptions);
  }

  /**
   * Validates and possibly creates buckets needed by subclass.
   *
   * @param gcsFs {@link GoogleCloudStorageFileSystem} to configure buckets
   * @throws IOException if bucket name is invalid or cannot be found.
   */
  @VisibleForTesting
  protected abstract void configureBuckets(GoogleCloudStorageFileSystem gcsFs) throws IOException;

  private void configureWorkingDirectory(Configuration config) {
    // Set initial working directory to root so that any configured value gets resolved
    // against file system root.
    workingDirectory = getFileSystemRoot();

    Path newWorkingDirectory;
    String configWorkingDirectory = GCS_WORKING_DIRECTORY.get(config, config::get);
    if (isNullOrEmpty(configWorkingDirectory)) {
      newWorkingDirectory = getDefaultWorkingDirectory();
      logger.atWarning().log(
          "No working directory configured, using default: '%s'", newWorkingDirectory);
    } else {
      newWorkingDirectory = new Path(configWorkingDirectory);
    }

    // Use the public method to ensure proper behavior of normalizing and resolving the new
    // working directory relative to the initial filesystem-root directory.
    setWorkingDirectory(newWorkingDirectory);
    logger.atFiner().log(
        "Configured working directory: %s = %s",
        GCS_WORKING_DIRECTORY.getKey(), getWorkingDirectory());
  }

  /** Assert that the FileSystem has been initialized and not close()d. */
  private void checkOpen() throws IOException {
    if (isClosed()) {
      GoogleCloudStorageEventBus.postOnException();
      throw new IOException("GoogleHadoopFileSystem has been closed or not initialized.");
    }
  }

  private boolean isClosed() {
    return gcsFsSupplier == null || gcsFsSupplier.get() == null;
  }

  // =================================================================
  // Overridden functions for debug tracing. The following functions
  // do not change functionality. They just log parameters and call base
  // class' function.
  // =================================================================

  @Override
  public boolean deleteOnExit(Path f) throws IOException {
    checkOpen();
    boolean result = super.deleteOnExit(f);
    logger.atFiner().log("deleteOnExit(path: %s): %b", f, result);
    return result;
  }

  @Override
  protected void processDeleteOnExit() {
    logger.atFiner().log("processDeleteOnExit()");
    super.processDeleteOnExit();
  }

  @Override
  public ContentSummary getContentSummary(Path f) throws IOException {
    ContentSummary result = super.getContentSummary(f);
    logger.atFiner().log("getContentSummary(path: %s): %b", f, result);
    return result;
  }

  @Override
  public Token getDelegationToken(String renewer) throws IOException {
    Token result = null;

    if (delegationTokens != null) {
      result = delegationTokens.getBoundOrNewDT(renewer);
    }

    logger.atFiner().log("getDelegationToken(renewer: %s): %s", renewer, result);
    return result;
  }

  @Override
  public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path[] srcs, Path dst)
      throws IOException {
    logger.atFiner().log(
        "copyFromLocalFile(delSrc: %b, overwrite: %b, %d srcs, dst: %s)",
        delSrc, overwrite, srcs.length, dst);
    super.copyFromLocalFile(delSrc, overwrite, srcs, dst);
  }

  @Override
  public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst)
      throws IOException {
    logger.atFiner().log(
        "copyFromLocalFile(delSrc: %b, overwrite: %b, src: %s, dst: %s)",
        delSrc, overwrite, src, dst);
    super.copyFromLocalFile(delSrc, overwrite, src, dst);
  }

  @Override
  public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
    logger.atFiner().log("copyToLocalFile(delSrc: %b, src: %s, dst: %s)", delSrc, src, dst);
    super.copyToLocalFile(delSrc, src, dst);
  }

  @Override
  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException {
    Path result = super.startLocalOutput(fsOutputFile, tmpLocalFile);
    logger.atFiner().log(
        "startLocalOutput(fsOutputFile: %s, tmpLocalFile: %s): %s",
        fsOutputFile, tmpLocalFile, result);
    return result;
  }

  @Override
  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException {
    logger.atFiner().log(
        "startLocalOutput(fsOutputFile: %s, tmpLocalFile: %s)", fsOutputFile, tmpLocalFile);
    super.completeLocalOutput(fsOutputFile, tmpLocalFile);
  }

  @Override
  public void close() throws IOException {
    logger.atFiner().log("close()");
    super.close();

    // NB: We must *first* have the superclass close() before we close the underlying gcsFsSupplier
    // since the superclass may decide to perform various heavyweight cleanup operations (such as
    // deleteOnExit).
    if (gcsFsSupplier != null) {
      if (gcsFsInitialized) {
        getGcsFs().close();
      }
      gcsFsSupplier = null;
    }

    stopDelegationTokens();
  }

  @Override
  public long getUsed() throws IOException {
    long result = super.getUsed();
    logger.atFiner().log("getUsed(): %s", result);
    return result;
  }

  @Override
  public long getDefaultBlockSize() {
    long result = defaultBlockSize;
    logger.atFiner().log("getDefaultBlockSize(): %d", result);
    return result;
  }

  @Override
  public FileChecksum getFileChecksum(Path hadoopPath) throws IOException {
    storageStatistics.getFileCheckSum();

    checkArgument(hadoopPath != null, "hadoopPath must not be null");

    checkOpen();

    URI gcsPath = getGcsPath(hadoopPath);
    final FileInfo fileInfo = getGcsFs().getFileInfo(gcsPath);
    if (!fileInfo.exists()) {
      GoogleCloudStorageEventBus.postOnException();
      throw new FileNotFoundException(
          String.format(
              "%s not found: %s", fileInfo.isDirectory() ? "Directory" : "File", hadoopPath));
    }
    FileChecksum checksum = getFileChecksum(checksumType, fileInfo);
    logger.atFiner().log(
        "getFileChecksum(hadoopPath: %s [gcsPath: %s]): %s", hadoopPath, gcsPath, checksum);
    return checksum;
  }

  private static FileChecksum getFileChecksum(GcsFileChecksumType type, FileInfo fileInfo)
      throws IOException {
    switch (type) {
      case NONE:
        return null;
      case CRC32C:
        return new GcsFileChecksum(type, fileInfo.getCrc32cChecksum());
      case MD5:
        return new GcsFileChecksum(type, fileInfo.getMd5Checksum());
    }
    GoogleCloudStorageEventBus.postOnException();
    throw new IOException("Unrecognized GcsFileChecksumType: " + type);
  }

  @Override
  public void setVerifyChecksum(boolean verifyChecksum) {
    logger.atFiner().log("setVerifyChecksum(verifyChecksum: %s)", verifyChecksum);
    super.setVerifyChecksum(verifyChecksum);
  }

  @Override
  public void setPermission(Path p, FsPermission permission) throws IOException {
    logger.atFiner().log("setPermission(path: %s, permission: %s)", p, permission);
    super.setPermission(p, permission);
  }

  @Override
  public void setOwner(Path p, String username, String groupname) throws IOException {
    logger.atFiner().log("setOwner(path: %s, username: %s, groupname: %s)", p, username, groupname);
    super.setOwner(p, username, groupname);
  }

  @Override
  public void setTimes(Path p, long mtime, long atime) throws IOException {
    logger.atFiner().log("setTimes(path: %s, mtime: %d, atime: %d)", p, mtime, atime);
    super.setTimes(p, mtime, atime);
  }

  /** {@inheritDoc} */
  @Override
  public byte[] getXAttr(Path path, String name) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_XATTR_GET_NAMED,
        path,
        traceFactory,
        () -> {
          checkNotNull(path, "path should not be null");
          checkNotNull(name, "name should not be null");

          Map attributes = getGcsFs().getFileInfo(getGcsPath(path)).getAttributes();
          String xAttrKey = getXAttrKey(name);
          byte[] xAttr =
              attributes.containsKey(xAttrKey) ? getXAttrValue(attributes.get(xAttrKey)) : null;

          logger.atFiner().log(
              "getXAttr(path: %s, name: %s): %s", path, name, lazy(() -> new String(xAttr, UTF_8)));
          return xAttr;
        });
  }

  /** {@inheritDoc} */
  @Override
  public Map getXAttrs(Path path) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_XATTR_GET_MAP,
        path,
        traceFactory,
        () -> {
          checkNotNull(path, "path should not be null");

          FileInfo fileInfo = getGcsFs().getFileInfo(getGcsPath(path));
          Map xAttrs =
              fileInfo.getAttributes().entrySet().stream()
                  .filter(a -> isXAttr(a.getKey()))
                  .collect(
                      HashMap::new,
                      (m, a) -> m.put(getXAttrName(a.getKey()), getXAttrValue(a.getValue())),
                      Map::putAll);

          logger.atFiner().log("getXAttrs(path: %s): %s", path, xAttrs);
          return xAttrs;
        });
  }

  /** {@inheritDoc} */
  @Override
  public Map getXAttrs(Path path, List names) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_XATTR_GET_NAMED_MAP,
        path,
        traceFactory,
        () -> {
          checkNotNull(path, "path should not be null");
          checkNotNull(names, "names should not be null");

          Map xAttrs;
          if (names.isEmpty()) {
            xAttrs = new HashMap<>();
          } else {
            Set namesSet = new HashSet<>(names);
            xAttrs =
                getXAttrs(path).entrySet().stream()
                    .filter(a -> namesSet.contains(a.getKey()))
                    .collect(HashMap::new, (m, a) -> m.put(a.getKey(), a.getValue()), Map::putAll);
          }

          logger.atFiner().log("getXAttrs(path: %s, names: %s): %s", path, names, xAttrs);
          return xAttrs;
        });
  }

  /** {@inheritDoc} */
  @Override
  public List listXAttrs(Path path) throws IOException {
    return GhfsStorageStatistics.trackDuration(
        storageStatistics,
        GhfsStatistic.INVOCATION_OP_XATTR_LIST,
        path,
        traceFactory,
        () -> {
          checkNotNull(path, "path should not be null");

          FileInfo fileInfo = getGcsFs().getFileInfo(getGcsPath(path));

          List xAttrs =
              fileInfo.getAttributes().keySet().stream()
                  .filter(this::isXAttr)
                  .map(this::getXAttrName)
                  .collect(Collectors.toCollection(ArrayList::new));

          logger.atFiner().log("listXAttrs(path: %s): %s", path, xAttrs);
          return xAttrs;
        });
  }

  /** {@inheritDoc} */
  @Override
  public void setXAttr(Path path, String name, byte[] value, EnumSet flags)
      throws IOException {
    logger.atFiner().log(
        "setXAttr(path: %s, name: %s, value %s, flags %s",
        path, name, lazy(() -> new String(value, UTF_8)), flags);
    checkNotNull(path, "path should not be null");
    checkNotNull(name, "name should not be null");
    checkArgument(flags != null && !flags.isEmpty(), "flags should not be null or empty");

    FileInfo fileInfo = getGcsFs().getFileInfo(getGcsPath(path));
    String xAttrKey = getXAttrKey(name);
    Map attributes = fileInfo.getAttributes();

    if (attributes.containsKey(xAttrKey) && !flags.contains(XAttrSetFlag.REPLACE)) {
      GoogleCloudStorageEventBus.postOnException();
      throw new IOException(
          String.format(
              "REPLACE flag must be set to update XAttr (name='%s', value='%s') for '%s'",
              name, new String(value, UTF_8), path));
    }
    if (!attributes.containsKey(xAttrKey) && !flags.contains(XAttrSetFlag.CREATE)) {
      GoogleCloudStorageEventBus.postOnException();
      throw new IOException(
          String.format(
              "CREATE flag must be set to create XAttr (name='%s', value='%s') for '%s'",
              name, new String(value, UTF_8), path));
    }

    UpdatableItemInfo updateInfo =
        new UpdatableItemInfo(
            StorageResourceId.fromUriPath(fileInfo.getPath(), /* allowEmptyObjectName= */ false),
            ImmutableMap.of(xAttrKey, getXAttrValue(value)));
    getGcsFs().getGcs().updateItems(ImmutableList.of(updateInfo));
  }

  /** {@inheritDoc} */
  @Override
  public void removeXAttr(Path path, String name) throws IOException {
    logger.atFiner().log("removeXAttr(path: %s, name: %s)", path, name);
    checkNotNull(path, "path should not be null");
    checkNotNull(name, "name should not be null");

    FileInfo fileInfo = getGcsFs().getFileInfo(getGcsPath(path));
    Map xAttrToRemove = new HashMap<>();
    xAttrToRemove.put(getXAttrKey(name), null);
    UpdatableItemInfo updateInfo =
        new UpdatableItemInfo(
            StorageResourceId.fromUriPath(fileInfo.getPath(), /* allowEmptyObjectName= */ false),
            xAttrToRemove);
    getGcsFs().getGcs().updateItems(ImmutableList.of(updateInfo));
  }

  /**
   * Get the storage statistics of this filesystem.
   *
   * @return the storage statistics
   */
  @Override
  public GhfsStorageStatistics getStorageStatistics() {
    return storageStatistics;
  }

  private boolean isXAttr(String key) {
    return key != null && key.startsWith(XATTR_KEY_PREFIX);
  }

  private String getXAttrKey(String name) {
    return XATTR_KEY_PREFIX + name;
  }

  private String getXAttrName(String key) {
    return key.substring(XATTR_KEY_PREFIX.length());
  }

  private byte[] getXAttrValue(byte[] value) {
    return value == null ? XATTR_NULL_VALUE : value;
  }
}