com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystem Maven / Gradle / Ivy
Show all versions of gcs-connector Show documentation
/**
* Copyright 2013 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.hadoop.fs.gcs;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import org.apache.hadoop.fs.Path;
import java.net.URI;
import java.net.URISyntaxException;
/**
* This class provides a Hadoop compatible File System on top of Google Cloud Storage (GCS).
*
* It is implemented as a thin abstraction layer on top of GCS. The layer hides any specific
* characteristics of the underlying store and exposes FileSystem interface understood by the Hadoop
* engine.
*
* Users interact with the files in the storage using fully qualified URIs. The file system exposed
* by this class is identified using the 'gs' scheme. For example, {@code gs://dir1/dir2/file1.txt}.
*
* This implementation translates paths between hadoop Path and GCS URI with the convention that the
* Hadoop root directly corresponds to the GCS "root", e.g. gs:/. This is convenient for many
* reasons, such as data portability and close equivalence to gsutil paths, but imposes certain
* inherited constraints, such as files not being allowed in root (only 'directories' can be placed
* in root), and directory names inside root have a more limited set of allowed characters.
*
* One of the main goals of this implementation is to maintain compatibility with behavior of HDFS
* implementation when accessed through FileSystem interface. HDFS implementation is not very
* consistent about the cases when it throws versus the cases when methods return false. We run GHFS
* tests and HDFS tests against the same test data and use that as a guide to decide whether to
* throw or to return false.
*
* @deprecated This implementation is deprecated and relies on the deprecated systemBucket property.
* Use GoogleHadoopFileSystem instead.
*/
@Deprecated
public class GoogleHadoopGlobalRootedFileSystem
extends GoogleHadoopFileSystemBase {
/**
* Constructs an instance of GoogleHadoopGlobalRootedFileSystem; the internal
* GoogleCloudStorageFileSystem will be set up with config settings when initialize() is called.
*/
public GoogleHadoopGlobalRootedFileSystem() {
super();
}
/**
* Constructs an instance of GoogleHadoopGlobalRootedFileSystem using the provided
* GoogleCloudStorageFileSystem; initialize() will not re-initialize it.
*/
public GoogleHadoopGlobalRootedFileSystem(GoogleCloudStorageFileSystem gcsfs) {
super(gcsfs);
}
/**
* Returns an unqualified path without any leading slash, relative to the filesystem root,
* which serves as the home directory of the current user; see {@code getHomeDirectory} for
* a description of what the home directory means.
*/
@Override
protected String getHomeDirectorySubpath() {
// Since we're globally-rooted, it's best to make the home directory based off the system bucket
// since otherwise bucket collisions will abound.
return systemBucket + "/user/" + System.getProperty("user.name");
}
/**
* Gets Hadoop path corresponding to the given GCS path.
*
* @param gcsPath Fully-qualified GCS path, of the form gs:///