All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting Google Cloud Storage

There is a newer version: 3.0.4
Show newest version
/*
 * Copyright 2013 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.cloud.hadoop.fs.gcs;

import static com.google.common.base.Preconditions.checkArgument;

import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.cloud.hadoop.gcsio.UriPaths;
import com.google.cloud.hadoop.util.GoogleCloudStorageEventBus;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.flogger.GoogleLogger;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.fs.Path;

/**
 * GoogleHadoopFileSystem is a version of GoogleHadoopFileSystemBase which is rooted in a single
 * bucket at initialization time; in this case, Hadoop paths no longer correspond directly to
 * general GCS paths, and all Hadoop operations going through this FileSystem will never touch any
 * GCS bucket other than the bucket on which this FileSystem is rooted.
 *
 * 

This implementation sacrifices a small amount of cross-bucket interoperability in favor of * more straightforward FileSystem semantics and compatibility with existing Hadoop applications. In * particular, it is not subject to bucket-naming constraints, and files are allowed to be placed in * root. */ public class GoogleHadoopFileSystem extends GoogleHadoopFileSystemBase { private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); /** URI scheme for GoogleHadoopFileSystem */ public static final String SCHEME = GoogleHadoopFileSystemBase.SCHEME; // The bucket the file system is rooted in used for default values of: // -- working directory // -- user home directories (only for Hadoop purposes). private String rootBucket; /** * Constructs an instance of GoogleHadoopFileSystem; the internal GoogleCloudStorageFileSystem * will be set up with config settings when initialize() is called. */ public GoogleHadoopFileSystem() { super(); } /** * Constructs an instance of GoogleHadoopFileSystem using the provided * GoogleCloudStorageFileSystem; initialize() will not re-initialize it. */ @VisibleForTesting GoogleHadoopFileSystem(GoogleCloudStorageFileSystem gcsfs) { super(gcsfs); } /** Sets and validates the root bucket. */ @Override @VisibleForTesting protected void configureBuckets(GoogleCloudStorageFileSystem gcsFs) throws IOException { rootBucket = initUri.getAuthority(); checkArgument(rootBucket != null, "No bucket specified in GCS URI: %s", initUri); // Validate root bucket name UriPaths.fromStringPathComponents( rootBucket, /* objectName= */ null, /* allowEmptyObjectName= */ true); logger.atFiner().log( "configureBuckets: GoogleHadoopFileSystem root bucket is '%s'", rootBucket); } @Override protected void checkPath(Path path) { logger.atFiner().log("checkPath(path: %s)", path); // Validate scheme super.checkPath(path); URI uri = path.toUri(); String bucket = uri.getAuthority(); // Bucketless URIs will be qualified later if (bucket == null || bucket.equals(rootBucket)) { return; } GoogleCloudStorageEventBus.postOnException(); throw new IllegalArgumentException( String.format( "Wrong bucket: %s, in path: %s, expected bucket: %s", bucket, path, rootBucket)); } /** Get the name of the bucket in which file system is rooted. */ @VisibleForTesting String getRootBucketName() { return rootBucket; } /** Override to allow a homedir subpath which sits directly on our FileSystem root. */ @Override protected String getHomeDirectorySubpath() { return "user/" + System.getProperty("user.name"); } /** * Validates GCS Path belongs to this file system. The bucket must match the root bucket provided * at initialization time. */ @Override public Path getHadoopPath(URI gcsPath) { logger.atFiner().log("getHadoopPath(gcsPath: %s)", gcsPath); // Handle root. Delegate to getGcsPath on "gs:/" to resolve the appropriate gs:// URI. if (gcsPath.equals(getGcsPath(getFileSystemRoot()))) { return getFileSystemRoot(); } StorageResourceId resourceId = StorageResourceId.fromUriPath(gcsPath, true); // Unlike the global-rooted GHFS, gs:// has no meaning in the bucket-rooted world. checkArgument(!resourceId.isRoot(), "Missing authority in gcsPath '%s'", gcsPath); checkArgument( resourceId.getBucketName().equals(rootBucket), "Authority of URI '%s' doesn't match root bucket '%s'", resourceId.getBucketName(), rootBucket); Path hadoopPath = new Path(getScheme() + "://" + rootBucket + '/' + resourceId.getObjectName()); logger.atFiner().log("getHadoopPath(gcsPath: %s): %s", gcsPath, hadoopPath); return hadoopPath; } /** * Translates a "gs:/" style hadoopPath (or relative path which is not fully-qualified) into the * appropriate GCS path which is compatible with the underlying GcsFs or gsutil. */ @Override public URI getGcsPath(Path hadoopPath) { logger.atFiner().log("getGcsPath(hadoopPath: %s)", hadoopPath); // Convert to fully qualified absolute path; the Path object will callback to get our current // workingDirectory as part of fully resolving the path. Path resolvedPath = makeQualified(hadoopPath); String objectName = resolvedPath.toUri().getPath(); if (objectName != null && resolvedPath.isAbsolute()) { // Strip off leading '/' because GoogleCloudStorageFileSystem.getPath appends it explicitly // between bucket and objectName. objectName = objectName.substring(1); } // Construct GCS path uri. URI gcsPath = UriPaths.fromStringPathComponents(rootBucket, objectName, /* allowEmptyObjectName= */ true); logger.atFiner().log("getGcsPath(hadoopPath: %s): %s", hadoopPath, gcsPath); return gcsPath; } /** * As the global-rooted FileSystem, our hadoop-path "scheme" is exactly equal to the general GCS * scheme. */ @Override public String getScheme() { return GoogleCloudStorageFileSystem.SCHEME; } @Override public Path getFileSystemRoot() { return new Path(getScheme() + "://" + rootBucket + '/'); } /** Gets the default value of working directory. */ @Override public Path getDefaultWorkingDirectory() { return getFileSystemRoot(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy