com.google.cloud.hadoop.gcsio.StorageResourceId Maven / Gradle / Ivy
Show all versions of gcsio Show documentation
/**
* Copyright 2013 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.hadoop.gcsio;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.base.Strings;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Data struct representing either a GCS StorageObject, a GCS Bucket or the GCS root (gs://).
* If both bucketName and objectName are null, the StorageResourceId refers to GCS root (gs://).
* If bucketName is non-null, and objectName is null, then this refers to a GCS Bucket. Otherwise,
* if bucketName and objectName are both non-null, this refers to a GCS StorageObject.
*/
public class StorageResourceId {
// Pattern that parses out bucket and object names.
// Given 'gs://foo-bucket/foo/bar/baz', matcher.group(x) will return:
// 0 = gs://foo-bucket/foo/bar/baz
// 1 = foo-bucket/foo/bar/baz
// 2 = foo-bucket
// 3 = /foo/bar/baz
// 4 = foo/bar/baz
// Groups 2 and 4 can be used to create an instance.
private static final Pattern OBJECT_NAME_IN_GCS_PATTERN =
Pattern.compile("gs://(([^/]+)(/((.+))?)?)?");
// The singleton instance identifying the GCS root (gs://). Both getObjectName() and
// getBucketName() will return null.
public static final StorageResourceId ROOT = new StorageResourceId();
// Bucket name of this storage resource to be used with the Google Cloud Storage API.
private final String bucketName;
// Object name of this storage resource to be used with the Google Cloud Storage API.
private final String objectName;
// Human-readable String to be returned by toString(); kept as 'final' member for efficiency.
private final String readableString;
/**
* Constructor for a StorageResourceId that refers to the GCS root (gs://). Private because
* all external users should just use the singleton StorageResourceId.ROOT.
*/
private StorageResourceId() {
this.bucketName = null;
this.objectName = null;
this.readableString = createReadableString(bucketName, objectName);
}
/**
* Constructor for a StorageResourceId representing a Bucket; {@code getObjectName()} will return
* null for a StorageResourceId that represents a Bucket.
*
* @param bucketName The bucket name of the resource. Must be non-empty and non-null.
*/
public StorageResourceId(String bucketName) {
checkArgument(!Strings.isNullOrEmpty(bucketName),
"bucketName must not be null or empty");
this.bucketName = bucketName;
this.objectName = null;
this.readableString = createReadableString(bucketName, objectName);
}
/**
* Constructor for a StorageResourceId representing a full StorageObject, including bucketName
* and objectName.
*
* @param bucketName The bucket name of the resource. Must be non-empty and non-null.
* @param objectName The object name of the resource. Must be non-empty and non-null.
*/
public StorageResourceId(String bucketName, String objectName) {
checkArgument(!Strings.isNullOrEmpty(bucketName),
"bucketName must not be null or empty");
checkArgument(!Strings.isNullOrEmpty(objectName),
"objectName must not be null or empty");
this.bucketName = bucketName;
this.objectName = objectName;
this.readableString = createReadableString(bucketName, objectName);
}
/**
* Returns true if this StorageResourceId represents a GCS StorageObject; if true, both
* {@code getBucketName} and {@code getObjectName} will be non-empty and non-null.
*/
public boolean isStorageObject() {
return bucketName != null && objectName != null;
}
/**
* Returns true if this StorageResourceId represents a GCS Bucket; if true, then {@code
* getObjectName} will return null.
*/
public boolean isBucket() {
return bucketName != null && objectName == null;
}
/**
* Returns true if this StorageResourceId represents the GCS root (gs://); if true, then
* both {@code getBucketName} and {@code getObjectName} will be null.
*/
public boolean isRoot() {
return bucketName == null && objectName == null;
}
/**
* Indicates if this StorageResourceId corresponds to a 'directory'; similar to
* {@link FileInfo#isDirectory} except deals entirely with pathnames instead of also checking
* for exists() to be true on a corresponding GoogleCloudStorageItemInfo.
*/
public boolean isDirectory() {
return isRoot() || isBucket() || objectHasDirectoryPath(objectName);
}
/**
* Gets the bucket name component of this resource identifier.
*/
public String getBucketName() {
return bucketName;
}
/**
* Gets the object name component of this resource identifier.
*/
public String getObjectName() {
return objectName;
}
/**
* Returns a string of the form gs:///.
*/
@Override
public String toString() {
return readableString;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof StorageResourceId) {
StorageResourceId other = (StorageResourceId) obj;
return Objects.equals(bucketName, other.bucketName)
&& Objects.equals(objectName, other.objectName);
}
return false;
}
@Override
public int hashCode() {
return readableString.hashCode();
}
/**
* Helper for standardizing the way various human-readable messages in logs/exceptions that refer
* to a bucket/object pair.
*/
public static String createReadableString(String bucketName, String objectName) {
if (bucketName == null && objectName == null) {
// TODO(user): Unify this method with other methods that convert bucketName/objectName
// to a URI; maybe use the single slash for compatibility.
return "gs://";
} else if (bucketName != null && objectName == null) {
return String.format("gs://%s", bucketName);
} else if (bucketName != null && objectName != null) {
return String.format("gs://%s/%s", bucketName, objectName);
}
throw new IllegalArgumentException(
String.format("Invalid bucketName/objectName pair: gs://%s/%s", bucketName, objectName));
}
/**
* Indicates whether the given object name looks like a directory path.
*
* @param objectName Name of the object to inspect.
* @return Whether the given object name looks like a directory path.
*/
static boolean objectHasDirectoryPath(String objectName) {
return !Strings.isNullOrEmpty(objectName)
&& objectName.endsWith(GoogleCloudStorage.PATH_DELIMITER);
}
/**
* Converts the given object name to look like a directory path.
* If the object name already looks like a directory path then
* this call is a no-op.
*
* If the object name is null or empty, it is returned as-is.
*
* @param objectName Name of the object to inspect.
* @return Directory path for the given path.
*/
static String convertToDirectoryPath(String objectName) {
if (!Strings.isNullOrEmpty(objectName)) {
if (!objectHasDirectoryPath(objectName)) {
objectName += GoogleCloudStorage.PATH_DELIMITER;
}
}
return objectName;
}
/**
* Parses {@link StorageResourceId} from specified string.
*/
public static StorageResourceId fromObjectName(String objectName) {
Matcher matcher = OBJECT_NAME_IN_GCS_PATTERN.matcher(objectName);
checkArgument(matcher.matches(), "'%s' is not a valid GCS object name.", objectName);
String bucketName = matcher.group(2);
String relativePath = matcher.group(4);
if (bucketName == null) {
return ROOT;
} else if (relativePath != null) {
return new StorageResourceId(bucketName, relativePath);
}
return new StorageResourceId(bucketName);
}
}