org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-aws Show documentation
Show all versions of hadoop-aws Show documentation
This module contains code to support integration with Amazon Web Services.
It also declares the dependencies needed to work with AWS services.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import com.amazonaws.services.dynamodbv2.document.Item;
import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
import com.amazonaws.services.dynamodbv2.model.KeyType;
import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.Tristate;
/**
* Defines methods for translating between domain model objects and their
* representations in the DynamoDB schema.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
@VisibleForTesting
public final class PathMetadataDynamoDBTranslation {
/** The HASH key name of each item. */
@VisibleForTesting
static final String PARENT = "parent";
/** The RANGE key name of each item. */
@VisibleForTesting
static final String CHILD = "child";
@VisibleForTesting
static final String IS_DIR = "is_dir";
@VisibleForTesting
static final String MOD_TIME = "mod_time";
@VisibleForTesting
static final String FILE_LENGTH = "file_length";
@VisibleForTesting
static final String BLOCK_SIZE = "block_size";
static final String IS_DELETED = "is_deleted";
static final String IS_AUTHORITATIVE = "is_authoritative";
static final String LAST_UPDATED = "last_updated";
static final String ETAG = "etag";
static final String VERSION_ID = "version_id";
/** Used while testing backward compatibility. */
@VisibleForTesting
static final Set IGNORED_FIELDS = new HashSet<>();
/** Table version field {@value} in version marker item. */
@VisibleForTesting
static final String TABLE_VERSION = "table_version";
/** Table creation timestampfield {@value} in version marker item. */
@VisibleForTesting
static final String TABLE_CREATED = "table_created";
/** The version marker field is invalid. */
static final String E_NOT_VERSION_MARKER = "Not a version marker: ";
/**
* Returns the key schema for the DynamoDB table.
*
* @return DynamoDB key schema
*/
static Collection keySchema() {
return Arrays.asList(
new KeySchemaElement(PARENT, KeyType.HASH),
new KeySchemaElement(CHILD, KeyType.RANGE));
}
/**
* Returns the attribute definitions for the DynamoDB table.
*
* @return DynamoDB attribute definitions
*/
static Collection attributeDefinitions() {
return Arrays.asList(
new AttributeDefinition(PARENT, ScalarAttributeType.S),
new AttributeDefinition(CHILD, ScalarAttributeType.S));
}
/**
* Converts a DynamoDB item to a {@link DDBPathMetadata}.
*
* @param item DynamoDB item to convert
* @return {@code item} converted to a {@link DDBPathMetadata}
*/
static DDBPathMetadata itemToPathMetadata(Item item, String username) {
if (item == null) {
return null;
}
String parentStr = item.getString(PARENT);
Preconditions.checkNotNull(parentStr, "No parent entry in item %s", item);
String childStr = item.getString(CHILD);
Preconditions.checkNotNull(childStr, "No child entry in item %s", item);
// Skip table version markers, which are only non-absolute paths stored.
Path rawPath = new Path(parentStr, childStr);
if (!rawPath.isAbsoluteAndSchemeAuthorityNull()) {
return null;
}
Path parent = new Path(Constants.FS_S3A + ":/" + parentStr + "/");
Path path = new Path(parent, childStr);
boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
boolean isAuthoritativeDir = false;
final S3AFileStatus fileStatus;
long lastUpdated = 0;
if (isDir) {
isAuthoritativeDir = !IGNORED_FIELDS.contains(IS_AUTHORITATIVE)
&& item.hasAttribute(IS_AUTHORITATIVE)
&& item.getBoolean(IS_AUTHORITATIVE);
fileStatus = DynamoDBMetadataStore.makeDirStatus(path, username);
} else {
long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
String eTag = item.getString(ETAG);
String versionId = item.getString(VERSION_ID);
fileStatus = new S3AFileStatus(
len, modTime, path, block, username, eTag, versionId);
}
lastUpdated =
!IGNORED_FIELDS.contains(LAST_UPDATED)
&& item.hasAttribute(LAST_UPDATED)
? item.getLong(LAST_UPDATED) : 0;
boolean isDeleted =
item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED);
return new DDBPathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted,
isAuthoritativeDir, lastUpdated);
}
/**
* Converts a {@link DDBPathMetadata} to a DynamoDB item.
*
* Can ignore {@code IS_AUTHORITATIVE} flag if {@code ignoreIsAuthFlag} is
* true.
*
* @param meta {@link DDBPathMetadata} to convert
* @return {@code meta} converted to DynamoDB item
*/
static Item pathMetadataToItem(DDBPathMetadata meta) {
Preconditions.checkNotNull(meta);
final S3AFileStatus status = meta.getFileStatus();
final Item item = new Item().withPrimaryKey(pathToKey(status.getPath()));
if (status.isDirectory()) {
item.withBoolean(IS_DIR, true);
if (!IGNORED_FIELDS.contains(IS_AUTHORITATIVE)) {
item.withBoolean(IS_AUTHORITATIVE, meta.isAuthoritativeDir());
}
} else {
item.withLong(FILE_LENGTH, status.getLen())
.withLong(MOD_TIME, status.getModificationTime())
.withLong(BLOCK_SIZE, status.getBlockSize());
if (status.getETag() != null) {
item.withString(ETAG, status.getETag());
}
if (status.getVersionId() != null) {
item.withString(VERSION_ID, status.getVersionId());
}
}
item.withBoolean(IS_DELETED, meta.isDeleted());
if(!IGNORED_FIELDS.contains(LAST_UPDATED)) {
item.withLong(LAST_UPDATED, meta.getLastUpdated());
}
return item;
}
/**
* The version marker has a primary key whose PARENT is {@code name};
* this MUST NOT be a value which represents an absolute path.
* @param name name of the version marker
* @param version version number
* @param timestamp creation timestamp
* @return an item representing a version marker.
*/
static Item createVersionMarker(String name, int version, long timestamp) {
return new Item().withPrimaryKey(createVersionMarkerPrimaryKey(name))
.withInt(TABLE_VERSION, version)
.withLong(TABLE_CREATED, timestamp);
}
/**
* Create the primary key of the version marker.
* @param name key name
* @return the key to use when registering or resolving version markers
*/
static PrimaryKey createVersionMarkerPrimaryKey(String name) {
return new PrimaryKey(PARENT, name, CHILD, name);
}
/**
* Extract the version from a version marker item.
* @param marker version marker item
* @return the extracted version field
* @throws IOException if the item is not a version marker
*/
static int extractVersionFromMarker(Item marker) throws IOException {
if (marker.hasAttribute(TABLE_VERSION)) {
return marker.getInt(TABLE_VERSION);
} else {
throw new IOException(E_NOT_VERSION_MARKER + marker);
}
}
/**
* Extract the creation time, if present.
* @param marker version marker item
* @return the creation time, or null
* @throws IOException if the item is not a version marker
*/
static Long extractCreationTimeFromMarker(Item marker) {
if (marker.hasAttribute(TABLE_CREATED)) {
return marker.getLong(TABLE_CREATED);
} else {
return null;
}
}
/**
* Converts a collection {@link DDBPathMetadata} to a collection DynamoDB
* items.
*
* @see #pathMetadataToItem(DDBPathMetadata)
*/
static Item[] pathMetadataToItem(Collection metas) {
if (metas == null) {
return null;
}
final Item[] items = new Item[metas.size()];
int i = 0;
for (DDBPathMetadata meta : metas) {
items[i++] = pathMetadataToItem(meta);
}
return items;
}
/**
* Converts a {@link Path} to a DynamoDB equality condition on that path as
* parent, suitable for querying all direct children of the path.
*
* @param path the path; can not be null
* @return DynamoDB equality condition on {@code path} as parent
*/
static KeyAttribute pathToParentKeyAttribute(Path path) {
return new KeyAttribute(PARENT, pathToParentKey(path));
}
/**
* e.g. {@code pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a}
* @param path path to convert
* @return string for parent key
*/
@VisibleForTesting
public static String pathToParentKey(Path path) {
Preconditions.checkNotNull(path);
Preconditions.checkArgument(path.isUriPathAbsolute(),
"Path not absolute: '%s'", path);
URI uri = path.toUri();
String bucket = uri.getHost();
Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
"Path missing bucket %s", path);
String pKey = "/" + bucket + uri.getPath();
// Strip trailing slash
if (pKey.endsWith("/")) {
pKey = pKey.substring(0, pKey.length() - 1);
}
return pKey;
}
/**
* Converts a {@link Path} to a DynamoDB key, suitable for getting the item
* matching the path.
*
* @param path the path; can not be null
* @return DynamoDB key for item matching {@code path}
*/
static PrimaryKey pathToKey(Path path) {
Preconditions.checkArgument(!path.isRoot(),
"Root path is not mapped to any PrimaryKey");
String childName = path.getName();
PrimaryKey key = new PrimaryKey(PARENT,
pathToParentKey(path.getParent()), CHILD,
childName);
for (KeyAttribute attr : key.getComponents()) {
String name = attr.getName();
Object v = attr.getValue();
Preconditions.checkNotNull(v,
"Null value for DynamoDB attribute \"%s\"", name);
Preconditions.checkState(!((String)v).isEmpty(),
"Empty string value for DynamoDB attribute \"%s\"", name);
}
return key;
}
/**
* Converts a collection of {@link Path} to a collection of DynamoDB keys.
*
* @see #pathToKey(Path)
*/
static PrimaryKey[] pathToKey(Collection paths) {
if (paths == null) {
return null;
}
final PrimaryKey[] keys = new PrimaryKey[paths.size()];
int i = 0;
for (Path p : paths) {
keys[i++] = pathToKey(p);
}
return keys;
}
/**
* There is no need to instantiate this class.
*/
private PathMetadataDynamoDBTranslation() {
}
/**
* Convert a collection of metadata entries to a list
* of DDBPathMetadata entries.
* If the sources are already DDBPathMetadata instances, they
* are copied directly into the new list, otherwise new
* instances are created.
* @param pathMetadatas source data
* @return the converted list.
*/
static List pathMetaToDDBPathMeta(
Collection extends PathMetadata> pathMetadatas) {
return pathMetadatas.stream().map(p ->
(p instanceof DDBPathMetadata)
? (DDBPathMetadata) p
: new DDBPathMetadata(p))
.collect(Collectors.toList());
}
/**
* Convert an item's (parent, child) key to a string value
* for logging. There is no validation of the item.
* @param item item.
* @return an s3a:// prefixed string.
*/
static String itemPrimaryKeyToString(Item item) {
String parent = item.getString(PARENT);
String child = item.getString(CHILD);
return "s3a://" + parent + "/" + child;
}
/**
* Convert an item's (parent, child) key to a string value
* for logging. There is no validation of the item.
* @param item item.
* @return an s3a:// prefixed string.
*/
static String primaryKeyToString(PrimaryKey item) {
Collection c = item.getComponents();
String parent = "";
String child = "";
for (KeyAttribute attr : c) {
switch (attr.getName()) {
case PARENT:
parent = attr.getValue().toString();
break;
case CHILD:
child = attr.getValue().toString();
break;
default:
}
}
return "s3a://" + parent + "/" + child;
}
/**
* Create an empty dir marker which, when passed to the
* DDB metastore, is considered authoritative.
* @param status file status
* @return path metadata.
*/
static PathMetadata authoritativeEmptyDirectoryMarker(
final S3AFileStatus status) {
return new DDBPathMetadata(status, Tristate.TRUE,
false, true, 0);
}
}