Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
software.amazon.s3tables.iceberg.S3TablesCatalog Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package software.amazon.s3tables.iceberg;
import software.amazon.s3tables.iceberg.imports.FileIOTracker;
import org.apache.iceberg.BaseMetastoreCatalog;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.aws.s3.S3FileIOProperties;
import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hadoop.Configurable;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.io.CloseableGroup;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.util.PropertyUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import software.amazon.awssdk.core.exception.SdkClientException;
import software.amazon.awssdk.services.s3tables.S3TablesClient;
import software.amazon.awssdk.services.s3tables.model.ConflictException;
import software.amazon.awssdk.services.s3tables.model.CreateNamespaceRequest;
import software.amazon.awssdk.services.s3tables.model.CreateTableRequest;
import software.amazon.awssdk.services.s3tables.model.DeleteNamespaceRequest;
import software.amazon.awssdk.services.s3tables.model.DeleteTableRequest;
import software.amazon.awssdk.services.s3tables.model.GetNamespaceRequest;
import software.amazon.awssdk.services.s3tables.model.GetNamespaceResponse;
import software.amazon.awssdk.services.s3tables.model.GetTableMetadataLocationRequest;
import software.amazon.awssdk.services.s3tables.model.GetTableMetadataLocationResponse;
import software.amazon.awssdk.services.s3tables.model.ListNamespacesRequest;
import software.amazon.awssdk.services.s3tables.model.ListNamespacesResponse;
import software.amazon.awssdk.services.s3tables.model.ListTablesRequest;
import software.amazon.awssdk.services.s3tables.model.ListTablesResponse;
import software.amazon.awssdk.services.s3tables.model.NotFoundException;
import software.amazon.awssdk.services.s3tables.model.OpenTableFormat;
import software.amazon.awssdk.services.s3tables.model.RenameTableRequest;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
public class S3TablesCatalog extends BaseMetastoreCatalog
implements Closeable, SupportsNamespaces, Configurable {
private static final Logger LOG = LoggerFactory.getLogger(S3TablesCatalog.class);
private String catalogName;
private Map catalogOptions;
private CloseableGroup closeableGroup;
private FileIOTracker fileIOTracker;
private Object hadoopConf;
private S3TablesClient tablesClient;
private final S3TablesCatalogConfiguration configuration;
private static final ImmutableMap S3_TABLES_DEFAULT_PROPERTIES = ImmutableMap.of(
// S3 Tables does not support deleting objects
S3FileIOProperties.DELETE_ENABLED,
"false"
);
// must have a no-arg constructor to be dynamically loaded
// initialize(String name, Map properties) will be called to complete initialization
public S3TablesCatalog() {
configuration = new S3TablesCatalogConfiguration();
}
public S3TablesCatalog(S3TablesCatalogConfiguration configuration) {
this.configuration = configuration;
}
/**
* Overrides loadTable to return an instance of S3TablesTable rather than BaseTable. Some engines use this to detect
* the type of the table and apply S3 Tables-specific behavior.
*/
@Override
public Table loadTable(TableIdentifier identifier) {
Table result;
if (isValidIdentifier(identifier)) {
TableOperations ops = newTableOps(identifier);
if (ops.current() == null) {
// the identifier may be valid for both tables and metadata tables
if (isValidMetadataIdentifier(identifier)) {
result = loadMetadataTable(identifier);
} else {
throw new NoSuchTableException("Table does not exist: %s", identifier);
}
} else {
result = new S3TablesTable(ops, fullTableName(name(), identifier), metricsReporter());
}
} else if (isValidMetadataIdentifier(identifier)) {
result = loadMetadataTable(identifier);
} else {
throw new NoSuchTableException("Invalid table identifier: %s", identifier);
}
LOG.info("Table loaded by catalog: {}", result);
return result;
}
// Copied from BaseMetastoreCatalog, but private there
private boolean isValidMetadataIdentifier(TableIdentifier identifier) {
return MetadataTableType.from(identifier.name()) != null
&& isValidIdentifier(TableIdentifier.of(identifier.namespace().levels()));
}
// Copied from BaseMetastoreCatalog, but private there
private Table loadMetadataTable(TableIdentifier identifier) {
String tableName = identifier.name();
MetadataTableType type = MetadataTableType.from(tableName);
if (type != null) {
TableIdentifier baseTableIdentifier = TableIdentifier.of(identifier.namespace().levels());
TableOperations ops = newTableOps(baseTableIdentifier);
if (ops.current() == null) {
throw new NoSuchTableException("Table does not exist: %s", baseTableIdentifier);
}
return MetadataTableUtils.createMetadataTableInstance(
ops, name(), baseTableIdentifier, identifier, type);
} else {
throw new NoSuchTableException("Table does not exist: %s", identifier);
}
}
@Override
protected TableOperations newTableOps(TableIdentifier tableIdentifier) {
if (tableIdentifier.namespace() == null || tableIdentifier.namespace().levels().length == 0) {
throw new ValidationException("Namespace can't be null or empty");
}
validateSingleLevelNamespace(tableIdentifier.namespace());
String namespaceName = tableIdentifier.namespace().toString();
String tableName = tableIdentifier.name();
S3TablesCatalogOperations s3TablesCatalogOperations = new S3TablesCatalogOperations(
tablesClient,
configuration,
namespaceName,
tableName,
catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION),
catalogOptions,
hadoopConf
);
fileIOTracker.track(s3TablesCatalogOperations);
return s3TablesCatalogOperations;
}
/**
* TODO: Check if there is a better way to derive the ware house location
* Currently just checking with the Control Plane APIs to find if a table exists, if not then create it.
*/
@Override
protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) {
validateSingleLevelNamespace(tableIdentifier.namespace());
try {
LOG.debug("Trying to get TableMetadataLocation for namespace: {}, name: {}", tableIdentifier.namespace(), tableIdentifier.name());
GetTableMetadataLocationResponse getTableMetadataLocationResponse = tablesClient.getTableMetadataLocation(GetTableMetadataLocationRequest.builder()
.name(tableIdentifier.name())
.namespace(tableIdentifier.namespace().toString())
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.build());
return getTableMetadataLocationResponse.warehouseLocation();
} catch (NotFoundException ex) {
LOG.info("Table {} does not exist, creating table to retrieve warehouse location", tableIdentifier.name());
try {
tablesClient.createTable(
CreateTableRequest.builder()
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.name(tableIdentifier.name())
.format(OpenTableFormat.ICEBERG)
.namespace(tableIdentifier.namespace().toString())
.build());
} catch (Exception e) {
LOG.error("Failed to create table {}", tableIdentifier.name(), e);
throw new RuntimeException(e);
}
try {
GetTableMetadataLocationResponse getTableResponse = tablesClient.getTableMetadataLocation(
GetTableMetadataLocationRequest.builder()
.name(tableIdentifier.name())
.namespace(tableIdentifier.namespace().toString())
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.build()
);
return getTableResponse.warehouseLocation();
} catch (Exception e) {
LOG.error("Failed to get table {}", tableIdentifier.name(), e);
throw new RuntimeException(e);
}
}
}
@Override
public void initialize(String name, Map properties) {
LOG.debug("initialize {}", properties);
S3TablesAwsClientFactory clientFactory = S3TablesAwsClientFactories.from(properties);
initialize(name, properties, clientFactory.s3tables());
}
@VisibleForTesting
public void initialize(String name, Map properties, S3TablesClient client) {
if (properties.get(CatalogProperties.WAREHOUSE_LOCATION) == null) {
throw new ValidationException("No Warehouse location provided. Please specify the warehouse location, which should be the table bucket ARN");
}
validateUnsupportedCatalogProperties(properties);
this.catalogOptions = ImmutableMap.builder()
.putAll(S3_TABLES_DEFAULT_PROPERTIES)
.putAll(properties)
.buildKeepingLast();
this.closeableGroup = new CloseableGroup();
this.catalogName = name;
this.tablesClient = client;
this.fileIOTracker = new FileIOTracker();
closeableGroup.addCloseable(this.tablesClient);
closeableGroup.addCloseable(fileIOTracker);
closeableGroup.setSuppressCloseFailure(true);
}
/**
* Validate some common properties that aren't supported by S3 Tables. We only log warnings rather than failing
* to preserve potential forward compatibility.
*/
private void validateUnsupportedCatalogProperties(Map properties) {
if (PropertyUtil.propertyAsBoolean(properties, S3FileIOProperties.DELETE_ENABLED, false)) {
LOG.warn("S3 Tables does not support DeleteObject requests; setting {}=true will cause failures", S3FileIOProperties.DELETE_ENABLED);
}
if (!PropertyUtil.propertiesWithPrefix(properties, S3FileIOProperties.DELETE_TAGS_PREFIX).isEmpty()) {
LOG.warn("S3 Tables does not support tagging objects; setting {} properties will cause failures", S3FileIOProperties.DELETE_TAGS_PREFIX);
}
if (!PropertyUtil.propertiesWithPrefix(properties, S3FileIOProperties.WRITE_TAGS_PREFIX).isEmpty()) {
LOG.warn("S3 Tables does not support tagging objects; setting {} properties will cause failures", S3FileIOProperties.WRITE_TAGS_PREFIX);
}
if (PropertyUtil.propertyAsBoolean(properties, S3FileIOProperties.S3_ACCESS_GRANTS_ENABLED, false)) {
LOG.warn("S3 Tables does not support S3 Access Grants; setting {}=true will cause failures", S3FileIOProperties.S3_ACCESS_GRANTS_ENABLED);
}
String sseConfig = PropertyUtil.propertyAsString(properties, S3FileIOProperties.SSE_TYPE, S3FileIOProperties.SSE_TYPE_NONE);
if (!sseConfig.equals(S3FileIOProperties.SSE_TYPE_NONE) && !sseConfig.equals(S3FileIOProperties.SSE_TYPE_S3)) {
LOG.warn("S3 Tables does not support configuring SSE other than SSE-S3; setting {}={} will cause failures", S3FileIOProperties.SSE_TYPE, sseConfig);
}
String aclConfig = properties.get(S3FileIOProperties.ACL);
if (aclConfig != null) {
LOG.warn("S3 Tables does not support ACLs; setting {}={} will cause failures", S3FileIOProperties.ACL, aclConfig);
}
String storageClassConfig = properties.get(S3FileIOProperties.WRITE_STORAGE_CLASS);
if (storageClassConfig != null && !storageClassConfig.equals("STANDARD")) {
LOG.warn("S3 Tables does not support storage classes other than STANDARD; setting {}={} will cause failures", S3FileIOProperties.WRITE_STORAGE_CLASS, storageClassConfig);
}
}
@Override
public void createNamespace(Namespace namespace, Map metadata) {
validateSingleLevelNamespace(namespace);
LOG.info("Creating namespace {} with metadata {}", namespace, metadata);
try {
tablesClient.createNamespace(
CreateNamespaceRequest.builder()
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.namespace(Collections.singletonList(namespace.toString()))
.build()
);
} catch (ConflictException ex) {
LOG.debug("Received exception {}", ex.toString());
LOG.info("Namespace {} already exists", namespace);
throw new AlreadyExistsException("Namespace already exists");
}
}
@Override
public List listNamespaces(Namespace namespace) throws NoSuchNamespaceException {
LOG.debug("Listing namespaces for {}", namespace);
if (!namespace.isEmpty()) {
LOG.error("S3TablesCatalog does not support more than 1 level of namespace");
throw new IllegalArgumentException(String.format("S3TablesCatalog does not support more than 1 level of " +
"namespace, so can only list top-level namespaces, but got: %s", namespace));
}
List results = new ArrayList<>();
try {
listWithToken(continuationToken -> {
ListNamespacesResponse response = tablesClient.listNamespaces(
ListNamespacesRequest.builder()
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.build()
);
results.addAll(
response.namespaces().stream()
.map(namespaceSummary -> Namespace.of(namespaceSummary.namespace().get(0)))
.collect(Collectors.toList())
);
return response.continuationToken();
});
} catch (Exception e) {
LOG.error("Failed to list namespaces", e);
throw new RuntimeException(e);
}
LOG.debug("Namespace results: {}", results);
return results;
}
@Override
public Map loadNamespaceMetadata(Namespace namespace) throws NoSuchNamespaceException {
validateSingleLevelNamespace(namespace);
try {
LOG.debug("Loading metadata for {}", namespace);
GetNamespaceResponse getNamespaceResponse = tablesClient.getNamespace(
GetNamespaceRequest.builder()
.namespace(namespace.toString())
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.build()
);
LOG.debug("Loaded metadata {}", getNamespaceResponse.toString());
return ImmutableMap.of("namespaceName", getNamespaceResponse.toString());
} catch (NotFoundException ex) {
throw new NoSuchNamespaceException(ex, "Namespace not found!");
} catch(Exception ex) {
LOG.error("Failed to load namespace metadata", ex);
throw new RuntimeException(String.format("Failed to load namespace metadata for %s: %s", ex.getClass().getName(), ex.getMessage()));
}
}
@Override
public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException {
try {
LOG.debug("Loading namespaces for {} inorder to drop them", namespace);
GetNamespaceResponse getNamespaceResponse = this.tablesClient.getNamespace(GetNamespaceRequest.builder()
.namespace(namespace.toString()).tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.build());
getNamespaceResponse.namespace().forEach(name -> {
LOG.debug("Deleting namespace {}", Namespace.of(name));
this.tablesClient.deleteNamespace(DeleteNamespaceRequest.builder()
.namespace(namespace.toString()).tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.build());
});
return true;
} catch (ConflictException | NamespaceNotEmptyException ex) {
LOG.error("Failed to delete namespace because it is not empty", ex);
throw ex;
} catch (NotFoundException ex) {
LOG.debug("Namespace: {} not found", namespace);
return false;
}
catch (Exception ex) {
LOG.error("Failed to delete namespace", ex);
throw ex;
}
}
@Override
public boolean setProperties(Namespace namespace, Map properties) throws NoSuchNamespaceException {
Map newProperties = Maps.newHashMap();
newProperties.putAll(loadNamespaceMetadata(namespace));
newProperties.putAll(properties);
// Always successful, otherwise exception is thrown
return true;
}
@Override
public boolean removeProperties(Namespace namespace, Set properties) throws NoSuchNamespaceException {
Map metadata = Maps.newHashMap(loadNamespaceMetadata(namespace));
for (String property : properties) {
metadata.remove(property);
}
// Always successful, otherwise exception is thrown
return true;
}
@Override
public List listTables(Namespace namespace) {
LOG.debug("Listing tables for {}", namespace);
namespaceExists(namespace);
List results = new ArrayList<>();
listWithToken(continuationToken -> {
ListTablesResponse response = tablesClient.listTables(
ListTablesRequest.builder()
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.namespace(namespace.level(0))
.maxTables(100)
.build()
);
results.addAll(
response.tables().stream()
.map(tableSummary -> TableIdentifier.of(namespace, tableSummary.name()))
.collect(Collectors.toList())
);
return response.continuationToken();
});
LOG.debug("Found {} tables", results.size());
return results;
}
@Override
public boolean dropTable(TableIdentifier identifier, boolean purge) {
LOG.debug("Trying to delete table: {}", identifier);
if (!purge) {
LOG.error("not allowing drop table with purge=false");
throw new UnsupportedOperationException("S3 Tables does not support the dropTable operation with purge=false. Some versions of Spark always set this flag to false even when running DROP TABLE PURGE commands." +
" You can retry with DROP TABLE PURGE or use the S3 Tables DeleteTable API to delete a table.");
}
try {
validateSingleLevelNamespace(identifier.namespace());
tablesClient.deleteTable(
DeleteTableRequest.builder()
.name(identifier.name())
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.namespace(identifier.namespace().toString())
.build()
);
LOG.info("Successfully deleted {}", identifier);
return true;
} catch (NotFoundException ex) {
LOG.info("Table not found" + identifier);
return false;
}
catch (Exception e) {
LOG.error("Failed to drop table {}", identifier, e);
throw new RuntimeException(e);
}
}
@Override
public void renameTable(TableIdentifier from, TableIdentifier to) {
/*
To comes in with the full namespace which is expected to be 2 levels.
e.g. it comes in as: ice_catalog.namespace_name instead of just namespace_name.
This throws off our normal validateSingleLevelNamespace which normally just accounts for namespace_name.
*/
validateSingleLevelNamespace(to.namespace(), 2);
validateSingleLevelNamespace(from.namespace());
LOG.info("Renaming table from {} to {}", from, to);
String sourceNamespaceName = from.namespace().toString();
String targetNamespaceName = null;
/* Since Iceberg supports multiple namespace levels (noticed that for target namespace it considered ice_catalog.namespace_name)
instead of just namespace_name. For now deriving the namespace level from the source TableIdentifier,
and choosing the target namespace at the same level.
*/
int sourceNamespaceLevel = from.namespace().levels().length;
int targetNamespaceLevel = to.namespace().levels().length;
if (targetNamespaceLevel > sourceNamespaceLevel) {
targetNamespaceName = to.namespace().level(sourceNamespaceLevel);
} else {
targetNamespaceName = to.namespace().toString();
}
try {
tablesClient.renameTable(
RenameTableRequest.builder()
.name(from.name())
.newName(to.name())
.namespace(sourceNamespaceName)
.newNamespaceName(targetNamespaceName)
.tableBucketARN(catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION))
.build());
LOG.info("Successfully renamed table from {} to {}", from.name(), to.name());
} catch (AwsServiceException | SdkClientException e ) {
LOG.error("Failed to rename table {}", from, e);
throw new RuntimeException(e);
}
}
@Override
public void close() throws IOException {
closeableGroup.close();
}
@Override
public void setConf(Object configuration) {
hadoopConf = configuration;
}
@Override
public String name() {
return catalogName;
}
@VisibleForTesting
static void validateSingleLevelNamespace(Namespace namespace, int maxLength) {
if (namespace != null && namespace.levels().length > maxLength) {
LOG.error("Namespace {} has {} levels and S3 Tables only supports one", namespace, namespace.levels());
throw new ValidationException(
"S3TablesCatalog does not support more than 1 level of namespace");
}
}
@VisibleForTesting
static void validateSingleLevelNamespace(Namespace namespace) {
validateSingleLevelNamespace(namespace, 1);
}
private static void listWithToken(Function continuationTokenGenerator) {
String continuationToken = null;
do {
continuationToken = continuationTokenGenerator.apply(continuationToken);
} while (continuationToken != null);
}
@VisibleForTesting
S3TablesClient getS3TablesClient() {
return this.tablesClient;
}
}