Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.iceberg.ksyun.ks3.Ks3Catalog Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.ksyun.ks3;
import com.ksyun.ks3.dto.GetObjectResult;
import com.ksyun.ks3.dto.HeadObjectResult;
import com.ksyun.ks3.dto.Ks3ObjectSummary;
import com.ksyun.ks3.dto.ObjectListing;
import com.ksyun.ks3.dto.ObjectMetadata;
import com.ksyun.ks3.exception.Ks3ServiceException;
import com.ksyun.ks3.service.Ks3;
import com.ksyun.ks3.service.request.ListObjectsRequest;
import com.ksyun.ks3.service.request.PutObjectRequest;
import org.apache.iceberg.BaseMetastoreCatalog;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hadoop.Configurable;
import org.apache.iceberg.io.CloseableGroup;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.ksyun.KsyunClientFactories;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.io.ByteStreams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Collectors;
public class Ks3Catalog extends BaseMetastoreCatalog
implements Closeable, SupportsNamespaces, Configurable {
/**
* Suffix of table metadata object
*/
private static final String TABLE_OBJECT_SUFFIX = ".table";
/**
* Suffix of namespace metadata object
*/
private static final String NAMESPACE_OBJECT_SUFFIX = ".namespace";
/**
* Key of properties version in KS3 object user metadata.
*/
private static final String PROPERTIES_VERSION_USER_METADATA_KEY = "iceberg_properties_version";
private static final String KS3_HEADER_IF_MATCH = "If-Match";
private static final String KS3_HEADER_FORBID_OVERWRITE = "x-kss-forbid-overwrite";
private static final Logger LOG = LoggerFactory.getLogger(Ks3Catalog.class);
private Ks3 client;
private Object hadoopConf;
private String catalogName;
// private LockManager lockManager;
/**
* Warehouse is unified with other catalog that without delimiter.
*/
private Ks3URI warehouseLocation;
private FileIO fileIO;
private CloseableGroup closeableGroup;
/**
* No-arg constructor to load the catalog dynamically.
*
* All fields are initialized by calling {@link Ks3Catalog#initialize(String, Map)} later.
*/
public Ks3Catalog() {
}
@Override
public void initialize(String name, Map properties) {
String inputWarehouseLocation = properties.get(CatalogProperties.WAREHOUSE_LOCATION);
Preconditions.checkArgument(inputWarehouseLocation != null && inputWarehouseLocation.length() > 0,
"Cannot initialize Ks3Catalog because warehousePath must not be null or empty");
this.catalogName = name;
// this.warehouseLocation = new Ks3URI(LocationUtil.stripTrailingSlash(inputWarehouseLocation));
this.warehouseLocation = stripTrailingSlash(inputWarehouseLocation);
this.client = KsyunClientFactories.from(properties).ks3Client();
//TODO
this.fileIO = initializeFileIO(properties);
this.closeableGroup = new CloseableGroup();
// closeableGroup.addCloseable(client);
closeableGroup.addCloseable(fileIO);
closeableGroup.setSuppressCloseFailure(true);
}
private Ks3URI stripTrailingSlash(String path) {
Preconditions.checkArgument(
path != null && path.length() > 0, "path must not be null or empty");
String result = path;
while (result.endsWith("/")) {
result = result.substring(0, result.length() - 1);
}
return new Ks3URI(result);
}
private FileIO initializeFileIO(Map properties) {
String fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL);
if (fileIOImpl == null) {
FileIO io = new Ks3FileIO();
io.initialize(properties);
return io;
} else {
return CatalogUtil.loadFileIO(fileIOImpl, properties, hadoopConf);
}
}
@Override
protected TableOperations newTableOps(TableIdentifier tableIdentifier) {
// System.out.println("-------- newTableOps tableURI(tableIdentifier): " + tableURI(tableIdentifier));
return new Ks3TableOperations(String.format("%s.%s", catalogName, tableIdentifier),
tableURI(tableIdentifier), fileIO, this);
}
@Override
protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) {
return String.format("%s/%s", namespacePrefix(tableIdentifier.namespace()), tableIdentifier.name());
}
/**
* Iterate all table objects with the namespace prefix.
*/
@Override
public List listTables(Namespace namespace) {
if (!namespace.isEmpty() && !namespaceExists(namespace)) {
throw new NoSuchNamespaceException("Namespace %s does not exist", namespace);
}
String marker = null;
List results = Lists.newArrayList();
// Add the end slash when delimiter listing
Ks3URI prefix = new Ks3URI(String.format("%s/", namespacePrefix(namespace)));
ListObjectsRequest listObjectsRequest = new ListObjectsRequest(prefix.bucket());
listObjectsRequest.setDelimiter("/");
listObjectsRequest.setPrefix(prefix.name());
ObjectListing listObjectsResult = null;
do{
listObjectsRequest.setMarker(marker);
listObjectsResult = client.listObjects(listObjectsRequest);
marker = listObjectsResult.getNextMarker();
results.addAll(listObjectsResult.getObjectSummaries().stream()
.filter(s3Object -> s3Object.getKey().endsWith(TABLE_OBJECT_SUFFIX))
.map(object -> parseTableId(namespace, prefix, object))
.collect(Collectors.toList()));
} while (listObjectsResult.isTruncated());
LOG.debug("Listing of namespace: {} resulted in the following tables: {}", namespace, results);
return results;
}
/**
* Get object prefix of namespace without the end slash.
*/
private String namespacePrefix(Namespace namespace) {
if (namespace.isEmpty()) {
return warehouseLocation.location();
} else {
// If the warehouseLocation.name is empty, the leading slash will be ignored
return String.format("%s/%s", warehouseLocation.location(),
String.join("/", namespace.levels()));
}
}
private TableIdentifier parseTableId(Namespace namespace, Ks3URI prefix, Ks3ObjectSummary ks3Object) {
String key = ks3Object.getKey();
Preconditions.checkArgument(key.startsWith(prefix.name()),
"List result should have same prefix", key, prefix);
String tableName = key.substring(
prefix.name().length(),
key.length() - TABLE_OBJECT_SUFFIX.length());
return TableIdentifier.of(namespace, tableName);
}
/**
* Remove table object. If the purge flag is set, remove all data objects.
*/
@Override
public boolean dropTable(TableIdentifier identifier, boolean purge) {
if (!tableExists(identifier)) {
return false;
}
Ks3URI tableObjectURI = tableURI(identifier);
if (purge) {
// if re-use the same instance, current() will throw exception.
TableOperations ops = newTableOps(identifier);
TableMetadata current = ops.current();
if (current == null) {
return false;
}
CatalogUtil.dropTableData(ops.io(), current);
}
client.deleteObject(tableObjectURI.bucket(), tableObjectURI.name());
return true;
}
private Ks3URI tableURI(TableIdentifier id) {
return new Ks3URI(String.format("%s/%s%s", namespacePrefix(id.namespace()), id.name(), TABLE_OBJECT_SUFFIX));
}
/**
* Table rename will only move table object, the data objects will still be in-place.
*
* @param from identifier of the table to rename
* @param to new table name
*/
@Override
public void renameTable(TableIdentifier from, TableIdentifier to) {
if (!namespaceExists(to.namespace())) {
throw new NoSuchNamespaceException("Cannot rename %s to %s because namespace %s does not exist",
from, to, to.namespace());
}
if (tableExists(to)) {
throw new AlreadyExistsException("Cannot rename %s because destination table %s exists", from, to);
}
Ks3URI fromURI = tableURI(from);
if (!objectMetadata(fromURI).isPresent()) {
throw new NoSuchTableException("Cannot rename table because table %s does not exist", from);
}
Properties properties = loadProperties(fromURI);
Ks3URI toURI = tableURI(to);
if (!putNewProperties(toURI, properties.content())) {
throw new AlreadyExistsException("Cannot rename %s because destination table %s exists", from, to);
}
client.deleteObject(fromURI.bucket(), fromURI.name());
LOG.info("Rename table {} to {}", from, to);
}
@Override
public void createNamespace(Namespace namespace, Map properties) {
Ks3URI namespaceObject = namespaceURI(namespace);
if (!putNewProperties(namespaceObject, properties)) {
throw new AlreadyExistsException("namespace %s(%s) has already existed", namespace, namespaceObject);
}
}
private Ks3URI namespaceURI(Namespace namespace) {
return new Ks3URI(String.format("%s%s", namespacePrefix(namespace), NAMESPACE_OBJECT_SUFFIX));
}
@Override
public List listNamespaces(Namespace namespace) throws NoSuchNamespaceException {
if (!namespace.isEmpty() && !namespaceExists(namespace)) {
throw new NoSuchNamespaceException("Namespace %s does not exist", namespace);
}
String marker = null;
List results = Lists.newArrayList();
// Add the end slash when delimiter listing
Ks3URI prefix = new Ks3URI(String.format("%s/", namespacePrefix(namespace)));
ListObjectsRequest listObjectsRequest = new ListObjectsRequest(prefix.bucket());
listObjectsRequest.setDelimiter("/");
listObjectsRequest.setPrefix(prefix.name());
ObjectListing listObjectsResult = null;
do{
listObjectsRequest.setMarker(marker);
listObjectsResult = client.listObjects(listObjectsRequest);
marker = listObjectsResult.getNextMarker();
results.addAll(listObjectsResult.getObjectSummaries().stream()
.filter(s3Object -> s3Object.getKey().endsWith(NAMESPACE_OBJECT_SUFFIX))
.map(object -> parseNamespace(namespace, prefix, object))
.collect(Collectors.toList()));
} while (listObjectsResult.isTruncated());
LOG.debug("Listing namespace {} returned namespaces: {}", namespace, results);
return results;
}
private Namespace parseNamespace(Namespace parent, Ks3URI prefix, Ks3ObjectSummary ks3ObjectSummary) {
String key = ks3ObjectSummary.getKey();
Preconditions.checkArgument(key.startsWith(prefix.name()),
"List result should have same prefix", key, prefix);
String namespaceName = key.substring(
prefix.name().length(),
key.length() - NAMESPACE_OBJECT_SUFFIX.length());
String[] namespace = Arrays.copyOf(parent.levels(), parent.levels().length + 1);
namespace[namespace.length - 1] = namespaceName;
return Namespace.of(namespace);
}
/**
* Load namespace properties.
*/
@Override
public Map loadNamespaceMetadata(Namespace namespace) throws NoSuchNamespaceException {
Ks3URI namespaceObject = namespaceURI(namespace);
if (!objectMetadata(namespaceObject).isPresent()) {
throw new NoSuchNamespaceException("Namespace %s(%s) properties object is absent", namespace, namespaceObject);
}
Map result = loadProperties(namespaceObject).content();
LOG.debug("Loaded metadata for namespace {} found {}", namespace, result);
return result;
}
@Override
public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException {
if (!namespace.isEmpty() && !namespaceExists(namespace)) {
throw new NoSuchNamespaceException("Namespace %s does not exist", namespace);
}
if (!listNamespaces(namespace).isEmpty() || !listTables(namespace).isEmpty()) {
throw new NamespaceNotEmptyException("Namespace %s is not empty", namespace);
}
Ks3URI namespaceObject = namespaceURI(namespace);
client.deleteObject(namespaceObject.bucket(), namespaceObject.name());
LOG.info("Dropped namespace: {}", namespace);
return true;
}
@Override
public boolean setProperties(Namespace namespace, Map properties) throws NoSuchNamespaceException {
return updateProperties(namespace, r -> r.putAll(properties));
}
@Override
public boolean removeProperties(Namespace namespace, Set properties) throws NoSuchNamespaceException {
return updateProperties(namespace, r -> r.keySet().removeAll(properties));
}
public boolean updateProperties(Namespace namespace, Consumer> propertiesFn)
throws NoSuchNamespaceException {
// Load old properties
Properties oldProperties = loadProperties(namespaceURI(namespace));
// Put new properties
Map newProperties = new LinkedHashMap<>(oldProperties.content());
propertiesFn.accept(newProperties);
LOG.debug("Successfully set properties {} for {}", newProperties.keySet(), namespace);
return updatePropertiesObject(namespaceURI(namespace), oldProperties.eTag(), newProperties);
}
@Override
public boolean namespaceExists(Namespace namespace) {
return objectMetadata(namespaceURI(namespace)).isPresent();
}
@Override
public boolean tableExists(TableIdentifier identifier) {
return objectMetadata(tableURI(identifier)).isPresent();
}
private void checkURI(Ks3URI uri) {
Preconditions.checkArgument(uri.bucket().equals(warehouseLocation.bucket()),
"Properties object %s should be in same bucket %s",
uri.location(), warehouseLocation.bucket());
Preconditions.checkArgument(uri.name().startsWith(warehouseLocation.name()),
"Properties object %s should have the expected prefix %s",
uri.location(), warehouseLocation.name());
}
/**
* Get S3 object metadata which include E-Tag, user metadata and so on.
*/
public Optional objectMetadata(Ks3URI uri) {
checkURI(uri);
try {
HeadObjectResult headObjectResult = client.headObject(uri.bucket(), uri.name());
return Optional.of(headObjectResult.getObjectMetadata());
} catch (Ks3ServiceException e) {
if (e.getStatueCode() == 404) {
return Optional.empty();
}
throw e;
}
}
/**
* Record class of properties content and E-Tag
*/
static class Properties {
private final String eTag;
private final Map content;
Properties(String eTag, Map content) {
this.eTag = eTag;
this.content = content;
}
public String eTag() {
return eTag;
}
public Map content() {
return content;
}
}
/**
* Parse object content and metadata as properties.
*/
Properties loadProperties(Ks3URI uri) {
checkURI(uri);
GetObjectResult result = client.getObject(uri.bucket(), uri.name());
ObjectMetadata objectMetadata = result.getObject().getObjectMetadata();
String version = objectMetadata.getUserMeta(PROPERTIES_VERSION_USER_METADATA_KEY);
Map content;
try (InputStream input = result.getObject().getObjectContent()) {
content = PropertiesSerDesUtil.read(ByteStreams.toByteArray(input), version);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return new Properties(objectMetadata.getETag(), content);
}
/**
* Create a new object to store properties.
*/
boolean putNewProperties(Ks3URI uri, Map properties) {
checkURI(uri);
byte[] bytes = PropertiesSerDesUtil.toBytes(properties);
ObjectMetadata meta = new ObjectMetadata();
meta.setContentLength(bytes.length);
meta.setUserMeta(PROPERTIES_VERSION_USER_METADATA_KEY, PropertiesSerDesUtil.currentVersion());
// use default : OVERWRITE
// meta.setHeader(KS3_HEADER_FORBID_OVERWRITE, false);
PutObjectRequest request = new PutObjectRequest(uri.bucket(), uri.name(), new ByteArrayInputStream(bytes), meta);
try {
client.putObject(request);
return true;
} catch (Ks3ServiceException e) {
// TODO
if (e.getStatueCode() == 412) {
return false;
}
throw e;
}
}
/**
* Update a exist object to store properties.
*/
boolean updatePropertiesObject(Ks3URI uri, String eTag, Map properties) {
checkURI(uri);
// Exclude some keys
Map newProperties = new LinkedHashMap<>(properties);
byte[] bytes = PropertiesSerDesUtil.toBytes(newProperties);
ObjectMetadata meta = new ObjectMetadata();
meta.setContentLength(bytes.length);
meta.setUserMeta(PROPERTIES_VERSION_USER_METADATA_KEY, PropertiesSerDesUtil.currentVersion());
meta.setHeader(KS3_HEADER_IF_MATCH, eTag);
// Replace properties object
PutObjectRequest request = new PutObjectRequest(uri.bucket(), uri.name(), new ByteArrayInputStream(bytes), meta);
try {
// System.out.println("-------- updatePropertiesObject put ------" + uri.location());
client.putObject(request);
return true;
} catch (Ks3ServiceException e) {
if (e.getStatueCode() == 412) {
LOG.debug("Update properties object {} failed : {}", uri.name(), e.getErrorMessage(), e);
return false;
}
throw e;
}
}
@Override
public String name() {
return catalogName;
}
@Override
public void close() throws IOException {
closeableGroup.close();
}
@Override
public void setConf(Object conf) {
this.hadoopConf = conf;
}
}