All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.ksyun.ks3.Ks3Catalog Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg.ksyun.ks3;

import com.ksyun.ks3.dto.GetObjectResult;
import com.ksyun.ks3.dto.HeadObjectResult;
import com.ksyun.ks3.dto.Ks3ObjectSummary;
import com.ksyun.ks3.dto.ObjectListing;
import com.ksyun.ks3.dto.ObjectMetadata;
import com.ksyun.ks3.exception.Ks3ServiceException;
import com.ksyun.ks3.service.Ks3;
import com.ksyun.ks3.service.request.ListObjectsRequest;
import com.ksyun.ks3.service.request.PutObjectRequest;
import org.apache.iceberg.BaseMetastoreCatalog;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hadoop.Configurable;
import org.apache.iceberg.io.CloseableGroup;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.ksyun.KsyunClientFactories;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.io.ByteStreams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Collectors;

public class Ks3Catalog extends BaseMetastoreCatalog
    implements Closeable, SupportsNamespaces, Configurable {

  /**
   * Suffix of table metadata object
   */
  private static final String TABLE_OBJECT_SUFFIX = ".table";

  /**
   * Suffix of namespace metadata object
   */
  private static final String NAMESPACE_OBJECT_SUFFIX = ".namespace";

  /**
   * Key of properties version in KS3 object user metadata.
   */
  private static final String PROPERTIES_VERSION_USER_METADATA_KEY = "iceberg_properties_version";

  private static final String KS3_HEADER_IF_MATCH = "If-Match";
  private static final String KS3_HEADER_FORBID_OVERWRITE = "x-kss-forbid-overwrite";


  private static final Logger LOG = LoggerFactory.getLogger(Ks3Catalog.class);

  private Ks3 client;
  private Object hadoopConf;
  private String catalogName;
//  private LockManager lockManager;

  /**
   * Warehouse is unified with other catalog that without delimiter.
   */
  private Ks3URI warehouseLocation;
  private FileIO fileIO;
  private CloseableGroup closeableGroup;

  /**
   * No-arg constructor to load the catalog dynamically.
   * 

* All fields are initialized by calling {@link Ks3Catalog#initialize(String, Map)} later. */ public Ks3Catalog() { } @Override public void initialize(String name, Map properties) { String inputWarehouseLocation = properties.get(CatalogProperties.WAREHOUSE_LOCATION); Preconditions.checkArgument(inputWarehouseLocation != null && inputWarehouseLocation.length() > 0, "Cannot initialize Ks3Catalog because warehousePath must not be null or empty"); this.catalogName = name; // this.warehouseLocation = new Ks3URI(LocationUtil.stripTrailingSlash(inputWarehouseLocation)); this.warehouseLocation = stripTrailingSlash(inputWarehouseLocation); this.client = KsyunClientFactories.from(properties).ks3Client(); //TODO this.fileIO = initializeFileIO(properties); this.closeableGroup = new CloseableGroup(); // closeableGroup.addCloseable(client); closeableGroup.addCloseable(fileIO); closeableGroup.setSuppressCloseFailure(true); } private Ks3URI stripTrailingSlash(String path) { Preconditions.checkArgument( path != null && path.length() > 0, "path must not be null or empty"); String result = path; while (result.endsWith("/")) { result = result.substring(0, result.length() - 1); } return new Ks3URI(result); } private FileIO initializeFileIO(Map properties) { String fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL); if (fileIOImpl == null) { FileIO io = new Ks3FileIO(); io.initialize(properties); return io; } else { return CatalogUtil.loadFileIO(fileIOImpl, properties, hadoopConf); } } @Override protected TableOperations newTableOps(TableIdentifier tableIdentifier) { // System.out.println("-------- newTableOps tableURI(tableIdentifier): " + tableURI(tableIdentifier)); return new Ks3TableOperations(String.format("%s.%s", catalogName, tableIdentifier), tableURI(tableIdentifier), fileIO, this); } @Override protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { return String.format("%s/%s", namespacePrefix(tableIdentifier.namespace()), tableIdentifier.name()); } /** * Iterate all table objects with the namespace prefix. */ @Override public List listTables(Namespace namespace) { if (!namespace.isEmpty() && !namespaceExists(namespace)) { throw new NoSuchNamespaceException("Namespace %s does not exist", namespace); } String marker = null; List results = Lists.newArrayList(); // Add the end slash when delimiter listing Ks3URI prefix = new Ks3URI(String.format("%s/", namespacePrefix(namespace))); ListObjectsRequest listObjectsRequest = new ListObjectsRequest(prefix.bucket()); listObjectsRequest.setDelimiter("/"); listObjectsRequest.setPrefix(prefix.name()); ObjectListing listObjectsResult = null; do{ listObjectsRequest.setMarker(marker); listObjectsResult = client.listObjects(listObjectsRequest); marker = listObjectsResult.getNextMarker(); results.addAll(listObjectsResult.getObjectSummaries().stream() .filter(s3Object -> s3Object.getKey().endsWith(TABLE_OBJECT_SUFFIX)) .map(object -> parseTableId(namespace, prefix, object)) .collect(Collectors.toList())); } while (listObjectsResult.isTruncated()); LOG.debug("Listing of namespace: {} resulted in the following tables: {}", namespace, results); return results; } /** * Get object prefix of namespace without the end slash. */ private String namespacePrefix(Namespace namespace) { if (namespace.isEmpty()) { return warehouseLocation.location(); } else { // If the warehouseLocation.name is empty, the leading slash will be ignored return String.format("%s/%s", warehouseLocation.location(), String.join("/", namespace.levels())); } } private TableIdentifier parseTableId(Namespace namespace, Ks3URI prefix, Ks3ObjectSummary ks3Object) { String key = ks3Object.getKey(); Preconditions.checkArgument(key.startsWith(prefix.name()), "List result should have same prefix", key, prefix); String tableName = key.substring( prefix.name().length(), key.length() - TABLE_OBJECT_SUFFIX.length()); return TableIdentifier.of(namespace, tableName); } /** * Remove table object. If the purge flag is set, remove all data objects. */ @Override public boolean dropTable(TableIdentifier identifier, boolean purge) { if (!tableExists(identifier)) { return false; } Ks3URI tableObjectURI = tableURI(identifier); if (purge) { // if re-use the same instance, current() will throw exception. TableOperations ops = newTableOps(identifier); TableMetadata current = ops.current(); if (current == null) { return false; } CatalogUtil.dropTableData(ops.io(), current); } client.deleteObject(tableObjectURI.bucket(), tableObjectURI.name()); return true; } private Ks3URI tableURI(TableIdentifier id) { return new Ks3URI(String.format("%s/%s%s", namespacePrefix(id.namespace()), id.name(), TABLE_OBJECT_SUFFIX)); } /** * Table rename will only move table object, the data objects will still be in-place. * * @param from identifier of the table to rename * @param to new table name */ @Override public void renameTable(TableIdentifier from, TableIdentifier to) { if (!namespaceExists(to.namespace())) { throw new NoSuchNamespaceException("Cannot rename %s to %s because namespace %s does not exist", from, to, to.namespace()); } if (tableExists(to)) { throw new AlreadyExistsException("Cannot rename %s because destination table %s exists", from, to); } Ks3URI fromURI = tableURI(from); if (!objectMetadata(fromURI).isPresent()) { throw new NoSuchTableException("Cannot rename table because table %s does not exist", from); } Properties properties = loadProperties(fromURI); Ks3URI toURI = tableURI(to); if (!putNewProperties(toURI, properties.content())) { throw new AlreadyExistsException("Cannot rename %s because destination table %s exists", from, to); } client.deleteObject(fromURI.bucket(), fromURI.name()); LOG.info("Rename table {} to {}", from, to); } @Override public void createNamespace(Namespace namespace, Map properties) { Ks3URI namespaceObject = namespaceURI(namespace); if (!putNewProperties(namespaceObject, properties)) { throw new AlreadyExistsException("namespace %s(%s) has already existed", namespace, namespaceObject); } } private Ks3URI namespaceURI(Namespace namespace) { return new Ks3URI(String.format("%s%s", namespacePrefix(namespace), NAMESPACE_OBJECT_SUFFIX)); } @Override public List listNamespaces(Namespace namespace) throws NoSuchNamespaceException { if (!namespace.isEmpty() && !namespaceExists(namespace)) { throw new NoSuchNamespaceException("Namespace %s does not exist", namespace); } String marker = null; List results = Lists.newArrayList(); // Add the end slash when delimiter listing Ks3URI prefix = new Ks3URI(String.format("%s/", namespacePrefix(namespace))); ListObjectsRequest listObjectsRequest = new ListObjectsRequest(prefix.bucket()); listObjectsRequest.setDelimiter("/"); listObjectsRequest.setPrefix(prefix.name()); ObjectListing listObjectsResult = null; do{ listObjectsRequest.setMarker(marker); listObjectsResult = client.listObjects(listObjectsRequest); marker = listObjectsResult.getNextMarker(); results.addAll(listObjectsResult.getObjectSummaries().stream() .filter(s3Object -> s3Object.getKey().endsWith(NAMESPACE_OBJECT_SUFFIX)) .map(object -> parseNamespace(namespace, prefix, object)) .collect(Collectors.toList())); } while (listObjectsResult.isTruncated()); LOG.debug("Listing namespace {} returned namespaces: {}", namespace, results); return results; } private Namespace parseNamespace(Namespace parent, Ks3URI prefix, Ks3ObjectSummary ks3ObjectSummary) { String key = ks3ObjectSummary.getKey(); Preconditions.checkArgument(key.startsWith(prefix.name()), "List result should have same prefix", key, prefix); String namespaceName = key.substring( prefix.name().length(), key.length() - NAMESPACE_OBJECT_SUFFIX.length()); String[] namespace = Arrays.copyOf(parent.levels(), parent.levels().length + 1); namespace[namespace.length - 1] = namespaceName; return Namespace.of(namespace); } /** * Load namespace properties. */ @Override public Map loadNamespaceMetadata(Namespace namespace) throws NoSuchNamespaceException { Ks3URI namespaceObject = namespaceURI(namespace); if (!objectMetadata(namespaceObject).isPresent()) { throw new NoSuchNamespaceException("Namespace %s(%s) properties object is absent", namespace, namespaceObject); } Map result = loadProperties(namespaceObject).content(); LOG.debug("Loaded metadata for namespace {} found {}", namespace, result); return result; } @Override public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException { if (!namespace.isEmpty() && !namespaceExists(namespace)) { throw new NoSuchNamespaceException("Namespace %s does not exist", namespace); } if (!listNamespaces(namespace).isEmpty() || !listTables(namespace).isEmpty()) { throw new NamespaceNotEmptyException("Namespace %s is not empty", namespace); } Ks3URI namespaceObject = namespaceURI(namespace); client.deleteObject(namespaceObject.bucket(), namespaceObject.name()); LOG.info("Dropped namespace: {}", namespace); return true; } @Override public boolean setProperties(Namespace namespace, Map properties) throws NoSuchNamespaceException { return updateProperties(namespace, r -> r.putAll(properties)); } @Override public boolean removeProperties(Namespace namespace, Set properties) throws NoSuchNamespaceException { return updateProperties(namespace, r -> r.keySet().removeAll(properties)); } public boolean updateProperties(Namespace namespace, Consumer> propertiesFn) throws NoSuchNamespaceException { // Load old properties Properties oldProperties = loadProperties(namespaceURI(namespace)); // Put new properties Map newProperties = new LinkedHashMap<>(oldProperties.content()); propertiesFn.accept(newProperties); LOG.debug("Successfully set properties {} for {}", newProperties.keySet(), namespace); return updatePropertiesObject(namespaceURI(namespace), oldProperties.eTag(), newProperties); } @Override public boolean namespaceExists(Namespace namespace) { return objectMetadata(namespaceURI(namespace)).isPresent(); } @Override public boolean tableExists(TableIdentifier identifier) { return objectMetadata(tableURI(identifier)).isPresent(); } private void checkURI(Ks3URI uri) { Preconditions.checkArgument(uri.bucket().equals(warehouseLocation.bucket()), "Properties object %s should be in same bucket %s", uri.location(), warehouseLocation.bucket()); Preconditions.checkArgument(uri.name().startsWith(warehouseLocation.name()), "Properties object %s should have the expected prefix %s", uri.location(), warehouseLocation.name()); } /** * Get S3 object metadata which include E-Tag, user metadata and so on. */ public Optional objectMetadata(Ks3URI uri) { checkURI(uri); try { HeadObjectResult headObjectResult = client.headObject(uri.bucket(), uri.name()); return Optional.of(headObjectResult.getObjectMetadata()); } catch (Ks3ServiceException e) { if (e.getStatueCode() == 404) { return Optional.empty(); } throw e; } } /** * Record class of properties content and E-Tag */ static class Properties { private final String eTag; private final Map content; Properties(String eTag, Map content) { this.eTag = eTag; this.content = content; } public String eTag() { return eTag; } public Map content() { return content; } } /** * Parse object content and metadata as properties. */ Properties loadProperties(Ks3URI uri) { checkURI(uri); GetObjectResult result = client.getObject(uri.bucket(), uri.name()); ObjectMetadata objectMetadata = result.getObject().getObjectMetadata(); String version = objectMetadata.getUserMeta(PROPERTIES_VERSION_USER_METADATA_KEY); Map content; try (InputStream input = result.getObject().getObjectContent()) { content = PropertiesSerDesUtil.read(ByteStreams.toByteArray(input), version); } catch (IOException e) { throw new UncheckedIOException(e); } return new Properties(objectMetadata.getETag(), content); } /** * Create a new object to store properties. */ boolean putNewProperties(Ks3URI uri, Map properties) { checkURI(uri); byte[] bytes = PropertiesSerDesUtil.toBytes(properties); ObjectMetadata meta = new ObjectMetadata(); meta.setContentLength(bytes.length); meta.setUserMeta(PROPERTIES_VERSION_USER_METADATA_KEY, PropertiesSerDesUtil.currentVersion()); // use default : OVERWRITE // meta.setHeader(KS3_HEADER_FORBID_OVERWRITE, false); PutObjectRequest request = new PutObjectRequest(uri.bucket(), uri.name(), new ByteArrayInputStream(bytes), meta); try { client.putObject(request); return true; } catch (Ks3ServiceException e) { // TODO if (e.getStatueCode() == 412) { return false; } throw e; } } /** * Update a exist object to store properties. */ boolean updatePropertiesObject(Ks3URI uri, String eTag, Map properties) { checkURI(uri); // Exclude some keys Map newProperties = new LinkedHashMap<>(properties); byte[] bytes = PropertiesSerDesUtil.toBytes(newProperties); ObjectMetadata meta = new ObjectMetadata(); meta.setContentLength(bytes.length); meta.setUserMeta(PROPERTIES_VERSION_USER_METADATA_KEY, PropertiesSerDesUtil.currentVersion()); meta.setHeader(KS3_HEADER_IF_MATCH, eTag); // Replace properties object PutObjectRequest request = new PutObjectRequest(uri.bucket(), uri.name(), new ByteArrayInputStream(bytes), meta); try { // System.out.println("-------- updatePropertiesObject put ------" + uri.location()); client.putObject(request); return true; } catch (Ks3ServiceException e) { if (e.getStatueCode() == 412) { LOG.debug("Update properties object {} failed : {}", uri.name(), e.getErrorMessage(), e); return false; } throw e; } } @Override public String name() { return catalogName; } @Override public void close() throws IOException { closeableGroup.close(); } @Override public void setConf(Object conf) { this.hadoopConf = conf; } }