All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.nessie.NessieCatalog Maven / Gradle / Ivy

There is a newer version: 1.7.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg.nessie;

import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
import org.apache.iceberg.hadoop.Configurable;
import org.apache.iceberg.io.CloseableGroup;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.util.LocationUtil;
import org.apache.iceberg.view.BaseMetastoreViewCatalog;
import org.apache.iceberg.view.ViewOperations;
import org.projectnessie.client.NessieClientBuilder;
import org.projectnessie.client.NessieConfigConstants;
import org.projectnessie.client.api.NessieApiV1;
import org.projectnessie.client.api.NessieApiV2;
import org.projectnessie.client.config.NessieClientConfigSource;
import org.projectnessie.client.config.NessieClientConfigSources;
import org.projectnessie.model.Content;
import org.projectnessie.model.ContentKey;
import org.projectnessie.model.TableReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Nessie implementation of Iceberg Catalog. */
public class NessieCatalog extends BaseMetastoreViewCatalog
    implements SupportsNamespaces, Configurable {

  private static final Logger LOG = LoggerFactory.getLogger(NessieCatalog.class);
  private static final Joiner SLASH = Joiner.on("/");
  private static final String NAMESPACE_LOCATION_PROPS = "location";

  private static final Map DEFAULT_CATALOG_OPTIONS =
      ImmutableMap.builder()
          .put(CatalogProperties.TABLE_DEFAULT_PREFIX + TableProperties.GC_ENABLED, "false")
          .put(
              CatalogProperties.TABLE_DEFAULT_PREFIX
                  + TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED,
              "false") // just in case METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT changes
          .build();

  private NessieIcebergClient client;
  private String warehouseLocation;
  private Object config;
  private String name;
  private FileIO fileIO;
  private Map catalogOptions = DEFAULT_CATALOG_OPTIONS;
  private CloseableGroup closeableGroup;

  public NessieCatalog() {}

  @SuppressWarnings("checkstyle:HiddenField")
  @Override
  public void initialize(String name, Map options) {
    Map catalogOptions = ImmutableMap.copyOf(options);
    String fileIOImpl =
        options.getOrDefault(
            CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.hadoop.HadoopFileIO");
    // remove nessie prefix
    final Function removePrefix =
        x -> x.replace(NessieUtil.NESSIE_CONFIG_PREFIX, "");
    final String requestedRef =
        options.get(removePrefix.apply(NessieConfigConstants.CONF_NESSIE_REF));
    String requestedHash =
        options.get(removePrefix.apply(NessieConfigConstants.CONF_NESSIE_REF_HASH));

    NessieClientConfigSource configSource =
        NessieClientConfigSources.mapConfigSource(options)
            .fallbackTo(x -> options.get(removePrefix.apply(x)));
    NessieClientBuilder nessieClientBuilder =
        NessieClientBuilder.createClientBuilderFromSystemSettings(configSource);
    // default version is inferred by uri.
    String apiVersion = options.get(removePrefix.apply(NessieUtil.CLIENT_API_VERSION));
    if (apiVersion == null) {
      apiVersion = inferVersionFromURI(options.get(CatalogProperties.URI));
    }

    NessieApiV1 api;
    switch (apiVersion) {
      case "1":
        api = nessieClientBuilder.build(NessieApiV1.class);
        break;
      case "2":
        api = nessieClientBuilder.build(NessieApiV2.class);
        break;
      default:
        throw new IllegalArgumentException(
            String.format(
                "Unsupported %s: %s. Can only be 1 or 2",
                removePrefix.apply(NessieUtil.CLIENT_API_VERSION), apiVersion));
    }

    initialize(
        name,
        new NessieIcebergClient(api, requestedRef, requestedHash, catalogOptions),
        CatalogUtil.loadFileIO(fileIOImpl, options, config),
        catalogOptions);
  }

  private static String inferVersionFromURI(String uri) {
    if (uri == null) {
      throw new IllegalArgumentException("URI is not specified in the catalog properties");
    }

    // match for uri ending with /v1, /v2 etc
    Pattern pattern = Pattern.compile("/v(\\d+)$");
    Matcher matcher = pattern.matcher(uri);
    if (matcher.find()) {
      return matcher.group(1);
    } else {
      throw new IllegalArgumentException(
          String.format(
              "URI doesn't end with the version: %s. "
                  + "Please configure `client-api-version` in the catalog properties explicitly.",
              uri));
    }
  }

  /**
   * An alternative way to initialize the catalog using a pre-configured {@link NessieIcebergClient}
   * and {@link FileIO} instance.
   *
   * @param name The name of the catalog, defaults to "nessie" if null
   * @param client The pre-configured {@link NessieIcebergClient} instance to use
   * @param fileIO The {@link FileIO} instance to use
   * @param catalogOptions The catalog options to use
   */
  @SuppressWarnings("checkstyle:HiddenField")
  public void initialize(
      String name, NessieIcebergClient client, FileIO fileIO, Map catalogOptions) {
    this.name = name == null ? "nessie" : name;
    this.client = Preconditions.checkNotNull(client, "client must be non-null");
    this.fileIO = Preconditions.checkNotNull(fileIO, "fileIO must be non-null");
    this.catalogOptions =
        ImmutableMap.builder()
            .putAll(DEFAULT_CATALOG_OPTIONS)
            .putAll(Preconditions.checkNotNull(catalogOptions, "catalogOptions must be non-null"))
            .buildKeepingLast();
    this.warehouseLocation = warehouseLocation(name, catalogOptions);
    this.closeableGroup = new CloseableGroup();
    closeableGroup.addCloseable(client);
    closeableGroup.addCloseable(fileIO);
    closeableGroup.addCloseable(metricsReporter());
    closeableGroup.setSuppressCloseFailure(true);
  }

  @SuppressWarnings("checkstyle:HiddenField")
  private String warehouseLocation(String name, Map catalogOptions) {
    String warehouseLocation = catalogOptions.get(CatalogProperties.WAREHOUSE_LOCATION);
    if (warehouseLocation == null) {
      // Explicitly log a warning, otherwise the thrown exception can get list in the "silent-ish
      // catch"
      // in o.a.i.spark.Spark3Util.catalogAndIdentifier(o.a.s.sql.SparkSession, List,
      //     o.a.s.sql.connector.catalog.CatalogPlugin)
      // in the code block
      //    Pair catalogIdentifier =
      // SparkUtil.catalogAndIdentifier(nameParts,
      //        catalogName ->  {
      //          try {
      //            return catalogManager.catalog(catalogName);
      //          } catch (Exception e) {
      //            return null;
      //          }
      //        },
      //        Identifier::of,
      //        defaultCatalog,
      //        currentNamespace
      //    );
      LOG.warn(
          "Catalog creation for inputName={} and options {} failed, because parameter "
              + "'warehouse' is not set, Nessie can't store data.",
          name,
          catalogOptions);
      throw new IllegalStateException("Parameter 'warehouse' not set, Nessie can't store data.");
    }

    return LocationUtil.stripTrailingSlash(warehouseLocation);
  }

  @Override
  public void close() throws IOException {
    if (null != closeableGroup) {
      closeableGroup.close();
    }
  }

  @Override
  public String name() {
    return name;
  }

  @Override
  protected TableOperations newTableOps(TableIdentifier tableIdentifier) {
    TableReference tr = parseTableReference(tableIdentifier);
    return new NessieTableOperations(
        ContentKey.of(
            org.projectnessie.model.Namespace.of(tableIdentifier.namespace().levels()),
            tr.getName()),
        client.withReference(tr.getReference(), tr.getHash()),
        fileIO);
  }

  @Override
  protected String defaultWarehouseLocation(TableIdentifier table) {
    String location;
    if (table.hasNamespace()) {
      String baseLocation = SLASH.join(warehouseLocation, table.namespace().toString());
      try {
        baseLocation =
            loadNamespaceMetadata(table.namespace())
                .getOrDefault(NAMESPACE_LOCATION_PROPS, baseLocation);
      } catch (NoSuchNamespaceException e) {
        // do nothing we want the same behavior that if the location is not defined
      }
      location = SLASH.join(baseLocation, table.name());
    } else {
      location = SLASH.join(warehouseLocation, table.name());
    }
    // Different tables with same table name can exist across references in Nessie.
    // To avoid sharing same table path between two tables with same name, use uuid in the table
    // path.
    return location + "_" + UUID.randomUUID();
  }

  @Override
  public List listTables(Namespace namespace) {
    return client.listTables(namespace);
  }

  @Override
  public boolean dropTable(TableIdentifier identifier, boolean purge) {
    TableReference tableReference = parseTableReference(identifier);
    return client
        .withReference(tableReference.getReference(), tableReference.getHash())
        .dropTable(identifierWithoutTableReference(identifier, tableReference), purge);
  }

  @Override
  public void renameTable(TableIdentifier from, TableIdentifier to) {
    TableReference fromTableReference = parseTableReference(from);
    TableReference toTableReference = parseTableReference(to);

    validateReferenceForRename(fromTableReference, toTableReference, Content.Type.ICEBERG_TABLE);

    TableIdentifier fromIdentifier =
        NessieUtil.removeCatalogName(
            identifierWithoutTableReference(from, fromTableReference), name());
    TableIdentifier toIdentifier =
        NessieUtil.removeCatalogName(identifierWithoutTableReference(to, toTableReference), name());
    client
        .withReference(fromTableReference.getReference(), fromTableReference.getHash())
        .renameTable(fromIdentifier, toIdentifier);
  }

  @Override
  public void createNamespace(Namespace namespace, Map metadata) {
    client.createNamespace(namespace, metadata);
  }

  @Override
  public List listNamespaces(Namespace namespace) throws NoSuchNamespaceException {
    return client.listNamespaces(namespace);
  }

  /**
   * Load the given namespace and return its properties.
   *
   * @param namespace a namespace. {@link Namespace}
   * @return a string map of properties for the given namespace
   * @throws NoSuchNamespaceException If the namespace does not exist
   */
  @Override
  public Map loadNamespaceMetadata(Namespace namespace)
      throws NoSuchNamespaceException {
    return client.loadNamespaceMetadata(namespace);
  }

  @Override
  public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException {
    return client.dropNamespace(namespace);
  }

  @Override
  public boolean setProperties(Namespace namespace, Map properties) {
    return client.setProperties(namespace, properties);
  }

  @Override
  public boolean removeProperties(Namespace namespace, Set properties) {
    return client.removeProperties(namespace, properties);
  }

  @Override
  public void setConf(Object conf) {
    this.config = conf;
  }

  @VisibleForTesting
  String currentHash() {
    return client.getRef().getHash();
  }

  @VisibleForTesting
  String currentRefName() {
    return client.getRef().getName();
  }

  @VisibleForTesting
  FileIO fileIO() {
    return fileIO;
  }

  private TableReference parseTableReference(TableIdentifier tableIdentifier) {
    TableReference tr = TableReference.parse(tableIdentifier.name());
    Preconditions.checkArgument(
        !tr.hasTimestamp(),
        "Invalid table name: # is only allowed for hashes (reference by "
            + "timestamp is not supported)");
    return tr;
  }

  private TableIdentifier identifierWithoutTableReference(
      TableIdentifier identifier, TableReference tableReference) {
    if (tableReference.hasReference()) {
      return TableIdentifier.of(identifier.namespace(), tableReference.getName());
    }
    return identifier;
  }

  @Override
  protected Map properties() {
    return catalogOptions;
  }

  @Override
  protected ViewOperations newViewOps(TableIdentifier identifier) {
    TableReference tr = parseTableReference(identifier);
    return new NessieViewOperations(
        ContentKey.of(
            org.projectnessie.model.Namespace.of(identifier.namespace().levels()), tr.getName()),
        client.withReference(tr.getReference(), tr.getHash()),
        fileIO);
  }

  @Override
  public List listViews(Namespace namespace) {
    return client.listViews(namespace);
  }

  @Override
  public boolean dropView(TableIdentifier identifier) {
    TableReference tableReference = parseTableReference(identifier);
    return client
        .withReference(tableReference.getReference(), tableReference.getHash())
        .dropView(identifierWithoutTableReference(identifier, tableReference), false);
  }

  @Override
  public void renameView(TableIdentifier from, TableIdentifier to) {
    TableReference fromTableReference = parseTableReference(from);
    TableReference toTableReference = parseTableReference(to);

    validateReferenceForRename(fromTableReference, toTableReference, Content.Type.ICEBERG_VIEW);

    TableIdentifier fromIdentifier =
        NessieUtil.removeCatalogName(
            identifierWithoutTableReference(from, fromTableReference), name());
    TableIdentifier toIdentifier =
        NessieUtil.removeCatalogName(identifierWithoutTableReference(to, toTableReference), name());
    client
        .withReference(fromTableReference.getReference(), fromTableReference.getHash())
        .renameView(fromIdentifier, toIdentifier);
  }

  private void validateReferenceForRename(
      TableReference fromTableReference, TableReference toTableReference, Content.Type type) {
    String fromReference =
        fromTableReference.hasReference()
            ? fromTableReference.getReference()
            : client.getRef().getName();
    String toReference =
        toTableReference.hasReference()
            ? toTableReference.getReference()
            : client.getRef().getName();
    Preconditions.checkArgument(
        fromReference.equalsIgnoreCase(toReference),
        "Cannot rename %s '%s' on reference '%s' to '%s' on reference '%s':"
            + " source and target references must be the same.",
        NessieUtil.contentTypeString(type).toLowerCase(Locale.ENGLISH),
        fromTableReference.getName(),
        fromReference,
        toTableReference.getName(),
        toReference);
  }
}