All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.mr.Catalogs Maven / Gradle / Ivy

There is a newer version: 0.14.0-bkbase.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg.mr;

import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Streams;

/**
 * Class for catalog resolution and accessing the common functions for {@link Catalog} API.
 * 

* If the catalog name is provided, get the catalog type from iceberg.catalog.catalogName.type config. *

* In case the catalog name is {@link #ICEBERG_HADOOP_TABLE_NAME location_based_table}, * type is ignored and tables will be loaded using {@link HadoopTables}. *

* In case the value of catalog type is null, iceberg.catalog.catalogName.catalog-impl config * is used to determine the catalog implementation class. *

* If catalog name is null, get the catalog type from {@link InputFormatConfig#CATALOG iceberg.mr.catalog} config: *

    *
  • hive: HiveCatalog
  • *
  • location: HadoopTables
  • *
  • hadoop: HadoopCatalog
  • *
*

* In case the value of catalog type is null, * {@link InputFormatConfig#CATALOG_LOADER_CLASS iceberg.mr.catalog.loader.class} is used to determine * the catalog implementation class. *

* Note: null catalog name mode is only supported for backwards compatibility. Using this mode is NOT RECOMMENDED. */ public final class Catalogs { public static final String ICEBERG_DEFAULT_CATALOG_NAME = "default_iceberg"; public static final String ICEBERG_HADOOP_TABLE_NAME = "location_based_table"; public static final String NAME = "name"; public static final String LOCATION = "location"; private static final String NO_CATALOG_TYPE = "no catalog"; private static final Set PROPERTIES_TO_REMOVE = ImmutableSet.of(InputFormatConfig.TABLE_SCHEMA, InputFormatConfig.PARTITION_SPEC, LOCATION, NAME, InputFormatConfig.CATALOG_NAME); private Catalogs() { } /** * Load an Iceberg table using the catalog and table identifier (or table path) specified by the configuration. * @param conf a Hadoop conf * @return an Iceberg table */ public static Table loadTable(Configuration conf) { return loadTable(conf, conf.get(InputFormatConfig.TABLE_IDENTIFIER), conf.get(InputFormatConfig.TABLE_LOCATION), conf.get(InputFormatConfig.CATALOG_NAME)); } /** * Load an Iceberg table using the catalog specified by the configuration. *

* The table identifier ({@link Catalogs#NAME}) and the catalog name ({@link InputFormatConfig#CATALOG_NAME}), * or table path ({@link Catalogs#LOCATION}) should be specified by the controlling properties. *

* Used by HiveIcebergSerDe and HiveIcebergStorageHandler * @param conf a Hadoop * @param props the controlling properties * @return an Iceberg table */ public static Table loadTable(Configuration conf, Properties props) { return loadTable(conf, props.getProperty(NAME), props.getProperty(LOCATION), props.getProperty(InputFormatConfig.CATALOG_NAME)); } private static Table loadTable(Configuration conf, String tableIdentifier, String tableLocation, String catalogName) { Optional catalog = loadCatalog(conf, catalogName); if (catalog.isPresent()) { Preconditions.checkArgument(tableIdentifier != null, "Table identifier not set"); return catalog.get().loadTable(TableIdentifier.parse(tableIdentifier)); } Preconditions.checkArgument(tableLocation != null, "Table location not set"); return new HadoopTables(conf).load(tableLocation); } /** * Creates an Iceberg table using the catalog specified by the configuration. *

* The properties should contain the following values: *

    *
  • Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required *
  • Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required *
  • Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if * not provided *

* Other properties will be handled over to the Table creation. The controlling properties above will not be * propagated. * @param conf a Hadoop conf * @param props the controlling properties * @return the created Iceberg table */ public static Table createTable(Configuration conf, Properties props) { String schemaString = props.getProperty(InputFormatConfig.TABLE_SCHEMA); Preconditions.checkNotNull(schemaString, "Table schema not set"); Schema schema = SchemaParser.fromJson(props.getProperty(InputFormatConfig.TABLE_SCHEMA)); String specString = props.getProperty(InputFormatConfig.PARTITION_SPEC); PartitionSpec spec = PartitionSpec.unpartitioned(); if (specString != null) { spec = PartitionSpecParser.fromJson(schema, specString); } String location = props.getProperty(LOCATION); String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME); // Create a table property map without the controlling properties Map map = Maps.newHashMapWithExpectedSize(props.size()); for (Object key : props.keySet()) { if (!PROPERTIES_TO_REMOVE.contains(key)) { map.put(key.toString(), props.get(key).toString()); } } Optional catalog = loadCatalog(conf, catalogName); if (catalog.isPresent()) { String name = props.getProperty(NAME); Preconditions.checkNotNull(name, "Table identifier not set"); return catalog.get().createTable(TableIdentifier.parse(name), schema, spec, location, map); } Preconditions.checkNotNull(location, "Table location not set"); return new HadoopTables(conf).create(schema, spec, map, location); } /** * Drops an Iceberg table using the catalog specified by the configuration. *

* The table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) should be specified by * the controlling properties. * @param conf a Hadoop conf * @param props the controlling properties * @return the created Iceberg table */ public static boolean dropTable(Configuration conf, Properties props) { String location = props.getProperty(LOCATION); String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME); Optional catalog = loadCatalog(conf, catalogName); if (catalog.isPresent()) { String name = props.getProperty(NAME); Preconditions.checkNotNull(name, "Table identifier not set"); return catalog.get().dropTable(TableIdentifier.parse(name)); } Preconditions.checkNotNull(location, "Table location not set"); return new HadoopTables(conf).dropTable(location); } /** * Returns true if HiveCatalog is used * @param conf a Hadoop conf * @param props the controlling properties * @return true if the Catalog is HiveCatalog */ public static boolean hiveCatalog(Configuration conf, Properties props) { String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME); String catalogType = getCatalogType(conf, catalogName); if (catalogType != null) { return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(catalogType); } catalogType = getCatalogType(conf, ICEBERG_DEFAULT_CATALOG_NAME); if (catalogType != null) { return CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE.equalsIgnoreCase(catalogType); } return getCatalogProperties(conf, catalogName, catalogType).get(CatalogProperties.CATALOG_IMPL) == null; } @VisibleForTesting static Optional loadCatalog(Configuration conf, String catalogName) { String catalogType = getCatalogType(conf, catalogName); if (NO_CATALOG_TYPE.equalsIgnoreCase(catalogType)) { return Optional.empty(); } else { String name = catalogName == null ? ICEBERG_DEFAULT_CATALOG_NAME : catalogName; return Optional.of(CatalogUtil.buildIcebergCatalog(name, getCatalogProperties(conf, name, catalogType), conf)); } } /** * Collect all the catalog specific configuration from the global hive configuration. * @param conf a Hadoop configuration * @param catalogName name of the catalog * @param catalogType type of the catalog * @return complete map of catalog properties */ private static Map getCatalogProperties(Configuration conf, String catalogName, String catalogType) { String keyPrefix = InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName; Map catalogProperties = Streams.stream(conf.iterator()) .filter(e -> e.getKey().startsWith(keyPrefix)) .collect(Collectors.toMap(e -> e.getKey().substring(keyPrefix.length() + 1), Map.Entry::getValue)); return addCatalogPropertiesIfMissing(conf, catalogType, catalogProperties); } /** * This method is used for backward-compatible catalog configuration. * Collect all the catalog specific configuration from the global hive configuration. * Note: this should be removed when the old catalog configuration is depracated. * @param conf global hive configuration * @param catalogType type of the catalog * @param catalogProperties pre-populated catalog properties * @return complete map of catalog properties */ private static Map addCatalogPropertiesIfMissing(Configuration conf, String catalogType, Map catalogProperties) { if (catalogType != null) { catalogProperties.putIfAbsent(CatalogUtil.ICEBERG_CATALOG_TYPE, catalogType); } String legacyCatalogImpl = conf.get(InputFormatConfig.CATALOG_LOADER_CLASS); if (legacyCatalogImpl != null) { catalogProperties.putIfAbsent(CatalogProperties.CATALOG_IMPL, legacyCatalogImpl); } String legacyWarehouseLocation = conf.get(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION); if (legacyWarehouseLocation != null) { catalogProperties.putIfAbsent(CatalogProperties.WAREHOUSE_LOCATION, legacyWarehouseLocation); } return catalogProperties; } /** * Return the catalog type based on the catalog name. *

* See {@link Catalogs} documentation for catalog type resolution strategy. * * @param conf global hive configuration * @param catalogName name of the catalog * @return type of the catalog, can be null */ private static String getCatalogType(Configuration conf, String catalogName) { if (catalogName != null) { String catalogType = conf.get(InputFormatConfig.catalogPropertyConfigKey( catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE)); if (catalogName.equals(ICEBERG_HADOOP_TABLE_NAME)) { return NO_CATALOG_TYPE; } else { return catalogType; } } else { String catalogType = conf.get(InputFormatConfig.CATALOG); if (catalogType != null && catalogType.equals(LOCATION)) { return NO_CATALOG_TYPE; } else { return catalogType; } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy