All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kitesdk.data.spi.hive.HiveAbstractMetadataProvider Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kitesdk.data.spi.hive;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.List;
import javax.annotation.Nullable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Table;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetException;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.DatasetNotFoundException;
import org.kitesdk.data.URIBuilder;
import org.kitesdk.data.spi.AbstractMetadataProvider;
import org.kitesdk.data.spi.Compatibility;
import org.kitesdk.data.spi.PartitionListener;
import org.kitesdk.data.spi.filesystem.SchemaManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

abstract class HiveAbstractMetadataProvider extends AbstractMetadataProvider implements
    PartitionListener {

  static final String SCHEMA_DIRECTORY = ".metadata/schemas";

  private static final Logger LOG = LoggerFactory
      .getLogger(HiveAbstractMetadataProvider.class);

  protected final Configuration conf;
  private MetaStoreUtil metastore;

  HiveAbstractMetadataProvider(Configuration conf) {
    Preconditions.checkNotNull(conf, "Configuration cannot be null");
    this.conf = conf;
  }

  protected MetaStoreUtil getMetaStoreUtil() {
    if (metastore == null) {
      metastore = new MetaStoreUtil(conf);
    }
    return metastore;
  }

  protected abstract URI expectedLocation(String namespace, String name);

  /**
   * Returns whether the table is a managed hive table.
   * @param name a Table name
   * @return true if the table is managed, false otherwise
   * @throws DatasetNotFoundException If the table does not exist in Hive
   */
  protected boolean isManaged(String namespace, String name) {
    String resolved = resolveNamespace(namespace, name);
    if (resolved != null) {
      return isManaged(getMetaStoreUtil().getTable(resolved, name));
    }
    return false;
  }

  /**
   * Returns whether the table is a managed hive table.
   * @param name a Table name
   * @return true if the table is managed, false otherwise
   * @throws DatasetNotFoundException If the table does not exist in Hive
   */
  protected boolean isExternal(String namespace, String name) {
    String resolved = resolveNamespace(namespace, name);
    if (resolved != null) {
      return isExternal(getMetaStoreUtil().getTable(resolved, name));
    }
    return false;
  }

  @Override
  public DatasetDescriptor load(String namespace, String name) {
    Compatibility.checkDatasetName(namespace, name);

    String resolved = resolveNamespace(namespace, name);
    if (resolved != null) {
      return HiveUtils.descriptorForTable(
          conf, getMetaStoreUtil().getTable(resolved, name));
    }
    throw new DatasetNotFoundException(
        "Hive table not found: " + namespace + "." + name);
  }

  @Override
  public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) {
    Compatibility.checkDatasetName(namespace, name);
    Compatibility.checkDescriptor(descriptor);

    String resolved = resolveNamespace(namespace, name);
    if (resolved != null) {
      Table table = getMetaStoreUtil().getTable(resolved, name);

      Path managerPath = new Path(new Path(table.getSd().getLocation()),
          SCHEMA_DIRECTORY);

      SchemaManager manager = SchemaManager.create(conf, managerPath);

      DatasetDescriptor newDescriptor;

      try {
        URI schemaURI = manager.writeSchema(descriptor.getSchema());

        newDescriptor = new DatasetDescriptor.Builder(descriptor)
            .schemaUri(schemaURI).build();

      } catch (IOException e) {
        throw new DatasetIOException("Unable to create schema", e);
      }

      HiveUtils.updateTableSchema(table, newDescriptor);
      getMetaStoreUtil().alterTable(table);
      return descriptor;
    }
    throw new DatasetNotFoundException(
        "Hive table not found: " + namespace + "." + name);
  }

  @Override
  public boolean delete(String namespace, String name) {
    Compatibility.checkDatasetName(namespace, name);
    String resolved = resolveNamespace(namespace, name);
    if (resolved != null) {
      getMetaStoreUtil().dropTable(resolved, name);
      return true;
    }
    return false;
  }

  @Override
  public boolean exists(String namespace, String name) {
    Compatibility.checkDatasetName(namespace, name);
    return (resolveNamespace(namespace, name) != null);
  }

  @Override
  public Collection namespaces() {
    Collection databases = getMetaStoreUtil().getAllDatabases();
    List databasesWithDatasets = Lists.newArrayList();
    for (String db : databases) {
      if (isNamespace(db)) {
        databasesWithDatasets.add(db);
      }
    }
    return databasesWithDatasets;
  }

  @Override
  public Collection datasets(String namespace) {
    Collection tables = getMetaStoreUtil().getAllTables(namespace);
    List readableTables = Lists.newArrayList();
    for (String name : tables) {
      if (isReadable(namespace, name)) {
        readableTables.add(name);
      }
    }
    return readableTables;
  }

  /**
   * Returns true if there is at least one table in the give database that can
   * be read.
   *
   * @param database a Hive database name
   * @return {@code true} if there is at least one readable table in database
   * @see {@link #isReadable(String, String)}
   */
  private boolean isNamespace(String database) {
    Collection tables = getMetaStoreUtil().getAllTables(database);
    for (String name : tables) {
      if (isReadable(database, name)) {
        return true;
      }
    }
    return false;
  }

  /**
   * Returns true if the given table exists and can be read by this library.
   *
   * @param namespace a Hive database name
   * @param name a table name
   * @return {@code true} if the table exists and is supported
   */
  private boolean isReadable(String namespace, String name) {
    Table table = getMetaStoreUtil().getTable(namespace, name);
    if (isManaged(table) || isExternal(table)) { // readable table types
      try {
        // get a descriptor for the table. if this succeeds, it is readable
        HiveUtils.descriptorForTable(conf, table);
        return true;
      } catch (DatasetException e) {
        // not a readable table
      } catch (IllegalStateException e) {
        // not a readable table
      } catch (IllegalArgumentException e) {
        // not a readable table
      } catch (UnsupportedOperationException e) {
        // not a readable table
      }
    }
    return false;
  }

  @Override
  @SuppressWarnings("unchecked")
  public void partitionAdded(String namespace, String name, String path) {
    getMetaStoreUtil().addPartition(namespace, name, path);
  }


  @Override
  @SuppressWarnings("unchecked")
  public void partitionDeleted(String namespace, String name, String path) {

    getMetaStoreUtil().dropPartition(namespace, name, path);
  }

  /**
   * Checks whether the Hive table {@code namespace.name} exists or if
   * {@code default.name} exists and should be used.
   *
   * @param namespace the requested namespace
   * @param name the table name
   * @return if namespace.name exists, namespace. if not and default.name
   *          exists, then default. {@code null} otherwise.
   */
  protected String resolveNamespace(String namespace, String name) {
    return resolveNamespace(namespace, name, null);
  }

  /**
   * Checks whether the Hive table {@code namespace.name} exists or if
   * {@code default.name} exists and should be used.
   *
   * @param namespace the requested namespace
   * @param name the table name
   * @param location location that should match or null to check the default
   * @return if namespace.name exists, namespace. if not and default.name
   *          exists, then default. {@code null} otherwise.
   */
  protected String resolveNamespace(String namespace, String name,
                                    @Nullable URI location) {
    if (getMetaStoreUtil().exists(namespace, name)) {
      return namespace;
    }
    try {
      DatasetDescriptor descriptor = HiveUtils.descriptorForTable(
          conf, getMetaStoreUtil().getTable(URIBuilder.NAMESPACE_DEFAULT, name));
      URI expectedLocation = location;
      if (location == null) {
        expectedLocation = expectedLocation(namespace, name);
      }
      if ((expectedLocation == null) ||
          pathsEquivalent(expectedLocation, descriptor.getLocation())) {
        // table in the default db has the location that would have been used
        return URIBuilder.NAMESPACE_DEFAULT;
      }
      // fall through and return null
    } catch (DatasetNotFoundException e) {
      // fall through and return null
    }
    return null;
  }

  private static boolean pathsEquivalent(URI left, @Nullable URI right) {
    if (right == null) {
      return false;
    }
    String leftAuth = left.getAuthority();
    String rightAuth = right.getAuthority();
    if (leftAuth != null && rightAuth != null && !leftAuth.equals(rightAuth)) {
      // but authority sections are set, but do not match
      return false;
    }
    return (Objects.equal(left.getScheme(), right.getScheme()) &&
        Objects.equal(left.getPath(), right.getPath()));
  }

  @VisibleForTesting
  static boolean isManaged(Table table) {
    return TableType.MANAGED_TABLE.toString().equals(table.getTableType());
  }

  @VisibleForTesting
  static boolean isExternal(Table table) {
    return TableType.EXTERNAL_TABLE.toString().equals(table.getTableType());
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy