All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.metastore.Warehouse Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.metastore;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.facebook.presto.hive.$internal.org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.metastore.api.Catalog;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
import org.apache.hadoop.hive.metastore.utils.FileUtils;
import org.apache.hadoop.hive.metastore.utils.HdfsUtils;
import org.apache.hadoop.hive.metastore.utils.JavaUtils;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.metastore.ReplChangeManager.RecycleType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.util.ReflectionUtils;

/**
 * This class represents a warehouse where data of Hive tables is stored
 */
public class Warehouse {
  public static final String DEFAULT_CATALOG_NAME = "hive";
  public static final String DEFAULT_CATALOG_COMMENT = "Default catalog, for Hive";
  public static final String DEFAULT_DATABASE_NAME = "default";
  public static final String DEFAULT_DATABASE_COMMENT = "Default Hive database";
  public static final String DEFAULT_SERIALIZATION_FORMAT = "1";
  public static final String DATABASE_WAREHOUSE_SUFFIX = ".db";
  private static final String CAT_DB_TABLE_SEPARATOR = ".";

  private Path whRoot;
  private final Configuration conf;
  private final String whRootString;

  public static final Logger LOG = LoggerFactory.getLogger("hive.metastore.warehouse");

  private MetaStoreFS fsHandler = null;
  private boolean storageAuthCheck = false;
  private ReplChangeManager cm = null;

  public Warehouse(Configuration conf) throws MetaException {
    this.conf = conf;
    whRootString = MetastoreConf.getVar(conf, ConfVars.WAREHOUSE);
    if (StringUtils.isBlank(whRootString)) {
      throw new MetaException(ConfVars.WAREHOUSE.getVarname()
          + " is not set in the config or blank");
    }
    fsHandler = getMetaStoreFsHandler(conf);
    cm = ReplChangeManager.getInstance(conf);
    storageAuthCheck = MetastoreConf.getBoolVar(conf, ConfVars.AUTHORIZATION_STORAGE_AUTH_CHECKS);
  }

  private MetaStoreFS getMetaStoreFsHandler(Configuration conf)
      throws MetaException {
    String handlerClassStr = MetastoreConf.getVar(conf, ConfVars.FS_HANDLER_CLS);
    try {
      Class handlerClass = (Class) Class
          .forName(handlerClassStr, true, JavaUtils.getClassLoader());
      MetaStoreFS handler = ReflectionUtils.newInstance(handlerClass, conf);
      return handler;
    } catch (ClassNotFoundException e) {
      throw new MetaException("Error in loading MetaStoreFS handler."
          + e.getMessage());
    }
  }


  /**
   * Helper functions to convert IOException to MetaException
   */
  public static FileSystem getFs(Path f, Configuration conf) throws MetaException {
    try {
      return f.getFileSystem(conf);
    } catch (IOException e) {
      MetaStoreUtils.logAndThrowMetaException(e);
    }
    return null;
  }

  public FileSystem getFs(Path f) throws MetaException {
    return getFs(f, conf);
  }


  /**
   * Hadoop File System reverse lookups paths with raw ip addresses The File
   * System URI always contains the canonical DNS name of the Namenode.
   * Subsequently, operations on paths with raw ip addresses cause an exception
   * since they don't match the file system URI.
   *
   * This routine solves this problem by replacing the scheme and authority of a
   * path with the scheme and authority of the FileSystem that it maps to.
   *
   * @param path
   *          Path to be canonicalized
   * @return Path with canonical scheme and authority
   */
  public static Path getDnsPath(Path path, Configuration conf) throws MetaException {
    FileSystem fs = getFs(path, conf);
    return (new Path(fs.getUri().getScheme(), fs.getUri().getAuthority(), path
        .toUri().getPath()));
  }

  public Path getDnsPath(Path path) throws MetaException {
    return getDnsPath(path, conf);
  }

  /**
   * Resolve the configured warehouse root dir with respect to the configuration
   * This involves opening the FileSystem corresponding to the warehouse root
   * dir (but that should be ok given that this is only called during DDL
   * statements for non-external tables).
   */
  public Path getWhRoot() throws MetaException {
    if (whRoot != null) {
      return whRoot;
    }
    whRoot = getDnsPath(new Path(whRootString));
    return whRoot;
  }

  /**
   * Build the database path based on catalog name and database name.  This should only be used
   * when a database is being created or altered.  If you just want to find out the path a
   * database is already using call {@link #getDatabasePath(Database)}.  If the passed in
   * database already has a path set that will be used.  If not the location will be built using
   * catalog's path and the database name.
   * @param cat catalog the database is in
   * @param db database object
   * @return Path representing the directory for the database
   * @throws MetaException when the file path cannot be properly determined from the configured
   * file system.
   */
  public Path determineDatabasePath(Catalog cat, Database db) throws MetaException {
    if (db.isSetLocationUri()) {
      return getDnsPath(new Path(db.getLocationUri()));
    }
    if (cat == null || cat.getName().equalsIgnoreCase(DEFAULT_CATALOG_NAME)) {
      if (db.getName().equalsIgnoreCase(DEFAULT_DATABASE_NAME)) {
        return getWhRoot();
      } else {
        return new Path(getWhRoot(), dbDirFromDbName(db));
      }
    } else {
      return new Path(getDnsPath(new Path(cat.getLocationUri())), dbDirFromDbName(db));
    }
  }

  private String dbDirFromDbName(Database db) throws MetaException {
    return db.getName().toLowerCase() + DATABASE_WAREHOUSE_SUFFIX;
  }

  /**
   * Get the path specified by the database.  In the case of the default database the root of the
   * warehouse is returned.
   * @param db database to get the path of
   * @return path to the database directory
   * @throws MetaException when the file path cannot be properly determined from the configured
   * file system.
   */
  public Path getDatabasePath(Database db) throws MetaException {
    if (db.getCatalogName().equalsIgnoreCase(DEFAULT_CATALOG_NAME) &&
        db.getName().equalsIgnoreCase(DEFAULT_DATABASE_NAME)) {
      return getWhRoot();
    }
    return new Path(db.getLocationUri());
  }

  public Path getDefaultDatabasePath(String dbName) throws MetaException {
    // TODO CAT - I am fairly certain that most calls to this are in error.  This should only be
    // used when the database location is unset, which should never happen except when a
    // new database is being created.  Once I have confirmation of this change calls of this to
    // getDatabasePath(), since it does the right thing.  Also, merge this with
    // determineDatabasePath() as it duplicates much of the logic.
    if (dbName.equalsIgnoreCase(DEFAULT_DATABASE_NAME)) {
      return getWhRoot();
    }
    return new Path(getWhRoot(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX);
  }

  /**
   * Returns the default location of the table path using the parent database's location
   * @param db Database where the table is created
   * @param tableName table name
   * @return
   * @throws MetaException
   */
  public Path getDefaultTablePath(Database db, String tableName)
      throws MetaException {
    return getDnsPath(new Path(getDatabasePath(db),
        MetaStoreUtils.encodeTableName(tableName.toLowerCase())));
  }

  public static String getQualifiedName(Table table) {
    return getQualifiedName(table.getDbName(), table.getTableName());
  }

  public static String getQualifiedName(String dbName, String tableName) {
    return dbName + CAT_DB_TABLE_SEPARATOR + tableName;
  }

  public static String getQualifiedName(Partition partition) {
    return partition.getDbName() + "." + partition.getTableName() + partition.getValues();
  }

  /**
   * Get table name in cat.db.table format.
   * @param table table object
   * @return fully qualified name.
   */
  public static String getCatalogQualifiedTableName(Table table) {
    return getCatalogQualifiedTableName(table.getCatName(), table.getDbName(), table.getTableName());
  }

  /**
   * Get table name in cat.db.table format.
   * @param catName catalog name
   * @param dbName database name
   * @param tableName table name
   * @return fully qualified name.
   */
  public static String getCatalogQualifiedTableName(String catName, String dbName, String tableName) {
    return catName + CAT_DB_TABLE_SEPARATOR + dbName + CAT_DB_TABLE_SEPARATOR + tableName;
  }

  public static String getCatalogQualifiedDbName(String catName, String dbName) {
    return catName + CAT_DB_TABLE_SEPARATOR + dbName;
  }

  public boolean mkdirs(Path f) throws MetaException {
    FileSystem fs;
    try {
      fs = getFs(f);
      return FileUtils.mkdir(fs, f);
    } catch (IOException e) {
      MetaStoreUtils.logAndThrowMetaException(e);
    }
    return false;
  }

  public boolean renameDir(Path sourcePath, Path destPath, boolean needCmRecycle) throws MetaException {
    try {
      if (needCmRecycle) {
        // Copy the source files to cmroot. As the client will move the source files to another
        // location, we should make a copy of the files to cmroot instead of moving it.
        cm.recycle(sourcePath, RecycleType.COPY, true);
      }
      FileSystem srcFs = getFs(sourcePath);
      FileSystem destFs = getFs(destPath);
      return FileUtils.rename(srcFs, destFs, sourcePath, destPath);
    } catch (Exception ex) {
      MetaStoreUtils.logAndThrowMetaException(ex);
    }
    return false;
  }

  void addToChangeManagement(Path file) throws MetaException {
    try {
      cm.recycle(file, RecycleType.COPY, true);
    } catch (IOException e) {
      throw new MetaException(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
  }

  public boolean deleteDir(Path f, boolean recursive) throws MetaException {
    return deleteDir(f, recursive, false);
  }

  public boolean deleteDir(Path f, boolean recursive, boolean ifPurge) throws MetaException {
    return deleteDir(f, recursive, ifPurge, true);
  }

  public boolean deleteDir(Path f, boolean recursive, boolean ifPurge, boolean needCmRecycle) throws MetaException {
    // no need to create the CM recycle file for temporary tables
    if (needCmRecycle) {

      try {
        cm.recycle(f, RecycleType.MOVE, ifPurge);
      } catch (IOException e) {
        throw new MetaException(org.apache.hadoop.util.StringUtils.stringifyException(e));
      }
    }
    FileSystem fs = getFs(f);
    return fsHandler.deleteDir(fs, f, recursive, ifPurge, conf);
  }

  public void recycleDirToCmPath(Path f, boolean ifPurge) throws MetaException {
    try {
      cm.recycle(f, RecycleType.MOVE, ifPurge);
    } catch (IOException e) {
      throw new MetaException(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
  }

  public boolean isEmpty(Path path) throws IOException, MetaException {
    ContentSummary contents = getFs(path).getContentSummary(path);
    if (contents != null && contents.getFileCount() == 0 && contents.getDirectoryCount() == 1) {
      return true;
    }
    return false;
  }

  public boolean isWritable(Path path) throws IOException {
    if (!storageAuthCheck) {
      // no checks for non-secure hadoop installations
      return true;
    }
    if (path == null) { //what??!!
      return false;
    }
    final FileStatus stat;
    final FileSystem fs;
    try {
      fs = getFs(path);
      stat = fs.getFileStatus(path);
      HdfsUtils.checkFileAccess(fs, stat, FsAction.WRITE);
      return true;
    } catch (FileNotFoundException fnfe){
      // File named by path doesn't exist; nothing to validate.
      return true;
    } catch (Exception e) {
      // all other exceptions are considered as emanating from
      // unauthorized accesses
      if (LOG.isDebugEnabled()) {
        LOG.debug("Exception when checking if path (" + path + ")", e);
      }
      return false;
    }
  }

  private static String escapePathName(String path) {
    return FileUtils.escapePathName(path);
  }

  private static String unescapePathName(String path) {
    return FileUtils.unescapePathName(path);
  }

  /**
   * Given a partition specification, return the path corresponding to the
   * partition spec. By default, the specification does not include dynamic partitions.
   * @param spec
   * @return string representation of the partition specification.
   * @throws MetaException
   */
  public static String makePartPath(Map spec)
      throws MetaException {
    return makePartName(spec, true);
  }

  /**
   * Makes a partition name from a specification
   * @param spec
   * @param addTrailingSeperator if true, adds a trailing separator e.g. 'ds=1/'
   * @return partition name
   * @throws MetaException
   */
  public static String makePartName(Map spec,
      boolean addTrailingSeperator)
      throws MetaException {
    StringBuilder suffixBuf = new StringBuilder();
    int i = 0;
    for (Entry e : spec.entrySet()) {
      if (e.getValue() == null || e.getValue().length() == 0) {
        throw new MetaException("Partition spec is incorrect. " + spec);
      }
      if (i>0) {
        suffixBuf.append(Path.SEPARATOR);
      }
      suffixBuf.append(escapePathName(e.getKey()));
      suffixBuf.append('=');
      suffixBuf.append(escapePathName(e.getValue()));
      i++;
    }
    if (addTrailingSeperator) {
      suffixBuf.append(Path.SEPARATOR);
    }
    return suffixBuf.toString();
  }
  /**
   * Given a dynamic partition specification, return the path corresponding to the
   * static part of partition specification. This is basically a copy of makePartName
   * but we get rid of MetaException since it is not serializable.
   * @param spec
   * @return string representation of the static part of the partition specification.
   */
  public static String makeDynamicPartName(Map spec) {
    StringBuilder suffixBuf = new StringBuilder();
    for (Entry e : spec.entrySet()) {
      if (e.getValue() != null && e.getValue().length() > 0) {
        suffixBuf.append(escapePathName(e.getKey()));
        suffixBuf.append('=');
        suffixBuf.append(escapePathName(e.getValue()));
        suffixBuf.append(Path.SEPARATOR);
      } else { // stop once we see a dynamic partition
        break;
      }
    }
    return suffixBuf.toString();
  }

  static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");

  private static final Pattern slash = Pattern.compile("/");

  /**
   * Extracts values from partition name without the column names.
   * @param name Partition name.
   * @param result The result. Must be pre-sized to the expected number of columns.
   */
  public static AbstractList makeValsFromName(
      String name, AbstractList result) throws MetaException {
    assert name != null;
    String[] parts = slash.split(name, 0);
    if (result == null) {
      result = new ArrayList<>(parts.length);
      for (int i = 0; i < parts.length; ++i) {
        result.add(null);
      }
    } else if (parts.length != result.size()) {
      throw new MetaException(
          "Expected " + result.size() + " components, got " + parts.length + " (" + name + ")");
    }
    for (int i = 0; i < parts.length; ++i) {
      int eq = parts[i].indexOf('=');
      if (eq <= 0) {
        throw new MetaException("Unexpected component " + parts[i]);
      }
      result.set(i, unescapePathName(parts[i].substring(eq + 1)));
    }
    return result;
  }

  public static LinkedHashMap makeSpecFromName(String name)
      throws MetaException {
    if (name == null || name.isEmpty()) {
      throw new MetaException("Partition name is invalid. " + name);
    }
    LinkedHashMap partSpec = new LinkedHashMap<>();
    makeSpecFromName(partSpec, new Path(name), null);
    return partSpec;
  }

  public static boolean makeSpecFromName(Map partSpec, Path currPath,
      Set requiredKeys) {
    List kvs = new ArrayList<>();
    do {
      String component = currPath.getName();
      Matcher m = pat.matcher(component);
      if (m.matches()) {
        String k = unescapePathName(m.group(1));
        String v = unescapePathName(m.group(2));
        String[] kv = new String[2];
        kv[0] = k;
        kv[1] = v;
        kvs.add(kv);
      }
      currPath = currPath.getParent();
    } while (currPath != null && !currPath.getName().isEmpty());

    // reverse the list since we checked the part from leaf dir to table's base dir
    for (int i = kvs.size(); i > 0; i--) {
      String key = kvs.get(i - 1)[0];
      if (requiredKeys != null) {
        requiredKeys.remove(key);
      }
      partSpec.put(key, kvs.get(i - 1)[1]);
    }
    if (requiredKeys == null || requiredKeys.isEmpty()) return true;
    LOG.warn("Cannot create partition spec from " + currPath + "; missing keys " + requiredKeys);
    return false;
  }

  public static Map makeEscSpecFromName(String name) throws MetaException {

    if (name == null || name.isEmpty()) {
      throw new MetaException("Partition name is invalid. " + name);
    }
    LinkedHashMap partSpec = new LinkedHashMap<>();

    Path currPath = new Path(name);

    List kvs = new ArrayList<>();
    do {
      String component = currPath.getName();
      Matcher m = pat.matcher(component);
      if (m.matches()) {
        String k = m.group(1);
        String v = m.group(2);
        String[] kv = new String[2];
        kv[0] = k;
        kv[1] = v;
        kvs.add(kv);
      }
      currPath = currPath.getParent();
    } while (currPath != null && !currPath.getName().isEmpty());

    // reverse the list since we checked the part from leaf dir to table's base dir
    for (int i = kvs.size(); i > 0; i--) {
      partSpec.put(kvs.get(i - 1)[0], kvs.get(i - 1)[1]);
    }

    return partSpec;
  }

  /**
   * Returns the default partition path of a table within a given database and partition key value
   * pairs. It uses the database location and appends it the table name and the partition key,value
   * pairs to create the Path for the partition directory
   *
   * @param db - parent database which is used to get the base location of the partition directory
   * @param tableName - table name for the partitions
   * @param pm - Partition key value pairs
   * @return
   * @throws MetaException
   */
  public Path getDefaultPartitionPath(Database db, String tableName,
      Map pm) throws MetaException {
    return getPartitionPath(getDefaultTablePath(db, tableName), pm);
  }

  /**
   * Returns the path object for the given partition key-value pairs and the base location
   *
   * @param tblPath - the base location for the partitions. Typically the table location
   * @param pm - Partition key value pairs
   * @return
   * @throws MetaException
   */
  public Path getPartitionPath(Path tblPath, Map pm)
      throws MetaException {
    return new Path(tblPath, makePartPath(pm));
  }

  /**
   * Given a database, a table and the partition key value pairs this method returns the Path object
   * corresponding to the partition key value pairs. It uses the table location if available else
   * uses the database location for constructing the path corresponding to the partition key-value
   * pairs
   *
   * @param db - Parent database of the given table
   * @param table - Table for which the partition key-values are given
   * @param vals - List of values for the partition keys
   * @return Path corresponding to the partition key-value pairs
   * @throws MetaException
   */
  public Path getPartitionPath(Database db, Table table, List vals)
      throws MetaException {
    List partKeys = table.getPartitionKeys();
    if (partKeys == null || (partKeys.size() != vals.size())) {
      throw new MetaException("Invalid number of partition keys found for " + table.getTableName());
    }
    Map pm = new LinkedHashMap<>(vals.size());
    int i = 0;
    for (FieldSchema key : partKeys) {
      pm.put(key.getName(), vals.get(i));
      i++;
    }

    if (table.getSd().getLocation() != null) {
      return getPartitionPath(getDnsPath(new Path(table.getSd().getLocation())), pm);
    } else {
      return getDefaultPartitionPath(db, table.getTableName(), pm);
    }
  }

  public boolean isDir(Path f) throws MetaException {
    FileSystem fs;
    try {
      fs = getFs(f);
      FileStatus fstatus = fs.getFileStatus(f);
      if (!fstatus.isDir()) {
        return false;
      }
    } catch (FileNotFoundException e) {
      return false;
    } catch (IOException e) {
      MetaStoreUtils.logAndThrowMetaException(e);
    }
    return true;
  }

  public static String makePartName(List partCols,
      List vals) throws MetaException {
    return makePartName(partCols, vals, null);
  }

  /**
   * @param desc
   * @return array of FileStatus objects corresponding to the files
   * making up the passed storage description
   */
  public List getFileStatusesForSD(StorageDescriptor desc)
      throws MetaException {
    return getFileStatusesForLocation(desc.getLocation());
  }

  /**
   * @param location
   * @return array of FileStatus objects corresponding to the files
   * making up the passed storage description
   */
  public List getFileStatusesForLocation(String location)
      throws MetaException {
    try {
      Path path = new Path(location);
      FileSystem fileSys = path.getFileSystem(conf);
      return FileUtils.getFileStatusRecurse(path, -1, fileSys);
    } catch (IOException ioe) {
      MetaStoreUtils.logAndThrowMetaException(ioe);
    }
    return null;
  }

  /**
   * @param db database
   * @param table table
   * @return array of FileStatus objects corresponding to the files making up the passed
   * unpartitioned table
   */
  public List getFileStatusesForUnpartitionedTable(Database db, Table table)
      throws MetaException {
    Path tablePath = getDnsPath(new Path(table.getSd().getLocation()));
    try {
      FileSystem fileSys = tablePath.getFileSystem(conf);
      return FileUtils.getFileStatusRecurse(tablePath, -1, fileSys);
    } catch (IOException ioe) {
      MetaStoreUtils.logAndThrowMetaException(ioe);
    }
    return null;
  }

  /**
   * Makes a valid partition name.
   * @param partCols The partition columns
   * @param vals The partition values
   * @param defaultStr
   *    The default name given to a partition value if the respective value is empty or null.
   * @return An escaped, valid partition name.
   * @throws MetaException
   */
  public static String makePartName(List partCols,
      List vals, String defaultStr) throws MetaException {
    if ((partCols.size() != vals.size()) || (partCols.size() == 0)) {
      String errorStr = "Invalid partition key & values; keys [";
      for (FieldSchema fs : partCols) {
        errorStr += (fs.getName() + ", ");
      }
      errorStr += "], values [";
      for (String val : vals) {
        errorStr += (val + ", ");
      }
      throw new MetaException(errorStr + "]");
    }
    List colNames = new ArrayList<>();
    for (FieldSchema col: partCols) {
      colNames.add(col.getName());
    }
    return FileUtils.makePartName(colNames, vals, defaultStr);
  }

  public static List getPartValuesFromPartName(String partName)
      throws MetaException {
    LinkedHashMap partSpec = Warehouse.makeSpecFromName(partName);
    List values = new ArrayList<>();
    values.addAll(partSpec.values());
    return values;
  }

  public static Map makeSpecFromValues(List partCols,
      List values) {
    Map spec = new LinkedHashMap<>();
    for (int i = 0; i < values.size(); i++) {
      spec.put(partCols.get(i).getName(), values.get(i));
    }
    return spec;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy