Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.metadata;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
import static org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM;
import static org.apache.hadoop.hive.serde.serdeConstants.ESCAPE_CHAR;
import static org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM;
import static org.apache.hadoop.hive.serde.serdeConstants.LINE_DELIM;
import static org.apache.hadoop.hive.serde.serdeConstants.MAPKEY_DELIM;
import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.ConcurrentHashMap;
import com.google.common.collect.ImmutableMap;
import javax.jdo.JDODataStoreException;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.io.HdfsUtils;
import org.apache.hadoop.hive.metastore.HiveMetaException;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.PartitionDropOptions;
import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.CompactionResponse;
import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.FireEventRequest;
import org.apache.hadoop.hive.metastore.api.FireEventRequestData;
import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest;
import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.HiveObjectType;
import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.metastore.api.InsertEventRequestData;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetadataPpdResult;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
import org.apache.hadoop.hive.metastore.api.Role;
import org.apache.hadoop.hive.metastore.api.RolePrincipalGrant;
import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.FunctionTask;
import org.apache.hadoop.hive.ql.exec.FunctionUtils;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.index.HiveIndexHandler;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.common.log.InPlaceUpdate;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.metastore.SynchronizedMetaStoreClient;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
import org.apache.hadoop.hive.ql.plan.DropTableDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.session.CreateTableAutomaticGrant;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* This class has functions that implement meta data/DDL operations using calls
* to the metastore.
* It has a metastore client instance it uses to communicate with the metastore.
*
* It is a thread local variable, and the instances is accessed using static
* get methods in this class.
*/
@SuppressWarnings({"deprecation", "rawtypes"})
public class Hive {
static final private Logger LOG = LoggerFactory.getLogger("hive.ql.metadata.Hive");
private HiveConf conf = null;
private IMetaStoreClient metaStoreClient;
private SynchronizedMetaStoreClient syncMetaStoreClient;
private UserGroupInformation owner;
// metastore calls timing information
private final ConcurrentHashMap metaCallTimeMap = new ConcurrentHashMap<>();
private static ThreadLocal hiveDB = new ThreadLocal() {
@Override
protected Hive initialValue() {
return null;
}
@Override
public synchronized void remove() {
if (this.get() != null) {
this.get().close();
}
super.remove();
}
};
// Note that while this is an improvement over static initialization, it is still not,
// technically, valid, cause nothing prevents us from connecting to several metastores in
// the same process. This will still only get the functions from the first metastore.
private final static AtomicInteger didRegisterAllFuncs = new AtomicInteger(0);
private final static int REG_FUNCS_NO = 0, REG_FUNCS_DONE = 2, REG_FUNCS_PENDING = 1;
// register all permanent functions. need improvement
private void registerAllFunctionsOnce() throws HiveException {
boolean breakLoop = false;
while (!breakLoop) {
int val = didRegisterAllFuncs.get();
switch (val) {
case REG_FUNCS_NO: {
if (didRegisterAllFuncs.compareAndSet(val, REG_FUNCS_PENDING)) {
breakLoop = true;
break;
}
continue;
}
case REG_FUNCS_PENDING: {
synchronized (didRegisterAllFuncs) {
try {
didRegisterAllFuncs.wait(100);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
}
}
continue;
}
case REG_FUNCS_DONE: return;
default: throw new AssertionError(val);
}
}
try {
reloadFunctions();
didRegisterAllFuncs.compareAndSet(REG_FUNCS_PENDING, REG_FUNCS_DONE);
} catch (Exception e) {
LOG.warn("Failed to register all functions.", e);
didRegisterAllFuncs.compareAndSet(REG_FUNCS_PENDING, REG_FUNCS_NO);
throw new HiveException(e);
} finally {
synchronized (didRegisterAllFuncs) {
didRegisterAllFuncs.notifyAll();
}
}
}
public void reloadFunctions() throws HiveException {
HashSet registryFunctions = new HashSet(
FunctionRegistry.getFunctionNames(".+\\..+"));
for (Function function : getAllFunctions()) {
String functionName = function.getFunctionName();
try {
LOG.info("Registering function " + functionName + " " + function.getClassName());
String qualFunc = FunctionUtils.qualifyFunctionName(functionName, function.getDbName());
FunctionRegistry.registerPermanentFunction(qualFunc, function.getClassName(), false,
FunctionTask.toFunctionResource(function.getResourceUris()));
registryFunctions.remove(qualFunc);
} catch (Exception e) {
LOG.warn("Failed to register persistent function " +
functionName + ":" + function.getClassName() + ". Ignore and continue.");
}
}
// unregister functions from local system registry that are not in getAllFunctions()
for (String functionName : registryFunctions) {
try {
FunctionRegistry.unregisterPermanentFunction(functionName);
} catch (Exception e) {
LOG.warn("Failed to unregister persistent function " +
functionName + "on reload. Ignore and continue.");
}
}
}
public static Hive get(Configuration c, Class> clazz) throws HiveException {
return get(c instanceof HiveConf ? (HiveConf)c : new HiveConf(c, clazz));
}
/**
* Gets hive object for the current thread. If one is not initialized then a
* new one is created If the new configuration is different in metadata conf
* vars, or the owner will be different then a new one is created.
*
* @param c
* new Hive Configuration
* @return Hive object for current thread
* @throws HiveException
*
*/
public static Hive get(HiveConf c) throws HiveException {
return getInternal(c, false, false, true);
}
/**
* Same as {@link #get(HiveConf)}, except that it checks only the object identity of existing
* MS client, assuming the relevant settings would be unchanged within the same conf object.
*/
public static Hive getWithFastCheck(HiveConf c) throws HiveException {
return getWithFastCheck(c, true);
}
/**
* Same as {@link #get(HiveConf)}, except that it checks only the object identity of existing
* MS client, assuming the relevant settings would be unchanged within the same conf object.
*/
public static Hive getWithFastCheck(HiveConf c, boolean doRegisterAllFns) throws HiveException {
return getInternal(c, false, true, doRegisterAllFns);
}
private static Hive getInternal(HiveConf c, boolean needsRefresh, boolean isFastCheck,
boolean doRegisterAllFns) throws HiveException {
Hive db = hiveDB.get();
if (db == null || !db.isCurrentUserOwner() || needsRefresh
|| (c != null && db.metaStoreClient != null && !isCompatible(db, c, isFastCheck))) {
return create(c, false, db, doRegisterAllFns);
}
if (c != null) {
db.conf = c;
}
return db;
}
private static Hive create(HiveConf c, boolean needsRefresh, Hive db, boolean doRegisterAllFns)
throws HiveException {
if (db != null) {
LOG.debug("Creating new db. db = " + db + ", needsRefresh = " + needsRefresh +
", db.isCurrentUserOwner = " + db.isCurrentUserOwner());
db.close();
}
closeCurrent();
if (c == null) {
c = createHiveConf();
}
c.set("fs.scheme.class", "dfs");
Hive newdb = new Hive(c, doRegisterAllFns);
hiveDB.set(newdb);
return newdb;
}
private static HiveConf createHiveConf() {
SessionState session = SessionState.get();
return (session == null) ? new HiveConf(Hive.class) : session.getConf();
}
private static boolean isCompatible(Hive db, HiveConf c, boolean isFastCheck) {
return isFastCheck
? db.metaStoreClient.isSameConfObj(c) : db.metaStoreClient.isCompatibleWith(c);
}
public static Hive get() throws HiveException {
return get(true);
}
public static Hive get(boolean doRegisterAllFns) throws HiveException {
return getInternal(null, false, false, doRegisterAllFns);
}
/**
* get a connection to metastore. see get(HiveConf) function for comments
*
* @param c
* new conf
* @param needsRefresh
* if true then creates a new one
* @return The connection to the metastore
* @throws HiveException
*/
public static Hive get(HiveConf c, boolean needsRefresh) throws HiveException {
return getInternal(c, needsRefresh, false, true);
}
public static void set(Hive hive) {
hiveDB.set(hive);
}
public static void closeCurrent() {
hiveDB.remove();
}
/**
* Hive
*
* @param c
*
*/
private Hive(HiveConf c, boolean doRegisterAllFns) throws HiveException {
conf = c;
if (doRegisterAllFns) {
registerAllFunctionsOnce();
}
}
private boolean isCurrentUserOwner() throws HiveException {
try {
return owner == null || owner.equals(UserGroupInformation.getCurrentUser());
} catch(IOException e) {
throw new HiveException("Error getting current user: " + e.getMessage(), e);
}
}
/**
* closes the connection to metastore for the calling thread
*/
private void close() {
LOG.debug("Closing current thread's connection to Hive Metastore.");
if (metaStoreClient != null) {
metaStoreClient.close();
metaStoreClient = null;
}
if (owner != null) {
owner = null;
}
}
/**
* Create a database
* @param db
* @param ifNotExist if true, will ignore AlreadyExistsException exception
* @throws AlreadyExistsException
* @throws HiveException
*/
public void createDatabase(Database db, boolean ifNotExist)
throws AlreadyExistsException, HiveException {
try {
getMSC().createDatabase(db);
} catch (AlreadyExistsException e) {
if (!ifNotExist) {
throw e;
}
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Create a Database. Raise an error if a database with the same name already exists.
* @param db
* @throws AlreadyExistsException
* @throws HiveException
*/
public void createDatabase(Database db) throws AlreadyExistsException, HiveException {
createDatabase(db, false);
}
/**
* Drop a database.
* @param name
* @throws NoSuchObjectException
* @throws HiveException
* @see org.apache.hadoop.hive.metastore.HiveMetaStoreClient#dropDatabase(java.lang.String)
*/
public void dropDatabase(String name) throws HiveException, NoSuchObjectException {
dropDatabase(name, true, false, false);
}
/**
* Drop a database
* @param name
* @param deleteData
* @param ignoreUnknownDb if true, will ignore NoSuchObjectException
* @throws HiveException
* @throws NoSuchObjectException
*/
public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb)
throws HiveException, NoSuchObjectException {
dropDatabase(name, deleteData, ignoreUnknownDb, false);
}
/**
* Drop a database
* @param name
* @param deleteData
* @param ignoreUnknownDb if true, will ignore NoSuchObjectException
* @param cascade if true, delete all tables on the DB if exists. Otherwise, the query
* will fail if table still exists.
* @throws HiveException
* @throws NoSuchObjectException
*/
public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb, boolean cascade)
throws HiveException, NoSuchObjectException {
try {
getMSC().dropDatabase(name, deleteData, ignoreUnknownDb, cascade);
} catch (NoSuchObjectException e) {
throw e;
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Creates a table metadata and the directory for the table data
*
* @param tableName
* name of the table
* @param columns
* list of fields of the table
* @param partCols
* partition keys of the table
* @param fileInputFormat
* Class of the input format of the table data file
* @param fileOutputFormat
* Class of the output format of the table data file
* @throws HiveException
* thrown if the args are invalid or if the metadata or the data
* directory couldn't be created
*/
public void createTable(String tableName, List columns,
List partCols, Class extends InputFormat> fileInputFormat,
Class> fileOutputFormat) throws HiveException {
this.createTable(tableName, columns, partCols, fileInputFormat,
fileOutputFormat, -1, null);
}
/**
* Creates a table metadata and the directory for the table data
*
* @param tableName
* name of the table
* @param columns
* list of fields of the table
* @param partCols
* partition keys of the table
* @param fileInputFormat
* Class of the input format of the table data file
* @param fileOutputFormat
* Class of the output format of the table data file
* @param bucketCount
* number of buckets that each partition (or the table itself) should
* be divided into
* @throws HiveException
* thrown if the args are invalid or if the metadata or the data
* directory couldn't be created
*/
public void createTable(String tableName, List columns,
List partCols, Class extends InputFormat> fileInputFormat,
Class> fileOutputFormat, int bucketCount, List bucketCols)
throws HiveException {
createTable(tableName, columns, partCols, fileInputFormat, fileOutputFormat, bucketCount,
bucketCols, null);
}
/**
* Create a table metadata and the directory for the table data
* @param tableName table name
* @param columns list of fields of the table
* @param partCols partition keys of the table
* @param fileInputFormat Class of the input format of the table data file
* @param fileOutputFormat Class of the output format of the table data file
* @param bucketCount number of buckets that each partition (or the table itself) should be
* divided into
* @param bucketCols Bucket columns
* @param parameters Parameters for the table
* @throws HiveException
*/
public void createTable(String tableName, List columns, List partCols,
Class extends InputFormat> fileInputFormat,
Class> fileOutputFormat, int bucketCount, List bucketCols,
Map parameters) throws HiveException {
if (columns == null) {
throw new HiveException("columns not specified for table " + tableName);
}
Table tbl = newTable(tableName);
tbl.setInputFormatClass(fileInputFormat.getName());
tbl.setOutputFormatClass(fileOutputFormat.getName());
for (String col : columns) {
FieldSchema field = new FieldSchema(col, STRING_TYPE_NAME, "default");
tbl.getCols().add(field);
}
if (partCols != null) {
for (String partCol : partCols) {
FieldSchema part = new FieldSchema();
part.setName(partCol);
part.setType(STRING_TYPE_NAME); // default partition key
tbl.getPartCols().add(part);
}
}
tbl.setSerializationLib(LazySimpleSerDe.class.getName());
tbl.setNumBuckets(bucketCount);
tbl.setBucketCols(bucketCols);
if (parameters != null) {
tbl.setParameters(parameters);
}
createTable(tbl);
}
/**
* Updates the existing table metadata with the new metadata.
*
* @param tblName
* name of the existing table
* @param newTbl
* new name of the table. could be the old name
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterTable(String tblName, Table newTbl, EnvironmentContext environmentContext)
throws InvalidOperationException, HiveException {
alterTable(tblName, newTbl, false, environmentContext);
}
public void alterTable(String tblName, Table newTbl, boolean cascade, EnvironmentContext environmentContext)
throws InvalidOperationException, HiveException {
String[] names = Utilities.getDbTableName(tblName);
try {
// Remove the DDL_TIME so it gets refreshed
if (newTbl.getParameters() != null) {
newTbl.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
newTbl.checkValidity(conf);
if (environmentContext == null) {
environmentContext = new EnvironmentContext();
}
if (cascade) {
environmentContext.putToProperties(StatsSetupConst.CASCADE, StatsSetupConst.TRUE);
}
getMSC().alter_table_with_environmentContext(names[0], names[1], newTbl.getTTable(), environmentContext);
} catch (MetaException e) {
throw new HiveException("Unable to alter table. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to alter table. " + e.getMessage(), e);
}
}
public void alterIndex(String baseTableName, String indexName, Index newIdx)
throws InvalidOperationException, HiveException {
String[] names = Utilities.getDbTableName(baseTableName);
alterIndex(names[0], names[1], indexName, newIdx);
}
/**
* Updates the existing index metadata with the new metadata.
*
* @param idxName
* name of the existing index
* @param newIdx
* new name of the index. could be the old name
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterIndex(String dbName, String baseTblName, String idxName, Index newIdx)
throws InvalidOperationException, HiveException {
try {
getMSC().alter_index(dbName, baseTblName, idxName, newIdx);
} catch (MetaException e) {
throw new HiveException("Unable to alter index. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to alter index. " + e.getMessage(), e);
}
}
/**
* Updates the existing partition metadata with the new metadata.
*
* @param tblName
* name of the existing table
* @param newPart
* new partition
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterPartition(String tblName, Partition newPart, EnvironmentContext environmentContext)
throws InvalidOperationException, HiveException {
String[] names = Utilities.getDbTableName(tblName);
alterPartition(names[0], names[1], newPart, environmentContext);
}
/**
* Updates the existing partition metadata with the new metadata.
*
* @param dbName
* name of the exiting table's database
* @param tblName
* name of the existing table
* @param newPart
* new partition
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterPartition(String dbName, String tblName, Partition newPart, EnvironmentContext environmentContext)
throws InvalidOperationException, HiveException {
try {
validatePartition(newPart);
getMSC().alter_partition(dbName, tblName, newPart.getTPartition(), environmentContext);
} catch (MetaException e) {
throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
}
}
private void validatePartition(Partition newPart) throws HiveException {
// Remove the DDL time so that it gets refreshed
if (newPart.getParameters() != null) {
newPart.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
newPart.checkValidity();
}
/**
* Updates the existing table metadata with the new metadata.
*
* @param tblName
* name of the existing table
* @param newParts
* new partitions
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterPartitions(String tblName, List newParts, EnvironmentContext environmentContext)
throws InvalidOperationException, HiveException {
String[] names = Utilities.getDbTableName(tblName);
List newTParts =
new ArrayList();
try {
// Remove the DDL time so that it gets refreshed
for (Partition tmpPart: newParts) {
if (tmpPart.getParameters() != null) {
tmpPart.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
newTParts.add(tmpPart.getTPartition());
}
getMSC().alter_partitions(names[0], names[1], newTParts, environmentContext);
} catch (MetaException e) {
throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
}
}
/**
* Rename a old partition to new partition
*
* @param tbl
* existing table
* @param oldPartSpec
* spec of old partition
* @param newPart
* new partition
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void renamePartition(Table tbl, Map oldPartSpec, Partition newPart)
throws HiveException {
try {
Map newPartSpec = newPart.getSpec();
if (oldPartSpec.keySet().size() != tbl.getPartCols().size()
|| newPartSpec.keySet().size() != tbl.getPartCols().size()) {
throw new HiveException("Unable to rename partition to the same name: number of partition cols don't match. ");
}
if (!oldPartSpec.keySet().equals(newPartSpec.keySet())){
throw new HiveException("Unable to rename partition to the same name: old and new partition cols don't match. ");
}
List pvals = new ArrayList();
for (FieldSchema field : tbl.getPartCols()) {
String val = oldPartSpec.get(field.getName());
if (val == null || val.length() == 0) {
throw new HiveException("get partition: Value for key "
+ field.getName() + " is null or empty");
} else if (val != null){
pvals.add(val);
}
}
getMSC().renamePartition(tbl.getDbName(), tbl.getTableName(), pvals,
newPart.getTPartition());
} catch (InvalidOperationException e){
throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
} catch (MetaException e) {
throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
}
}
public void alterDatabase(String dbName, Database db)
throws HiveException {
try {
getMSC().alterDatabase(dbName, db);
} catch (MetaException e) {
throw new HiveException("Unable to alter database " + dbName + ". " + e.getMessage(), e);
} catch (NoSuchObjectException e) {
throw new HiveException("Database " + dbName + " does not exists.", e);
} catch (TException e) {
throw new HiveException("Unable to alter database " + dbName + ". " + e.getMessage(), e);
}
}
/**
* Creates the table with the give objects
*
* @param tbl
* a table object
* @throws HiveException
*/
public void createTable(Table tbl) throws HiveException {
createTable(tbl, false);
}
/**
* Creates the table with the given objects. It takes additional arguments for
* primary keys and foreign keys associated with the table.
*
* @param tbl
* a table object
* @param ifNotExists
* if true, ignore AlreadyExistsException
* @param primaryKeys
* primary key columns associated with the table
* @param foreignKeys
* foreign key columns associated with the table
* @throws HiveException
*/
public void createTable(Table tbl, boolean ifNotExists,
List primaryKeys, List foreignKeys) throws HiveException {
try {
if (tbl.getDbName() == null || "".equals(tbl.getDbName().trim())) {
tbl.setDbName(SessionState.get().getCurrentDatabase());
}
if (tbl.getCols().size() == 0 || tbl.getSd().getColsSize() == 0) {
tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getTableName(),
tbl.getDeserializer()));
}
tbl.checkValidity(conf);
if (tbl.getParameters() != null) {
tbl.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
org.apache.hadoop.hive.metastore.api.Table tTbl = tbl.getTTable();
PrincipalPrivilegeSet principalPrivs = new PrincipalPrivilegeSet();
SessionState ss = SessionState.get();
if (ss != null) {
CreateTableAutomaticGrant grants = ss.getCreateTableGrants();
if (grants != null) {
principalPrivs.setUserPrivileges(grants.getUserGrants());
principalPrivs.setGroupPrivileges(grants.getGroupGrants());
principalPrivs.setRolePrivileges(grants.getRoleGrants());
tTbl.setPrivileges(principalPrivs);
}
}
if (primaryKeys == null && foreignKeys == null) {
getMSC().createTable(tTbl);
} else {
getMSC().createTableWithConstraints(tTbl, primaryKeys, foreignKeys);
}
} catch (AlreadyExistsException e) {
if (!ifNotExists) {
throw new HiveException(e);
}
} catch (Exception e) {
throw new HiveException(e);
}
}
public void createTable(Table tbl, boolean ifNotExists) throws HiveException {
createTable(tbl, ifNotExists, null, null);
}
public static List getFieldsFromDeserializerForMsStorage(
Table tbl, Deserializer deserializer) throws SerDeException, MetaException {
List schema = MetaStoreUtils.getFieldsFromDeserializer(
tbl.getTableName(), deserializer);
for (FieldSchema field : schema) {
field.setType(MetaStoreUtils.TYPE_FROM_DESERIALIZER);
}
return schema;
}
/**
*
* @param tableName
* table name
* @param indexName
* index name
* @param indexHandlerClass
* index handler class
* @param indexedCols
* index columns
* @param indexTblName
* index table's name
* @param deferredRebuild
* referred build index table's data
* @param inputFormat
* input format
* @param outputFormat
* output format
* @param serde
* @param storageHandler
* index table's storage handler
* @param location
* location
* @param idxProps
* idx
* @param serdeProps
* serde properties
* @param collItemDelim
* @param fieldDelim
* @param fieldEscape
* @param lineDelim
* @param mapKeyDelim
* @throws HiveException
*/
public void createIndex(String tableName, String indexName, String indexHandlerClass,
List indexedCols, String indexTblName, boolean deferredRebuild,
String inputFormat, String outputFormat, String serde,
String storageHandler, String location,
Map idxProps, Map tblProps, Map serdeProps,
String collItemDelim, String fieldDelim, String fieldEscape,
String lineDelim, String mapKeyDelim, String indexComment)
throws HiveException {
try {
String tdname = Utilities.getDatabaseName(tableName);
String idname = Utilities.getDatabaseName(indexTblName);
if (!idname.equals(tdname)) {
throw new HiveException("Index on different database (" + idname
+ ") from base table (" + tdname + ") is not supported.");
}
Index old_index = null;
try {
old_index = getIndex(tableName, indexName);
} catch (Exception e) {
}
if (old_index != null) {
throw new HiveException("Index " + indexName + " already exists on table " + tableName);
}
org.apache.hadoop.hive.metastore.api.Table baseTbl = getTable(tableName).getTTable();
if (TableType.VIRTUAL_VIEW.toString().equals(baseTbl.getTableType())) {
throw new HiveException("tableName="+ tableName +" is a VIRTUAL VIEW. Index on VIRTUAL VIEW is not supported.");
}
if (baseTbl.isTemporary()) {
throw new HiveException("tableName=" + tableName
+ " is a TEMPORARY TABLE. Index on TEMPORARY TABLE is not supported.");
}
org.apache.hadoop.hive.metastore.api.Table temp = null;
try {
temp = getTable(indexTblName).getTTable();
} catch (Exception e) {
}
if (temp != null) {
throw new HiveException("Table name " + indexTblName + " already exists. Choose another name.");
}
SerDeInfo serdeInfo = new SerDeInfo();
serdeInfo.setName(indexTblName);
if(serde != null) {
serdeInfo.setSerializationLib(serde);
} else {
if (storageHandler == null) {
serdeInfo.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
} else {
HiveStorageHandler sh = HiveUtils.getStorageHandler(getConf(), storageHandler);
String serDeClassName = sh.getSerDeClass().getName();
serdeInfo.setSerializationLib(serDeClassName);
}
}
serdeInfo.setParameters(new HashMap());
if (fieldDelim != null) {
serdeInfo.getParameters().put(FIELD_DELIM, fieldDelim);
serdeInfo.getParameters().put(SERIALIZATION_FORMAT, fieldDelim);
}
if (fieldEscape != null) {
serdeInfo.getParameters().put(ESCAPE_CHAR, fieldEscape);
}
if (collItemDelim != null) {
serdeInfo.getParameters().put(COLLECTION_DELIM, collItemDelim);
}
if (mapKeyDelim != null) {
serdeInfo.getParameters().put(MAPKEY_DELIM, mapKeyDelim);
}
if (lineDelim != null) {
serdeInfo.getParameters().put(LINE_DELIM, lineDelim);
}
if (serdeProps != null) {
Iterator> iter = serdeProps.entrySet()
.iterator();
while (iter.hasNext()) {
Entry m = iter.next();
serdeInfo.getParameters().put(m.getKey(), m.getValue());
}
}
List indexTblCols = new ArrayList();
List sortCols = new ArrayList();
int k = 0;
Table metaBaseTbl = new Table(baseTbl);
// Even though we are storing these in metastore, get regular columns. Indexes on lengthy
// types from e.g. Avro schema will just fail to create the index table (by design).
List cols = metaBaseTbl.getCols();
for (int i = 0; i < cols.size(); i++) {
FieldSchema col = cols.get(i);
if (indexedCols.contains(col.getName())) {
indexTblCols.add(col);
sortCols.add(new Order(col.getName(), 1));
k++;
}
}
if (k != indexedCols.size()) {
throw new RuntimeException(
"Check the index columns, they should appear in the table being indexed.");
}
int time = (int) (System.currentTimeMillis() / 1000);
org.apache.hadoop.hive.metastore.api.Table tt = null;
HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass);
String itname = Utilities.getTableName(indexTblName);
if (indexHandler.usesIndexTable()) {
tt = new org.apache.hadoop.hive.ql.metadata.Table(idname, itname).getTTable();
List partKeys = baseTbl.getPartitionKeys();
tt.setPartitionKeys(partKeys);
tt.setTableType(TableType.INDEX_TABLE.toString());
if (tblProps != null) {
for (Entry prop : tblProps.entrySet()) {
tt.putToParameters(prop.getKey(), prop.getValue());
}
}
SessionState ss = SessionState.get();
CreateTableAutomaticGrant grants;
if (ss != null && ((grants = ss.getCreateTableGrants()) != null)) {
PrincipalPrivilegeSet principalPrivs = new PrincipalPrivilegeSet();
principalPrivs.setUserPrivileges(grants.getUserGrants());
principalPrivs.setGroupPrivileges(grants.getGroupGrants());
principalPrivs.setRolePrivileges(grants.getRoleGrants());
tt.setPrivileges(principalPrivs);
}
}
if(!deferredRebuild) {
throw new RuntimeException("Please specify deferred rebuild using \" WITH DEFERRED REBUILD \".");
}
StorageDescriptor indexSd = new StorageDescriptor(
indexTblCols,
location,
inputFormat,
outputFormat,
false/*compressed - not used*/,
-1/*numBuckets - default is -1 when the table has no buckets*/,
serdeInfo,
null/*bucketCols*/,
sortCols,
null/*parameters*/);
String ttname = Utilities.getTableName(tableName);
Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, itname,
indexSd, new HashMap(), deferredRebuild);
if (indexComment != null) {
indexDesc.getParameters().put("comment", indexComment);
}
if (idxProps != null)
{
indexDesc.getParameters().putAll(idxProps);
}
indexHandler.analyzeIndexDefinition(baseTbl, indexDesc, tt);
this.getMSC().createIndex(indexDesc, tt);
} catch (Exception e) {
throw new HiveException(e);
}
}
public Index getIndex(String baseTableName, String indexName) throws HiveException {
String[] names = Utilities.getDbTableName(baseTableName);
return this.getIndex(names[0], names[1], indexName);
}
public Index getIndex(String dbName, String baseTableName,
String indexName) throws HiveException {
try {
return this.getMSC().getIndex(dbName, baseTableName, indexName);
} catch (Exception e) {
throw new HiveException(e);
}
}
public boolean dropIndex(String baseTableName, String index_name,
boolean throwException, boolean deleteData) throws HiveException {
String[] names = Utilities.getDbTableName(baseTableName);
return dropIndex(names[0], names[1], index_name, throwException, deleteData);
}
public boolean dropIndex(String db_name, String tbl_name, String index_name,
boolean throwException, boolean deleteData) throws HiveException {
try {
return getMSC().dropIndex(db_name, tbl_name, index_name, deleteData);
} catch (NoSuchObjectException e) {
if (throwException) {
throw new HiveException("Index " + index_name + " doesn't exist. ", e);
}
return false;
} catch (Exception e) {
throw new HiveException(e.getMessage(), e);
}
}
/**
* Drops table along with the data in it. If the table doesn't exist then it
* is a no-op. If ifPurge option is specified it is passed to the
* hdfs command that removes table data from warehouse to make it skip trash.
*
* @param tableName
* table to drop
* @param ifPurge
* completely purge the table (skipping trash) while removing data from warehouse
* @throws HiveException
* thrown if the drop fails
*/
public void dropTable(String tableName, boolean ifPurge) throws HiveException {
String[] names = Utilities.getDbTableName(tableName);
dropTable(names[0], names[1], true, true, ifPurge);
}
/**
* Drops table along with the data in it. If the table doesn't exist then it
* is a no-op
*
* @param tableName
* table to drop
* @throws HiveException
* thrown if the drop fails
*/
public void dropTable(String tableName) throws HiveException {
dropTable(tableName, false);
}
/**
* Drops table along with the data in it. If the table doesn't exist then it
* is a no-op
*
* @param dbName
* database where the table lives
* @param tableName
* table to drop
* @throws HiveException
* thrown if the drop fails
*/
public void dropTable(String dbName, String tableName) throws HiveException {
dropTable(dbName, tableName, true, true, false);
}
/**
* Drops the table.
*
* @param dbName
* @param tableName
* @param deleteData
* deletes the underlying data along with metadata
* @param ignoreUnknownTab
* an exception is thrown if this is false and the table doesn't exist
* @throws HiveException
*/
public void dropTable(String dbName, String tableName, boolean deleteData,
boolean ignoreUnknownTab) throws HiveException {
dropTable(dbName, tableName, deleteData, ignoreUnknownTab, false);
}
/**
* Drops the table.
*
* @param dbName
* @param tableName
* @param deleteData
* deletes the underlying data along with metadata
* @param ignoreUnknownTab
* an exception is thrown if this is false and the table doesn't exist
* @param ifPurge
* completely purge the table skipping trash while removing data from warehouse
* @throws HiveException
*/
public void dropTable(String dbName, String tableName, boolean deleteData,
boolean ignoreUnknownTab, boolean ifPurge) throws HiveException {
try {
getMSC().dropTable(dbName, tableName, deleteData, ignoreUnknownTab, ifPurge);
} catch (NoSuchObjectException e) {
if (!ignoreUnknownTab) {
throw new HiveException(e);
}
} catch (Exception e) {
throw new HiveException(e);
}
}
public HiveConf getConf() {
return (conf);
}
/**
* Returns metadata for the table named tableName
* @param tableName the name of the table
* @return the table metadata
* @throws HiveException if there's an internal error or if the
* table doesn't exist
*/
public Table getTable(final String tableName) throws HiveException {
return this.getTable(tableName, true);
}
/**
* Returns metadata for the table named tableName
* @param tableName the name of the table
* @param throwException controls whether an exception is thrown or a returns a null
* @return the table metadata
* @throws HiveException if there's an internal error or if the
* table doesn't exist
*/
public Table getTable(final String tableName, boolean throwException) throws HiveException {
String[] names = Utilities.getDbTableName(tableName);
return this.getTable(names[0], names[1], throwException);
}
/**
* Returns metadata of the table
*
* @param dbName
* the name of the database
* @param tableName
* the name of the table
* @return the table
* @exception HiveException
* if there's an internal error or if the table doesn't exist
*/
public Table getTable(final String dbName, final String tableName) throws HiveException {
if (tableName.contains(".")) {
String[] names = Utilities.getDbTableName(tableName);
return this.getTable(names[0], names[1], true);
} else {
return this.getTable(dbName, tableName, true);
}
}
/**
* Returns metadata of the table
*
* @param dbName
* the name of the database
* @param tableName
* the name of the table
* @param throwException
* controls whether an exception is thrown or a returns a null
* @return the table or if throwException is false a null value.
* @throws HiveException
*/
public Table getTable(final String dbName, final String tableName,
boolean throwException) throws HiveException {
if (tableName == null || tableName.equals("")) {
throw new HiveException("empty table creation??");
}
// Get the table from metastore
org.apache.hadoop.hive.metastore.api.Table tTable = null;
try {
tTable = getMSC().getTable(dbName, tableName);
} catch (NoSuchObjectException e) {
if (throwException) {
LOG.error("Table " + tableName + " not found: " + e.getMessage());
throw new InvalidTableException(tableName);
}
return null;
} catch (Exception e) {
throw new HiveException("Unable to fetch table " + tableName + ". " + e.getMessage(), e);
}
// For non-views, we need to do some extra fixes
if (!TableType.VIRTUAL_VIEW.toString().equals(tTable.getTableType())) {
// Fix the non-printable chars
Map parameters = tTable.getSd().getParameters();
String sf = parameters!=null?parameters.get(SERIALIZATION_FORMAT) : null;
if (sf != null) {
char[] b = sf.toCharArray();
if ((b.length == 1) && (b[0] < 10)) { // ^A, ^B, ^C, ^D, \t
parameters.put(SERIALIZATION_FORMAT, Integer.toString(b[0]));
}
}
// Use LazySimpleSerDe for MetadataTypedColumnsetSerDe.
// NOTE: LazySimpleSerDe does not support tables with a single column of
// col
// of type "array". This happens when the table is created using
// an
// earlier version of Hive.
if (org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class
.getName().equals(
tTable.getSd().getSerdeInfo().getSerializationLib())
&& tTable.getSd().getColsSize() > 0
&& tTable.getSd().getCols().get(0).getType().indexOf('<') == -1) {
tTable.getSd().getSerdeInfo().setSerializationLib(
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
}
}
return new Table(tTable);
}
/**
* Get all table names for the current database.
* @return List of table names
* @throws HiveException
*/
public List getAllTables() throws HiveException {
return getAllTables(SessionState.get().getCurrentDatabase());
}
/**
* Get all table names for the specified database.
* @param dbName
* @return List of table names
* @throws HiveException
*/
public List getAllTables(String dbName) throws HiveException {
return getTablesByPattern(dbName, ".*");
}
/**
* Returns all existing tables from default database which match the given
* pattern. The matching occurs as per Java regular expressions
*
* @param tablePattern
* java re pattern
* @return list of table names
* @throws HiveException
*/
public List getTablesByPattern(String tablePattern) throws HiveException {
return getTablesByPattern(SessionState.get().getCurrentDatabase(),
tablePattern);
}
/**
* Returns all existing tables from the specified database which match the given
* pattern. The matching occurs as per Java regular expressions.
* @param dbName
* @param tablePattern
* @return list of table names
* @throws HiveException
*/
public List getTablesByPattern(String dbName, String tablePattern) throws HiveException {
try {
return getMSC().getTables(dbName, tablePattern);
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Returns all existing tables from the given database which match the given
* pattern. The matching occurs as per Java regular expressions
*
* @param database
* the database name
* @param tablePattern
* java re pattern
* @return list of table names
* @throws HiveException
*/
public List getTablesForDb(String database, String tablePattern)
throws HiveException {
try {
return getMSC().getTables(database, tablePattern);
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Get all existing database names.
*
* @return List of database names.
* @throws HiveException
*/
public List getAllDatabases() throws HiveException {
try {
return getMSC().getAllDatabases();
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Get all existing databases that match the given
* pattern. The matching occurs as per Java regular expressions
*
* @param databasePattern
* java re pattern
* @return list of database names
* @throws HiveException
*/
public List getDatabasesByPattern(String databasePattern) throws HiveException {
try {
return getMSC().getDatabases(databasePattern);
} catch (Exception e) {
throw new HiveException(e);
}
}
public boolean grantPrivileges(PrivilegeBag privileges)
throws HiveException {
try {
return getMSC().grant_privileges(privileges);
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* @param privileges
* a bag of privileges
* @return true on success
* @throws HiveException
*/
public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption)
throws HiveException {
try {
return getMSC().revoke_privileges(privileges, grantOption);
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Query metadata to see if a database with the given name already exists.
*
* @param dbName
* @return true if a database with the given name already exists, false if
* does not exist.
* @throws HiveException
*/
public boolean databaseExists(String dbName) throws HiveException {
return getDatabase(dbName) != null;
}
/**
* Get the database by name.
* @param dbName the name of the database.
* @return a Database object if this database exists, null otherwise.
* @throws HiveException
*/
public Database getDatabase(String dbName) throws HiveException {
try {
return getMSC().getDatabase(dbName);
} catch (NoSuchObjectException e) {
return null;
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Get the Database object for current database
* @return a Database object if this database exists, null otherwise.
* @throws HiveException
*/
public Database getDatabaseCurrent() throws HiveException {
String currentDb = SessionState.get().getCurrentDatabase();
return getDatabase(currentDb);
}
public void loadPartition(Path loadPath, String tableName,
Map partSpec, boolean replace,
boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir,
boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException {
Table tbl = getTable(tableName);
loadPartition(loadPath, tbl, partSpec, replace, inheritTableSpecs,
isSkewedStoreAsSubdir, isSrcLocal, isAcid, hasFollowingStatsTask);
}
/**
* Load a directory into a Hive Table Partition - Alters existing content of
* the partition with the contents of loadPath. - If the partition does not
* exist - one is created - files in loadPath are moved into Hive. But the
* directory itself is not removed.
*
* @param loadPath
* Directory containing files to load into Table
* @param tbl
* name of table to be loaded.
* @param partSpec
* defines which partition needs to be loaded
* @param replace
* if true - replace files in the partition, otherwise add files to
* the partition
* @param inheritTableSpecs if true, on [re]creating the partition, take the
* location/inputformat/outputformat/serde details from table spec
* @param isSrcLocal
* If the source directory is LOCAL
* @param isAcid true if this is an ACID operation
*/
public Partition loadPartition(Path loadPath, Table tbl,
Map partSpec, boolean replace,
boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir,
boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException {
Path tblDataLocationPath = tbl.getDataLocation();
try {
Partition oldPart = getPartition(tbl, partSpec, false);
/**
* Move files before creating the partition since down stream processes
* check for existence of partition in metadata before accessing the data.
* If partition is created before data is moved, downstream waiting
* processes might move forward with partial data
*/
Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null;
Path newPartPath = null;
if (inheritTableSpecs) {
Path partPath = new Path(tbl.getDataLocation(),
Warehouse.makePartPath(partSpec));
newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), tblDataLocationPath.toUri().getAuthority(),
partPath.toUri().getPath());
if(oldPart != null) {
/*
* If we are moving the partition across filesystem boundaries
* inherit from the table properties. Otherwise (same filesystem) use the
* original partition location.
*
* See: HIVE-1707 and HIVE-2117 for background
*/
FileSystem oldPartPathFS = oldPartPath.getFileSystem(getConf());
FileSystem loadPathFS = loadPath.getFileSystem(getConf());
if (FileUtils.equalsFileSystem(oldPartPathFS,loadPathFS)) {
newPartPath = oldPartPath;
}
}
} else {
newPartPath = oldPartPath;
}
List newFiles = null;
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin("MoveTask", "FileMoves");
if (replace || (oldPart == null && !isAcid)) {
replaceFiles(tbl.getPath(), loadPath, newPartPath, oldPartPath, getConf(),
isSrcLocal);
} else {
if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null) {
newFiles = Collections.synchronizedList(new ArrayList());
}
FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles);
}
perfLogger.PerfLogEnd("MoveTask", "FileMoves");
Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
validatePartition(newTPart);
if ((null != newFiles) || replace) {
fireInsertEvent(tbl, partSpec, newFiles);
} else {
LOG.debug("No new files were created, and is not a replace. Skipping generating INSERT event.");
}
//column stats will be inaccurate
StatsSetupConst.clearColumnStatsState(newTPart.getParameters());
// recreate the partition if it existed before
if (isSkewedStoreAsSubdir) {
org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
/* Construct list bucketing location mappings from sub-directory name. */
Map, String> skewedColValueLocationMaps = constructListBucketingLocationMap(
newPartPath, skewedInfo);
/* Add list bucketing location mappings. */
skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
}
if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE);
}
if (oldPart == null) {
newTPart.getTPartition().setParameters(new HashMap());
if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsStateForCreateTable(newTPart.getParameters(),
StatsSetupConst.TRUE);
}
MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters());
try {
LOG.debug("Adding new partition " + newTPart.getSpec());
getSychronizedMSC().add_partition(newTPart.getTPartition());
} catch (AlreadyExistsException aee) {
// With multiple users concurrently issuing insert statements on the same partition has
// a side effect that some queries may not see a partition at the time when they're issued,
// but will realize the partition is actually there when it is trying to add such partition
// to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition).
// For example, imagine such a table is created:
// create table T (name char(50)) partitioned by (ds string);
// and the following two queries are launched at the same time, from different sessions:
// insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today'
// insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException
// In that case, we want to retry with alterPartition.
LOG.debug("Caught AlreadyExistsException, trying to alter partition instead");
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
}
} else {
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
}
return newTPart;
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (MetaException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (InvalidOperationException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (TException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
}
}
private void setStatsPropAndAlterPartition(boolean hasFollowingStatsTask, Table tbl,
Partition newTPart) throws MetaException, TException {
EnvironmentContext environmentContext = null;
if (hasFollowingStatsTask) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
}
LOG.debug("Altering existing partition " + newTPart.getSpec());
getSychronizedMSC().alter_partition(tbl.getDbName(), tbl.getTableName(),
newTPart.getTPartition(), environmentContext);
}
/**
* Walk through sub-directory tree to construct list bucketing location map.
*
* @param fSta
* @param fSys
* @param skewedColValueLocationMaps
* @param newPartPath
* @param skewedInfo
* @throws IOException
*/
private void walkDirTree(FileStatus fSta, FileSystem fSys,
Map, String> skewedColValueLocationMaps, Path newPartPath, SkewedInfo skewedInfo)
throws IOException {
/* Base Case. It's leaf. */
if (!fSta.isDir()) {
/* construct one location map if not exists. */
constructOneLBLocationMap(fSta, skewedColValueLocationMaps, newPartPath, skewedInfo);
return;
}
/* dfs. */
FileStatus[] children = fSys.listStatus(fSta.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
if (children != null) {
for (FileStatus child : children) {
walkDirTree(child, fSys, skewedColValueLocationMaps, newPartPath, skewedInfo);
}
}
}
/**
* Construct a list bucketing location map
* @param fSta
* @param skewedColValueLocationMaps
* @param newPartPath
* @param skewedInfo
*/
private void constructOneLBLocationMap(FileStatus fSta,
Map, String> skewedColValueLocationMaps,
Path newPartPath, SkewedInfo skewedInfo) {
Path lbdPath = fSta.getPath().getParent();
List skewedValue = new ArrayList();
String lbDirName = FileUtils.unescapePathName(lbdPath.toString());
String partDirName = FileUtils.unescapePathName(newPartPath.toString());
String lbDirSuffix = lbDirName.replace(partDirName, "");
String[] dirNames = lbDirSuffix.split(Path.SEPARATOR);
for (String dirName : dirNames) {
if ((dirName != null) && (dirName.length() > 0)) {
// Construct skewed-value to location map except default directory.
// why? query logic knows default-dir structure and don't need to get from map
if (!dirName
.equalsIgnoreCase(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME)) {
String[] kv = dirName.split("=");
if (kv.length == 2) {
skewedValue.add(kv[1]);
}
}
}
}
if ((skewedValue.size() > 0) && (skewedValue.size() == skewedInfo.getSkewedColNames().size())
&& !skewedColValueLocationMaps.containsKey(skewedValue)) {
skewedColValueLocationMaps.put(skewedValue, lbdPath.toString());
}
}
/**
* Construct location map from path
*
* @param newPartPath
* @param skewedInfo
* @return
* @throws IOException
* @throws FileNotFoundException
*/
private Map, String> constructListBucketingLocationMap(Path newPartPath,
SkewedInfo skewedInfo) throws IOException, FileNotFoundException {
Map, String> skewedColValueLocationMaps = new HashMap, String>();
FileSystem fSys = newPartPath.getFileSystem(conf);
walkDirTree(fSys.getFileStatus(newPartPath), fSys, skewedColValueLocationMaps, newPartPath,
skewedInfo);
return skewedColValueLocationMaps;
}
/**
* Get the valid partitions from the path
* @param numDP number of dynamic partitions
* @param loadPath
* @return Set of valid partitions
* @throws HiveException
*/
private Set getValidPartitionsInPath(int numDP, Path loadPath) throws HiveException {
Set validPartitions = new HashSet();
try {
FileSystem fs = loadPath.getFileSystem(conf);
FileStatus[] leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP, fs);
// Check for empty partitions
for (FileStatus s : leafStatus) {
if (!s.isDirectory()) {
throw new HiveException("partition " + s.getPath() + " is not a directory!");
}
validPartitions.add(s.getPath());
}
} catch (IOException e) {
throw new HiveException(e);
}
int partsToLoad = validPartitions.size();
if (partsToLoad == 0) {
LOG.warn("No partition is generated by dynamic partitioning");
}
if (partsToLoad > conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS)) {
throw new HiveException("Number of dynamic partitions created is " + partsToLoad
+ ", which is more than "
+ conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS)
+". To solve this try to set " + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
+ " to at least " + partsToLoad + '.');
}
return validPartitions;
}
/**
* Given a source directory name of the load path, load all dynamically generated partitions
* into the specified table and return a list of strings that represent the dynamic partition
* paths.
* @param loadPath
* @param tableName
* @param partSpec
* @param replace
* @param numDP number of dynamic partitions
* @param listBucketingEnabled
* @param isAcid true if this is an ACID operation
* @param txnId txnId, can be 0 unless isAcid == true
* @return partition map details (PartitionSpec and Partition)
* @throws HiveException
*/
public Map