Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.metadata;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import static org.apache.hadoop.hive.conf.Constants.MATERIALIZED_VIEW_REWRITING_TIME_WINDOW;
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog;
import static org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer.makeBinaryPredicate;
import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import javax.jdo.JDODataStoreException;
import com.google.common.collect.ImmutableList;
import org.apache.calcite.plan.RelOptMaterialization;
import org.apache.calcite.plan.hep.HepPlanner;
import org.apache.calcite.plan.hep.HepProgramBuilder;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelVisitor;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rex.RexBuilder;
import org.apache.commons.io.FilenameUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
import org.apache.hadoop.hive.common.log.InPlaceUpdate;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.io.HdfsUtils;
import org.apache.hadoop.hive.metastore.HiveMetaException;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.HiveMetaHookLoader;
import org.apache.hadoop.hive.metastore.HiveMetaStore;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.PartitionDropOptions;
import org.apache.hadoop.hive.metastore.RawStore;
import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient;
import org.apache.hadoop.hive.metastore.SynchronizedMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.ReplChangeManager;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.CheckConstraintsRequest;
import org.apache.hadoop.hive.metastore.api.CmRecycleRequest;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.CompactionResponse;
import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.CreationMetadata;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.DefaultConstraintsRequest;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.FireEventRequest;
import org.apache.hadoop.hive.metastore.api.FireEventRequestData;
import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest;
import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.HiveObjectType;
import org.apache.hadoop.hive.metastore.api.InsertEventRequestData;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.Materialization;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.MetadataPpdResult;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.NotNullConstraintsRequest;
import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
import org.apache.hadoop.hive.metastore.api.Role;
import org.apache.hadoop.hive.metastore.api.RolePrincipalGrant;
import org.apache.hadoop.hive.metastore.api.SQLCheckConstraint;
import org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint;
import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint;
import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint;
import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
import org.apache.hadoop.hive.metastore.api.UniqueConstraintsRequest;
import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan;
import org.apache.hadoop.hive.metastore.api.WMMapping;
import org.apache.hadoop.hive.metastore.api.WMNullablePool;
import org.apache.hadoop.hive.metastore.api.WMNullableResourcePlan;
import org.apache.hadoop.hive.metastore.api.WMPool;
import org.apache.hadoop.hive.metastore.api.WMResourcePlan;
import org.apache.hadoop.hive.metastore.api.WMTrigger;
import org.apache.hadoop.hive.metastore.api.WMValidateResourcePlanResponse;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.FunctionTask;
import org.apache.hadoop.hive.ql.exec.FunctionUtils;
import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentMaterializationRule;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
import org.apache.hadoop.hive.ql.plan.DropTableDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType;
import org.apache.hadoop.hive.ql.session.CreateTableAutomaticGrant;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.TxnIdUtils;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class has functions that implement meta data/DDL operations using calls
* to the metastore.
* It has a metastore client instance it uses to communicate with the metastore.
*
* It is a thread local variable, and the instances is accessed using static
* get methods in this class.
*/
@SuppressWarnings({"deprecation", "rawtypes"})
public class Hive {
static final private Logger LOG = LoggerFactory.getLogger("hive.ql.metadata.Hive");
private HiveConf conf = null;
private IMetaStoreClient metaStoreClient;
private SynchronizedMetaStoreClient syncMetaStoreClient;
private UserGroupInformation owner;
// metastore calls timing information
private final ConcurrentHashMap metaCallTimeMap = new ConcurrentHashMap<>();
private static ThreadLocal hiveDB = new ThreadLocal() {
@Override
protected Hive initialValue() {
return null;
}
@Override
public synchronized void remove() {
if (this.get() != null) {
this.get().close();
}
super.remove();
}
};
// Note that while this is an improvement over static initialization, it is still not,
// technically, valid, cause nothing prevents us from connecting to several metastores in
// the same process. This will still only get the functions from the first metastore.
private final static AtomicInteger didRegisterAllFuncs = new AtomicInteger(0);
private final static int REG_FUNCS_NO = 0, REG_FUNCS_DONE = 2, REG_FUNCS_PENDING = 1;
// register all permanent functions. need improvement
private void registerAllFunctionsOnce() throws HiveException {
boolean breakLoop = false;
while (!breakLoop) {
int val = didRegisterAllFuncs.get();
switch (val) {
case REG_FUNCS_NO: {
if (didRegisterAllFuncs.compareAndSet(val, REG_FUNCS_PENDING)) {
breakLoop = true;
break;
}
continue;
}
case REG_FUNCS_PENDING: {
synchronized (didRegisterAllFuncs) {
try {
didRegisterAllFuncs.wait(100);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
}
}
continue;
}
case REG_FUNCS_DONE: return;
default: throw new AssertionError(val);
}
}
try {
reloadFunctions();
didRegisterAllFuncs.compareAndSet(REG_FUNCS_PENDING, REG_FUNCS_DONE);
} catch (Exception e) {
LOG.warn("Failed to register all functions.", e);
didRegisterAllFuncs.compareAndSet(REG_FUNCS_PENDING, REG_FUNCS_NO);
throw new HiveException(e);
} finally {
synchronized (didRegisterAllFuncs) {
didRegisterAllFuncs.notifyAll();
}
}
}
public void reloadFunctions() throws HiveException {
HashSet registryFunctions = new HashSet(
FunctionRegistry.getFunctionNames(".+\\..+"));
for (Function function : getAllFunctions()) {
String functionName = function.getFunctionName();
try {
LOG.info("Registering function " + functionName + " " + function.getClassName());
String qualFunc = FunctionUtils.qualifyFunctionName(functionName, function.getDbName());
FunctionRegistry.registerPermanentFunction(qualFunc, function.getClassName(), false,
FunctionTask.toFunctionResource(function.getResourceUris()));
registryFunctions.remove(qualFunc);
} catch (Exception e) {
LOG.warn("Failed to register persistent function " +
functionName + ":" + function.getClassName() + ". Ignore and continue.");
}
}
// unregister functions from local system registry that are not in getAllFunctions()
for (String functionName : registryFunctions) {
try {
FunctionRegistry.unregisterPermanentFunction(functionName);
} catch (Exception e) {
LOG.warn("Failed to unregister persistent function " +
functionName + "on reload. Ignore and continue.");
}
}
}
public static Hive get(Configuration c, Class> clazz) throws HiveException {
return get(c instanceof HiveConf ? (HiveConf)c : new HiveConf(c, clazz));
}
/**
* Gets hive object for the current thread. If one is not initialized then a
* new one is created If the new configuration is different in metadata conf
* vars, or the owner will be different then a new one is created.
*
* @param c
* new Hive Configuration
* @return Hive object for current thread
* @throws HiveException
*
*/
public static Hive get(HiveConf c) throws HiveException {
return getInternal(c, false, false, true);
}
/**
* Same as {@link #get(HiveConf)}, except that it checks only the object identity of existing
* MS client, assuming the relevant settings would be unchanged within the same conf object.
*/
public static Hive getWithFastCheck(HiveConf c) throws HiveException {
return getWithFastCheck(c, true);
}
/**
* Same as {@link #get(HiveConf)}, except that it checks only the object identity of existing
* MS client, assuming the relevant settings would be unchanged within the same conf object.
*/
public static Hive getWithFastCheck(HiveConf c, boolean doRegisterAllFns) throws HiveException {
return getInternal(c, false, true, doRegisterAllFns);
}
private static Hive getInternal(HiveConf c, boolean needsRefresh, boolean isFastCheck,
boolean doRegisterAllFns) throws HiveException {
Hive db = hiveDB.get();
if (db == null || !db.isCurrentUserOwner() || needsRefresh
|| (c != null && !isCompatible(db, c, isFastCheck))) {
db = create(c, false, db, doRegisterAllFns);
}
if (c != null) {
db.conf = c;
}
return db;
}
private static Hive create(HiveConf c, boolean needsRefresh, Hive db, boolean doRegisterAllFns)
throws HiveException {
if (db != null) {
LOG.debug("Creating new db. db = " + db + ", needsRefresh = " + needsRefresh +
", db.isCurrentUserOwner = " + db.isCurrentUserOwner());
db.close();
}
closeCurrent();
if (c == null) {
c = createHiveConf();
}
c.set("fs.scheme.class", "dfs");
Hive newdb = new Hive(c, doRegisterAllFns);
hiveDB.set(newdb);
return newdb;
}
private static HiveConf createHiveConf() {
SessionState session = SessionState.get();
return (session == null) ? new HiveConf(Hive.class) : session.getConf();
}
private static boolean isCompatible(Hive db, HiveConf c, boolean isFastCheck) {
if (isFastCheck) {
return (db.metaStoreClient == null || db.metaStoreClient.isSameConfObj(c))
&& (db.syncMetaStoreClient == null || db.syncMetaStoreClient.isSameConfObj(c));
} else {
return (db.metaStoreClient == null || db.metaStoreClient.isCompatibleWith(c))
&& (db.syncMetaStoreClient == null || db.syncMetaStoreClient.isCompatibleWith(c));
}
}
public static Hive get() throws HiveException {
return get(true);
}
public static Hive get(boolean doRegisterAllFns) throws HiveException {
return getInternal(null, false, false, doRegisterAllFns);
}
/**
* get a connection to metastore. see get(HiveConf) function for comments
*
* @param c
* new conf
* @param needsRefresh
* if true then creates a new one
* @return The connection to the metastore
* @throws HiveException
*/
public static Hive get(HiveConf c, boolean needsRefresh) throws HiveException {
return getInternal(c, needsRefresh, false, true);
}
public static void set(Hive hive) {
hiveDB.set(hive);
}
public static void closeCurrent() {
hiveDB.remove();
}
/**
* Hive
*
* @param c
*
*/
private Hive(HiveConf c, boolean doRegisterAllFns) throws HiveException {
conf = c;
if (doRegisterAllFns) {
registerAllFunctionsOnce();
}
}
private boolean isCurrentUserOwner() throws HiveException {
try {
return owner == null || owner.equals(UserGroupInformation.getCurrentUser());
} catch(IOException e) {
throw new HiveException("Error getting current user: " + e.getMessage(), e);
}
}
/**
* closes the connection to metastore for the calling thread
*/
private void close() {
LOG.debug("Closing current thread's connection to Hive Metastore.");
if (metaStoreClient != null) {
metaStoreClient.close();
metaStoreClient = null;
}
if (syncMetaStoreClient != null) {
syncMetaStoreClient.close();
}
if (owner != null) {
owner = null;
}
}
/**
* Create a database
* @param db
* @param ifNotExist if true, will ignore AlreadyExistsException exception
* @throws AlreadyExistsException
* @throws HiveException
*/
public void createDatabase(Database db, boolean ifNotExist)
throws AlreadyExistsException, HiveException {
try {
getMSC().createDatabase(db);
} catch (AlreadyExistsException e) {
if (!ifNotExist) {
throw e;
}
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Create a Database. Raise an error if a database with the same name already exists.
* @param db
* @throws AlreadyExistsException
* @throws HiveException
*/
public void createDatabase(Database db) throws AlreadyExistsException, HiveException {
createDatabase(db, false);
}
/**
* Drop a database.
* @param name
* @throws NoSuchObjectException
* @throws HiveException
* @see org.apache.hadoop.hive.metastore.HiveMetaStoreClient#dropDatabase(java.lang.String)
*/
public void dropDatabase(String name) throws HiveException, NoSuchObjectException {
dropDatabase(name, true, false, false);
}
/**
* Drop a database
* @param name
* @param deleteData
* @param ignoreUnknownDb if true, will ignore NoSuchObjectException
* @throws HiveException
* @throws NoSuchObjectException
*/
public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb)
throws HiveException, NoSuchObjectException {
dropDatabase(name, deleteData, ignoreUnknownDb, false);
}
/**
* Drop a database
* @param name
* @param deleteData
* @param ignoreUnknownDb if true, will ignore NoSuchObjectException
* @param cascade if true, delete all tables on the DB if exists. Otherwise, the query
* will fail if table still exists.
* @throws HiveException
* @throws NoSuchObjectException
*/
public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb, boolean cascade)
throws HiveException, NoSuchObjectException {
try {
getMSC().dropDatabase(name, deleteData, ignoreUnknownDb, cascade);
} catch (NoSuchObjectException e) {
throw e;
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Creates a table metadata and the directory for the table data
*
* @param tableName
* name of the table
* @param columns
* list of fields of the table
* @param partCols
* partition keys of the table
* @param fileInputFormat
* Class of the input format of the table data file
* @param fileOutputFormat
* Class of the output format of the table data file
* @throws HiveException
* thrown if the args are invalid or if the metadata or the data
* directory couldn't be created
*/
public void createTable(String tableName, List columns,
List partCols, Class extends InputFormat> fileInputFormat,
Class> fileOutputFormat) throws HiveException {
this.createTable(tableName, columns, partCols, fileInputFormat,
fileOutputFormat, -1, null);
}
/**
* Creates a table metadata and the directory for the table data
*
* @param tableName
* name of the table
* @param columns
* list of fields of the table
* @param partCols
* partition keys of the table
* @param fileInputFormat
* Class of the input format of the table data file
* @param fileOutputFormat
* Class of the output format of the table data file
* @param bucketCount
* number of buckets that each partition (or the table itself) should
* be divided into
* @throws HiveException
* thrown if the args are invalid or if the metadata or the data
* directory couldn't be created
*/
public void createTable(String tableName, List columns,
List partCols, Class extends InputFormat> fileInputFormat,
Class> fileOutputFormat, int bucketCount, List bucketCols)
throws HiveException {
createTable(tableName, columns, partCols, fileInputFormat, fileOutputFormat, bucketCount,
bucketCols, null);
}
/**
* Create a table metadata and the directory for the table data
* @param tableName table name
* @param columns list of fields of the table
* @param partCols partition keys of the table
* @param fileInputFormat Class of the input format of the table data file
* @param fileOutputFormat Class of the output format of the table data file
* @param bucketCount number of buckets that each partition (or the table itself) should be
* divided into
* @param bucketCols Bucket columns
* @param parameters Parameters for the table
* @throws HiveException
*/
public void createTable(String tableName, List columns, List partCols,
Class extends InputFormat> fileInputFormat,
Class> fileOutputFormat, int bucketCount, List bucketCols,
Map parameters) throws HiveException {
if (columns == null) {
throw new HiveException("columns not specified for table " + tableName);
}
Table tbl = newTable(tableName);
tbl.setInputFormatClass(fileInputFormat.getName());
tbl.setOutputFormatClass(fileOutputFormat.getName());
for (String col : columns) {
FieldSchema field = new FieldSchema(col, STRING_TYPE_NAME, "default");
tbl.getCols().add(field);
}
if (partCols != null) {
for (String partCol : partCols) {
FieldSchema part = new FieldSchema();
part.setName(partCol);
part.setType(STRING_TYPE_NAME); // default partition key
tbl.getPartCols().add(part);
}
}
tbl.setSerializationLib(LazySimpleSerDe.class.getName());
tbl.setNumBuckets(bucketCount);
tbl.setBucketCols(bucketCols);
if (parameters != null) {
tbl.setParameters(parameters);
}
createTable(tbl);
}
public void alterTable(Table newTbl, EnvironmentContext environmentContext)
throws HiveException {
alterTable(newTbl.getDbName(), newTbl.getTableName(), newTbl, false, environmentContext);
}
/**
* Updates the existing table metadata with the new metadata.
*
* @param fullyQlfdTblName
* name of the existing table
* @param newTbl
* new name of the table. could be the old name
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterTable(String fullyQlfdTblName, Table newTbl, EnvironmentContext environmentContext)
throws HiveException {
alterTable(fullyQlfdTblName, newTbl, false, environmentContext);
}
public void alterTable(String fullyQlfdTblName, Table newTbl, boolean cascade, EnvironmentContext environmentContext)
throws HiveException {
String[] names = Utilities.getDbTableName(fullyQlfdTblName);
alterTable(names[0], names[1], newTbl, cascade, environmentContext);
}
public void alterTable(String dbName, String tblName, Table newTbl, boolean cascade,
EnvironmentContext environmentContext)
throws HiveException {
try {
// Remove the DDL_TIME so it gets refreshed
if (newTbl.getParameters() != null) {
newTbl.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
newTbl.checkValidity(conf);
if (environmentContext == null) {
environmentContext = new EnvironmentContext();
}
if (cascade) {
environmentContext.putToProperties(StatsSetupConst.CASCADE, StatsSetupConst.TRUE);
}
getMSC().alter_table_with_environmentContext(dbName, tblName, newTbl.getTTable(), environmentContext);
} catch (MetaException e) {
throw new HiveException("Unable to alter table. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to alter table. " + e.getMessage(), e);
}
}
public void updateCreationMetadata(String dbName, String tableName, CreationMetadata cm)
throws HiveException {
try {
getMSC().updateCreationMetadata(dbName, tableName, cm);
} catch (TException e) {
throw new HiveException("Unable to update creation metadata " + e.getMessage(), e);
}
}
/**
* Updates the existing partition metadata with the new metadata.
*
* @param tblName
* name of the existing table
* @param newPart
* new partition
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterPartition(String tblName, Partition newPart, EnvironmentContext environmentContext)
throws InvalidOperationException, HiveException {
String[] names = Utilities.getDbTableName(tblName);
alterPartition(names[0], names[1], newPart, environmentContext);
}
/**
* Updates the existing partition metadata with the new metadata.
*
* @param dbName
* name of the exiting table's database
* @param tblName
* name of the existing table
* @param newPart
* new partition
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterPartition(String dbName, String tblName, Partition newPart, EnvironmentContext environmentContext)
throws InvalidOperationException, HiveException {
try {
validatePartition(newPart);
String location = newPart.getLocation();
if (location != null) {
location = Utilities.getQualifiedPath(conf, new Path(location));
newPart.setLocation(location);
}
getSynchronizedMSC().alter_partition(dbName, tblName, newPart.getTPartition(), environmentContext);
} catch (MetaException e) {
throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
}
}
private void validatePartition(Partition newPart) throws HiveException {
// Remove the DDL time so that it gets refreshed
if (newPart.getParameters() != null) {
newPart.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
newPart.checkValidity();
}
/**
* Updates the existing table metadata with the new metadata.
*
* @param tblName
* name of the existing table
* @param newParts
* new partitions
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void alterPartitions(String tblName, List newParts, EnvironmentContext environmentContext)
throws InvalidOperationException, HiveException {
String[] names = Utilities.getDbTableName(tblName);
List newTParts =
new ArrayList();
try {
// Remove the DDL time so that it gets refreshed
for (Partition tmpPart: newParts) {
if (tmpPart.getParameters() != null) {
tmpPart.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
String location = tmpPart.getLocation();
if (location != null) {
location = Utilities.getQualifiedPath(conf, new Path(location));
tmpPart.setLocation(location);
}
newTParts.add(tmpPart.getTPartition());
}
getMSC().alter_partitions(names[0], names[1], newTParts, environmentContext);
} catch (MetaException e) {
throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to alter partition. " + e.getMessage(), e);
}
}
/**
* Rename a old partition to new partition
*
* @param tbl
* existing table
* @param oldPartSpec
* spec of old partition
* @param newPart
* new partition
* @throws InvalidOperationException
* if the changes in metadata is not acceptable
* @throws TException
*/
public void renamePartition(Table tbl, Map oldPartSpec, Partition newPart)
throws HiveException {
try {
Map newPartSpec = newPart.getSpec();
if (oldPartSpec.keySet().size() != tbl.getPartCols().size()
|| newPartSpec.keySet().size() != tbl.getPartCols().size()) {
throw new HiveException("Unable to rename partition to the same name: number of partition cols don't match. ");
}
if (!oldPartSpec.keySet().equals(newPartSpec.keySet())){
throw new HiveException("Unable to rename partition to the same name: old and new partition cols don't match. ");
}
List pvals = new ArrayList();
for (FieldSchema field : tbl.getPartCols()) {
String val = oldPartSpec.get(field.getName());
if (val == null || val.length() == 0) {
throw new HiveException("get partition: Value for key "
+ field.getName() + " is null or empty");
} else if (val != null){
pvals.add(val);
}
}
getMSC().renamePartition(tbl.getDbName(), tbl.getTableName(), pvals,
newPart.getTPartition());
} catch (InvalidOperationException e){
throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
} catch (MetaException e) {
throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
} catch (TException e) {
throw new HiveException("Unable to rename partition. " + e.getMessage(), e);
}
}
public void alterDatabase(String dbName, Database db)
throws HiveException {
try {
getMSC().alterDatabase(dbName, db);
} catch (MetaException e) {
throw new HiveException("Unable to alter database " + dbName + ". " + e.getMessage(), e);
} catch (NoSuchObjectException e) {
throw new HiveException("Database " + dbName + " does not exists.", e);
} catch (TException e) {
throw new HiveException("Unable to alter database " + dbName + ". " + e.getMessage(), e);
}
}
/**
* Creates the table with the give objects
*
* @param tbl
* a table object
* @throws HiveException
*/
public void createTable(Table tbl) throws HiveException {
createTable(tbl, false);
}
/**
* Creates the table with the given objects. It takes additional arguments for
* primary keys and foreign keys associated with the table.
*
* @param tbl
* a table object
* @param ifNotExists
* if true, ignore AlreadyExistsException
* @param primaryKeys
* primary key columns associated with the table
* @param foreignKeys
* foreign key columns associated with the table
* @param uniqueConstraints
* UNIQUE constraints associated with the table
* @param notNullConstraints
* NOT NULL constraints associated with the table
* @param defaultConstraints
* DEFAULT constraints associated with the table
* @param checkConstraints
* CHECK constraints associated with the table
* @throws HiveException
*/
public void createTable(Table tbl, boolean ifNotExists,
List primaryKeys,
List foreignKeys,
List uniqueConstraints,
List notNullConstraints,
List defaultConstraints,
List checkConstraints)
throws HiveException {
try {
if (tbl.getDbName() == null || "".equals(tbl.getDbName().trim())) {
tbl.setDbName(SessionState.get().getCurrentDatabase());
}
if (tbl.getCols().size() == 0 || tbl.getSd().getColsSize() == 0) {
tbl.setFields(HiveMetaStoreUtils.getFieldsFromDeserializer(tbl.getTableName(),
tbl.getDeserializer()));
}
tbl.checkValidity(conf);
if (tbl.getParameters() != null) {
tbl.getParameters().remove(hive_metastoreConstants.DDL_TIME);
}
org.apache.hadoop.hive.metastore.api.Table tTbl = tbl.getTTable();
PrincipalPrivilegeSet principalPrivs = new PrincipalPrivilegeSet();
SessionState ss = SessionState.get();
if (ss != null) {
CreateTableAutomaticGrant grants = ss.getCreateTableGrants();
if (grants != null) {
principalPrivs.setUserPrivileges(grants.getUserGrants());
principalPrivs.setGroupPrivileges(grants.getGroupGrants());
principalPrivs.setRolePrivileges(grants.getRoleGrants());
tTbl.setPrivileges(principalPrivs);
}
}
if (primaryKeys == null && foreignKeys == null
&& uniqueConstraints == null && notNullConstraints == null && defaultConstraints == null
&& checkConstraints == null) {
getMSC().createTable(tTbl);
} else {
getMSC().createTableWithConstraints(tTbl, primaryKeys, foreignKeys,
uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints);
}
} catch (AlreadyExistsException e) {
if (!ifNotExists) {
throw new HiveException(e);
}
} catch (Exception e) {
throw new HiveException(e);
}
}
public void createTable(Table tbl, boolean ifNotExists) throws HiveException {
createTable(tbl, ifNotExists, null, null, null, null,
null, null);
}
public static List getFieldsFromDeserializerForMsStorage(
Table tbl, Deserializer deserializer) throws SerDeException, MetaException {
List schema = HiveMetaStoreUtils.getFieldsFromDeserializer(
tbl.getTableName(), deserializer);
for (FieldSchema field : schema) {
field.setType(MetaStoreUtils.TYPE_FROM_DESERIALIZER);
}
return schema;
}
/**
* Drops table along with the data in it. If the table doesn't exist then it
* is a no-op. If ifPurge option is specified it is passed to the
* hdfs command that removes table data from warehouse to make it skip trash.
*
* @param tableName
* table to drop
* @param ifPurge
* completely purge the table (skipping trash) while removing data from warehouse
* @throws HiveException
* thrown if the drop fails
*/
public void dropTable(String tableName, boolean ifPurge) throws HiveException {
String[] names = Utilities.getDbTableName(tableName);
dropTable(names[0], names[1], true, true, ifPurge);
}
/**
* Drops table along with the data in it. If the table doesn't exist then it
* is a no-op
*
* @param tableName
* table to drop
* @throws HiveException
* thrown if the drop fails
*/
public void dropTable(String tableName) throws HiveException {
dropTable(tableName, false);
}
/**
* Drops table along with the data in it. If the table doesn't exist then it
* is a no-op
*
* @param dbName
* database where the table lives
* @param tableName
* table to drop
* @throws HiveException
* thrown if the drop fails
*/
public void dropTable(String dbName, String tableName) throws HiveException {
dropTable(dbName, tableName, true, true, false);
}
/**
* Drops the table.
*
* @param dbName
* @param tableName
* @param deleteData
* deletes the underlying data along with metadata
* @param ignoreUnknownTab
* an exception is thrown if this is false and the table doesn't exist
* @throws HiveException
*/
public void dropTable(String dbName, String tableName, boolean deleteData,
boolean ignoreUnknownTab) throws HiveException {
dropTable(dbName, tableName, deleteData, ignoreUnknownTab, false);
}
/**
* Drops the table.
*
* @param dbName
* @param tableName
* @param deleteData
* deletes the underlying data along with metadata
* @param ignoreUnknownTab
* an exception is thrown if this is false and the table doesn't exist
* @param ifPurge
* completely purge the table skipping trash while removing data from warehouse
* @throws HiveException
*/
public void dropTable(String dbName, String tableName, boolean deleteData,
boolean ignoreUnknownTab, boolean ifPurge) throws HiveException {
try {
getMSC().dropTable(dbName, tableName, deleteData, ignoreUnknownTab, ifPurge);
} catch (NoSuchObjectException e) {
if (!ignoreUnknownTab) {
throw new HiveException(e);
}
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Truncates the table/partition as per specifications. Just trash the data files
*
* @param dbDotTableName
* name of the table
* @throws HiveException
*/
public void truncateTable(String dbDotTableName, Map partSpec) throws HiveException {
try {
Table table = getTable(dbDotTableName, true);
List partNames = ((null == partSpec)
? null : getPartitionNames(table.getDbName(), table.getTableName(), partSpec, (short) -1));
getMSC().truncateTable(table.getDbName(), table.getTableName(), partNames);
} catch (Exception e) {
throw new HiveException(e);
}
}
public HiveConf getConf() {
return (conf);
}
/**
* Returns metadata for the table named tableName
* @param tableName the name of the table
* @return the table metadata
* @throws HiveException if there's an internal error or if the
* table doesn't exist
*/
public Table getTable(final String tableName) throws HiveException {
return this.getTable(tableName, true);
}
/**
* Returns metadata for the table named tableName
* @param tableName the name of the table
* @param throwException controls whether an exception is thrown or a returns a null
* @return the table metadata
* @throws HiveException if there's an internal error or if the
* table doesn't exist
*/
public Table getTable(final String tableName, boolean throwException) throws HiveException {
String[] names = Utilities.getDbTableName(tableName);
return this.getTable(names[0], names[1], throwException);
}
/**
* Returns metadata of the table
*
* @param dbName
* the name of the database
* @param tableName
* the name of the table
* @return the table
* @exception HiveException
* if there's an internal error or if the table doesn't exist
*/
public Table getTable(final String dbName, final String tableName) throws HiveException {
if (tableName.contains(".")) {
String[] names = Utilities.getDbTableName(tableName);
return this.getTable(names[0], names[1], true);
} else {
return this.getTable(dbName, tableName, true);
}
}
/**
* Returns metadata of the table
*
* @param dbName
* the name of the database
* @param tableName
* the name of the table
* @param throwException
* controls whether an exception is thrown or a returns a null
* @return the table or if throwException is false a null value.
* @throws HiveException
*/
public Table getTable(final String dbName, final String tableName,
boolean throwException) throws HiveException {
if (tableName == null || tableName.equals("")) {
throw new HiveException("empty table creation??");
}
// Get the table from metastore
org.apache.hadoop.hive.metastore.api.Table tTable = null;
try {
tTable = getMSC().getTable(dbName, tableName);
} catch (NoSuchObjectException e) {
if (throwException) {
LOG.error("Table " + dbName + "." + tableName + " not found: " + e.getMessage());
throw new InvalidTableException(tableName);
}
return null;
} catch (Exception e) {
throw new HiveException("Unable to fetch table " + tableName + ". " + e.getMessage(), e);
}
// For non-views, we need to do some extra fixes
if (!TableType.VIRTUAL_VIEW.toString().equals(tTable.getTableType())) {
// Fix the non-printable chars
Map parameters = tTable.getSd().getParameters();
String sf = parameters!=null?parameters.get(SERIALIZATION_FORMAT) : null;
if (sf != null) {
char[] b = sf.toCharArray();
if ((b.length == 1) && (b[0] < 10)) { // ^A, ^B, ^C, ^D, \t
parameters.put(SERIALIZATION_FORMAT, Integer.toString(b[0]));
}
}
// Use LazySimpleSerDe for MetadataTypedColumnsetSerDe.
// NOTE: LazySimpleSerDe does not support tables with a single column of
// col
// of type "array". This happens when the table is created using
// an
// earlier version of Hive.
if (org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class
.getName().equals(
tTable.getSd().getSerdeInfo().getSerializationLib())
&& tTable.getSd().getColsSize() > 0
&& tTable.getSd().getCols().get(0).getType().indexOf('<') == -1) {
tTable.getSd().getSerdeInfo().setSerializationLib(
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
}
}
return new Table(tTable);
}
/**
* Get all table names for the current database.
* @return List of table names
* @throws HiveException
*/
public List getAllTables() throws HiveException {
return getTablesByType(SessionState.get().getCurrentDatabase(), null, null);
}
/**
* Get all table names for the specified database.
* @param dbName
* @return List of table names
* @throws HiveException
*/
public List getAllTables(String dbName) throws HiveException {
return getTablesByType(dbName, ".*", null);
}
/**
* Get all tables for the specified database.
* @param dbName
* @return List of all tables
* @throws HiveException
*/
public List
getAllTableObjects(String dbName) throws HiveException {
return getTableObjects(dbName, ".*", null);
}
/**
* Get all materialized view names for the specified database.
* @param dbName
* @return List of materialized view table names
* @throws HiveException
*/
public List getAllMaterializedViews(String dbName) throws HiveException {
return getTablesByType(dbName, ".*", TableType.MATERIALIZED_VIEW);
}
/**
* Get all materialized views for the specified database.
* @param dbName
* @return List of materialized view table objects
* @throws HiveException
*/
public List
getTableObjects(String dbName, String pattern, TableType tableType) throws HiveException {
try {
return Lists.transform(getMSC().getTableObjectsByName(dbName, getTablesByType(dbName, pattern, tableType)),
new com.google.common.base.Function() {
@Override
public Table apply(org.apache.hadoop.hive.metastore.api.Table table) {
return new Table(table);
}
}
);
} catch (Exception e) {
throw new HiveException(e);
}
}
private List
getTableObjects(String dbName, List tableNames) throws HiveException {
try {
return Lists.transform(getMSC().getTableObjectsByName(dbName, tableNames),
new com.google.common.base.Function() {
@Override
public Table apply(org.apache.hadoop.hive.metastore.api.Table table) {
return new Table(table);
}
}
);
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Returns all existing tables from default database which match the given
* pattern. The matching occurs as per Java regular expressions
*
* @param tablePattern
* java re pattern
* @return list of table names
* @throws HiveException
*/
public List getTablesByPattern(String tablePattern) throws HiveException {
return getTablesByType(SessionState.get().getCurrentDatabase(),
tablePattern, null);
}
/**
* Returns all existing tables from the specified database which match the given
* pattern. The matching occurs as per Java regular expressions.
* @param dbName
* @param tablePattern
* @return list of table names
* @throws HiveException
*/
public List getTablesByPattern(String dbName, String tablePattern) throws HiveException {
return getTablesByType(dbName, tablePattern, null);
}
/**
* Returns all existing tables from the given database which match the given
* pattern. The matching occurs as per Java regular expressions
*
* @param database
* the database name
* @param tablePattern
* java re pattern
* @return list of table names
* @throws HiveException
*/
public List getTablesForDb(String database, String tablePattern)
throws HiveException {
return getTablesByType(database, tablePattern, null);
}
/**
* Returns all existing tables of a type (VIRTUAL_VIEW|EXTERNAL_TABLE|MANAGED_TABLE) from the specified
* database which match the given pattern. The matching occurs as per Java regular expressions.
* @param dbName Database name to find the tables in. if null, uses the current database in this session.
* @param pattern A pattern to match for the table names.If null, returns all names from this DB.
* @param type The type of tables to return. VIRTUAL_VIEWS for views. If null, returns all tables and views.
* @return list of table names that match the pattern.
* @throws HiveException
*/
public List getTablesByType(String dbName, String pattern, TableType type)
throws HiveException {
if (dbName == null) {
dbName = SessionState.get().getCurrentDatabase();
}
try {
if (type != null) {
if (pattern != null) {
return getMSC().getTables(dbName, pattern, type);
} else {
return getMSC().getTables(dbName, ".*", type);
}
} else {
if (pattern != null) {
return getMSC().getTables(dbName, pattern);
} else {
return getMSC().getTables(dbName, ".*");
}
}
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Get the materialized views that have been enabled for rewriting from the
* metastore. If the materialized view is in the cache, we do not need to
* parse it to generate a logical plan for the rewriting. Instead, we
* return the version present in the cache. Further, information provided
* by the invalidation cache is useful to know whether a materialized view
* can be used for rewriting or not.
*
* @return the list of materialized views available for rewriting
* @throws HiveException
*/
public List getAllValidMaterializedViews(List tablesUsed, boolean forceMVContentsUpToDate)
throws HiveException {
// Final result
List result = new ArrayList<>();
try {
for (String dbName : getMSC().getAllDatabases()) {
// From metastore (for security)
List materializedViewNames = getMaterializedViewsForRewriting(dbName);
if (materializedViewNames.isEmpty()) {
// Bail out: empty list
continue;
}
result.addAll(getValidMaterializedViews(dbName, materializedViewNames, tablesUsed, forceMVContentsUpToDate));
}
return result;
} catch (Exception e) {
throw new HiveException(e);
}
}
public List getValidMaterializedView(String dbName, String materializedViewName,
List tablesUsed, boolean forceMVContentsUpToDate) throws HiveException {
return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), tablesUsed, forceMVContentsUpToDate);
}
private List getValidMaterializedViews(String dbName, List materializedViewNames,
List tablesUsed, boolean forceMVContentsUpToDate) throws HiveException {
final String validTxnsList = conf.get(ValidTxnList.VALID_TXNS_KEY);
final ValidTxnWriteIdList currentTxnWriteIds =
SessionState.get().getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList);
final boolean tryIncrementalRewriting =
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL);
final boolean tryIncrementalRebuild =
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL);
final long defaultDiff =
HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW,
TimeUnit.MILLISECONDS);
final long currentTime = System.currentTimeMillis();
try {
// Final result
List result = new ArrayList<>();
List
materializedViewTables = getTableObjects(dbName, materializedViewNames);
for (Table materializedViewTable : materializedViewTables) {
// Check if materialization defined its own invalidation time window
String timeWindowString = materializedViewTable.getProperty(MATERIALIZED_VIEW_REWRITING_TIME_WINDOW);
long diff = org.apache.commons.lang.StringUtils.isEmpty(timeWindowString) ? defaultDiff :
HiveConf.toTime(timeWindowString,
HiveConf.getDefaultTimeUnit(HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW),
TimeUnit.MILLISECONDS);
CreationMetadata creationMetadata = materializedViewTable.getCreationMetadata();
boolean outdated = false;
if (diff < 0L) {
// We only consider the materialized view to be outdated if forceOutdated = true, i.e.,
// if it is a rebuild. Otherwise, it passed the test and we use it as it is.
outdated = forceMVContentsUpToDate;
} else {
// Check whether the materialized view is invalidated
if (forceMVContentsUpToDate || diff == 0L || creationMetadata.getMaterializationTime() < currentTime - diff) {
if (currentTxnWriteIds == null) {
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
" ignored for rewriting as we could not obtain current txn ids");
continue;
}
if (creationMetadata.getValidTxnList() == null ||
creationMetadata.getValidTxnList().isEmpty()) {
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
" ignored for rewriting as we could not obtain materialization txn ids");
continue;
}
boolean ignore = false;
ValidTxnWriteIdList mvTxnWriteIds = new ValidTxnWriteIdList(
creationMetadata.getValidTxnList());
for (String qName : tablesUsed) {
// Note. If the materialized view does not contain a table that is contained in the query,
// we do not need to check whether that specific table is outdated or not. If a rewriting
// is produced in those cases, it is because that additional table is joined with the
// existing tables with an append-columns only join, i.e., PK-FK + not null.
if (!creationMetadata.getTablesUsed().contains(qName)) {
continue;
}
ValidWriteIdList tableCurrentWriteIds = currentTxnWriteIds.getTableValidWriteIdList(qName);
if (tableCurrentWriteIds == null) {
// Uses non-transactional table, cannot be considered
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
" ignored for rewriting as it is outdated and cannot be considered for " +
" rewriting because it uses non-transactional table " + qName);
ignore = true;
break;
}
ValidWriteIdList tableWriteIds = mvTxnWriteIds.getTableValidWriteIdList(qName);
if (tableWriteIds == null) {
// This should not happen, but we ignore for safety
LOG.warn("Materialized view " + materializedViewTable.getFullyQualifiedName() +
" ignored for rewriting as details about txn ids for table " + qName +
" could not be found in " + mvTxnWriteIds);
ignore = true;
break;
}
if (!outdated && !TxnIdUtils.checkEquivalentWriteIds(tableCurrentWriteIds, tableWriteIds)) {
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
" contents are outdated");
outdated = true;
}
}
if (ignore) {
continue;
}
}
}
if (outdated) {
// The MV is outdated, see whether we should consider it for rewriting or not
boolean ignore = false;
if (forceMVContentsUpToDate && !tryIncrementalRebuild) {
// We will not try partial rewriting for rebuild if incremental rebuild is disabled
ignore = true;
} else if (!forceMVContentsUpToDate && !tryIncrementalRewriting) {
// We will not try partial rewriting for non-rebuild if incremental rewriting is disabled
ignore = true;
} else {
// Obtain additional information if we should try incremental rewriting / rebuild
// We will not try partial rewriting if there were update/delete operations on source tables
Materialization invalidationInfo = getMSC().getMaterializationInvalidationInfo(
creationMetadata, conf.get(ValidTxnList.VALID_TXNS_KEY));
ignore = invalidationInfo == null || invalidationInfo.isSourceTablesUpdateDeleteModified();
}
if (ignore) {
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
" ignored for rewriting as its contents are outdated");
continue;
}
}
// It passed the test, load
RelOptMaterialization materialization =
HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(
dbName, materializedViewTable.getTableName());
if (materialization != null) {
RelNode viewScan = materialization.tableRel;
RelOptHiveTable cachedMaterializedViewTable;
if (viewScan instanceof Project) {
// There is a Project on top (due to nullability)
cachedMaterializedViewTable = (RelOptHiveTable) viewScan.getInput(0).getTable();
} else {
cachedMaterializedViewTable = (RelOptHiveTable) viewScan.getTable();
}
if (cachedMaterializedViewTable.getHiveTableMD().getCreateTime() ==
materializedViewTable.getCreateTime()) {
// It is in the cache and up to date
if (outdated) {
// We will rewrite it to include the filters on transaction list
// so we can produce partial rewritings
materialization = augmentMaterializationWithTimeInformation(
materialization, validTxnsList, new ValidTxnWriteIdList(
creationMetadata.getValidTxnList()));
}
result.add(materialization);
continue;
}
}
// It was not present in the cache (maybe because it was added by another HS2)
// or it is not up to date.
if (HiveMaterializedViewsRegistry.get().isInitialized()) {
// But the registry was fully initialized, thus we need to add it
if (LOG.isDebugEnabled()) {
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
" was not in the cache");
}
materialization = HiveMaterializedViewsRegistry.get().createMaterializedView(
conf, materializedViewTable);
if (materialization != null) {
if (outdated) {
// We will rewrite it to include the filters on transaction list
// so we can produce partial rewritings
materialization = augmentMaterializationWithTimeInformation(
materialization, validTxnsList, new ValidTxnWriteIdList(
creationMetadata.getValidTxnList()));
}
result.add(materialization);
}
} else {
// Otherwise the registry has not been initialized, skip for the time being
if (LOG.isWarnEnabled()) {
LOG.info("Materialized view " + materializedViewTable.getFullyQualifiedName() + " was skipped "
+ "because cache has not been loaded yet");
}
}
}
return result;
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Method to enrich the materialization query contained in the input with
* its invalidation.
*/
private static RelOptMaterialization augmentMaterializationWithTimeInformation(
RelOptMaterialization materialization, String validTxnsList,
ValidTxnWriteIdList materializationTxnList) throws LockException {
// Extract tables used by the query which will in turn be used to generate
// the corresponding txn write ids
List tablesUsed = new ArrayList<>();
new RelVisitor() {
@Override
public void visit(RelNode node, int ordinal, RelNode parent) {
if (node instanceof TableScan) {
TableScan ts = (TableScan) node;
tablesUsed.add(((RelOptHiveTable) ts.getTable()).getHiveTableMD().getFullyQualifiedName());
}
super.visit(node, ordinal, parent);
}
}.go(materialization.queryRel);
ValidTxnWriteIdList currentTxnList =
SessionState.get().getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList);
// Augment
final RexBuilder rexBuilder = materialization.queryRel.getCluster().getRexBuilder();
final HepProgramBuilder augmentMaterializationProgram = new HepProgramBuilder()
.addRuleInstance(new HiveAugmentMaterializationRule(rexBuilder, currentTxnList, materializationTxnList));
final HepPlanner augmentMaterializationPlanner = new HepPlanner(
augmentMaterializationProgram.build());
augmentMaterializationPlanner.setRoot(materialization.queryRel);
final RelNode modifiedQueryRel = augmentMaterializationPlanner.findBestExp();
return new RelOptMaterialization(materialization.tableRel, modifiedQueryRel,
null, materialization.qualifiedTableName);
}
/**
* Get materialized views for the specified database that have enabled rewriting.
* @param dbName
* @return List of materialized view table objects
* @throws HiveException
*/
private List getMaterializedViewsForRewriting(String dbName) throws HiveException {
try {
return getMSC().getMaterializedViewsForRewriting(dbName);
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Get all existing database names.
*
* @return List of database names.
* @throws HiveException
*/
public List getAllDatabases() throws HiveException {
try {
return getMSC().getAllDatabases();
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Get all existing databases that match the given
* pattern. The matching occurs as per Java regular expressions
*
* @param databasePattern
* java re pattern
* @return list of database names
* @throws HiveException
*/
public List getDatabasesByPattern(String databasePattern) throws HiveException {
try {
return getMSC().getDatabases(databasePattern);
} catch (Exception e) {
throw new HiveException(e);
}
}
public boolean grantPrivileges(PrivilegeBag privileges)
throws HiveException {
try {
return getMSC().grant_privileges(privileges);
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* @param privileges
* a bag of privileges
* @return true on success
* @throws HiveException
*/
public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption)
throws HiveException {
try {
return getMSC().revoke_privileges(privileges, grantOption);
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Query metadata to see if a database with the given name already exists.
*
* @param dbName
* @return true if a database with the given name already exists, false if
* does not exist.
* @throws HiveException
*/
public boolean databaseExists(String dbName) throws HiveException {
return getDatabase(dbName) != null;
}
/**
* Get the database by name.
* @param dbName the name of the database.
* @return a Database object if this database exists, null otherwise.
* @throws HiveException
*/
public Database getDatabase(String dbName) throws HiveException {
try {
return getMSC().getDatabase(dbName);
} catch (NoSuchObjectException e) {
return null;
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Get the database by name.
* @param catName catalog name
* @param dbName the name of the database.
* @return a Database object if this database exists, null otherwise.
* @throws HiveException
*/
public Database getDatabase(String catName, String dbName) throws HiveException {
try {
return getMSC().getDatabase(catName, dbName);
} catch (NoSuchObjectException e) {
return null;
} catch (Exception e) {
throw new HiveException(e);
}
}
/**
* Get the Database object for current database
* @return a Database object if this database exists, null otherwise.
* @throws HiveException
*/
public Database getDatabaseCurrent() throws HiveException {
String currentDb = SessionState.get().getCurrentDatabase();
return getDatabase(currentDb);
}
/**
* Load a directory into a Hive Table Partition - Alters existing content of
* the partition with the contents of loadPath. - If the partition does not
* exist - one is created - files in loadPath are moved into Hive. But the
* directory itself is not removed.
*
* @param loadPath
* Directory containing files to load into Table
* @param tbl
* name of table to be loaded.
* @param partSpec
* defines which partition needs to be loaded
* @param loadFileType
* if REPLACE_ALL - replace files in the table,
* otherwise add files to table (KEEP_EXISTING, OVERWRITE_EXISTING)
* @param inheritTableSpecs if true, on [re]creating the partition, take the
* location/inputformat/outputformat/serde details from table spec
* @param isSrcLocal
* If the source directory is LOCAL
* @param isAcidIUDoperation
* true if this is an ACID operation Insert/Update/Delete operation
* @param hasFollowingStatsTask
* true if there is a following task which updates the stats, so, this method need not update.
* @param writeId write ID allocated for the current load operation
* @param stmtId statement ID of the current load statement
* @param isInsertOverwrite
* @return Partition object being loaded with data
*/
public Partition loadPartition(Path loadPath, Table tbl, Map partSpec,
LoadFileType loadFileType, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir,
boolean isSrcLocal, boolean isAcidIUDoperation, boolean hasFollowingStatsTask, Long writeId,
int stmtId, boolean isInsertOverwrite) throws HiveException {
Path tblDataLocationPath = tbl.getDataLocation();
boolean isMmTableWrite = AcidUtils.isInsertOnlyTable(tbl.getParameters());
assert tbl.getPath() != null : "null==getPath() for " + tbl.getTableName();
boolean isFullAcidTable = AcidUtils.isFullAcidTable(tbl);
boolean isTxnTable = AcidUtils.isTransactionalTable(tbl);
try {
// Get the partition object if it already exists
Partition oldPart = getPartition(tbl, partSpec, false);
/**
* Move files before creating the partition since down stream processes
* check for existence of partition in metadata before accessing the data.
* If partition is created before data is moved, downstream waiting
* processes might move forward with partial data
*/
Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null;
Path newPartPath = null;
if (inheritTableSpecs) {
Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec));
newPartPath = new Path(tblDataLocationPath.toUri().getScheme(),
tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath());
if(oldPart != null) {
/*
* If we are moving the partition across filesystem boundaries
* inherit from the table properties. Otherwise (same filesystem) use the
* original partition location.
*
* See: HIVE-1707 and HIVE-2117 for background
*/
FileSystem oldPartPathFS = oldPartPath.getFileSystem(getConf());
FileSystem loadPathFS = loadPath.getFileSystem(getConf());
if (FileUtils.equalsFileSystem(oldPartPathFS,loadPathFS)) {
newPartPath = oldPartPath;
}
}
} else {
newPartPath = oldPartPath;
}
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin("MoveTask", "FileMoves");
List newFiles = Collections.synchronizedList(new ArrayList());
// Note: the stats for ACID tables do not have any coordination with either Hive ACID logic
// like txn commits, time outs, etc.; nor the lower level sync in metastore pertaining
// to ACID updates. So the are not themselves ACID.
// Note: this assumes both paths are qualified; which they are, currently.
if ((isMmTableWrite || isFullAcidTable) && loadPath.equals(newPartPath)) {
// MM insert query, move itself is a no-op.
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("not moving " + loadPath + " to " + newPartPath + " (MM)");
}
assert !isAcidIUDoperation;
if (areEventsForDmlNeeded(tbl, oldPart)) {
newFiles = listFilesCreatedByQuery(loadPath, writeId, stmtId);
}
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("maybe deleting stuff from " + oldPartPath
+ " (new " + newPartPath + ") for replace");
}
} else {
// Either a non-MM query, or a load into MM table from an external source.
Path destPath = newPartPath;
if (isMmTableWrite) {
assert !isAcidIUDoperation;
// We will load into MM directory, and hide previous directories if needed.
destPath = new Path(destPath, isInsertOverwrite
? AcidUtils.baseDir(writeId) : AcidUtils.deltaSubdir(writeId, writeId, stmtId));
}
if (!isAcidIUDoperation && isFullAcidTable) {
destPath = fixFullAcidPathForLoadData(loadFileType, destPath, writeId, stmtId, tbl);
}
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("moving " + loadPath + " to " + destPath);
}
boolean isManaged = tbl.getTableType() == TableType.MANAGED_TABLE;
// TODO: why is "&& !isAcidIUDoperation" needed here?
if (!isTxnTable && ((loadFileType == LoadFileType.REPLACE_ALL) || (oldPart == null && !isAcidIUDoperation))) {
//for fullAcid tables we don't delete files for commands with OVERWRITE - we create a new
// base_x. (there is Insert Overwrite and Load Data Overwrite)
boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge"));
boolean needRecycle = !tbl.isTemporary()
&& ReplChangeManager.isSourceOfReplication(Hive.get().getDatabase(tbl.getDbName()));
replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), isSrcLocal,
isAutoPurge, newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, isManaged);
} else {
FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
copyFiles(conf, loadPath, destPath, fs, isSrcLocal, isAcidIUDoperation,
(loadFileType == LoadFileType.OVERWRITE_EXISTING), newFiles,
tbl.getNumBuckets() > 0, isFullAcidTable, isManaged);
}
}
perfLogger.PerfLogEnd("MoveTask", "FileMoves");
Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
validatePartition(newTPart);
// If config is set, table is not temporary and partition being inserted exists, capture
// the list of files added. For not yet existing partitions (insert overwrite to new partition
// or dynamic partition inserts), the add partition event will capture the list of files added.
// Generate an insert event only if inserting into an existing partition
// When inserting into a new partition, the add partition event takes care of insert event
if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && (null != oldPart)) {
fireInsertEvent(tbl, partSpec, (loadFileType == LoadFileType.REPLACE_ALL), newFiles);
}
// column stats will be inaccurate
if (!hasFollowingStatsTask) {
StatsSetupConst.clearColumnStatsState(newTPart.getParameters());
}
// recreate the partition if it existed before
if (isSkewedStoreAsSubdir) {
org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
/* Construct list bucketing location mappings from sub-directory name. */
Map, String> skewedColValueLocationMaps = constructListBucketingLocationMap(
newPartPath, skewedInfo);
/* Add list bucketing location mappings. */
skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
}
if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE);
}
if (oldPart == null) {
newTPart.getTPartition().setParameters(new HashMap());
if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters(),
MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE);
}
// Note: we are creating a brand new the partition, so this is going to be valid for ACID.
List filesForStats = null;
if (isTxnTable) {
filesForStats = AcidUtils.getAcidFilesForStats(
newTPart.getTable(), newPartPath, conf, null);
} else {
filesForStats = HiveStatsUtils.getFileStatusRecurse(
newPartPath, -1, newPartPath.getFileSystem(conf));
}
if (filesForStats != null) {
MetaStoreUtils.populateQuickStats(filesForStats, newTPart.getParameters());
} else {
// The ACID state is probably absent. Warning is logged in the get method.
MetaStoreUtils.clearQuickStats(newTPart.getParameters());
}
try {
LOG.debug("Adding new partition " + newTPart.getSpec());
getSynchronizedMSC().add_partition(newTPart.getTPartition());
} catch (AlreadyExistsException aee) {
// With multiple users concurrently issuing insert statements on the same partition has
// a side effect that some queries may not see a partition at the time when they're issued,
// but will realize the partition is actually there when it is trying to add such partition
// to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition).
// For example, imagine such a table is created:
// create table T (name char(50)) partitioned by (ds string);
// and the following two queries are launched at the same time, from different sessions:
// insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today'
// insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException
// In that case, we want to retry with alterPartition.
LOG.debug("Caught AlreadyExistsException, trying to alter partition instead");
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
} catch (Exception e) {
try {
final FileSystem newPathFileSystem = newPartPath.getFileSystem(this.getConf());
boolean isAutoPurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge"));
final FileStatus status = newPathFileSystem.getFileStatus(newPartPath);
Hive.trashFiles(newPathFileSystem, new FileStatus[] {status}, this.getConf(), isAutoPurge);
} catch (IOException io) {
LOG.error("Could not delete partition directory contents after failed partition creation: ", io);
}
throw e;
}
} else {
setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
}
return newTPart;
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (MetaException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (InvalidOperationException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
} catch (TException e) {
LOG.error(StringUtils.stringifyException(e));
throw new HiveException(e);
}
}
/**
* Load Data commands for fullAcid tables write to base_x (if there is overwrite clause) or
* delta_x_x directory - same as any other Acid write. This method modifies the destPath to add
* this path component.
* @param writeId - write id of the operated table from current transaction (in which this operation is running)
* @param stmtId - see {@link DbTxnManager#getStmtIdAndIncrement()}
* @return appropriately modified path
*/
private Path fixFullAcidPathForLoadData(LoadFileType loadFileType, Path destPath, long writeId, int stmtId, Table tbl) throws HiveException {
switch (loadFileType) {
case REPLACE_ALL:
destPath = new Path(destPath, AcidUtils.baseDir(writeId));
break;
case KEEP_EXISTING:
destPath = new Path(destPath, AcidUtils.deltaSubdir(writeId, writeId, stmtId));
break;
case OVERWRITE_EXISTING:
//should not happen here - this is for replication
default:
throw new IllegalArgumentException("Unexpected " + LoadFileType.class.getName() + " " + loadFileType);
}
try {
FileSystem fs = tbl.getDataLocation().getFileSystem(SessionState.getSessionConf());
if(!FileUtils.mkdir(fs, destPath, conf)) {
LOG.warn(destPath + " already exists?!?!");
}
} catch (IOException e) {
throw new HiveException("load: error while creating " + destPath + ";loadFileType=" + loadFileType, e);
}
return destPath;
}
private boolean areEventsForDmlNeeded(Table tbl, Partition oldPart) {
return conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null;
}
private List listFilesCreatedByQuery(Path loadPath, long writeId, int stmtId) throws HiveException {
List newFiles = new ArrayList();
final String filePrefix = AcidUtils.deltaSubdir(writeId, writeId, stmtId);
FileStatus[] srcs;
FileSystem srcFs;
try {
srcFs = loadPath.getFileSystem(conf);
srcs = srcFs.listStatus(loadPath);
} catch (IOException e) {
LOG.error("Error listing files", e);
throw new HiveException(e);
}
if (srcs == null) {
LOG.info("No sources specified: " + loadPath);
return newFiles;
}
PathFilter subdirFilter = null;
// Note: just like the move path, we only do one level of recursion.
for (FileStatus src : srcs) {
if (src.isDirectory()) {
if (subdirFilter == null) {
subdirFilter = new PathFilter() {
@Override
public boolean accept(Path path) {
return path.getName().startsWith(filePrefix);
}
};
}
try {
for (FileStatus srcFile : srcFs.listStatus(src.getPath(), subdirFilter)) {
newFiles.add(srcFile.getPath());
}
} catch (IOException e) {
throw new HiveException(e);
}
} else if (src.getPath().getName().startsWith(filePrefix)) {
newFiles.add(src.getPath());
}
}
return newFiles;
}
private void setStatsPropAndAlterPartition(boolean hasFollowingStatsTask, Table tbl,
Partition newTPart) throws MetaException, TException {
EnvironmentContext environmentContext = null;
if (hasFollowingStatsTask) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
}
LOG.debug("Altering existing partition " + newTPart.getSpec());
getSynchronizedMSC().alter_partition(tbl.getDbName(), tbl.getTableName(),
newTPart.getTPartition(), environmentContext);
}
/**
* Walk through sub-directory tree to construct list bucketing location map.
*
* @param fSta
* @param fSys
* @param skewedColValueLocationMaps
* @param newPartPath
* @param skewedInfo
* @throws IOException
*/
private void walkDirTree(FileStatus fSta, FileSystem fSys,
Map, String> skewedColValueLocationMaps, Path newPartPath, SkewedInfo skewedInfo)
throws IOException {
/* Base Case. It's leaf. */
if (!fSta.isDir()) {
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Processing LB leaf " + fSta.getPath());
}
/* construct one location map if not exists. */
constructOneLBLocationMap(fSta, skewedColValueLocationMaps, newPartPath, skewedInfo);
return;
}
/* dfs. */
FileStatus[] children = fSys.listStatus(fSta.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
if (children != null) {
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Processing LB dir " + fSta.getPath());
}
for (FileStatus child : children) {
walkDirTree(child, fSys, skewedColValueLocationMaps, newPartPath, skewedInfo);
}
}
}
/**
* Construct a list bucketing location map
* @param fSta
* @param skewedColValueLocationMaps
* @param newPartPath
* @param skewedInfo
*/
private void constructOneLBLocationMap(FileStatus fSta,
Map, String> skewedColValueLocationMaps,
Path newPartPath, SkewedInfo skewedInfo) {
Path lbdPath = fSta.getPath().getParent();
List skewedValue = new ArrayList();
String lbDirName = FileUtils.unescapePathName(lbdPath.toString());
String partDirName = FileUtils.unescapePathName(newPartPath.toString());
String lbDirSuffix = lbDirName.replace(partDirName, ""); // TODO: should it rather do a prefix?
if (lbDirSuffix.startsWith(Path.SEPARATOR)) {
lbDirSuffix = lbDirSuffix.substring(1);
}
String[] dirNames = lbDirSuffix.split(Path.SEPARATOR);
int keysFound = 0, dirsToTake = 0;
int colCount = skewedInfo.getSkewedColNames().size();
while (dirsToTake < dirNames.length && keysFound < colCount) {
String dirName = dirNames[dirsToTake++];
// Construct skewed-value to location map except default directory.
// why? query logic knows default-dir structure and don't need to get from map
if (dirName.equalsIgnoreCase(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME)) {
++keysFound;
} else {
String[] kv = dirName.split("=");
if (kv.length == 2) {
skewedValue.add(kv[1]);
++keysFound;
} else {
// TODO: we should really probably throw. Keep the existing logic for now.
LOG.warn("Skipping unknown directory " + dirName
+ " when expecting LB keys or default directory (from " + lbDirName + ")");
}
}
}
for (int i = 0; i < (dirNames.length - dirsToTake); ++i) {
lbdPath = lbdPath.getParent();
}
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Saving LB location " + lbdPath + " based on "
+ colCount + " keys and " + fSta.getPath());
}
if ((skewedValue.size() > 0) && (skewedValue.size() == colCount)
&& !skewedColValueLocationMaps.containsKey(skewedValue)) {
skewedColValueLocationMaps.put(skewedValue, lbdPath.toString());
}
}
/**
* Construct location map from path
*
* @param newPartPath
* @param skewedInfo
* @return
* @throws IOException
* @throws FileNotFoundException
*/
private Map, String> constructListBucketingLocationMap(Path newPartPath,
SkewedInfo skewedInfo) throws IOException, FileNotFoundException {
Map, String> skewedColValueLocationMaps = new HashMap, String>();
FileSystem fSys = newPartPath.getFileSystem(conf);
walkDirTree(fSys.getFileStatus(newPartPath),
fSys, skewedColValueLocationMaps, newPartPath, skewedInfo);
return skewedColValueLocationMaps;
}
/**
* Get the valid partitions from the path
* @param numDP number of dynamic partitions
* @param loadPath
* @return Set of valid partitions
* @throws HiveException
*/
private Set getValidPartitionsInPath(
int numDP, int numLB, Path loadPath, Long writeId, int stmtId,
boolean isMmTable, boolean isInsertOverwrite) throws HiveException {
Set validPartitions = new HashSet();
try {
FileSystem fs = loadPath.getFileSystem(conf);
if (!isMmTable) {
List leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP, fs);
// Check for empty partitions
for (FileStatus s : leafStatus) {
if (!s.isDirectory()) {
throw new HiveException("partition " + s.getPath() + " is not a directory!");
}
Path dpPath = s.getPath();
validPartitions.add(dpPath);
}
} else {
// The non-MM path only finds new partitions, as it is looking at the temp path.
// To produce the same effect, we will find all the partitions affected by this txn ID.
// Note: we ignore the statement ID here, because it's currently irrelevant for MoveTask
// where this is used; we always want to load everything; also the only case where
// we have multiple statements anyway is union.
Utilities.FILE_OP_LOGGER.trace(
"Looking for dynamic partitions in {} ({} levels)", loadPath, numDP);
Path[] leafStatus = Utilities.getMmDirectoryCandidates(
fs, loadPath, numDP, null, writeId, -1, conf, isInsertOverwrite);
for (Path p : leafStatus) {
Path dpPath = p.getParent(); // Skip the MM directory that we have found.
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Found DP " + dpPath);
}
validPartitions.add(dpPath);
}
}
} catch (IOException e) {
throw new HiveException(e);
}
int partsToLoad = validPartitions.size();
if (partsToLoad == 0) {
LOG.warn("No partition is generated by dynamic partitioning");
}
if (partsToLoad > conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS)) {
throw new HiveException("Number of dynamic partitions created is " + partsToLoad
+ ", which is more than "
+ conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS)
+". To solve this try to set " + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
+ " to at least " + partsToLoad + '.');
}
return validPartitions;
}
/**
* Given a source directory name of the load path, load all dynamically generated partitions
* into the specified table and return a list of strings that represent the dynamic partition
* paths.
* @param loadPath
* @param tableName
* @param partSpec
* @param loadFileType
* @param numDP number of dynamic partitions
* @param isAcid true if this is an ACID operation
* @param writeId writeId, can be 0 unless isAcid == true
* @return partition map details (PartitionSpec and Partition)
* @throws HiveException
*/
public Map