Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveVariableSource;
import org.apache.hadoop.hive.conf.VariableSubstitution;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Schema;
import org.apache.hadoop.hive.metastore.api.TxnType;
import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils;
import org.apache.hadoop.hive.ql.hooks.HookUtils;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
import org.apache.hadoop.hive.ql.reexec.ReCompileException;
import org.apache.hadoop.hive.ql.security.authorization.command.CommandAuthorizer;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.util.StringUtils;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableMap;
/**
* The compiler compiles the command, by creating a QueryPlan from a String command.
* Also opens a transaction if necessary.
*/
public class Compiler {
private static final String CLASS_NAME = Driver.class.getName();
private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
private static final LogHelper CONSOLE = new LogHelper(LOG);
private final Context context;
private final DriverContext driverContext;
private final DriverState driverState;
private final PerfLogger perfLogger = SessionState.getPerfLogger();
private ASTNode tree;
public Compiler(Context context, DriverContext driverContext, DriverState driverState) {
this.context = context;
this.driverContext = driverContext;
this.driverState = driverState;
}
/**
* @param deferClose indicates if the close/destroy should be deferred when the process has been interrupted
* it should be set to true if the compile method is called within another method like runInternal,
* which defers the close to the called in that method.
*/
public QueryPlan compile(String rawCommand, boolean deferClose) throws CommandProcessorException {
initialize(rawCommand);
Throwable compileException = null;
boolean parsed = false;
QueryPlan plan = null;
try {
DriverUtils.checkInterrupted(driverState, driverContext, "before parsing and analysing the query", null, null);
parse();
parsed = true;
BaseSemanticAnalyzer sem = analyze();
DriverUtils.checkInterrupted(driverState, driverContext, "after analyzing query.", null, null);
plan = createPlan(sem);
initializeFetchTask(plan);
authorize(sem);
explainOutput(sem, plan);
} catch (CommandProcessorException cpe) {
compileException = cpe.getCause();
throw cpe;
} catch (Exception e) {
compileException = e;
DriverUtils.checkInterrupted(driverState, driverContext, "during query compilation: " + e.getMessage(), null,
null);
handleException(e);
} finally {
cleanUp(compileException, parsed, deferClose);
}
return plan;
}
private void initialize(String rawCommand) throws CommandProcessorException {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
driverState.compilingWithLocking();
VariableSubstitution variableSubstitution = new VariableSubstitution(new HiveVariableSource() {
@Override
public Map getHiveVariable() {
return SessionState.get().getHiveVariables();
}
});
String command = variableSubstitution.substitute(driverContext.getConf(), rawCommand);
String queryStr = command;
try {
// command should be redacted to avoid to logging sensitive data
queryStr = HookUtils.redactLogString(driverContext.getConf(), command);
} catch (Exception e) {
LOG.warn("WARNING! Query command could not be redacted." + e);
}
DriverUtils.checkInterrupted(driverState, driverContext, "at beginning of compilation.", null, null);
context.setCmd(command);
driverContext.getQueryDisplay().setQueryStr(queryStr);
LOG.info("Compiling command(queryId=" + driverContext.getQueryId() + "): " + queryStr);
driverContext.getConf().setQueryString(queryStr);
// FIXME: side effect will leave the last query set at the session level
if (SessionState.get() != null) {
SessionState.get().getConf().setQueryString(queryStr);
SessionState.get().setupQueryCurrentTimestamp();
}
}
private void parse() throws ParseException {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.PARSE);
// Trigger query hook before compilation
driverContext.getHookRunner().runBeforeParseHook(context.getCmd());
boolean success = false;
try {
tree = ParseUtils.parse(context.getCmd(), context);
success = true;
} finally {
driverContext.getHookRunner().runAfterParseHook(context.getCmd(), !success);
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.PARSE);
}
private BaseSemanticAnalyzer analyze() throws Exception {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
driverContext.getHookRunner().runBeforeCompileHook(context.getCmd());
// clear CurrentFunctionsInUse set, to capture new set of functions
// that SemanticAnalyzer finds are in use
SessionState.get().getCurrentFunctionsInUse().clear();
// Flush the metastore cache. This assures that we don't pick up objects from a previous
// query running in this same thread. This has to be done after we get our semantic
// analyzer (this is when the connection to the metastore is made) but before we analyze,
// because at that point we need access to the objects.
Hive.get().getMSC().flushCache();
boolean executeHooks = driverContext.getHookRunner().hasPreAnalyzeHooks();
HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
if (executeHooks) {
hookCtx.setConf(driverContext.getConf());
hookCtx.setUserName(SessionState.get().getUserName());
hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
hookCtx.setCommand(context.getCmd());
hookCtx.setHiveOperation(driverContext.getQueryState().getHiveOperation());
tree = driverContext.getHookRunner().runPreAnalyzeHooks(hookCtx, tree);
}
// SemanticAnalyzerFactory also sets the hive operation in query state
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(driverContext.getQueryState(), tree);
if (!driverContext.isRetrial()) {
if (HiveOperation.REPLDUMP.equals(driverContext.getQueryState().getHiveOperation())) {
setLastReplIdForDump(driverContext.getQueryState().getConf());
}
driverContext.setTxnType(AcidUtils.getTxnType(driverContext.getConf(), tree));
openTransaction(driverContext.getTxnType());
generateValidTxnList();
}
// Do semantic analysis and plan generation
try {
sem.startAnalysis();
sem.analyze(tree, context);
} finally {
sem.endAnalysis();
}
if (executeHooks) {
hookCtx.update(sem);
driverContext.getHookRunner().runPostAnalyzeHooks(hookCtx, sem.getAllRootTasks());
}
LOG.info("Semantic Analysis Completed (retrial = {})", driverContext.isRetrial());
// Retrieve information about cache usage for the query.
if (driverContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED)) {
driverContext.setCacheUsage(sem.getCacheUsage());
}
// validate the plan
sem.validate();
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
return sem;
}
/**
* Last repl id should be captured before opening txn by current REPL DUMP operation.
* This is needed to avoid losing data which are added/modified by concurrent txns when bootstrap
* dump in progress.
* @param conf Query configurations
* @throws HiveException
* @throws TException
*/
private void setLastReplIdForDump(HiveConf conf) throws HiveException, TException {
// Last logged notification event id would be the last repl Id for the current REPl DUMP.
Hive hiveDb = Hive.get();
Long lastReplId = hiveDb.getMSC().getCurrentNotificationEventId().getEventId();
conf.setLong(ReplUtils.LAST_REPL_ID_KEY, lastReplId);
LOG.debug("Setting " + ReplUtils.LAST_REPL_ID_KEY + " = " + lastReplId);
}
private void openTransaction(TxnType txnType) throws LockException, CommandProcessorException {
if (DriverUtils.checkConcurrency(driverContext) && startImplicitTxn(driverContext.getTxnManager()) &&
!driverContext.getTxnManager().isTxnOpen() && !MetaStoreServerUtils.isCompactionTxn(txnType)) {
String userFromUGI = DriverUtils.getUserFromUGI(driverContext);
if (HiveOperation.REPLDUMP.equals(driverContext.getQueryState().getHiveOperation())
|| HiveOperation.REPLLOAD.equals(driverContext.getQueryState().getHiveOperation())) {
context.setReplPolicy(PlanUtils.stripQuotes(tree.getChild(0).getText()));
}
driverContext.getTxnManager().openTxn(context, userFromUGI, txnType);
}
}
private boolean startImplicitTxn(HiveTxnManager txnManager) throws LockException {
//this is dumb. HiveOperation is not always set. see HIVE-16447/HIVE-16443
HiveOperation hiveOperation = driverContext.getQueryState().getHiveOperation();
switch (hiveOperation == null ? HiveOperation.QUERY : hiveOperation) {
case COMMIT:
case ROLLBACK:
if (!txnManager.isTxnOpen()) {
throw new LockException(null, ErrorMsg.OP_NOT_ALLOWED_WITHOUT_TXN, hiveOperation.getOperationName());
}
case SWITCHDATABASE:
case SET_AUTOCOMMIT:
/**
* autocommit is here for completeness. TM doesn't use it. If we want to support JDBC
* semantics (or any other definition of autocommit) it should be done at session level.
*/
case SHOWDATABASES:
case SHOWTABLES:
case SHOW_TABLESTATUS:
case SHOW_TBLPROPERTIES:
case SHOWCOLUMNS:
case SHOWFUNCTIONS:
case SHOWPARTITIONS:
case SHOWLOCKS:
case SHOWVIEWS:
case SHOW_ROLES:
case SHOW_ROLE_PRINCIPALS:
case SHOW_COMPACTIONS:
case SHOW_TRANSACTIONS:
case ABORT_TRANSACTIONS:
case KILL_QUERY:
return false;
//this implies that no locks are needed for such a command
default:
return !context.isExplainPlan();
}
}
private void generateValidTxnList() throws LockException {
// Record current valid txn list that will be used throughout the query
// compilation and processing. We only do this if 1) a transaction
// was already opened and 2) the list has not been recorded yet,
// e.g., by an explicit open transaction command.
driverContext.setValidTxnListsGenerated(false);
String currentTxnString = driverContext.getConf().get(ValidTxnList.VALID_TXNS_KEY);
if (driverContext.getTxnManager().isTxnOpen() && (currentTxnString == null || currentTxnString.isEmpty())) {
try {
recordValidTxns(driverContext.getTxnManager());
driverContext.setValidTxnListsGenerated(true);
} catch (LockException e) {
LOG.error("Exception while acquiring valid txn list", e);
throw e;
}
}
}
// Write the current set of valid transactions into the conf file
private void recordValidTxns(HiveTxnManager txnMgr) throws LockException {
String oldTxnString = driverContext.getConf().get(ValidTxnList.VALID_TXNS_KEY);
if ((oldTxnString != null) && (oldTxnString.length() > 0)) {
throw new IllegalStateException("calling recordValidTxn() more than once in the same " +
JavaUtils.txnIdToString(txnMgr.getCurrentTxnId()));
}
ValidTxnList txnList = txnMgr.getValidTxns();
String txnStr = txnList.toString();
driverContext.getConf().set(ValidTxnList.VALID_TXNS_KEY, txnStr);
LOG.debug("Encoding valid txns info " + txnStr + " txnid:" + txnMgr.getCurrentTxnId());
}
private QueryPlan createPlan(BaseSemanticAnalyzer sem) {
// get the output schema
setSchema(sem);
QueryPlan plan = new QueryPlan(driverContext.getQueryString(), sem,
driverContext.getQueryDisplay().getQueryStartTime(), driverContext.getQueryId(),
driverContext.getQueryState().getHiveOperation(), driverContext.getSchema());
// save the optimized plan and sql for the explain
plan.setOptimizedCBOPlan(context.getCalcitePlan());
plan.setOptimizedQueryString(context.getOptimizedSql());
// this is required so that later driver can skip executing prepare queries
if (sem.isPrepareQuery()) {
plan.setPrepareQuery(true);
}
return plan;
}
protected void initializeFetchTask(QueryPlan plan) {
// for PREPARE statement we should avoid initializing operators
if (plan.isPrepareQuery()) {
return;
}
// initialize FetchTask right here
if (plan.getFetchTask() != null) {
plan.getFetchTask().initialize(driverContext.getQueryState(), plan, null, context);
}
}
/**
* Get a Schema with fields represented with native Hive types.
*/
private void setSchema(BaseSemanticAnalyzer sem) {
Schema schema = new Schema();
// If we have a plan, prefer its logical result schema if it's available; otherwise, try digging out a fetch task;
// failing that, give up.
if (sem == null) {
LOG.info("No semantic analyzer, using empty schema.");
} else if (sem.getResultSchema() != null) {
List lst = sem.getResultSchema();
schema = new Schema(lst, null);
} else if (sem.getFetchTask() != null) {
FetchTask ft = sem.getFetchTask();
TableDesc td = ft.getTblDesc();
// partitioned tables don't have tableDesc set on the FetchTask. Instead they have a list of PartitionDesc
// objects, each with a table desc. Let's try to fetch the desc for the first partition and use it's deserializer.
if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
if (ft.getWork().getPartDesc().size() > 0) {
td = ft.getWork().getPartDesc().get(0).getTableDesc();
}
}
if (td == null) {
LOG.info("No returning schema, using empty schema");
} else {
String tableName = "result";
List lst = null;
try {
lst = HiveMetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer(driverContext.getConf()),
driverContext.getConf());
} catch (Exception e) {
LOG.warn("Error getting schema", e);
}
if (lst != null) {
schema = new Schema(lst, null);
}
}
}
LOG.info("Created Hive schema: " + schema);
driverContext.setSchema(schema);
}
private void authorize(BaseSemanticAnalyzer sem) throws HiveException, CommandProcessorException {
// do the authorization check
if (!sem.skipAuthorization()) {
try {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
// Authorization check for kill query will be in KillQueryImpl
// As both admin or operation owner can perform the operation.
// Which is not directly supported in authorizer
if (driverContext.getQueryState().getHiveOperation() != HiveOperation.KILL_QUERY) {
CommandAuthorizer.doAuthorization(driverContext.getQueryState().getHiveOperation(), sem, context.getCmd());
}
} catch (AuthorizationException authExp) {
CONSOLE.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details.");
throw DriverUtils.createProcessorException(driverContext, 403, authExp.getMessage(), "42000", null);
} finally {
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
}
}
}
private void explainOutput(BaseSemanticAnalyzer sem, QueryPlan plan) throws IOException {
if (driverContext.getConf().getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT) ||
driverContext.getConf().getBoolVar(ConfVars.HIVE_SERVER2_WEBUI_EXPLAIN_OUTPUT)) {
String explainOutput = ExplainTask.getExplainOutput(sem, plan, tree, driverContext.getQueryState(),
context, driverContext.getConf());
if (explainOutput != null) {
if (driverContext.getConf().getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
if (driverContext.getConf().getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT_TO_CONSOLE)) {
CONSOLE.printInfo("EXPLAIN output for queryid " + driverContext.getQueryId() + " : " + explainOutput);
} else {
LOG.info("EXPLAIN output for queryid " + driverContext.getQueryId() + " : " + explainOutput);
}
}
if (driverContext.getConf().isWebUiQueryInfoCacheEnabled() &&
driverContext.getConf().getBoolVar(ConfVars.HIVE_SERVER2_WEBUI_EXPLAIN_OUTPUT)) {
driverContext.getQueryDisplay().setExplainPlan(explainOutput);
}
}
}
}
private void handleException(Exception e) throws CommandProcessorException {
ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
String errorMessage = "FAILED: " + e.getClass().getSimpleName();
if (error != ErrorMsg.GENERIC_ERROR) {
errorMessage += " [Error " + error.getErrorCode() + "]:";
}
// HIVE-4889
if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
errorMessage += " " + e.getCause().getMessage();
} else {
errorMessage += " " + e.getMessage();
}
if (error == ErrorMsg.TXNMGR_NOT_ACID) {
errorMessage += ". Failed command: " + driverContext.getQueryString();
}
if (!(e instanceof ReCompileException)) {
CONSOLE.printError(errorMessage, "\n" + StringUtils.stringifyException(e));
}
throw DriverUtils.createProcessorException(driverContext, error.getErrorCode(), errorMessage, error.getSQLState(),
e);
}
private void cleanUp(Throwable compileException, boolean parsed, boolean deferClose) {
// Trigger post compilation hook. Note that if the compilation fails here then
// before/after execution hook will never be executed.
if (parsed) {
try {
driverContext.getHookRunner().runAfterCompilationHook(driverContext, context, compileException);
} catch (Exception e) {
LOG.warn("Failed when invoking query after-compilation hook.", e);
}
}
double duration = perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.COMPILE) / 1000.00;
ImmutableMap compileHMSTimings = Hive.dumpMetaCallTimingWithoutEx("compilation");
driverContext.getQueryDisplay().setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings);
if (driverState.isAborted()) {
driverState.compilationInterruptedWithLocking(deferClose);
LOG.info("Compiling command(queryId={}) has been interrupted after {} seconds", driverContext.getQueryId(),
duration);
} else {
driverState.compilationFinishedWithLocking(compileException != null);
LOG.info("Completed compiling command(queryId={}); Time taken: {} seconds", driverContext.getQueryId(),
duration);
}
}
}