org.apache.hadoop.hive.ql.exec.ExplainTask Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
import org.apache.commons.lang3.tuple.ImmutablePair;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Serializable;
import java.lang.annotation.Annotation;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.jsonexplain.JsonParser;
import org.apache.hadoop.hive.common.jsonexplain.JsonParserFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.Validator.StringSet;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.Explain;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExplainWork;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SparkWork;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo;
import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.security.authorization.AuthorizationFactory;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.AnnotationUtils;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* ExplainTask implementation.
*
**/
public class ExplainTask extends Task implements Serializable {
private static final long serialVersionUID = 1L;
public static final String EXPL_COLUMN_NAME = "Explain";
public static final String OUTPUT_OPERATORS = "OutputOperators:";
private final Set> visitedOps = new HashSet>();
private boolean isLogical = false;
protected final Logger LOG;
public ExplainTask() {
super();
LOG = LoggerFactory.getLogger(this.getClass().getName());
}
/*
* Below method returns the dependencies for the passed in query to EXPLAIN.
* The dependencies are the set of input tables and partitions, and are
* provided back as JSON output for the EXPLAIN command.
* Example output:
* {"input_tables":[{"tablename": "default@test_sambavi_v1", "tabletype": "TABLE"}],
* "input partitions":["default@srcpart@ds=2008-04-08/hr=11"]}
*/
private static JSONObject getJSONDependencies(ExplainWork work)
throws Exception {
assert(work.getDependency());
JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
JSONArray inputTableInfo = new JSONArray();
JSONArray inputPartitionInfo = new JSONArray();
for (ReadEntity input: work.getInputs()) {
switch (input.getType()) {
case TABLE:
Table table = input.getTable();
JSONObject tableInfo = new JSONObject();
tableInfo.put("tablename", table.getCompleteName());
tableInfo.put("tabletype", table.getTableType().toString());
if ((input.getParents() != null) && (!input.getParents().isEmpty())) {
tableInfo.put("tableParents", input.getParents().toString());
}
inputTableInfo.put(tableInfo);
break;
case PARTITION:
JSONObject partitionInfo = new JSONObject();
partitionInfo.put("partitionName", input.getPartition().getCompleteName());
if ((input.getParents() != null) && (!input.getParents().isEmpty())) {
partitionInfo.put("partitionParents", input.getParents().toString());
}
inputPartitionInfo.put(partitionInfo);
break;
default:
break;
}
}
outJSONObject.put("input_tables", inputTableInfo);
outJSONObject.put("input_partitions", inputPartitionInfo);
return outJSONObject;
}
public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws Exception {
isLogical = true;
JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
boolean jsonOutput = work.isFormatted();
if (jsonOutput) {
out = null;
}
if (work.getParseContext() != null) {
if (out != null) {
out.print("LOGICAL PLAN:");
}
JSONObject jsonPlan = outputMap(work.getParseContext().getTopOps(), true,
out, work.getExtended(), jsonOutput, 0);
if (out != null) {
out.println();
}
if (jsonOutput) {
outJSONObject.put("LOGICAL PLAN", jsonPlan);
}
} else {
System.err.println("No parse context!");
}
return outJSONObject;
}
private static String trueCondNameVectorizationEnabled =
HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname + " IS true";
private static String falseCondNameVectorizationEnabled =
HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname + " IS false";
private ImmutablePair outputPlanVectorization(PrintStream out, boolean jsonOutput)
throws Exception {
if (out != null) {
out.println("PLAN VECTORIZATION:");
}
JSONObject json = jsonOutput ? new JSONObject(new LinkedHashMap<>()) : null;
HiveConf hiveConf = queryState.getConf();
boolean isVectorizationEnabled = HiveConf.getBoolVar(hiveConf,
HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
String isVectorizationEnabledCondName =
(isVectorizationEnabled ?
trueCondNameVectorizationEnabled :
falseCondNameVectorizationEnabled);
List isVectorizationEnabledCondList = Arrays.asList(isVectorizationEnabledCondName);
if (out != null) {
out.print(indentString(2));
out.print("enabled: ");
out.println(isVectorizationEnabled);
out.print(indentString(2));
if (!isVectorizationEnabled) {
out.print("enabledConditionsNotMet: ");
} else {
out.print("enabledConditionsMet: ");
}
out.println(isVectorizationEnabledCondList);
}
if (jsonOutput) {
json.put("enabled", isVectorizationEnabled);
JSONArray jsonArray = new JSONArray(Arrays.asList(isVectorizationEnabledCondName));
if (!isVectorizationEnabled) {
json.put("enabledConditionsNotMet", jsonArray);
} else {
json.put("enabledConditionsMet", jsonArray);
}
}
return new ImmutablePair(isVectorizationEnabled, jsonOutput ? json : null);
}
public JSONObject getJSONPlan(PrintStream out, ExplainWork work)
throws Exception {
return getJSONPlan(out, work.getRootTasks(), work.getFetchTask(),
work.isFormatted(), work.getExtended(), work.isAppendTaskType());
}
public JSONObject getJSONPlan(PrintStream out, List> tasks, Task> fetchTask,
boolean jsonOutput, boolean isExtended, boolean appendTaskType) throws Exception {
// If the user asked for a formatted output, dump the json output
// in the output stream
JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
if (jsonOutput) {
out = null;
}
List ordered = StageIDsRearranger.getExplainOrder(conf, tasks);
if (fetchTask != null) {
fetchTask.setParentTasks((List)StageIDsRearranger.getFetchSources(tasks));
if (fetchTask.getNumParent() == 0) {
fetchTask.setRootTask(true);
}
ordered.add(fetchTask);
}
boolean suppressOthersForVectorization = false;
if (this.work != null && this.work.isVectorization()) {
ImmutablePair planVecPair = outputPlanVectorization(out, jsonOutput);
if (this.work.isVectorizationOnly()) {
// Suppress the STAGES if vectorization is off.
suppressOthersForVectorization = !planVecPair.left;
}
if (out != null) {
out.println();
}
if (jsonOutput) {
outJSONObject.put("PLAN VECTORIZATION", planVecPair.right);
}
}
if (!suppressOthersForVectorization) {
JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered);
if (out != null) {
out.println();
}
if (jsonOutput) {
outJSONObject.put("STAGE DEPENDENCIES", jsonDependencies);
}
// Go over all the tasks and dump out the plans
JSONObject jsonPlan = outputStagePlans(out, ordered,
jsonOutput, isExtended);
if (jsonOutput) {
outJSONObject.put("STAGE PLANS", jsonPlan);
}
if (fetchTask != null) {
fetchTask.setParentTasks(null);
}
}
return jsonOutput ? outJSONObject : null;
}
private List toString(Collection> objects) {
List list = new ArrayList();
for (Object object : objects) {
list.add(String.valueOf(object));
}
return list;
}
private Object toJson(String header, String message, PrintStream out, ExplainWork work)
throws Exception {
if (work.isFormatted()) {
return message;
}
out.print(header);
out.println(": ");
out.print(indentString(2));
out.println(message);
return null;
}
private Object toJson(String header, List messages, PrintStream out, ExplainWork work)
throws Exception {
if (work.isFormatted()) {
return new JSONArray(messages);
}
out.print(header);
out.println(": ");
for (String message : messages) {
out.print(indentString(2));
out.print(message);
out.println();
}
return null;
}
@Override
public int execute(DriverContext driverContext) {
PrintStream out = null;
try {
Path resFile = work.getResFile();
OutputStream outS = resFile.getFileSystem(conf).create(resFile);
out = new PrintStream(outS);
if (work.isLogical()) {
JSONObject jsonLogicalPlan = getJSONLogicalPlan(out, work);
if (work.isFormatted()) {
out.print(jsonLogicalPlan);
}
} else if (work.isAuthorize()) {
JSONObject jsonAuth = collectAuthRelatedEntities(out, work);
if (work.isFormatted()) {
out.print(jsonAuth);
}
} else if (work.getDependency()) {
JSONObject jsonDependencies = getJSONDependencies(work);
out.print(jsonDependencies);
} else {
if (work.isUserLevelExplain()) {
// Because of the implementation of the JsonParserFactory, we are sure
// that we can get a TezJsonParser.
JsonParser jsonParser = JsonParserFactory.getParser(conf);
work.getConfig().setFormatted(true);
JSONObject jsonPlan = getJSONPlan(out, work);
if (work.getCboInfo() != null) {
jsonPlan.put("cboInfo", work.getCboInfo());
}
try {
jsonParser.print(jsonPlan, out);
} catch (Exception e) {
// if there is anything wrong happen, we bail out.
LOG.error("Running explain user level has problem: " + e.toString()
+ ". Falling back to normal explain");
work.getConfig().setFormatted(false);
work.getConfig().setUserLevelExplain(false);
jsonPlan = getJSONPlan(out, work);
}
} else {
JSONObject jsonPlan = getJSONPlan(out, work);
if (work.isFormatted()) {
out.print(jsonPlan);
}
}
}
out.close();
out = null;
return (0);
}
catch (Exception e) {
console.printError("Failed with exception " + e.getMessage(),
"\n" + StringUtils.stringifyException(e));
return (1);
}
finally {
IOUtils.closeStream(out);
}
}
private JSONObject collectAuthRelatedEntities(PrintStream out, ExplainWork work)
throws Exception {
BaseSemanticAnalyzer analyzer = work.getAnalyzer();
HiveOperation operation = queryState.getHiveOperation();
JSONObject object = new JSONObject(new LinkedHashMap<>());
Object jsonInput = toJson("INPUTS", toString(analyzer.getInputs()), out, work);
if (work.isFormatted()) {
object.put("INPUTS", jsonInput);
}
Object jsonOutput = toJson("OUTPUTS", toString(analyzer.getOutputs()), out, work);
if (work.isFormatted()) {
object.put("OUTPUTS", jsonOutput);
}
String userName = SessionState.get().getAuthenticator().getUserName();
Object jsonUser = toJson("CURRENT_USER", userName, out, work);
if (work.isFormatted()) {
object.put("CURRENT_USER", jsonUser);
}
Object jsonOperation = toJson("OPERATION", operation.name(), out, work);
if (work.isFormatted()) {
object.put("OPERATION", jsonOperation);
}
if (analyzer.skipAuthorization()) {
return object;
}
final List exceptions = new ArrayList();
Object delegate = SessionState.get().getActiveAuthorizer();
if (delegate != null) {
Class itface = SessionState.get().getAuthorizerInterface();
Object authorizer = AuthorizationFactory.create(delegate, itface,
new AuthorizationFactory.AuthorizationExceptionHandler() {
public void exception(Exception exception) {
exceptions.add(exception.getMessage());
}
});
SessionState.get().setActiveAuthorizer(authorizer);
try {
Driver.doAuthorization(queryState.getHiveOperation(), analyzer, "");
} finally {
SessionState.get().setActiveAuthorizer(delegate);
}
}
if (!exceptions.isEmpty()) {
Object jsonFails = toJson("AUTHORIZATION_FAILURES", exceptions, out, work);
if (work.isFormatted()) {
object.put("AUTHORIZATION_FAILURES", jsonFails);
}
}
return object;
}
private static String indentString(int indent) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < indent; ++i) {
sb.append(" ");
}
return sb.toString();
}
private JSONObject outputMap(Map, ?> mp, boolean hasHeader, PrintStream out,
boolean extended, boolean jsonOutput, int indent) throws Exception {
TreeMap
© 2015 - 2025 Weber Informatics LLC | Privacy Policy