org.apache.hadoop.hive.ql.parse.SemanticAnalyzer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
Show all versions of hive-exec Show documentation
Hive is a data warehouse infrastructure built on top of Hadoop see
http://wiki.apache.org/hadoop/Hive
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.antlr.runtime.tree.BaseTree;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ColumnStatsTask;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.ExecDriver;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RecordReader;
import org.apache.hadoop.hive.ql.exec.RecordWriter;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1;
import org.apache.hadoop.hive.ql.optimizer.GenMROperator;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2;
import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3;
import org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1;
import org.apache.hadoop.hive.ql.optimizer.GenMRUnion1;
import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
import org.apache.hadoop.hive.ql.optimizer.MapJoinFactory;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalOptimizer;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc;
import org.apache.hadoop.hive.ql.plan.ColumnStatsWork;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
import org.apache.hadoop.hive.ql.plan.ForwardDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
import org.apache.hadoop.hive.ql.plan.LimitDesc;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PTFDesc;
import org.apache.hadoop.hive.ql.plan.PTFDesc.OrderExpressionDef;
import org.apache.hadoop.hive.ql.plan.PTFDesc.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.PTFDesc.PartitionedTableFunctionDef;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.UDTFDesc;
import org.apache.hadoop.hive.ql.plan.UnionDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.InputFormat;
/**
* Implementation of the semantic analyzer.
*/
public class SemanticAnalyzer extends BaseSemanticAnalyzer {
private HashMap opToPartPruner;
private HashMap opToPartList;
private HashMap> topOps;
private HashMap> topSelOps;
private LinkedHashMap, OpParseContext> opParseCtx;
private List loadTableWork;
private List loadFileWork;
private Map joinContext;
private Map smbMapJoinContext;
private final HashMap topToTable;
private final Map fsopToTable;
private final List reduceSinkOperatorsAddedByEnforceBucketingSorting;
private QB qb;
private ASTNode ast;
private int destTableId;
private UnionProcContext uCtx;
List> listMapJoinOpsNoReducer;
private HashMap opToSamplePruner;
private final Map> opToPartToSkewedPruner;
/**
* a map for the split sampling, from ailias to an instance of SplitSample
* that describes percentage and number.
*/
private final HashMap nameToSplitSample;
Map> groupOpToInputTables;
Map prunedPartitions;
private List resultSchema;
private CreateViewDesc createVwDesc;
private ArrayList viewsExpanded;
private ASTNode viewSelect;
private final UnparseTranslator unparseTranslator;
private final GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx();
// prefix for column names auto generated by hive
private final String autogenColAliasPrfxLbl;
private final boolean autogenColAliasPrfxIncludeFuncName;
// Keep track of view alias to read entity corresponding to the view
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of aliases for V3, V3:V2, V3:V2:V1.
// This is used when T is added as an input for the query, the parents of T is
// derived from the alias V3:V2:V1:T
private final Map viewAliasToInput = new HashMap();
// Max characters when auto generating the column name with func name
private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
// flag for no scan during analyze ... compute statistics
protected boolean noscan = false;
//flag for partial scan during analyze ... compute statistics
protected boolean partialscan = false;
private static class Phase1Ctx {
String dest;
int nextNum;
}
public SemanticAnalyzer(HiveConf conf) throws SemanticException {
super(conf);
opToPartPruner = new HashMap();
opToPartList = new HashMap();
opToSamplePruner = new HashMap();
nameToSplitSample = new HashMap();
topOps = new HashMap>();
topSelOps = new HashMap>();
loadTableWork = new ArrayList();
loadFileWork = new ArrayList();
opParseCtx = new LinkedHashMap, OpParseContext>();
joinContext = new HashMap();
smbMapJoinContext = new HashMap();
topToTable = new HashMap();
fsopToTable = new HashMap();
reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList();
destTableId = 1;
uCtx = null;
listMapJoinOpsNoReducer = new ArrayList>();
groupOpToInputTables = new HashMap>();
prunedPartitions = new HashMap();
unparseTranslator = new UnparseTranslator();
autogenColAliasPrfxLbl = HiveConf.getVar(conf,
HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
queryProperties = new QueryProperties();
opToPartToSkewedPruner = new HashMap>();
}
@Override
protected void reset() {
super.reset();
loadTableWork.clear();
loadFileWork.clear();
topOps.clear();
topSelOps.clear();
destTableId = 1;
idToTableNameMap.clear();
qb = null;
ast = null;
uCtx = null;
joinContext.clear();
smbMapJoinContext.clear();
opParseCtx.clear();
groupOpToInputTables.clear();
prunedPartitions.clear();
}
public void initParseCtx(ParseContext pctx) {
opToPartPruner = pctx.getOpToPartPruner();
opToPartList = pctx.getOpToPartList();
opToSamplePruner = pctx.getOpToSamplePruner();
topOps = pctx.getTopOps();
topSelOps = pctx.getTopSelOps();
opParseCtx = pctx.getOpParseCtx();
loadTableWork = pctx.getLoadTableWork();
loadFileWork = pctx.getLoadFileWork();
joinContext = pctx.getJoinContext();
smbMapJoinContext = pctx.getSmbMapJoinContext();
ctx = pctx.getContext();
destTableId = pctx.getDestTableId();
idToTableNameMap = pctx.getIdToTableNameMap();
uCtx = pctx.getUCtx();
listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
qb = pctx.getQB();
groupOpToInputTables = pctx.getGroupOpToInputTables();
prunedPartitions = pctx.getPrunedPartitions();
fetchTask = pctx.getFetchTask();
setLineageInfo(pctx.getLineageInfo());
}
public ParseContext getParseContext() {
return new ParseContext(conf, qb, ast, opToPartPruner, opToPartList, topOps,
topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable,
fsopToTable, loadTableWork,
loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
opToPartToSkewedPruner, viewAliasToInput,
reduceSinkOperatorsAddedByEnforceBucketingSorting,
queryProperties);
}
@SuppressWarnings("nls")
public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
throws SemanticException {
assert (ast.getToken() != null);
switch (ast.getToken().getType()) {
case HiveParser.TOK_QUERY: {
QB qb = new QB(id, alias, true);
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(ast, qb, ctx_1);
qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
qbexpr.setQB(qb);
}
break;
case HiveParser.TOK_UNION: {
qbexpr.setOpcode(QBExpr.Opcode.UNION);
// query 1
assert (ast.getChild(0) != null);
QBExpr qbexpr1 = new QBExpr(alias + "-subquery1");
doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + "-subquery1",
alias + "-subquery1");
qbexpr.setQBExpr1(qbexpr1);
// query 2
assert (ast.getChild(0) != null);
QBExpr qbexpr2 = new QBExpr(alias + "-subquery2");
doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + "-subquery2",
alias + "-subquery2");
qbexpr.setQBExpr2(qbexpr2);
}
break;
}
}
private LinkedHashMap doPhase1GetAggregationsFromSelect(
ASTNode selExpr, QB qb, String dest) {
// Iterate over the selects search for aggregation Trees.
// Use String as keys to eliminate duplicate trees.
LinkedHashMap aggregationTrees = new LinkedHashMap();
for (int i = 0; i < selExpr.getChildCount(); ++i) {
ASTNode sel = (ASTNode) selExpr.getChild(i);
doPhase1GetAllAggregations((ASTNode) sel.getChild(0), aggregationTrees);
}
/*
* remove any aggregation to be handled by Windowing.
*/
if ( queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null ) {
HashMap aliasToWdwExprs = qb.getParseInfo().getWindowingExprsForClause(dest);
LinkedHashMap aggTreesMinusWindowing = new LinkedHashMap();
for(Map.Entry entry : aggregationTrees.entrySet()) {
if ( !aliasToWdwExprs.containsKey(entry.getKey())) {
aggTreesMinusWindowing.put(entry.getKey(), entry.getValue());
}
}
aggregationTrees = aggTreesMinusWindowing;
}
return aggregationTrees;
}
private void doPhase1GetColumnAliasesFromSelect(
ASTNode selectExpr, QBParseInfo qbp) {
for (int i = 0; i < selectExpr.getChildCount(); ++i) {
ASTNode selExpr = (ASTNode) selectExpr.getChild(i);
if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR)
&& (selExpr.getChildCount() == 2)) {
String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText());
qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias);
}
}
}
/**
* DFS-scan the expressionTree to find all aggregation subtrees and put them
* in aggregations.
*
* @param expressionTree
* @param aggregations
* the key to the HashTable is the toStringTree() representation of
* the aggregation subtree.
*/
private void doPhase1GetAllAggregations(ASTNode expressionTree,
HashMap aggregations) {
int exprTokenType = expressionTree.getToken().getType();
if (exprTokenType == HiveParser.TOK_FUNCTION
|| exprTokenType == HiveParser.TOK_FUNCTIONDI
|| exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
assert (expressionTree.getChildCount() != 0);
if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
String functionName = unescapeIdentifier(expressionTree.getChild(0)
.getText());
if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
aggregations.put(expressionTree.toStringTree(), expressionTree);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
if (!fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
.getChild(0));
}
return;
}
}
}
for (int i = 0; i < expressionTree.getChildCount(); i++) {
doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
aggregations);
}
}
private List doPhase1GetDistinctFuncExprs(
HashMap aggregationTrees) throws SemanticException {
List exprs = new ArrayList();
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
assert (value != null);
if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
exprs.add(value);
}
}
return exprs;
}
public static String generateErrorMessage(ASTNode ast, String message) {
StringBuilder sb = new StringBuilder();
sb.append(ast.getLine());
sb.append(":");
sb.append(ast.getCharPositionInLine());
sb.append(" ");
sb.append(message);
sb.append(". Error encountered near token '");
sb.append(ErrorMsg.getText(ast));
sb.append("'");
return sb.toString();
}
/**
* Goes though the tabref tree and finds the alias for the table. Once found,
* it records the table name-> alias association in aliasToTabs. It also makes
* an association from the alias to the table AST in parse info.
*
* @return the alias of the table
*/
private String processTable(QB qb, ASTNode tabref) throws SemanticException {
// For each table reference get the table name
// and the alias (if alias is not present, the table name
// is used as an alias)
boolean tableSamplePresent = false;
boolean splitSamplePresent = false;
int aliasIndex = 0;
if (tabref.getChildCount() == 2) {
// tablename tablesample
// OR
// tablename alias
ASTNode ct = (ASTNode) tabref.getChild(1);
if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
tableSamplePresent = true;
} else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
splitSamplePresent = true;
} else {
aliasIndex = 1;
}
} else if (tabref.getChildCount() == 3) {
// table name table sample alias
aliasIndex = 2;
ASTNode ct = (ASTNode) tabref.getChild(1);
if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
tableSamplePresent = true;
} else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
splitSamplePresent = true;
}
}
ASTNode tableTree = (ASTNode) (tabref.getChild(0));
String tabIdName = getUnescapedName(tableTree);
String alias;
if (aliasIndex != 0) {
alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
}
else {
alias = getUnescapedUnqualifiedTableName(tableTree);
}
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
.getChild(aliasIndex)));
}
if (tableSamplePresent) {
ASTNode sampleClause = (ASTNode) tabref.getChild(1);
ArrayList sampleCols = new ArrayList();
if (sampleClause.getChildCount() > 2) {
for (int i = 2; i < sampleClause.getChildCount(); i++) {
sampleCols.add((ASTNode) sampleClause.getChild(i));
}
}
// TODO: For now only support sampling on up to two columns
// Need to change it to list of columns
if (sampleCols.size() > 2) {
throw new SemanticException(generateErrorMessage(
(ASTNode) tabref.getChild(0),
ErrorMsg.SAMPLE_RESTRICTION.getMsg()));
}
qb.getParseInfo().setTabSample(
alias,
new TableSample(
unescapeIdentifier(sampleClause.getChild(0).getText()),
unescapeIdentifier(sampleClause.getChild(1).getText()),
sampleCols));
if (unparseTranslator.isEnabled()) {
for (ASTNode sampleCol : sampleCols) {
unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
.getChild(0));
}
}
} else if (splitSamplePresent) {
ASTNode sampleClause = (ASTNode) tabref.getChild(1);
Tree type = sampleClause.getChild(0);
Tree numerator = sampleClause.getChild(1);
String value = unescapeIdentifier(numerator.getText());
SplitSample sample;
if (type.getType() == HiveParser.TOK_PERCENT) {
assertCombineInputFormat(numerator, "Percentage");
Double percent = Double.valueOf(value).doubleValue();
if (percent < 0 || percent > 100) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
"Sampling percentage should be between 0 and 100"));
}
int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
sample = new SplitSample(percent, seedNum);
} else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
sample = new SplitSample(Integer.valueOf(value));
} else {
assert type.getType() == HiveParser.TOK_LENGTH;
assertCombineInputFormat(numerator, "Total Length");
long length = Integer.valueOf(value.substring(0, value.length() - 1));
char last = value.charAt(value.length() - 1);
if (last == 'k' || last == 'K') {
length <<= 10;
} else if (last == 'm' || last == 'M') {
length <<= 20;
} else if (last == 'g' || last == 'G') {
length <<= 30;
}
int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
sample = new SplitSample(length, seedNum);
}
String alias_id = getAliasId(alias, qb);
nameToSplitSample.put(alias_id, sample);
}
// Insert this map into the stats
qb.setTabAlias(alias, tabIdName);
qb.addAlias(alias);
qb.getParseInfo().setSrcForAlias(alias, tableTree);
unparseTranslator.addTableNameTranslation(tableTree, db.getCurrentDatabase());
if (aliasIndex != 0) {
unparseTranslator.addIdentifierTranslation((ASTNode) tabref
.getChild(aliasIndex));
}
return alias;
}
private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
String inputFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
message + " sampling is not supported in " + inputFormat));
}
}
private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {
// This is a subquery and must have an alias
if (subq.getChildCount() != 2) {
throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
}
ASTNode subqref = (ASTNode) subq.getChild(0);
String alias = unescapeIdentifier(subq.getChild(1).getText());
// Recursively do the first phase of semantic analysis for the subquery
QBExpr qbexpr = new QBExpr(alias);
doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias);
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
.getChild(1)));
}
// Insert this map into the stats
qb.setSubqAlias(alias, qbexpr);
qb.addAlias(alias);
unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
return alias;
}
private boolean isJoinToken(ASTNode node) {
if ((node.getToken().getType() == HiveParser.TOK_JOIN)
|| (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
|| (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
|| (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
return true;
}
return false;
}
/**
* Given the AST with TOK_JOIN as the root, get all the aliases for the tables
* or subqueries in the join.
*
* @param qb
* @param join
* @throws SemanticException
*/
@SuppressWarnings("nls")
private void processJoin(QB qb, ASTNode join) throws SemanticException {
int numChildren = join.getChildCount();
if ((numChildren != 2) && (numChildren != 3)
&& join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
throw new SemanticException(generateErrorMessage(join,
"Join with multiple children"));
}
for (int num = 0; num < numChildren; num++) {
ASTNode child = (ASTNode) join.getChild(num);
if (child.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, child);
} else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, child);
} else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
queryProperties.setHasPTF(true);
processPTF(qb, child);
PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child);
String inputAlias = ptfInvocationSpec == null ? null :
((PartitionedTableFunctionSpec)ptfInvocationSpec.getFunction()).getAlias();;
if ( inputAlias == null ) {
throw new SemanticException(generateErrorMessage(child,
"PTF invocation in a Join must have an alias"));
}
} else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
// SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
// is not supported. Instead, the lateral view must be in a subquery
// SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
// JOIN src2 ...
throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
.getMsg(join));
} else if (isJoinToken(child)) {
processJoin(qb, child);
}
}
}
/**
* Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
* table or subquery in the lateral view and also make a mapping from the
* alias to all the lateral view AST's.
*
* @param qb
* @param lateralView
* @return the alias for the table/subquery
* @throws SemanticException
*/
private String processLateralView(QB qb, ASTNode lateralView)
throws SemanticException {
int numChildren = lateralView.getChildCount();
assert (numChildren == 2);
ASTNode next = (ASTNode) lateralView.getChild(1);
String alias = null;
switch (next.getToken().getType()) {
case HiveParser.TOK_TABREF:
alias = processTable(qb, next);
break;
case HiveParser.TOK_SUBQUERY:
alias = processSubQuery(qb, next);
break;
case HiveParser.TOK_LATERAL_VIEW:
alias = processLateralView(qb, next);
break;
default:
throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
.getMsg(lateralView));
}
alias = alias.toLowerCase();
qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
qb.addAlias(alias);
return alias;
}
/**
* Phase 1: (including, but not limited to):
*
* 1. Gets all the aliases for all the tables / subqueries and makes the
* appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
* destination and names the clase "inclause" + i 3. Creates a map from a
* string representation of an aggregation tree to the actual aggregation AST
* 4. Creates a mapping from the clause name to the select expression AST in
* destToSelExpr 5. Creates a mapping from a table alias to the lateral view
* AST's in aliasToLateralViews
*
* @param ast
* @param qb
* @param ctx_1
* @throws SemanticException
*/
@SuppressWarnings({"fallthrough", "nls"})
public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1)
throws SemanticException {
boolean phase1Result = true;
QBParseInfo qbp = qb.getParseInfo();
boolean skipRecursion = false;
if (ast.getToken() != null) {
skipRecursion = true;
switch (ast.getToken().getType()) {
case HiveParser.TOK_SELECTDI:
qb.countSelDi();
// fall through
case HiveParser.TOK_SELECT:
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
qbp.setHints((ASTNode) ast.getChild(0));
}
handleWindowingExprsInSelectList(qb, ctx_1.dest, ast);
LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast,
qb, ctx_1.dest);
doPhase1GetColumnAliasesFromSelect(ast, qbp);
qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
qbp.setDistinctFuncExprsForClause(ctx_1.dest,
doPhase1GetDistinctFuncExprs(aggregations));
break;
case HiveParser.TOK_WHERE:
qbp.setWhrExprForClause(ctx_1.dest, ast);
break;
case HiveParser.TOK_INSERT_INTO:
String currentDatabase = db.getCurrentDatabase();
String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
qbp.addInsertIntoTable(tab_name);
case HiveParser.TOK_DESTINATION:
ctx_1.dest = "insclause-" + ctx_1.nextNum;
ctx_1.nextNum++;
// is there a insert in the subquery
if (qbp.getIsSubQ()) {
ASTNode ch = (ASTNode) ast.getChild(0);
if ((ch.getToken().getType() != HiveParser.TOK_DIR)
|| (((ASTNode) ch.getChild(0)).getToken().getType() != HiveParser.TOK_TMP_FILE)) {
throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY
.getMsg(ast));
}
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
break;
case HiveParser.TOK_FROM:
int child_count = ast.getChildCount();
if (child_count != 1) {
throw new SemanticException(generateErrorMessage(ast,
"Multiple Children " + child_count));
}
// Check if this is a subquery / lateral view
ASTNode frm = (ASTNode) ast.getChild(0);
if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
queryProperties.setHasJoin(true);
processJoin(qb, frm);
qbp.setJoinExpr(frm);
}else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
queryProperties.setHasPTF(true);
processPTF(qb, frm);
}
break;
case HiveParser.TOK_CLUSTERBY:
// Get the clusterby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasClusterBy(true);
qbp.setClusterByExprForClause(ctx_1.dest, ast);
break;
case HiveParser.TOK_DISTRIBUTEBY:
// Get the distribute by aliases - these are aliased to the entries in
// the
// select list
queryProperties.setHasDistributeBy(true);
qbp.setDistributeByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_SORTBY:
// Get the sort by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasSortBy(true);
qbp.setSortByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_ORDERBY:
// Get the order by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasOrderBy(true);
qbp.setOrderByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_GROUPBY:
case HiveParser.TOK_ROLLUP_GROUPBY:
case HiveParser.TOK_CUBE_GROUPBY:
case HiveParser.TOK_GROUPING_SETS:
// Get the groupby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasGroupBy(true);
if (qbp.getJoinExpr() != null) {
queryProperties.setHasJoinFollowedByGroupBy(true);
}
if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
}
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
// Rollup and Cubes are syntactic sugar on top of grouping sets
if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
qbp.getDestRollups().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
qbp.getDestCubes().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
qbp.getDestGroupingSets().add(ctx_1.dest);
}
break;
case HiveParser.TOK_HAVING:
qbp.setHavingExprForClause(ctx_1.dest, ast);
qbp.addAggregationExprsForClause(ctx_1.dest,
doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
break;
case HiveParser.KW_WINDOW:
if (!qb.hasWindowingSpec(ctx_1.dest) ) {
throw new SemanticException(generateErrorMessage(ast,
"Query has no Cluster/Distribute By; but has a Window definition"));
}
handleQueryWindowClauses(qb, ctx_1, ast);
break;
case HiveParser.TOK_LIMIT:
qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()));
break;
case HiveParser.TOK_ANALYZE:
// Case of analyze command
String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0));
qb.setTabAlias(table_name, table_name);
qb.addAlias(table_name);
qb.getParseInfo().setIsAnalyzeCommand(true);
qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
// Allow analyze the whole table and dynamic partitions
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
break;
case HiveParser.TOK_UNION:
// currently, we dont support subq1 union subq2 - the user has to
// explicitly say:
// select * from (subq1 union subq2) subqalias
if (!qbp.getIsSubQ()) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
}
case HiveParser.TOK_INSERT:
ASTNode destination = (ASTNode) ast.getChild(0);
Tree tab = destination.getChild(0);
// Proceed if AST contains partition & If Not Exists
if (destination.getChildCount() == 2 &&
tab.getChildCount() == 2 &&
destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
String tableName = tab.getChild(0).getChild(0).getText();
Tree partitions = tab.getChild(1);
int childCount = partitions.getChildCount();
HashMap partition = new HashMap();
for (int i = 0; i < childCount; i++) {
String partitionName = partitions.getChild(i).getChild(0).getText();
Tree pvalue = partitions.getChild(i).getChild(1);
if (pvalue == null) {
break;
}
String partitionVal = stripQuotes(pvalue.getText());
partition.put(partitionName, partitionVal);
}
// if it is a dynamic partition throw the exception
if (childCount == partition.size()) {
try {
Table table = db.getTable(tableName);
Partition parMetaData = db.getPartition(table, partition, false);
// Check partition exists if it exists skip the overwrite
if (parMetaData != null) {
phase1Result = false;
skipRecursion = true;
LOG.info("Partition already exists so insert into overwrite " +
"skipped for partition : " + parMetaData.toString());
break;
}
} catch (HiveException e) {
LOG.info("Error while getting metadata : ", e);
}
} else {
throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
.getMsg(partition.toString()));
}
}
default:
skipRecursion = false;
break;
}
}
if (!skipRecursion) {
// Iterate over the rest of the children
int child_count = ast.getChildCount();
for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
// Recurse
phase1Result = phase1Result && doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1);
}
}
return phase1Result;
}
private void getMetaData(QBExpr qbexpr) throws SemanticException {
getMetaData(qbexpr, null);
}
private void getMetaData(QBExpr qbexpr, ReadEntity parentInput)
throws SemanticException {
if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
getMetaData(qbexpr.getQB(), parentInput);
} else {
getMetaData(qbexpr.getQBExpr1(), parentInput);
getMetaData(qbexpr.getQBExpr2(), parentInput);
}
}
public void getMetaData(QB qb) throws SemanticException {
getMetaData(qb, null);
}
@SuppressWarnings("nls")
public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException {
try {
LOG.info("Get metadata for source tables");
// Go over the tables and populate the related structures.
// We have to materialize the table alias list since we might
// modify it in the middle for view rewrite.
List tabAliases = new ArrayList(qb.getTabAliases());
// Keep track of view alias to view name and read entity
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
// This is needed for tracking the dependencies for inputs, along with their parents.
Map> aliasToViewInfo =
new HashMap>();
for (String alias : tabAliases) {
String tab_name = qb.getTabNameForAlias(alias);
Table tab = null;
try {
tab = db.getTable(tab_name);
} catch (InvalidTableException ite) {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(qb
.getParseInfo().getSrcForAlias(alias)));
}
// Disallow INSERT INTO on bucketized tables
if (qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()) &&
tab.getNumBuckets() > 0) {
throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE.
getMsg("Table: " + tab_name));
}
// We check offline of the table, as if people only select from an
// non-existing partition of an offline table, the partition won't
// be added to inputs and validate() won't have the information to
// check the table's offline status.
// TODO: Modify the code to remove the checking here and consolidate
// it in validate()
//
if (tab.isOffline()) {
throw new SemanticException(ErrorMsg.OFFLINE_TABLE_OR_PARTITION.
getMsg("Table " + getUnescapedName(qb.getParseInfo().getSrcForAlias(alias))));
}
if (tab.isView()) {
if (qb.getParseInfo().isAnalyzeCommand()) {
throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
}
String fullViewName = tab.getDbName() + "." + tab.getTableName();
// Prevent view cycles
if (viewsExpanded.contains(fullViewName)) {
throw new SemanticException("Recursive view " + fullViewName +
" detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") +
" -> " + fullViewName + ").");
}
replaceViewReferenceWithDefinition(qb, tab, tab_name, alias);
// This is the last time we'll see the Table objects for views, so add it to the inputs
// now
ReadEntity viewInput = new ReadEntity(tab, parentInput);
viewInput = PlanUtils.addInput(inputs, viewInput);
aliasToViewInfo.put(alias, new ObjectPair(fullViewName, viewInput));
viewAliasToInput.put(getAliasId(alias, qb), viewInput);
continue;
}
if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getSrcForAlias(alias),
ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
}
qb.getMetaData().setSrcForAlias(alias, tab);
if (qb.getParseInfo().isAnalyzeCommand()) {
// allow partial partition specification for nonscan since noscan is fast.
tableSpec ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
try {
ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
} catch (HiveException e) {
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getSrcForAlias(alias),
"Cannot get partitions for " + ts.partSpec), e);
}
}
// validate partial scan command
QBParseInfo qbpi = qb.getParseInfo();
if (qbpi.isPartialScanAnalyzeCommand()) {
Class extends InputFormat> inputFormatClass = null;
switch (ts.specType) {
case TABLE_ONLY:
inputFormatClass = ts.tableHandle.getInputFormatClass();
break;
case STATIC_PARTITION:
inputFormatClass = ts.partHandle.getInputFormatClass();
break;
default:
assert false;
}
// throw a HiveException for non-rcfile.
if (!inputFormatClass.equals(RCFileInputFormat.class)) {
throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_NON_RCFILE.getMsg());
}
}
qb.getParseInfo().addTableSpec(alias, ts);
}
}
LOG.info("Get metadata for subqueries");
// Go over the subqueries and getMetaData for these
for (String alias : qb.getSubqAliases()) {
boolean wasView = aliasToViewInfo.containsKey(alias);
ReadEntity newParentInput = null;
if (wasView) {
viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
newParentInput = aliasToViewInfo.get(alias).getSecond();
}
QBExpr qbexpr = qb.getSubqForAlias(alias);
getMetaData(qbexpr, newParentInput);
if (wasView) {
viewsExpanded.remove(viewsExpanded.size() - 1);
}
}
RowFormatParams rowFormatParams = new RowFormatParams();
AnalyzeCreateCommonVars shared = new AnalyzeCreateCommonVars();
StorageFormat storageFormat = new StorageFormat();
LOG.info("Get metadata for destination tables");
// Go over all the destination structures and populate the related
// metadata
QBParseInfo qbp = qb.getParseInfo();
for (String name : qbp.getClauseNamesForDest()) {
ASTNode ast = qbp.getDestForClause(name);
switch (ast.getToken().getType()) {
case HiveParser.TOK_TAB: {
tableSpec ts = new tableSpec(db, conf, ast);
if (ts.tableHandle.isView()) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
Class> outputFormatClass = ts.tableHandle.getOutputFormatClass();
if (!HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
.getMsg(ast, "The class is " + outputFormatClass.toString()));
}
// tableSpec ts is got from the query (user specified),
// which means the user didn't specify partitions in their query,
// but whether the table itself is partitioned is not know.
if (ts.specType != SpecType.STATIC_PARTITION) {
// This is a table or dynamic partition
qb.getMetaData().setDestForAlias(name, ts.tableHandle);
// has dynamic as well as static partitions
if (ts.partSpec != null && ts.partSpec.size() > 0) {
qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
}
} else {
// This is a partition
qb.getMetaData().setDestForAlias(name, ts.partHandle);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
// Set that variable to automatically collect stats during the MapReduce job
qb.getParseInfo().setIsInsertToTable(true);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
break;
}
case HiveParser.TOK_LOCAL_DIR:
case HiveParser.TOK_DIR: {
// This is a dfs file
String fname = stripQuotes(ast.getChild(0).getText());
if ((!qb.getParseInfo().getIsSubQ())
&& (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
if (qb.isCTAS()) {
qb.setIsQuery(false);
ctx.setResDir(null);
ctx.setResFile(null);
// allocate a temporary output dir on the location of the table
String tableName = getUnescapedName((ASTNode) ast.getChild(0));
Table newTable = db.newTable(tableName);
Path location;
try {
Warehouse wh = new Warehouse(conf);
location = wh.getDatabasePath(db.getDatabase(newTable.getDbName()));
} catch (MetaException e) {
throw new SemanticException(e);
}
try {
fname = ctx.getExternalTmpFileURI(
FileUtils.makeQualified(location, conf).toUri());
} catch (Exception e) {
throw new SemanticException(generateErrorMessage(ast,
"Error creating temporary folder on: " + location.toString()), e);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
tableSpec ts = new tableSpec(db, conf, this.ast);
// Set that variable to automatically collect stats during the MapReduce job
qb.getParseInfo().setIsInsertToTable(true);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
} else {
qb.setIsQuery(true);
fname = ctx.getMRTmpFileURI();
ctx.setResDir(new Path(fname));
}
}
qb.getMetaData().setDestForAlias(name, fname,
(ast.getToken().getType() == HiveParser.TOK_DIR));
CreateTableDesc localDirectoryDesc = new CreateTableDesc();
boolean localDirectoryDescIsSet = false;
int numCh = ast.getChildCount();
for (int num = 1; num < numCh ; num++){
ASTNode child = (ASTNode) ast.getChild(num);
if (ast.getChild(num) != null){
switch (child.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(shared, child);
localDirectoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
localDirectoryDesc.setLineDelim(rowFormatParams.lineDelim);
localDirectoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
localDirectoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
localDirectoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
localDirectoryDescIsSet=true;
break;
case HiveParser.TOK_TABLESERIALIZER:
ASTNode serdeChild = (ASTNode) child.getChild(0);
shared.serde = unescapeSQLString(serdeChild.getChild(0).getText());
localDirectoryDesc.setSerName(shared.serde);
localDirectoryDescIsSet=true;
break;
case HiveParser.TOK_TBLSEQUENCEFILE:
case HiveParser.TOK_TBLTEXTFILE:
case HiveParser.TOK_TBLRCFILE:
case HiveParser.TOK_TBLORCFILE:
case HiveParser.TOK_TABLEFILEFORMAT:
storageFormat.fillStorageFormat(child, shared);
localDirectoryDesc.setOutputFormat(storageFormat.outputFormat);
localDirectoryDesc.setSerName(shared.serde);
localDirectoryDescIsSet=true;
break;
}
}
}
if (localDirectoryDescIsSet){
qb.setLocalDirectoryDesc(localDirectoryDesc);
}
break;
}
default:
throw new SemanticException(generateErrorMessage(ast,
"Unknown Token Type " + ast.getToken().getType()));
}
}
} catch (HiveException e) {
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
}
private void replaceViewReferenceWithDefinition(QB qb, Table tab,
String tab_name, String alias) throws SemanticException {
ParseDriver pd = new ParseDriver();
ASTNode viewTree;
final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(),
tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias(
alias));
try {
String viewText = tab.getViewExpandedText();
// Reparse text, passing null for context to avoid clobbering
// the top-level token stream.
ASTNode tree = pd.parse(viewText, null);
tree = ParseUtils.findRootNonNullToken(tree);
viewTree = tree;
Dispatcher nodeOriginDispatcher = new Dispatcher() {
public Object dispatch(Node nd, java.util.Stack stack,
Object... nodeOutputs) {
((ASTNode) nd).setOrigin(viewOrigin);
return null;
}
};
GraphWalker nodeOriginTagger = new DefaultGraphWalker(
nodeOriginDispatcher);
nodeOriginTagger.startWalking(java.util.Collections
. singleton(viewTree), null);
} catch (ParseException e) {
// A user could encounter this if a stored view definition contains
// an old SQL construct which has been eliminated in a later Hive
// version, so we need to provide full debugging info to help
// with fixing the view definition.
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
StringBuilder sb = new StringBuilder();
sb.append(e.getMessage());
ErrorMsg.renderOrigin(sb, viewOrigin);
throw new SemanticException(sb.toString(), e);
}
QBExpr qbexpr = new QBExpr(alias);
doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias);
qb.rewriteViewToSubq(alias, tab_name, qbexpr);
}
private boolean isPresent(String[] list, String elem) {
for (String s : list) {
if (s.toLowerCase().equals(elem)) {
return true;
}
}
return false;
}
@SuppressWarnings("nls")
private void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
ArrayList leftAliases, ArrayList rightAliases,
ArrayList fields) throws SemanticException {
// String[] allAliases = joinTree.getAllAliases();
switch (condn.getToken().getType()) {
case HiveParser.TOK_TABLE_OR_COL:
String tableOrCol = unescapeIdentifier(condn.getChild(0).getText()
.toLowerCase());
unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0));
if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
if (!leftAliases.contains(tableOrCol)) {
leftAliases.add(tableOrCol);
}
} else if (isPresent(joinTree.getRightAliases(), tableOrCol)) {
if (!rightAliases.contains(tableOrCol)) {
rightAliases.add(tableOrCol);
}
} else {
// We don't support columns without table prefix in JOIN condition right
// now.
// We need to pass Metadata here to know which table the column belongs
// to.
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(condn
.getChild(0)));
}
break;
case HiveParser.Identifier:
// it may be a field name, return the identifier and let the caller decide
// whether it is or not
if (fields != null) {
fields
.add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
}
unparseTranslator.addIdentifierTranslation(condn);
break;
case HiveParser.Number:
case HiveParser.StringLiteral:
case HiveParser.TOK_STRINGLITERALSEQUENCE:
case HiveParser.TOK_CHARSETLITERAL:
case HiveParser.KW_TRUE:
case HiveParser.KW_FALSE:
break;
case HiveParser.TOK_FUNCTION:
// check all the arguments
for (int i = 1; i < condn.getChildCount(); i++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
leftAliases, rightAliases, null);
}
break;
default:
// This is an operator - so check whether it is unary or binary operator
if (condn.getChildCount() == 1) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null);
} else if (condn.getChildCount() == 2) {
ArrayList fields1 = null;
// if it is a dot operator, remember the field name of the rhs of the
// left semijoin
if (joinTree.getNoSemiJoin() == false
&& condn.getToken().getType() == HiveParser.DOT) {
// get the semijoin rhs table name and field name
fields1 = new ArrayList();
int rhssize = rightAliases.size();
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null);
String rhsAlias = null;
if (rightAliases.size() > rhssize) { // the new table is rhs table
rhsAlias = rightAliases.get(rightAliases.size() - 1);
}
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1);
if (rhsAlias != null && fields1.size() > 0) {
joinTree.addRHSSemijoinColumns(rhsAlias, condn);
}
} else {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null);
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1);
}
} else {
throw new SemanticException(condn.toStringTree() + " encountered with "
+ condn.getChildCount() + " children");
}
break;
}
}
private void populateAliases(List leftAliases,
List rightAliases, ASTNode condn, QBJoinTree joinTree,
List leftSrc) throws SemanticException {
if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) {
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
.getMsg(condn));
}
if (rightAliases.size() != 0) {
assert rightAliases.size() == 1;
joinTree.getExpressions().get(1).add(condn);
} else if (leftAliases.size() != 0) {
joinTree.getExpressions().get(0).add(condn);
for (String s : leftAliases) {
if (!leftSrc.contains(s)) {
leftSrc.add(s);
}
}
} else {
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2
.getMsg(condn));
}
}
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, List leftSrc)
throws SemanticException {
if (joinCond == null) {
return;
}
JoinCond cond = joinTree.getJoinCond()[0];
JoinType type = cond.getJoinType();
parseJoinCondition(joinTree, joinCond, leftSrc, type);
List> filters = joinTree.getFilters();
if (type == JoinType.LEFTOUTER || type == JoinType.FULLOUTER) {
joinTree.addFilterMapping(cond.getLeft(), cond.getRight(), filters.get(0).size());
}
if (type == JoinType.RIGHTOUTER || type == JoinType.FULLOUTER) {
joinTree.addFilterMapping(cond.getRight(), cond.getLeft(), filters.get(1).size());
}
}
/**
* Parse the join condition. If the condition is a join condition, throw an
* error if it is not an equality. Otherwise, break it into left and right
* expressions and store in the join tree. If the condition is a join filter,
* add it to the filter list of join tree. The join condition can contains
* conditions on both the left and tree trees and filters on either.
* Currently, we only support equi-joins, so we throw an error if the
* condition involves both subtrees and is not a equality. Also, we only
* support AND i.e ORs are not supported currently as their semantics are not
* very clear, may lead to data explosion and there is no usecase.
*
* @param joinTree
* jointree to be populated
* @param joinCond
* join condition
* @param leftSrc
* left sources
* @throws SemanticException
*/
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond,
List leftSrc, JoinType type) throws SemanticException {
if (joinCond == null) {
return;
}
switch (joinCond.getToken().getType()) {
case HiveParser.KW_OR:
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3
.getMsg(joinCond));
case HiveParser.KW_AND:
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc, type);
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc, type);
break;
case HiveParser.EQUAL_NS:
case HiveParser.EQUAL:
ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
ArrayList leftCondAl1 = new ArrayList();
ArrayList leftCondAl2 = new ArrayList();
parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
null);
ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
ArrayList rightCondAl1 = new ArrayList();
ArrayList rightCondAl2 = new ArrayList();
parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
rightCondAl2, null);
// is it a filter or a join condition
// if it is filter see if it can be pushed above the join
// filter cannot be pushed if
// * join is full outer or
// * join is left outer and filter is on left alias or
// * join is right outer and filter is on right alias
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
|| ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
.getMsg(joinCond));
}
if (leftCondAl1.size() != 0) {
if ((rightCondAl1.size() != 0)
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
if (type.equals(JoinType.LEFTOUTER) ||
type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else if (rightCondAl2.size() != 0) {
populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
leftSrc);
populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
leftSrc);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
}
} else if (leftCondAl2.size() != 0) {
if ((rightCondAl2.size() != 0)
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else if (rightCondAl1.size() != 0) {
populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
leftSrc);
populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
leftSrc);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
}
} else if (rightCondAl1.size() != 0) {
if (type.equals(JoinType.LEFTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
}
break;
default:
boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION);
// Create all children
int childrenBegin = (isFunction ? 1 : 0);
ArrayList> leftAlias = new ArrayList>(
joinCond.getChildCount() - childrenBegin);
ArrayList> rightAlias = new ArrayList>(
joinCond.getChildCount() - childrenBegin);
for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) {
ArrayList left = new ArrayList();
ArrayList right = new ArrayList();
leftAlias.add(left);
rightAlias.add(right);
}
for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci),
leftAlias.get(ci - childrenBegin), rightAlias.get(ci
- childrenBegin), null);
}
boolean leftAliasNull = true;
for (ArrayList left : leftAlias) {
if (left.size() != 0) {
leftAliasNull = false;
break;
}
}
boolean rightAliasNull = true;
for (ArrayList right : rightAlias) {
if (right.size() != 0) {
rightAliasNull = false;
break;
}
}
if (!leftAliasNull && !rightAliasNull) {
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
.getMsg(joinCond));
}
if (!leftAliasNull) {
if (type.equals(JoinType.LEFTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
}
break;
}
}
@SuppressWarnings("nls")
public Operator putOpInsertMap(Operator op,
RowResolver rr) {
OpParseContext ctx = new OpParseContext(rr);
opParseCtx.put(op, ctx);
op.augmentPlan();
return op;
}
@SuppressWarnings("nls")
private Operator genHavingPlan(String dest, QB qb, Operator input)
throws SemanticException {
ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest);
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
Map exprToColumnAlias = qb.getParseInfo().getAllExprToColumnAlias();
for (ASTNode astNode : exprToColumnAlias.keySet()) {
if (inputRR.getExpression(astNode) != null) {
inputRR.put("", exprToColumnAlias.get(astNode), inputRR.getExpression(astNode));
}
}
ASTNode condn = (ASTNode) havingExpr.getChild(0);
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema(
inputRR.getColumnInfos()), input), inputRR);
return output;
}
@SuppressWarnings("nls")
private Operator genFilterPlan(String dest, QB qb, Operator input)
throws SemanticException {
ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
return genFilterPlan(qb, (ASTNode) whereExpr.getChild(0), input);
}
/**
* create a filter plan. The condition and the inputs are specified.
*
* @param qb
* current query block
* @param condn
* The condition to be resolved
* @param input
* the input operator
*/
@SuppressWarnings("nls")
private Operator genFilterPlan(QB qb, ASTNode condn, Operator input)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema(
inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ inputRR.toString());
}
return output;
}
@SuppressWarnings("nls")
private Integer genColListRegex(String colRegex, String tabAlias,
ASTNode sel, ArrayList col_list,
RowResolver input, Integer pos, RowResolver output, List aliases, boolean subQuery)
throws SemanticException {
// The table alias should exist
if (tabAlias != null && !input.hasTableAlias(tabAlias)) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
}
// TODO: Have to put in the support for AS clause
Pattern regex = null;
try {
regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e
.getMessage()));
}
StringBuilder replacementText = new StringBuilder();
int matched = 0;
// add empty string to the list of aliases. Some operators (ex. GroupBy) add
// ColumnInfos for table alias "".
if (!aliases.contains("")) {
aliases.add("");
}
// For expr "*", aliases should be iterated in the order they are specified
// in the query.
for (String alias : aliases) {
HashMap fMap = input.getFieldMap(alias);
if (fMap == null) {
continue;
}
// For the tab.* case, add all the columns to the fieldList
// from the input schema
for (Map.Entry entry : fMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
String name = colInfo.getInternalName();
String[] tmp = input.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
// Not matching the regex?
if (!regex.matcher(tmp[1]).matches()) {
continue;
}
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol());
if (subQuery) {
output.checkColumn(tmp[0], tmp[1]);
}
col_list.add(expr);
output.put(tmp[0], tmp[1],
new ColumnInfo(getColumnInternalName(pos), colInfo.getType(),
colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
colInfo.isHiddenVirtualCol()));
pos = Integer.valueOf(pos.intValue() + 1);
matched++;
if (unparseTranslator.isEnabled()) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0]));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1]));
}
}
}
if (matched == 0) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel));
}
if (unparseTranslator.isEnabled()) {
unparseTranslator.addTranslation(sel, replacementText.toString());
}
return pos;
}
public static String getColumnInternalName(int pos) {
return HiveConf.getColumnInternalName(pos);
}
private String getScriptProgName(String cmd) {
int end = cmd.indexOf(" ");
return (end == -1) ? cmd : cmd.substring(0, end);
}
private String getScriptArgs(String cmd) {
int end = cmd.indexOf(" ");
return (end == -1) ? "" : cmd.substring(end, cmd.length());
}
private static int getPositionFromInternalName(String internalName) {
return HiveConf.getPositionFromInternalName(internalName);
}
private String fetchFilesNotInLocalFilesystem(String cmd) {
SessionState ss = SessionState.get();
String progName = getScriptProgName(cmd);
if (SessionState.canDownloadResource(progName)) {
String filePath = ss.add_resource(ResourceType.FILE, progName, true);
if (filePath == null) {
throw new RuntimeException("Could not download the resource: " + progName);
}
Path p = new Path(filePath);
String fileName = p.getName();
String scriptArgs = getScriptArgs(cmd);
String finalCmd = fileName + scriptArgs;
return finalCmd;
}
return cmd;
}
private TableDesc getTableDescFromSerDe(ASTNode child, String cols,
String colTypes, boolean defaultCols) throws SemanticException {
if (child.getType() == HiveParser.TOK_SERDENAME) {
String serdeName = unescapeSQLString(child.getChild(0).getText());
Class extends Deserializer> serdeClass = null;
try {
serdeClass = (Class extends Deserializer>) Class.forName(serdeName,
true, JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
TableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer
.toString(Utilities.tabCode), cols, colTypes, defaultCols);
// copy all the properties
if (child.getChildCount() == 2) {
ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0);
for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
.getText());
String value = unescapeSQLString(prop.getChild(propChild).getChild(1)
.getText());
tblDesc.getProperties().setProperty(key, value);
}
}
return tblDesc;
} else if (child.getType() == HiveParser.TOK_SERDEPROPS) {
TableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer
.toString(Utilities.ctrlaCode), cols, colTypes, defaultCols);
int numChildRowFormat = child.getChildCount();
for (int numC = 0; numC < numChildRowFormat; numC++) {
ASTNode rowChild = (ASTNode) child.getChild(numC);
switch (rowChild.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMATFIELD:
String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties()
.setProperty(serdeConstants.FIELD_DELIM, fieldDelim);
tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_FORMAT,
fieldDelim);
if (rowChild.getChildCount() >= 2) {
String fieldEscape = unescapeSQLString(rowChild.getChild(1)
.getText());
tblDesc.getProperties().setProperty(serdeConstants.ESCAPE_CHAR,
fieldEscape);
}
break;
case HiveParser.TOK_TABLEROWFORMATCOLLITEMS:
tblDesc.getProperties().setProperty(serdeConstants.COLLECTION_DELIM,
unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATMAPKEYS:
tblDesc.getProperties().setProperty(serdeConstants.MAPKEY_DELIM,
unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATLINES:
String lineDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties().setProperty(serdeConstants.LINE_DELIM, lineDelim);
if (!lineDelim.equals("\n") && !lineDelim.equals("10")) {
throw new SemanticException(generateErrorMessage(rowChild,
ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()));
}
break;
default:
assert false;
}
}
return tblDesc;
}
// should never come here
return null;
}
private void failIfColAliasExists(Set nameSet, String name)
throws SemanticException {
if (nameSet.contains(name)) {
throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS
.getMsg(name));
}
nameSet.add(name);
}
@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input)
throws SemanticException {
// If there is no "AS" clause, the output schema will be "key,value"
ArrayList outputCols = new ArrayList();
int inputSerDeNum = 1, inputRecordWriterNum = 2;
int outputSerDeNum = 4, outputRecordReaderNum = 5;
int outputColsNum = 6;
boolean outputColNames = false, outputColSchemas = false;
int execPos = 3;
boolean defaultOutputCols = false;
// Go over all the children
if (trfm.getChildCount() > outputColsNum) {
ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
outputColNames = true;
} else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
outputColSchemas = true;
}
}
// If column type is not specified, use a string
if (!outputColNames && !outputColSchemas) {
String intName = getColumnInternalName(0);
ColumnInfo colInfo = new ColumnInfo(intName,
TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("key");
outputCols.add(colInfo);
intName = getColumnInternalName(1);
colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null,
false);
colInfo.setAlias("value");
outputCols.add(colInfo);
defaultOutputCols = true;
} else {
ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
int ccount = collist.getChildCount();
Set colAliasNamesDuplicateCheck = new HashSet();
if (outputColNames) {
for (int i = 0; i < ccount; ++i) {
String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i))
.getText());
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName,
TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
} else {
for (int i = 0; i < ccount; ++i) {
ASTNode child = (ASTNode) collist.getChild(i);
assert child.getType() == HiveParser.TOK_TABCOL;
String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0))
.getText());
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils
.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child
.getChild(1))), null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
}
}
RowResolver out_rwsch = new RowResolver();
StringBuilder columns = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
for (int i = 0; i < outputCols.size(); ++i) {
if (i != 0) {
columns.append(",");
columnTypes.append(",");
}
columns.append(outputCols.get(i).getInternalName());
columnTypes.append(outputCols.get(i).getType().getTypeName());
out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(),
outputCols.get(i));
}
StringBuilder inpColumns = new StringBuilder();
StringBuilder inpColumnTypes = new StringBuilder();
ArrayList inputSchema = opParseCtx.get(input).getRowResolver()
.getColumnInfos();
for (int i = 0; i < inputSchema.size(); ++i) {
if (i != 0) {
inpColumns.append(",");
inpColumnTypes.append(",");
}
inpColumns.append(inputSchema.get(i).getInternalName());
inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
}
TableDesc outInfo;
TableDesc errInfo;
TableDesc inInfo;
String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
Class extends Deserializer> serde;
try {
serde = (Class extends Deserializer>) Class.forName(defaultSerdeName,
true, JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
int fieldSeparator = Utilities.tabCode;
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
fieldSeparator = Utilities.ctrlaCode;
}
// Input and Output Serdes
if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(),
inpColumnTypes.toString(), false);
} else {
inInfo = PlanUtils.getTableDesc(serde, Integer
.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes
.toString(), false, true);
}
if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
.getChild(outputSerDeNum))).getChild(0), columns.toString(),
columnTypes.toString(), false);
// This is for backward compatibility. If the user did not specify the
// output column list, we assume that there are 2 columns: key and value.
// However, if the script outputs: col1, col2, col3 seperated by TAB, the
// requirement is: key is col and value is (col2 TAB col3)
} else {
outInfo = PlanUtils.getTableDesc(serde, Integer
.toString(fieldSeparator), columns.toString(), columnTypes
.toString(), defaultOutputCols);
}
// Error stream always uses the default serde with a single column
errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
// Output record readers
Class extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm
.getChild(outputRecordReaderNum));
Class extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm
.getChild(inputRecordWriterNum));
Class extends RecordReader> errRecordReader = getDefaultRecordReader();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new ScriptDesc(
fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())),
inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo),
new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
return output;
}
private Class extends RecordReader> getRecordReader(ASTNode node)
throws SemanticException {
String name;
if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
} else {
name = unescapeSQLString(node.getChild(0).getText());
}
try {
return (Class extends RecordReader>) Class.forName(name, true,
JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private Class extends RecordReader> getDefaultRecordReader()
throws SemanticException {
String name;
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
try {
return (Class extends RecordReader>) Class.forName(name, true,
JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private Class extends RecordWriter> getRecordWriter(ASTNode node)
throws SemanticException {
String name;
if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER);
} else {
name = unescapeSQLString(node.getChild(0).getText());
}
try {
return (Class extends RecordWriter>) Class.forName(name, true,
JavaUtils.getClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private List getGroupingSetsForRollup(int size) {
List groupingSetKeys = new ArrayList();
for (int i = 0; i <= size; i++) {
groupingSetKeys.add((1 << i) - 1);
}
return groupingSetKeys;
}
private List getGroupingSetsForCube(int size) {
int count = 1 << size;
List results = new ArrayList(count);
for (int i = 0; i < count; ++i) {
results.add(i);
}
return results;
}
// This function returns the grouping sets along with the grouping expressions
// Even if rollups and cubes are present in the query, they are converted to
// grouping sets at this point
private ObjectPair, List> getGroupByGroupingSetsForClause(
QBParseInfo parseInfo, String dest) throws SemanticException {
List groupingSets = new ArrayList();
List groupByExprs = getGroupByForClause(parseInfo, dest);
if (parseInfo.getDestRollups().contains(dest)) {
groupingSets = getGroupingSetsForRollup(groupByExprs.size());
} else if (parseInfo.getDestCubes().contains(dest)) {
groupingSets = getGroupingSetsForCube(groupByExprs.size());
} else if (parseInfo.getDestGroupingSets().contains(dest)) {
groupingSets = getGroupingSets(groupByExprs, parseInfo, dest);
}
return new ObjectPair, List>(groupByExprs, groupingSets);
}
private List getGroupingSets(List groupByExpr, QBParseInfo parseInfo,
String dest) throws SemanticException {
Map exprPos = new HashMap();
for (int i = 0; i < groupByExpr.size(); ++i) {
ASTNode node = groupByExpr.get(i);
exprPos.put(node.toStringTree(), i);
}
ASTNode root = parseInfo.getGroupByForClause(dest);
List result = new ArrayList(root == null ? 0 : root.getChildCount());
if (root != null) {
for (int i = 0; i < root.getChildCount(); ++i) {
ASTNode child = (ASTNode) root.getChild(i);
if (child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
continue;
}
int bitmap = 0;
for (int j = 0; j < child.getChildCount(); ++j) {
String treeAsString = child.getChild(j).toStringTree();
Integer pos = exprPos.get(treeAsString);
if (pos == null) {
throw new SemanticException(
generateErrorMessage((ASTNode) child.getChild(j),
ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg()));
}
bitmap = setBit(bitmap, pos);
}
result.add(bitmap);
}
}
if (checkForNoAggr(result)) {
throw new SemanticException(
ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOFUNC.getMsg());
}
return result;
}
private boolean checkForNoAggr(List bitmaps) {
boolean ret = true;
for (int mask : bitmaps) {
ret &= mask == 0;
}
return ret;
}
private int setBit(int bitmap, int bitIdx) {
return bitmap | (1 << bitIdx);
}
/**
* This function is a wrapper of parseInfo.getGroupByForClause which
* automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
* a,b,c.
*/
static List getGroupByForClause(QBParseInfo parseInfo, String dest) {
if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
ASTNode selectExprs = parseInfo.getSelForClause(dest);
List result = new ArrayList(selectExprs == null ? 0
: selectExprs.getChildCount());
if (selectExprs != null) {
HashMap windowingExprs = parseInfo.getWindowingExprsForClause(dest);
for (int i = 0; i < selectExprs.getChildCount(); ++i) {
if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.TOK_HINTLIST) {
continue;
}
// table.column AS alias
ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
/*
* If this is handled by Windowing then ignore it.
*/
if (windowingExprs != null && windowingExprs.containsKey(grpbyExpr.toStringTree())) {
continue;
}
result.add(grpbyExpr);
}
}
return result;
} else {
ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
List result = new ArrayList(grpByExprs == null ? 0
: grpByExprs.getChildCount());
if (grpByExprs != null) {
for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
result.add(grpbyExpr);
}
}
}
return result;
}
}
private static String[] getColAlias(ASTNode selExpr, String defaultName,
RowResolver inputRR, boolean includeFuncName, int colNum) {
String colAlias = null;
String tabAlias = null;
String[] colRef = new String[2];
//for queries with a windowing expressions, the selexpr may have a third child
if (selExpr.getChildCount() == 2 ||
(selExpr.getChildCount() == 3 &&
selExpr.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC)) {
// return zz for "xx + yy AS zz"
colAlias = unescapeIdentifier(selExpr.getChild(1).getText());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
ASTNode root = (ASTNode) selExpr.getChild(0);
if (root.getType() == HiveParser.TOK_TABLE_OR_COL) {
colAlias =
BaseSemanticAnalyzer.unescapeIdentifier(root.getChild(0).getText());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
if (root.getType() == HiveParser.DOT) {
ASTNode tab = (ASTNode) root.getChild(0);
if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) {
String t = unescapeIdentifier(tab.getChild(0).getText());
if (inputRR.hasTableAlias(t)) {
tabAlias = t;
}
}
// Return zz for "xx.zz" and "xx.yy.zz"
ASTNode col = (ASTNode) root.getChild(1);
if (col.getType() == HiveParser.Identifier) {
colAlias = unescapeIdentifier(col.getText());
}
}
// if specified generate alias using func name
if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) {
String expr_flattened = root.toStringTree();
// remove all TOK tokens
String expr_no_tok = expr_flattened.replaceAll("TOK_\\S+", "");
// remove all non alphanumeric letters, replace whitespace spans with underscore
String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_");
// limit length to 20 chars
if (expr_formatted.length() > AUTOGEN_COLALIAS_PRFX_MAXLENGTH) {
expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH);
}
// append colnum to make it unique
colAlias = expr_formatted.concat("_" + colNum);
}
if (colAlias == null) {
// Return defaultName if selExpr is not a simple xx.yy.zz
colAlias = defaultName + colNum;
}
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
/**
* Returns whether the pattern is a regex expression (instead of a normal
* string). Normal string is a string with all alphabets/digits and "_".
*/
private static boolean isRegex(String pattern) {
for (int i = 0; i < pattern.length(); i++) {
if (!Character.isLetterOrDigit(pattern.charAt(i))
&& pattern.charAt(i) != '_') {
return true;
}
}
return false;
}
private Operator> genSelectPlan(String dest, QB qb, Operator> input)
throws SemanticException {
ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
Operator> op = genSelectPlan(selExprList, qb, input);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan for clause: " + dest);
}
return op;
}
@SuppressWarnings("nls")
private Operator> genSelectPlan(ASTNode selExprList, QB qb,
Operator> input) throws SemanticException {
if (LOG.isDebugEnabled()) {
LOG.debug("tree: " + selExprList.toStringTree());
}
ArrayList col_list = new ArrayList();
RowResolver out_rwsch = new RowResolver();
ASTNode trfm = null;
Integer pos = Integer.valueOf(0);
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
// SELECT * or SELECT TRANSFORM(*)
boolean selectStar = false;
int posn = 0;
boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST);
if (hintPresent) {
posn++;
}
boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() ==
HiveParser.TOK_TRANSFORM);
if (isInTransform) {
queryProperties.setUsesScript(true);
globalLimitCtx.setHasTransformOrUDTF(true);
trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
}
// Detect queries of the form SELECT udtf(col) AS ...
// by looking for a function as the first child, and then checking to see
// if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
// the lack of a special token.
boolean isUDTF = false;
String udtfTableAlias = null;
ArrayList udtfColAliases = new ArrayList();
ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
GenericUDTF genericUDTF = null;
int udtfExprType = udtfExpr.getType();
if (udtfExprType == HiveParser.TOK_FUNCTION
|| udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
String funcName = TypeCheckProcFactory.DefaultExprProcessor
.getFunctionText(udtfExpr, true);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
if (fi != null) {
genericUDTF = fi.getGenericUDTF();
}
isUDTF = (genericUDTF != null);
if (isUDTF) {
globalLimitCtx.setHasTransformOrUDTF(true);
}
if (isUDTF && !fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr
.getChild(0));
}
}
if (isUDTF) {
// Only support a single expression when it's a UDTF
if (selExprList.getChildCount() > 1) {
throw new SemanticException(generateErrorMessage(
(ASTNode) selExprList.getChild(1),
ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
}
// Require an AS for UDTFs for column aliases
ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
if (selExpr.getChildCount() < 2) {
throw new SemanticException(generateErrorMessage(udtfExpr,
ErrorMsg.UDTF_REQUIRE_AS.getMsg()));
}
// Get the column / table aliases from the expression. Start from 1 as
// 0 is the TOK_FUNCTION
for (int i = 1; i < selExpr.getChildCount(); i++) {
ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
switch (selExprChild.getType()) {
case HiveParser.Identifier:
udtfColAliases.add(unescapeIdentifier(selExprChild.getText()));
unparseTranslator.addIdentifierTranslation(selExprChild);
break;
case HiveParser.TOK_TABALIAS:
assert (selExprChild.getChildCount() == 1);
udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0)
.getText());
qb.addAlias(udtfTableAlias);
unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild
.getChild(0));
break;
default:
assert (false);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("UDTF table alias is " + udtfTableAlias);
LOG.debug("UDTF col aliases are " + udtfColAliases);
}
}
// The list of expressions after SELECT or SELECT TRANSFORM.
ASTNode exprList;
if (isInTransform) {
exprList = (ASTNode) trfm.getChild(0);
} else if (isUDTF) {
exprList = udtfExpr;
} else {
exprList = selExprList;
}
if (LOG.isDebugEnabled()) {
LOG.debug("genSelectPlan: input = " + inputRR.toString());
}
// For UDTF's, skip the function name to get the expressions
int startPosn = isUDTF ? posn + 1 : posn;
if (isInTransform) {
startPosn = 0;
}
// Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
for (int i = startPosn; i < exprList.getChildCount(); ++i) {
// child can be EXPR AS ALIAS, or EXPR.
ASTNode child = (ASTNode) exprList.getChild(i);
boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
boolean isWindowSpec = child.getChildCount() == 3 ?
(child.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC) :
false;
// EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
// This check is not needed and invalid when there is a transform b/c the
// AST's are slightly different.
if (!isWindowSpec && !isInTransform && !isUDTF && child.getChildCount() > 2) {
throw new SemanticException(generateErrorMessage(
(ASTNode) child.getChild(2),
ErrorMsg.INVALID_AS.getMsg()));
}
// The real expression
ASTNode expr;
String tabAlias;
String colAlias;
if (isInTransform || isUDTF) {
tabAlias = null;
colAlias = autogenColAliasPrfxLbl + i;
expr = child;
} else {
// Get rid of TOK_SELEXPR
expr = (ASTNode) child.getChild(0);
String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR,
autogenColAliasPrfxIncludeFuncName, i);
tabAlias = colRef[0];
colAlias = colRef[1];
if (hasAsClause) {
unparseTranslator.addIdentifierTranslation((ASTNode) child
.getChild(1));
}
}
boolean subQuery = qb.getParseInfo().getIsSubQ();
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
: getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery);
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(0).getText()))) {
// In case the expression is a regex COL.
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
null, expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
.getChild(0).getText().toLowerCase())) && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(1).getText()))) {
// In case the expression is TABLE.COL (col can be regex).
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
unescapeIdentifier(expr.getChild(0).getChild(0).getText()
.toLowerCase()), expr, col_list, inputRR, pos, out_rwsch,
qb.getAliases(), subQuery);
} else {
// Case when this is an expression
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
// We allow stateful functions in the SELECT list (but nowhere else)
tcCtx.setAllowStatefulFunctions(true);
ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
String recommended = recommendName(exp, colAlias);
if (recommended != null && out_rwsch.get(null, recommended) == null) {
colAlias = recommended;
}
col_list.add(exp);
if (subQuery) {
out_rwsch.checkColumn(tabAlias, colAlias);
}
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos),
exp.getWritableObjectInspector(), tabAlias, false);
colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp)
.isSkewedCol() : false);
out_rwsch.put(tabAlias, colAlias, colInfo);
pos = Integer.valueOf(pos.intValue() + 1);
}
}
selectStar = selectStar && exprList.getChildCount() == posn + 1;
ArrayList columnNames = new ArrayList();
Map colExprMap = new HashMap();
for (int i = 0; i < col_list.size(); i++) {
// Replace NULL with CAST(NULL AS STRING)
if (col_list.get(i) instanceof ExprNodeNullDesc) {
col_list.set(i, new ExprNodeConstantDesc(
TypeInfoFactory.stringTypeInfo, null));
}
String outputCol = getColumnInternalName(i);
colExprMap.put(outputCol, col_list.get(i));
columnNames.add(outputCol);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new SelectDesc(col_list, columnNames, selectStar), new RowSchema(
out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(colExprMap);
if (isInTransform) {
output = genScriptPlan(trfm, qb, output);
}
if (isUDTF) {
output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb,
output);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
}
return output;
}
private String recommendName(ExprNodeDesc exp, String colAlias) {
if (!colAlias.startsWith(autogenColAliasPrfxLbl)) {
return null;
}
String column = ExprNodeDescUtils.recommendInputName(exp);
if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) {
return column;
}
return null;
}
/**
* Class to store GenericUDAF related information.
*/
static class GenericUDAFInfo {
ArrayList convertedParameters;
GenericUDAFEvaluator genericUDAFEvaluator;
TypeInfo returnType;
}
/**
* Convert exprNodeDesc array to Typeinfo array.
*/
static ArrayList getTypeInfo(ArrayList exprs) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : exprs) {
result.add(expr.getTypeInfo());
}
return result;
}
/**
* Convert exprNodeDesc array to ObjectInspector array.
*/
static ArrayList getWritableObjectInspector(ArrayList exprs) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : exprs) {
result.add(expr.getWritableObjectInspector());
}
return result;
}
/**
* Convert exprNodeDesc array to Typeinfo array.
*/
static ObjectInspector[] getStandardObjectInspector(ArrayList exprs) {
ObjectInspector[] result = new ObjectInspector[exprs.size()];
for (int i = 0; i < exprs.size(); i++) {
result[i] = TypeInfoUtils
.getStandardWritableObjectInspectorFromTypeInfo(exprs.get(i));
}
return result;
}
/**
* Returns the GenericUDAFEvaluator for the aggregation. This is called once
* for each GroupBy aggregation.
*/
static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName,
ArrayList aggParameters, ASTNode aggTree,
boolean isDistinct, boolean isAllColumns)
throws SemanticException {
ArrayList originalParameterTypeInfos =
getWritableObjectInspector(aggParameters);
GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator(
aggName, originalParameterTypeInfos, isDistinct, isAllColumns);
if (null == result) {
String reason = "Looking for UDAF Evaluator\"" + aggName
+ "\" with parameters " + originalParameterTypeInfos;
throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(
(ASTNode) aggTree.getChild(0), reason));
}
return result;
}
/**
* Returns the GenericUDAFInfo struct for the aggregation.
*
* @param aggName
* The name of the UDAF.
* @param aggParameters
* The exprNodeDesc of the original parameters
* @param aggTree
* The ASTNode node of the UDAF in the query.
* @return GenericUDAFInfo
* @throws SemanticException
* when the UDAF is not found or has problems.
*/
static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator,
GenericUDAFEvaluator.Mode emode, ArrayList aggParameters)
throws SemanticException {
GenericUDAFInfo r = new GenericUDAFInfo();
// set r.genericUDAFEvaluator
r.genericUDAFEvaluator = evaluator;
// set r.returnType
ObjectInspector returnOI = null;
try {
ArrayList aggOIs = getWritableObjectInspector(aggParameters);
ObjectInspector[] aggOIArray = new ObjectInspector[aggOIs.size()];
for (int ii = 0; ii < aggOIs.size(); ++ii) {
aggOIArray[ii] = aggOIs.get(ii);
}
returnOI = r.genericUDAFEvaluator.init(emode, aggOIArray);
r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
} catch (HiveException e) {
throw new SemanticException(e);
}
// set r.convertedParameters
// TODO: type conversion
r.convertedParameters = aggParameters;
return r;
}
private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(
GroupByDesc.Mode mode, boolean isDistinct) {
switch (mode) {
case COMPLETE:
return GenericUDAFEvaluator.Mode.COMPLETE;
case PARTIAL1:
return GenericUDAFEvaluator.Mode.PARTIAL1;
case PARTIAL2:
return GenericUDAFEvaluator.Mode.PARTIAL2;
case PARTIALS:
return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1
: GenericUDAFEvaluator.Mode.PARTIAL2;
case FINAL:
return GenericUDAFEvaluator.Mode.FINAL;
case HASH:
return GenericUDAFEvaluator.Mode.PARTIAL1;
case MERGEPARTIAL:
return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE
: GenericUDAFEvaluator.Mode.FINAL;
default:
throw new RuntimeException("internal error in groupByDescModeToUDAFMode");
}
}
/**
* Check if the given internalName represents a constant parameter in aggregation parameters
* of an aggregation tree.
* This method is only invoked when map-side aggregation is not involved. In this case,
* every parameter in every aggregation tree should already have a corresponding ColumnInfo,
* which is generated when the corresponding ReduceSinkOperator of the GroupByOperator being
* generating is generated. If we find that this parameter is a constant parameter,
* we will return the corresponding ExprNodeDesc in reduceValues, and we will not need to
* use a new ExprNodeColumnDesc, which can not be treated as a constant parameter, for this
* parameter (since the writableObjectInspector of a ExprNodeColumnDesc will not be
* a instance of ConstantObjectInspector).
*
* @param reduceValues
* value columns of the corresponding ReduceSinkOperator
* @param internalName
* the internal name of this parameter
* @return the ExprNodeDesc of the constant parameter if the given internalName represents
* a constant parameter; otherwise, return null
*/
private ExprNodeDesc isConstantParameterInAggregationParameters(String internalName,
List reduceValues) {
// only the pattern of "VALUE._col([0-9]+)" should be handled.
String[] terms = internalName.split("\\.");
if (terms.length != 2 || reduceValues == null) {
return null;
}
if (Utilities.ReduceField.VALUE.toString().equals(terms[0])) {
int pos = getPositionFromInternalName(terms[1]);
if (pos >= 0 && pos < reduceValues.size()) {
ExprNodeDesc reduceValue = reduceValues.get(pos);
if (reduceValue != null) {
if (reduceValue.getWritableObjectInspector() instanceof ConstantObjectInspector) {
// this internalName represents a constant parameter in aggregation parameters
return reduceValue;
}
}
}
}
return null;
}
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (PARTIAL1 or COMPLETE)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo,
String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
Map genericUDAFEvaluators)
throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx
.get(reduceSinkOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList aggregations = new ArrayList();
ArrayList outputColumnNames = new ArrayList();
Map colExprMap = new HashMap();
List grpByExprs = getGroupByForClause(parseInfo, dest);
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo
.getInternalName(), "", false));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr,
new ColumnInfo(field, exprInfo.getType(), null, false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// For each aggregation
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List inputKeyCols = ((ReduceSinkDesc)
reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
// Convert children to aggParameters
ArrayList aggParameters = new ArrayList();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "." +
getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
// Save the evaluator so that it can be used by the next-stage
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
false, groupByMemoryUsage, memoryThreshold, null, false, 0, numDistinctUDFs > 0),
new RowSchema(groupByOutputRowResolver.getColumnInfos()),
reduceSinkOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
// Add the grouping set key to the group by operator.
// This is not the first group by operator, but it is a subsequent group by operator
// which is forwarding the grouping keys introduced by the grouping sets.
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for GroupBy2 to pass the additional grouping keys introduced by
// GroupBy1 for the grouping set (corresponding to the rollup).
private void addGroupingSetKey(List groupByKeys,
RowResolver groupByInputRowResolver,
RowResolver groupByOutputRowResolver,
List outputColumnNames,
Map colExprMap) throws SemanticException {
// For grouping sets, add a dummy grouping key
String groupingSetColumnName =
groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
groupingSetColumnName, null, false);
groupByKeys.add(inputExpr);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
TypeInfoFactory.stringTypeInfo,
null,
true));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// Process grouping set for the reduce sink operator
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for ReduceSink to add the additional grouping keys introduced by
// GroupBy1 into the reduce keys.
private void processGroupingSetReduceSinkOperator(RowResolver reduceSinkInputRowResolver,
RowResolver reduceSinkOutputRowResolver,
List reduceKeys,
List outputKeyColumnNames,
Map colExprMap) throws SemanticException {
// add a key for reduce sink
String groupingSetColumnName =
reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
groupingSetColumnName, null, false);
reduceKeys.add(inputExpr);
outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
String field = Utilities.ReduceField.KEY.toString() + "."
+ getColumnInternalName(reduceKeys.size() - 1);
ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
reduceKeys.size() - 1).getTypeInfo(), null, true);
reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
}
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @param distPartAggr
* partial aggregation for distincts
* @param groupingSets
* list of grouping sets
* @param groupingSetsPresent
* whether grouping sets are present in this query
* @param groupingSetsConsumedCurrentMR
* whether grouping sets are consumed by this group by
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo,
String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
Map genericUDAFEvaluators,
boolean distPartAgg,
List groupingSets,
boolean groupingSetsPresent,
boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
ArrayList outputColumnNames = new ArrayList();
RowResolver groupByInputRowResolver = opParseCtx
.get(reduceSinkOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList aggregations = new ArrayList();
List grpByExprs = getGroupByForClause(parseInfo, dest);
Map colExprMap = new HashMap();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo
.getInternalName(), exprInfo.getTabAlias(), exprInfo
.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr,
new ColumnInfo(field, exprInfo.getType(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// This is only needed if a new grouping set key is being created
int groupingSetsPosition = 0;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in a single map-reduce job
// The plan is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1 already added the grouping id as part of the row
// This function is called for GroupBy2 to add grouping id as part of the groupby keys
if (!groupingSetsNeedAdditionalMRJob) {
addGroupingSetKey(
groupByKeys,
groupByInputRowResolver,
groupByOutputRowResolver,
outputColumnNames,
colExprMap);
}
else {
groupingSetsPosition = groupByKeys.size();
// The grouping set has not yet been processed. Create a new grouping key
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in 2 map-reduce jobs
// The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1/ReduceSink worked as if grouping sets were not present
// This function is called for GroupBy2 to create new rows for grouping sets
// For each input row (a,b), 4 rows are created for the example above:
// (a,b), (a,null), (null, b), (null, null)
createNewGroupingKey(groupByKeys,
outputColumnNames,
groupByOutputRowResolver,
colExprMap);
}
}
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List inputKeyCols = ((ReduceSinkDesc)
reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
boolean containsDistinctAggr = false;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList aggParameters = new ArrayList();
boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
containsDistinctAggr = containsDistinctAggr || isDistinct;
// If the function is distinct, partial aggregation has not been done on
// the client side.
// If distPartAgg is set, the client is letting us know that partial
// aggregation has not been done.
// For eg: select a, count(b+c), count(distinct d+e) group by a
// For count(b+c), if partial aggregation has been performed, then we
// directly look for count(b+c).
// Otherwise, we look for b+c.
// For distincts, partial aggregation is never performed on the client
// side, so always look for the parameters: d+e
boolean partialAggDone = !(distPartAgg || isDistinct);
if (!partialAggDone) {
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN
.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "."
+ getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
} else {
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(), paraExprInfo
.getIsVirtualCol()));
}
if (isDistinct) {
numDistinctUDFs++;
}
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = null;
// For distincts, partial aggregations have not been done
if (distPartAgg) {
genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters,
value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
} else {
genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
}
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters,
(mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
distPartAgg, groupByMemoryUsage, memoryThreshold,
groupingSets,
groupingSetsPresent && groupingSetsNeedAdditionalMRJob,
groupingSetsPosition, containsDistinctAggr),
new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo),
groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
/*
* Create a new grouping key for grouping id.
* A dummy grouping id. is added. At runtime, the group by operator
* creates 'n' rows per input row, where 'n' is the number of grouping sets.
*/
private void createNewGroupingKey(List groupByKeys,
List outputColumnNames,
RowResolver groupByOutputRowResolver,
Map colExprMap) {
// The value for the constant does not matter. It is replaced by the grouping set
// value for the actual implementation
ExprNodeConstantDesc constant = new ExprNodeConstantDesc("0");
groupByKeys.add(constant);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
TypeInfoFactory.stringTypeInfo,
null,
true));
colExprMap.put(field, constant);
}
/**
* Generate the map-side GroupByOperator for the Query Block
* (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
* of the inputOperatorInfo.
*
* @param mode
* The mode of the aggregation (HASH)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb,
String dest,
List grpByExprs,
Operator inputOperatorInfo,
GroupByDesc.Mode mode,
Map genericUDAFEvaluators,
List groupingSetKeys,
boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo)
.getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList outputColumnNames = new ArrayList();
ArrayList aggregations = new ArrayList();
Map colExprMap = new HashMap();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr,
groupByInputRowResolver);
groupByKeys.add(grpByExprNode);
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr,
new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// The grouping set key is present after the grouping keys, before the distinct keys
int groupingSetsPosition = groupByKeys.size();
// For grouping sets, add a dummy grouping key
// This dummy key needs to be added as a reduce key
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for GroupBy1 to create an additional grouping key
// for the grouping set (corresponding to the rollup).
if (groupingSetsPresent) {
createNewGroupingKey(groupByKeys,
outputColumnNames,
groupByOutputRowResolver,
colExprMap);
}
// If there is a distinctFuncExp, add all parameters to the reduceKeys.
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
List list = parseInfo.getDistinctFuncExprsForClause(dest);
for (ASTNode value : list) {
// 0 is function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode parameter = (ASTNode) value.getChild(i);
if (groupByOutputRowResolver.getExpression(parameter) == null) {
ExprNodeDesc distExprNode = genExprNodeDesc(parameter,
groupByInputRowResolver);
groupByKeys.add(distExprNode);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(
field, distExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
}
}
}
// For each aggregation
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
boolean containsDistinctAggr = false;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList aggParameters = new ArrayList();
new ArrayList>();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr,
groupByInputRowResolver);
aggParameters.add(paraExprNode);
}
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
// Save the evaluator so that it can be used by the next-stage
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
false, groupByMemoryUsage, memoryThreshold,
groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr),
new RowSchema(groupByOutputRowResolver.getColumnInfos()),
inputOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
/**
* Generate the ReduceSinkOperator for the Group By Query Block
* (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child
* of inputOperatorInfo.
*
* It will put all Group By keys and the distinct field (if any) in the
* map-reduce sort key, and all other fields in the map-reduce value.
*
* @param numPartitionFields
* the number of fields for map-reduce partitioning. This is usually
* the number of fields in the Group By keys.
* @return the new ReduceSinkOperator.
* @throws SemanticException
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanReduceSinkOperator(QB qb,
String dest,
Operator inputOperatorInfo,
List grpByExprs,
int numPartitionFields,
boolean changeNumPartitionFields,
int numReducers,
boolean mapAggrDone,
boolean groupingSetsPresent,
boolean optimizeSkew) throws SemanticException {
RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo)
.getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver reduceSinkOutputRowResolver = new RowResolver();
reduceSinkOutputRowResolver.setIsExprResolver(true);
Map colExprMap = new HashMap();
int numPartitionCols = -1;
// Pre-compute group-by keys and store in reduceKeys
List outputKeyColumnNames = new ArrayList();
List outputValueColumnNames = new ArrayList();
ArrayList reduceKeys = getReduceKeysForReduceSink(grpByExprs, dest,
reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames,
colExprMap);
// add a key for reduce sink
if (groupingSetsPresent) {
// Process grouping set for the reduce sink operator
processGroupingSetReduceSinkOperator(
reduceSinkInputRowResolver,
reduceSinkOutputRowResolver,
reduceKeys,
outputKeyColumnNames,
colExprMap);
if (changeNumPartitionFields) {
numPartitionFields++;
}
}
List> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest,
reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames);
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
numPartitionCols = grpByExprs.size();
}
ArrayList reduceValues = new ArrayList();
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
if (!mapAggrDone) {
getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver,
reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues);
} else {
// Put partial aggregation results in reduceValues
int inputField = reduceKeys.size();
for (Map.Entry entry : aggregationTrees.entrySet()) {
TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(
inputField).getType();
reduceValues.add(new ExprNodeColumnDesc(type,
getColumnInternalName(inputField), "", false));
inputField++;
outputValueColumnNames.add(getColumnInternalName(reduceValues.size() - 1));
String field = Utilities.ReduceField.VALUE.toString() + "."
+ getColumnInternalName(reduceValues.size() - 1);
reduceSinkOutputRowResolver.putExpression(entry.getValue(),
new ColumnInfo(field, type, null, false));
}
}
if (!optimizeSkew) {
numPartitionCols = numPartitionFields;
}
ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
OperatorFactory.getAndMakeChild(
PlanUtils.getReduceSinkDesc(reduceKeys,
groupingSetsPresent ? grpByExprs.size() + 1 : grpByExprs.size(),
reduceValues, distinctColIndices,
outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionCols,
numReducers,optimizeSkew),
new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo),
reduceSinkOutputRowResolver);
rsOp.setColumnExprMap(colExprMap);
return rsOp;
}
private ArrayList getReduceKeysForReduceSink(List grpByExprs, String dest,
RowResolver reduceSinkInputRowResolver, RowResolver reduceSinkOutputRowResolver,
List outputKeyColumnNames, Map colExprMap)
throws SemanticException {
ArrayList reduceKeys = new ArrayList();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ExprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr,
reduceSinkInputRowResolver);
reduceKeys.add(inputExpr);
if (reduceSinkOutputRowResolver.getExpression(grpbyExpr) == null) {
outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
String field = Utilities.ReduceField.KEY.toString() + "."
+ getColumnInternalName(reduceKeys.size() - 1);
ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
reduceKeys.size() - 1).getTypeInfo(), null, false);
reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
} else {
throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY
.getMsg(grpbyExpr));
}
}
return reduceKeys;
}
private List> getDistinctColIndicesForReduceSink(QBParseInfo parseInfo,
String dest,
List reduceKeys, RowResolver reduceSinkInputRowResolver,
RowResolver reduceSinkOutputRowResolver, List outputKeyColumnNames)
throws SemanticException {
List> distinctColIndices = new ArrayList>();
// If there is a distinctFuncExp, add all parameters to the reduceKeys.
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
List distFuncs = parseInfo.getDistinctFuncExprsForClause(dest);
String colName = getColumnInternalName(reduceKeys.size());
outputKeyColumnNames.add(colName);
for (int i = 0; i < distFuncs.size(); i++) {
ASTNode value = distFuncs.get(i);
int numExprs = 0;
List distinctIndices = new ArrayList();
// 0 is function name
for (int j = 1; j < value.getChildCount(); j++) {
ASTNode parameter = (ASTNode) value.getChild(j);
ExprNodeDesc expr = genExprNodeDesc(parameter, reduceSinkInputRowResolver);
// see if expr is already present in reduceKeys.
// get index of expr in reduceKeys
int ri;
for (ri = 0; ri < reduceKeys.size(); ri++) {
if (reduceKeys.get(ri).getExprString().equals(expr.getExprString())) {
break;
}
}
// add the expr to reduceKeys if it is not present
if (ri == reduceKeys.size()) {
reduceKeys.add(expr);
}
// add the index of expr in reduceKeys to distinctIndices
distinctIndices.add(ri);
String name = getColumnInternalName(numExprs);
String field = Utilities.ReduceField.KEY.toString() + "." + colName
+ ":" + i
+ "." + name;
ColumnInfo colInfo = new ColumnInfo(field, expr.getTypeInfo(), null, false);
reduceSinkOutputRowResolver.putExpression(parameter, colInfo);
numExprs++;
}
distinctColIndices.add(distinctIndices);
}
}
return distinctColIndices;
}
private void getReduceValuesForReduceSinkNoMapAgg(QBParseInfo parseInfo, String dest,
RowResolver reduceSinkInputRowResolver, RowResolver reduceSinkOutputRowResolver,
List outputValueColumnNames, ArrayList reduceValues)
throws SemanticException {
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
// Put parameters to aggregations in reduceValues
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
// 0 is function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode parameter = (ASTNode) value.getChild(i);
if (reduceSinkOutputRowResolver.getExpression(parameter) == null) {
reduceValues.add(genExprNodeDesc(parameter,
reduceSinkInputRowResolver));
outputValueColumnNames
.add(getColumnInternalName(reduceValues.size() - 1));
String field = Utilities.ReduceField.VALUE.toString() + "."
+ getColumnInternalName(reduceValues.size() - 1);
reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field,
reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null,
false));
}
}
}
}
@SuppressWarnings("nls")
private Operator genCommonGroupByPlanReduceSinkOperator(QB qb, List dests,
Operator inputOperatorInfo, boolean optimizeSkew) throws SemanticException {
RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo)
.getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver reduceSinkOutputRowResolver = new RowResolver();
reduceSinkOutputRowResolver.setIsExprResolver(true);
Map