Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.security.AccessControlException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeVisitor;
import org.antlr.runtime.tree.TreeVisitorAction;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
import org.apache.calcite.rel.RelNode;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.mutable.MutableBoolean;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.AddDynamicPartitions;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RecordReader;
import org.apache.hadoop.hive.ql.exec.RecordWriter;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.Transform;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
import org.apache.hadoop.hive.ql.optimizer.lineage.Generator;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType;
import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc;
import org.apache.hadoop.hive.ql.plan.ForwardDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.InsertTableDesc;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
import org.apache.hadoop.hive.ql.plan.LimitDesc;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PTFDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.UDTFDesc;
import org.apache.hadoop.hive.ql.plan.UnionDesc;
import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.ql.util.ResourceDownloader;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
import org.apache.hadoop.hive.serde2.NoOpFetchFormatter;
import org.apache.hadoop.hive.serde2.NullStructSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.Sets;
import com.google.common.math.IntMath;
/**
* Implementation of the semantic analyzer. It generates the query plan.
* There are other specific semantic analyzers for some hive operations such as
* DDLSemanticAnalyzer for ddl operations.
*/
public class SemanticAnalyzer extends BaseSemanticAnalyzer {
public static final String DUMMY_DATABASE = "_dummy_database";
public static final String DUMMY_TABLE = "_dummy_table";
public static final String SUBQUERY_TAG_1 = "-subquery1";
public static final String SUBQUERY_TAG_2 = "-subquery2";
// Max characters when auto generating the column name with func name
private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
private static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__";
static final String MATERIALIZATION_MARKER = "$MATERIALIZATION";
private HashMap opToPartPruner;
private HashMap opToPartList;
protected HashMap topOps;
protected LinkedHashMap, OpParseContext> opParseCtx;
private List loadTableWork;
private List loadFileWork;
private List columnStatsAutoGatherContexts;
private final Map joinContext;
private final Map smbMapJoinContext;
private final HashMap topToTable;
private final Map fsopToTable;
private final List reduceSinkOperatorsAddedByEnforceBucketingSorting;
private final HashMap> topToTableProps;
private QB qb;
private ASTNode ast;
private int destTableId;
private UnionProcContext uCtx;
List> listMapJoinOpsNoReducer;
private HashMap opToSamplePruner;
private final Map> opToPartToSkewedPruner;
private Map viewProjectToTableSchema;
/**
* a map for the split sampling, from alias to an instance of SplitSample
* that describes percentage and number.
*/
private final HashMap nameToSplitSample;
Map> groupOpToInputTables;
Map prunedPartitions;
protected List resultSchema;
protected CreateViewDesc createVwDesc;
private ArrayList viewsExpanded;
private ASTNode viewSelect;
protected final UnparseTranslator unparseTranslator;
private final GlobalLimitCtx globalLimitCtx;
// prefix for column names auto generated by hive
private final String autogenColAliasPrfxLbl;
private final boolean autogenColAliasPrfxIncludeFuncName;
// Keep track of view alias to read entity corresponding to the view
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of aliases for V3, V3:V2, V3:V2:V1.
// This is used when T is added as an input for the query, the parents of T is
// derived from the alias V3:V2:V1:T
private final Map viewAliasToInput;
//need merge isDirect flag to input even if the newInput does not have a parent
private boolean mergeIsDirect;
// flag for no scan during analyze ... compute statistics
protected boolean noscan;
//flag for partial scan during analyze ... compute statistics
protected boolean partialscan;
protected volatile boolean disableJoinMerge = false;
protected final boolean defaultJoinMerge;
/*
* Capture the CTE definitions in a Query.
*/
final Map aliasToCTEs;
/*
* Used to check recursive CTE invocations. Similar to viewsExpanded
*/
ArrayList ctesExpanded;
/*
* Whether root tasks after materialized CTE linkage have been resolved
*/
boolean rootTasksResolved;
protected TableMask tableMask;
CreateTableDesc tableDesc;
/** Not thread-safe. */
final ASTSearcher astSearcher = new ASTSearcher();
protected AnalyzeRewriteContext analyzeRewrite;
// A mapping from a tableName to a table object in metastore.
Map tabNameToTabObject;
// The tokens we should ignore when we are trying to do table masking.
private final Set ignoredTokens = Sets.newHashSet(HiveParser.TOK_GROUPBY,
HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY,
HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY);
static class Phase1Ctx {
String dest;
int nextNum;
}
public SemanticAnalyzer(QueryState queryState) throws SemanticException {
super(queryState);
opToPartPruner = new HashMap();
opToPartList = new HashMap();
opToSamplePruner = new HashMap();
nameToSplitSample = new HashMap();
// Must be deterministic order maps - see HIVE-8707
topOps = new LinkedHashMap();
loadTableWork = new ArrayList();
loadFileWork = new ArrayList();
columnStatsAutoGatherContexts = new ArrayList();
opParseCtx = new LinkedHashMap, OpParseContext>();
joinContext = new HashMap();
smbMapJoinContext = new HashMap();
// Must be deterministic order map for consistent q-test output across Java versions
topToTable = new LinkedHashMap();
fsopToTable = new HashMap();
reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList();
topToTableProps = new HashMap>();
destTableId = 1;
uCtx = null;
listMapJoinOpsNoReducer = new ArrayList>();
groupOpToInputTables = new HashMap>();
prunedPartitions = new HashMap();
tabNameToTabObject = new HashMap();
unparseTranslator = new UnparseTranslator(conf);
autogenColAliasPrfxLbl = HiveConf.getVar(conf,
HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
queryProperties = new QueryProperties();
opToPartToSkewedPruner = new HashMap>();
aliasToCTEs = new HashMap();
globalLimitCtx = new GlobalLimitCtx();
viewAliasToInput = new HashMap();
mergeIsDirect = true;
noscan = partialscan = false;
tabNameToTabObject = new HashMap<>();
defaultJoinMerge = false == HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MERGE_NWAY_JOINS);
disableJoinMerge = defaultJoinMerge;
}
@Override
protected void reset(boolean clearPartsCache) {
super.reset(true);
if(clearPartsCache) {
prunedPartitions.clear();
//When init(true) combine with genResolvedParseTree, it will generate Resolved Parse tree from syntax tree
//ReadEntity created under these conditions should be all relevant to the syntax tree even the ones without parents
//set mergeIsDirect to true here.
mergeIsDirect = true;
} else {
mergeIsDirect = false;
}
tabNameToTabObject.clear();
loadTableWork.clear();
loadFileWork.clear();
columnStatsAutoGatherContexts.clear();
topOps.clear();
destTableId = 1;
idToTableNameMap.clear();
qb = null;
ast = null;
uCtx = null;
joinContext.clear();
smbMapJoinContext.clear();
opParseCtx.clear();
groupOpToInputTables.clear();
disableJoinMerge = defaultJoinMerge;
aliasToCTEs.clear();
topToTable.clear();
opToPartPruner.clear();
opToPartList.clear();
opToPartToSkewedPruner.clear();
opToSamplePruner.clear();
nameToSplitSample.clear();
fsopToTable.clear();
resultSchema = null;
createVwDesc = null;
viewsExpanded = null;
viewSelect = null;
ctesExpanded = null;
globalLimitCtx.disableOpt();
viewAliasToInput.clear();
reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
topToTableProps.clear();
listMapJoinOpsNoReducer.clear();
unparseTranslator.clear();
queryProperties.clear();
outputs.clear();
}
public void initParseCtx(ParseContext pctx) {
opToPartPruner = pctx.getOpToPartPruner();
opToPartList = pctx.getOpToPartList();
opToSamplePruner = pctx.getOpToSamplePruner();
topOps = pctx.getTopOps();
loadTableWork = pctx.getLoadTableWork();
loadFileWork = pctx.getLoadFileWork();
ctx = pctx.getContext();
destTableId = pctx.getDestTableId();
idToTableNameMap = pctx.getIdToTableNameMap();
uCtx = pctx.getUCtx();
listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
prunedPartitions = pctx.getPrunedPartitions();
tabNameToTabObject = pctx.getTabNameToTabObject();
fetchTask = pctx.getFetchTask();
setLineageInfo(pctx.getLineageInfo());
}
public ParseContext getParseContext() {
// Make sure the basic query properties are initialized
copyInfoToQueryProperties(queryProperties);
return new ParseContext(queryState, opToPartPruner, opToPartList, topOps,
new HashSet(joinContext.keySet()),
new HashSet(smbMapJoinContext.keySet()),
loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx,
listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject,
opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting,
analyzeRewrite, tableDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
}
public CompilationOpContext getOpContext() {
return ctx.getOpContext();
}
public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
throws SemanticException {
doPhase1QBExpr(ast, qbexpr, id, alias, false);
}
@SuppressWarnings("nls")
public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, boolean insideView)
throws SemanticException {
assert (ast.getToken() != null);
if (ast.getToken().getType() == HiveParser.TOK_QUERY) {
QB qb = new QB(id, alias, true);
qb.setInsideView(insideView);
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(ast, qb, ctx_1, null);
qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
qbexpr.setQB(qb);
}
// setop
else {
switch (ast.getToken().getType()) {
case HiveParser.TOK_UNIONALL:
qbexpr.setOpcode(QBExpr.Opcode.UNION);
break;
case HiveParser.TOK_INTERSECTALL:
qbexpr.setOpcode(QBExpr.Opcode.INTERSECTALL);
break;
case HiveParser.TOK_INTERSECTDISTINCT:
qbexpr.setOpcode(QBExpr.Opcode.INTERSECT);
break;
case HiveParser.TOK_EXCEPTALL:
qbexpr.setOpcode(QBExpr.Opcode.EXCEPTALL);
break;
case HiveParser.TOK_EXCEPTDISTINCT:
qbexpr.setOpcode(QBExpr.Opcode.EXCEPT);
break;
default:
throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg("Type "
+ ast.getToken().getType()));
}
// query 1
assert (ast.getChild(0) != null);
QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1);
doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1, alias
+ SUBQUERY_TAG_1, insideView);
qbexpr.setQBExpr1(qbexpr1);
// query 2
assert (ast.getChild(1) != null);
QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2);
doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2, alias
+ SUBQUERY_TAG_2, insideView);
qbexpr.setQBExpr2(qbexpr2);
}
}
private LinkedHashMap doPhase1GetAggregationsFromSelect(
ASTNode selExpr, QB qb, String dest) throws SemanticException {
// Iterate over the selects search for aggregation Trees.
// Use String as keys to eliminate duplicate trees.
LinkedHashMap aggregationTrees = new LinkedHashMap();
List wdwFns = new ArrayList();
for (int i = 0; i < selExpr.getChildCount(); ++i) {
ASTNode function = (ASTNode) selExpr.getChild(i);
if (function.getType() == HiveParser.TOK_SELEXPR ||
function.getType() == HiveParser.TOK_SUBQUERY_EXPR) {
function = (ASTNode)function.getChild(0);
}
doPhase1GetAllAggregations(function, aggregationTrees, wdwFns);
}
// window based aggregations are handled differently
for (ASTNode wdwFn : wdwFns) {
WindowingSpec spec = qb.getWindowingSpec(dest);
if(spec == null) {
queryProperties.setHasWindowing(true);
spec = new WindowingSpec();
qb.addDestToWindowingSpec(dest, spec);
}
HashMap wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest);
int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size();
WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn,
(ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1));
// If this is a duplicate invocation of a function; don't add to WindowingSpec.
if ( wExprsInDest != null &&
wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) {
continue;
}
wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx);
spec.addWindowFunction(wFnSpec);
qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression());
}
return aggregationTrees;
}
private void doPhase1GetColumnAliasesFromSelect(
ASTNode selectExpr, QBParseInfo qbp) {
for (int i = 0; i < selectExpr.getChildCount(); ++i) {
ASTNode selExpr = (ASTNode) selectExpr.getChild(i);
if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR)
&& (selExpr.getChildCount() == 2)) {
String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText());
qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias);
}
}
}
/**
* DFS-scan the expressionTree to find all aggregation subtrees and put them
* in aggregations.
*
* @param expressionTree
* @param aggregations
* the key to the HashTable is the toStringTree() representation of
* the aggregation subtree.
* @throws SemanticException
*/
private void doPhase1GetAllAggregations(ASTNode expressionTree,
HashMap aggregations, List wdwFns) throws SemanticException {
int exprTokenType = expressionTree.getToken().getType();
if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) {
//since now we have scalar subqueries we can get subquery expression in having
// we don't want to include aggregate from within subquery
return;
}
if (exprTokenType == HiveParser.TOK_FUNCTION
|| exprTokenType == HiveParser.TOK_FUNCTIONDI
|| exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
assert (expressionTree.getChildCount() != 0);
if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
== HiveParser.TOK_WINDOWSPEC) {
// If it is a windowing spec, we include it in the list
// Further, we will examine its children AST nodes to check whether
// there are aggregation functions within
wdwFns.add(expressionTree);
doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1),
aggregations, wdwFns);
return;
}
if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
String functionName = unescapeIdentifier(expressionTree.getChild(0)
.getText());
// Validate the function name
if (FunctionRegistry.getFunctionInfo(functionName) == null) {
throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName));
}
if(FunctionRegistry.impliesOrder(functionName)) {
throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
}
if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
if(containsLeadLagUDF(expressionTree)) {
throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
}
aggregations.put(expressionTree.toStringTree(), expressionTree);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
if (!fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
.getChild(0));
}
return;
}
}
}
for (int i = 0; i < expressionTree.getChildCount(); i++) {
doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
aggregations, wdwFns);
}
}
private List doPhase1GetDistinctFuncExprs(
HashMap aggregationTrees) throws SemanticException {
List exprs = new ArrayList();
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
assert (value != null);
if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
exprs.add(value);
}
}
return exprs;
}
public static String generateErrorMessage(ASTNode ast, String message) {
StringBuilder sb = new StringBuilder();
if (ast == null) {
sb.append(message).append(". Cannot tell the position of null AST.");
return sb.toString();
}
sb.append(ast.getLine());
sb.append(":");
sb.append(ast.getCharPositionInLine());
sb.append(" ");
sb.append(message);
sb.append(". Error encountered near token '");
sb.append(ErrorMsg.getText(ast));
sb.append("'");
return sb.toString();
}
ASTNode getAST() {
return this.ast;
}
protected void setAST(ASTNode newAST) {
this.ast = newAST;
}
int[] findTabRefIdxs(ASTNode tabref) {
assert tabref.getType() == HiveParser.TOK_TABREF;
int aliasIndex = 0;
int propsIndex = -1;
int tsampleIndex = -1;
int ssampleIndex = -1;
for (int index = 1; index < tabref.getChildCount(); index++) {
ASTNode ct = (ASTNode) tabref.getChild(index);
if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
tsampleIndex = index;
} else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
ssampleIndex = index;
} else if (ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
propsIndex = index;
} else {
aliasIndex = index;
}
}
return new int[] {aliasIndex, propsIndex, tsampleIndex, ssampleIndex};
}
String findSimpleTableName(ASTNode tabref, int aliasIndex) {
assert tabref.getType() == HiveParser.TOK_TABREF;
ASTNode tableTree = (ASTNode) (tabref.getChild(0));
String alias;
if (aliasIndex != 0) {
alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
}
else {
alias = getUnescapedUnqualifiedTableName(tableTree);
}
return alias;
}
/**
* Goes though the tabref tree and finds the alias for the table. Once found,
* it records the table name-> alias association in aliasToTabs. It also makes
* an association from the alias to the table AST in parse info.
*
* @return the alias of the table
*/
private String processTable(QB qb, ASTNode tabref) throws SemanticException {
// For each table reference get the table name
// and the alias (if alias is not present, the table name
// is used as an alias)
int[] indexes = findTabRefIdxs(tabref);
int aliasIndex = indexes[0];
int propsIndex = indexes[1];
int tsampleIndex = indexes[2];
int ssampleIndex = indexes[3];
ASTNode tableTree = (ASTNode) (tabref.getChild(0));
String tabIdName = getUnescapedName(tableTree).toLowerCase();
String alias = findSimpleTableName(tabref, aliasIndex);
if (propsIndex >= 0) {
Tree propsAST = tabref.getChild(propsIndex);
Map props = DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0));
// We get the information from Calcite.
if ("TRUE".equals(props.get("insideView"))) {
qb.getAliasInsideView().add(alias.toLowerCase());
}
qb.setTabProps(alias, props);
}
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
.getChild(aliasIndex)));
}
if (tsampleIndex >= 0) {
ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex);
ArrayList sampleCols = new ArrayList();
if (sampleClause.getChildCount() > 2) {
for (int i = 2; i < sampleClause.getChildCount(); i++) {
sampleCols.add((ASTNode) sampleClause.getChild(i));
}
}
// TODO: For now only support sampling on up to two columns
// Need to change it to list of columns
if (sampleCols.size() > 2) {
throw new SemanticException(generateErrorMessage(
(ASTNode) tabref.getChild(0),
ErrorMsg.SAMPLE_RESTRICTION.getMsg()));
}
TableSample tabSample = new TableSample(
unescapeIdentifier(sampleClause.getChild(0).getText()),
unescapeIdentifier(sampleClause.getChild(1).getText()),
sampleCols);
qb.getParseInfo().setTabSample(alias, tabSample);
if (unparseTranslator.isEnabled()) {
for (ASTNode sampleCol : sampleCols) {
unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
.getChild(0));
}
}
} else if (ssampleIndex >= 0) {
ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex);
Tree type = sampleClause.getChild(0);
Tree numerator = sampleClause.getChild(1);
String value = unescapeIdentifier(numerator.getText());
SplitSample sample;
if (type.getType() == HiveParser.TOK_PERCENT) {
assertCombineInputFormat(numerator, "Percentage");
Double percent = Double.valueOf(value).doubleValue();
if (percent < 0 || percent > 100) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
"Sampling percentage should be between 0 and 100"));
}
int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
sample = new SplitSample(percent, seedNum);
} else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
sample = new SplitSample(Integer.parseInt(value));
} else {
assert type.getType() == HiveParser.TOK_LENGTH;
assertCombineInputFormat(numerator, "Total Length");
long length = Integer.parseInt(value.substring(0, value.length() - 1));
char last = value.charAt(value.length() - 1);
if (last == 'k' || last == 'K') {
length <<= 10;
} else if (last == 'm' || last == 'M') {
length <<= 20;
} else if (last == 'g' || last == 'G') {
length <<= 30;
}
int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
sample = new SplitSample(length, seedNum);
}
String alias_id = getAliasId(alias, qb);
nameToSplitSample.put(alias_id, sample);
}
// Insert this map into the stats
qb.setTabAlias(alias, tabIdName);
if (qb.isInsideView()) {
qb.getAliasInsideView().add(alias.toLowerCase());
}
qb.addAlias(alias);
qb.getParseInfo().setSrcForAlias(alias, tableTree);
// if alias to CTE contains the alias, we do not do the translation because
// cte is actually a subquery.
if (!this.aliasToCTEs.containsKey(alias)) {
unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
if (aliasIndex != 0) {
unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex));
}
}
return alias;
}
Map getNameToSplitSampleMap() {
return this.nameToSplitSample;
}
/**
* Convert a string to Text format and write its bytes in the same way TextOutputFormat would do.
* This is needed to properly encode non-ascii characters.
*/
private static void writeAsText(String text, FSDataOutputStream out) throws IOException {
Text to = new Text(text);
out.write(to.getBytes(), 0, to.getLength());
}
/**
* Generate a temp table out of a values clause
* See also {@link #preProcessForInsert(ASTNode, QB)}
*/
private ASTNode genValuesTempTable(ASTNode originalFrom, QB qb) throws SemanticException {
Path dataDir = null;
if(!qb.getEncryptedTargetTablePaths().isEmpty()) {
//currently only Insert into T values(...) is supported thus only 1 values clause
//and only 1 target table are possible. If/when support for
//select ... from values(...) is added an insert statement may have multiple
//encrypted target tables.
dataDir = ctx.getMRTmpPath(qb.getEncryptedTargetTablePaths().get(0).toUri());
}
// Pick a name for the table
SessionState ss = SessionState.get();
String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
// Step 1, parse the values clause we were handed
List extends Node> fromChildren = originalFrom.getChildren();
// First child should be the virtual table ref
ASTNode virtualTableRef = (ASTNode)fromChildren.get(0);
assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF :
"Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " +
virtualTableRef.getName();
List extends Node> virtualTableRefChildren = virtualTableRef.getChildren();
// First child of this should be the table name. If it's anonymous,
// then we don't have a table name.
ASTNode tabName = (ASTNode)virtualTableRefChildren.get(0);
if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) {
// TODO, if you want to make select ... from (values(...) as foo(...) work,
// you need to parse this list of columns names and build it into the table
throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg());
}
// The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE
ASTNode valuesTable = (ASTNode)fromChildren.get(1);
assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE :
"Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " +
valuesTable.getName();
// Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW
List extends Node> valuesTableChildren = valuesTable.getChildren();
// Now that we're going to start reading through the rows, open a file to write the rows too
// If we leave this method before creating the temporary table we need to be sure to clean up
// this file.
Path tablePath = null;
FileSystem fs = null;
FSDataOutputStream out = null;
try {
if(dataDir == null) {
tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
}
else {
//if target table of insert is encrypted, make sure temporary table data is stored
//similarly encrypted
tablePath = Warehouse.getDnsPath(new Path(dataDir, tableName), conf);
}
fs = tablePath.getFileSystem(conf);
fs.mkdirs(tablePath);
Path dataFile = new Path(tablePath, "data_file");
out = fs.create(dataFile);
List fields = new ArrayList();
boolean firstRow = true;
for (Node n : valuesTableChildren) {
ASTNode valuesRow = (ASTNode) n;
assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW :
"Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName();
// Each of the children of this should be a literal
List extends Node> valuesRowChildren = valuesRow.getChildren();
boolean isFirst = true;
int nextColNum = 1;
for (Node n1 : valuesRowChildren) {
ASTNode value = (ASTNode) n1;
if (firstRow) {
fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
}
if (isFirst) isFirst = false;
else writeAsText("\u0001", out);
writeAsText(unparseExprForValuesClause(value), out);
}
writeAsText("\n", out);
firstRow = false;
}
// Step 2, create a temp table, using the created file as the data
StorageFormat format = new StorageFormat(conf);
format.processStorageFormat("TextFile");
Table table = db.newTable(tableName);
table.setSerializationLib(format.getSerde());
table.setFields(fields);
table.setDataLocation(tablePath);
table.getTTable().setTemporary(true);
table.setStoredAsSubDirectories(false);
table.setInputFormatClass(format.getInputFormat());
table.setOutputFormatClass(format.getOutputFormat());
db.createTable(table, false);
} catch (Exception e) {
String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage();
LOG.error(errMsg);
// Try to delete the file
if (fs != null && tablePath != null) {
try {
fs.delete(tablePath, false);
} catch (IOException swallowIt) {}
}
throw new SemanticException(errMsg, e);
} finally {
IOUtils.closeStream(out);
}
// Step 3, return a new subtree with a from clause built around that temp table
// The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename)
Token t = new ClassicToken(HiveParser.TOK_TABREF);
ASTNode tabRef = new ASTNode(t);
t = new ClassicToken(HiveParser.TOK_TABNAME);
ASTNode tabNameNode = new ASTNode(t);
tabRef.addChild(tabNameNode);
t = new ClassicToken(HiveParser.Identifier, tableName);
ASTNode identifier = new ASTNode(t);
tabNameNode.addChild(identifier);
return tabRef;
}
// Take an expression in the values clause and turn it back into a string. This is far from
// comprehensive. At the moment it only supports:
// * literals (all types)
// * unary negatives
// * true/false
private String unparseExprForValuesClause(ASTNode expr) throws SemanticException {
switch (expr.getToken().getType()) {
case HiveParser.Number:
return expr.getText();
case HiveParser.StringLiteral:
return BaseSemanticAnalyzer.unescapeSQLString(expr.getText());
case HiveParser.KW_FALSE:
// UDFToBoolean casts any non-empty string to true, so set this to false
return "";
case HiveParser.KW_TRUE:
return "TRUE";
case HiveParser.MINUS:
return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0));
case HiveParser.TOK_NULL:
// Hive's text input will translate this as a null
return "\\N";
default:
throw new SemanticException("Expression of type " + expr.getText() +
" not supported in insert/values");
}
}
private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT):
HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
message + " sampling is not supported in " + inputFormat));
}
}
private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {
// This is a subquery and must have an alias
if (subq.getChildCount() != 2) {
throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
}
ASTNode subqref = (ASTNode) subq.getChild(0);
String alias = unescapeIdentifier(subq.getChild(1).getText());
// Recursively do the first phase of semantic analysis for the subquery
QBExpr qbexpr = new QBExpr(alias);
doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias, qb.isInsideView());
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
.getChild(1)));
}
// Insert this map into the stats
qb.setSubqAlias(alias, qbexpr);
qb.addAlias(alias);
unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
return alias;
}
/*
* Phase1: hold onto any CTE definitions in aliasToCTE.
* CTE definitions are global to the Query.
*/
private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
int numCTEs = ctes.getChildCount();
for(int i=0; i aliasToCTEs map.
*
*/
private CTEClause findCTEFromName(QB qb, String cteName) {
StringBuilder qId = new StringBuilder();
if (qb.getId() != null) {
qId.append(qb.getId());
}
while (qId.length() > 0) {
String nm = qId + ":" + cteName;
CTEClause cte = aliasToCTEs.get(nm);
if (cte != null) {
return cte;
}
int lastIndex = qId.lastIndexOf(":");
lastIndex = lastIndex < 0 ? 0 : lastIndex;
qId.setLength(lastIndex);
}
return aliasToCTEs.get(cteName);
}
/*
* If a CTE is referenced in a QueryBlock:
* - add it as a SubQuery for now.
* - SQ.alias is the alias used in QB. (if no alias is specified,
* it used the CTE name. Works just like table references)
* - Adding SQ done by:
* - copying AST of CTE
* - setting ASTOrigin on cloned AST.
* - trigger phase 1 on new QBExpr.
* - update QB data structs: remove this as a table reference, move it to a SQ invocation.
*/
private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias)
throws SemanticException {
cteAlias = cteAlias == null ? cteName : cteAlias;
CTEClause cte = findCTEFromName(qb, cteName);
ASTNode cteQryNode = cte.cteNode;
QBExpr cteQBExpr = new QBExpr(cteAlias);
doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias);
qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr);
}
private final CTEClause rootClause = new CTEClause(null, null);
@Override
public List> getAllRootTasks() {
if (!rootTasksResolved) {
rootTasks = toRealRootTasks(rootClause.asExecutionOrder());
rootTasksResolved = true;
}
return rootTasks;
}
@Override
public HashSet getAllInputs() {
HashSet readEntities = new HashSet(getInputs());
for (CTEClause cte : rootClause.asExecutionOrder()) {
if (cte.source != null) {
readEntities.addAll(cte.source.getInputs());
}
}
return readEntities;
}
@Override
public HashSet getAllOutputs() {
HashSet writeEntities = new HashSet(getOutputs());
for (CTEClause cte : rootClause.asExecutionOrder()) {
if (cte.source != null) {
writeEntities.addAll(cte.source.getOutputs());
}
}
return writeEntities;
}
class CTEClause {
CTEClause(String alias, ASTNode cteNode) {
this.alias = alias;
this.cteNode = cteNode;
}
String alias;
ASTNode cteNode;
boolean materialize;
int reference;
QBExpr qbExpr;
List parents = new ArrayList();
// materialized
Table table;
SemanticAnalyzer source;
List> getTasks() {
return source == null ? null : source.rootTasks;
}
List asExecutionOrder() {
List execution = new ArrayList();
asExecutionOrder(new HashSet(), execution);
return execution;
}
void asExecutionOrder(Set visited, List execution) {
for (CTEClause parent : parents) {
if (visited.add(parent)) {
parent.asExecutionOrder(visited, execution);
}
}
execution.add(this);
}
@Override
public String toString() {
return alias == null ? "" : alias;
}
}
private List> toRealRootTasks(List execution) {
List> cteRoots = new ArrayList<>();
List> cteLeafs = new ArrayList<>();
List> curTopRoots = null;
List> curBottomLeafs = null;
for (int i = 0; i < execution.size(); i++) {
CTEClause current = execution.get(i);
if (current.parents.isEmpty() && curTopRoots != null) {
cteRoots.addAll(curTopRoots);
cteLeafs.addAll(curBottomLeafs);
curTopRoots = curBottomLeafs = null;
}
List> curTasks = current.getTasks();
if (curTasks == null) {
continue;
}
if (curTopRoots == null) {
curTopRoots = curTasks;
}
if (curBottomLeafs != null) {
for (Task> topLeafTask : curBottomLeafs) {
for (Task> currentRootTask : curTasks) {
topLeafTask.addDependentTask(currentRootTask);
}
}
}
curBottomLeafs = Task.findLeafs(curTasks);
}
if (curTopRoots != null) {
cteRoots.addAll(curTopRoots);
cteLeafs.addAll(curBottomLeafs);
}
if (cteRoots.isEmpty()) {
return rootTasks;
}
for (Task> cteLeafTask : cteLeafs) {
for (Task> mainRootTask : rootTasks) {
cteLeafTask.addDependentTask(mainRootTask);
}
}
return cteRoots;
}
Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
createTable.addChild(tableName);
createTable.addChild(temporary);
createTable.addChild(cte.cteNode);
SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
analyzer.initCtx(ctx);
analyzer.init(false);
// should share cte contexts
analyzer.aliasToCTEs.putAll(aliasToCTEs);
HiveOperation operation = queryState.getHiveOperation();
try {
analyzer.analyzeInternal(createTable);
} finally {
queryState.setCommandType(operation);
}
Table table = analyzer.tableDesc.toTable(conf);
Path location = table.getDataLocation();
try {
location.getFileSystem(conf).mkdirs(location);
} catch (IOException e) {
throw new HiveException(e);
}
table.setMaterializedTable(true);
LOG.info(cteName + " will be materialized into " + location);
cte.table = table;
cte.source = analyzer;
ctx.addMaterializedTable(cteName, table);
return table;
}
static boolean isJoinToken(ASTNode node) {
if ((node.getToken().getType() == HiveParser.TOK_JOIN)
|| (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
|| isOuterJoinToken(node)
|| (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
|| (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
return true;
}
return false;
}
static private boolean isOuterJoinToken(ASTNode node) {
return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
}
/**
* Given the AST with TOK_JOIN as the root, get all the aliases for the tables
* or subqueries in the join.
*
* @param qb
* @param join
* @throws SemanticException
*/
@SuppressWarnings("nls")
private void processJoin(QB qb, ASTNode join) throws SemanticException {
int numChildren = join.getChildCount();
if ((numChildren != 2) && (numChildren != 3)
&& join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
throw new SemanticException(generateErrorMessage(join,
"Join with multiple children"));
}
queryProperties.incrementJoinCount(isOuterJoinToken(join));
for (int num = 0; num < numChildren; num++) {
ASTNode child = (ASTNode) join.getChild(num);
if (child.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, child);
} else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, child);
} else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
queryProperties.setHasPTF(true);
processPTF(qb, child);
PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child);
String inputAlias = ptfInvocationSpec == null ? null :
ptfInvocationSpec.getFunction().getAlias();;
if ( inputAlias == null ) {
throw new SemanticException(generateErrorMessage(child,
"PTF invocation in a Join must have an alias"));
}
} else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
// SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
// is not supported. Instead, the lateral view must be in a subquery
// SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
// JOIN src2 ...
throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
.getMsg(join));
} else if (isJoinToken(child)) {
processJoin(qb, child);
}
}
}
/**
* Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
* table or subquery in the lateral view and also make a mapping from the
* alias to all the lateral view AST's.
*
* @param qb
* @param lateralView
* @return the alias for the table/subquery
* @throws SemanticException
*/
private String processLateralView(QB qb, ASTNode lateralView)
throws SemanticException {
int numChildren = lateralView.getChildCount();
assert (numChildren == 2);
ASTNode next = (ASTNode) lateralView.getChild(1);
String alias = null;
switch (next.getToken().getType()) {
case HiveParser.TOK_TABREF:
alias = processTable(qb, next);
break;
case HiveParser.TOK_SUBQUERY:
alias = processSubQuery(qb, next);
break;
case HiveParser.TOK_LATERAL_VIEW:
case HiveParser.TOK_LATERAL_VIEW_OUTER:
alias = processLateralView(qb, next);
break;
default:
throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
.getMsg(lateralView));
}
alias = alias.toLowerCase();
qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
qb.addAlias(alias);
return alias;
}
/**
* Phase 1: (including, but not limited to):
*
* 1. Gets all the aliases for all the tables / subqueries and makes the
* appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
* destination and names the clause "inclause" + i 3. Creates a map from a
* string representation of an aggregation tree to the actual aggregation AST
* 4. Creates a mapping from the clause name to the select expression AST in
* destToSelExpr 5. Creates a mapping from a table alias to the lateral view
* AST's in aliasToLateralViews
*
* @param ast
* @param qb
* @param ctx_1
* @throws SemanticException
*/
@SuppressWarnings({"fallthrough", "nls"})
public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx)
throws SemanticException {
boolean phase1Result = true;
QBParseInfo qbp = qb.getParseInfo();
boolean skipRecursion = false;
if (ast.getToken() != null) {
skipRecursion = true;
switch (ast.getToken().getType()) {
case HiveParser.TOK_SELECTDI:
qb.countSelDi();
// fall through
case HiveParser.TOK_SELECT:
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
int posn = 0;
if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
qbp.setHints((ASTNode) ast.getChild(0));
posn++;
}
if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
queryProperties.setUsesScript(true);
LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast,
qb, ctx_1.dest);
doPhase1GetColumnAliasesFromSelect(ast, qbp);
qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
qbp.setDistinctFuncExprsForClause(ctx_1.dest,
doPhase1GetDistinctFuncExprs(aggregations));
break;
case HiveParser.TOK_WHERE:
qbp.setWhrExprForClause(ctx_1.dest, ast);
if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
queryProperties.setFilterWithSubQuery(true);
break;
case HiveParser.TOK_INSERT_INTO:
String currentDatabase = SessionState.get().getCurrentDatabase();
String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
qbp.addInsertIntoTable(tab_name, ast);
case HiveParser.TOK_DESTINATION:
ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + ctx_1.nextNum;
ctx_1.nextNum++;
boolean isTmpFileDest = false;
if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
ASTNode ch = (ASTNode) ast.getChild(0);
if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0
&& ch.getChild(0) instanceof ASTNode) {
ch = (ASTNode) ch.getChild(0);
isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
} else {
if (ast.getToken().getType() == HiveParser.TOK_DESTINATION
&& ast.getChild(0).getType() == HiveParser.TOK_TAB) {
String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
SessionState.get().getCurrentDatabase());
qbp.getInsertOverwriteTables().put(fullTableName, ast);
}
}
}
// is there a insert in the subquery
if (qbp.getIsSubQ() && !isTmpFileDest) {
throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
if (qbp.getClauseNamesForDest().size() == 2) {
// From the moment that we have two destination clauses,
// we know that this is a multi-insert query.
// Thus, set property to right value.
// Using qbp.getClauseNamesForDest().size() >= 2 would be
// equivalent, but we use == to avoid setting the property
// multiple times
queryProperties.setMultiDestQuery(true);
}
if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
plannerCtx.setInsertToken(ast, isTmpFileDest);
} else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
// For multi-insert query, currently we only optimize the FROM clause.
// Hence, introduce multi-insert token on top of it.
// However, first we need to reset existing token (insert).
// Using qbp.getClauseNamesForDest().size() >= 2 would be
// equivalent, but we use == to avoid setting the property
// multiple times
plannerCtx.resetToken();
plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
}
break;
case HiveParser.TOK_FROM:
int child_count = ast.getChildCount();
if (child_count != 1) {
throw new SemanticException(generateErrorMessage(ast,
"Multiple Children " + child_count));
}
if (!qbp.getIsSubQ()) {
qbp.setQueryFromExpr(ast);
}
// Check if this is a subquery / lateral view
ASTNode frm = (ASTNode) ast.getChild(0);
if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
// Create a temp table with the passed values in it then rewrite this portion of the
// tree to be from that table.
ASTNode newFrom = genValuesTempTable(frm, qb);
ast.setChild(0, newFrom);
processTable(qb, newFrom);
} else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
queryProperties.setHasLateralViews(true);
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
processJoin(qb, frm);
qbp.setJoinExpr(frm);
}else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
queryProperties.setHasPTF(true);
processPTF(qb, frm);
}
break;
case HiveParser.TOK_CLUSTERBY:
// Get the clusterby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasClusterBy(true);
qbp.setClusterByExprForClause(ctx_1.dest, ast);
break;
case HiveParser.TOK_DISTRIBUTEBY:
// Get the distribute by aliases - these are aliased to the entries in
// the
// select list
queryProperties.setHasDistributeBy(true);
qbp.setDistributeByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_SORTBY:
// Get the sort by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasSortBy(true);
qbp.setSortByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_ORDERBY:
// Get the order by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasOrderBy(true);
qbp.setOrderByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_GROUPBY:
case HiveParser.TOK_ROLLUP_GROUPBY:
case HiveParser.TOK_CUBE_GROUPBY:
case HiveParser.TOK_GROUPING_SETS:
// Get the groupby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasGroupBy(true);
if (qbp.getJoinExpr() != null) {
queryProperties.setHasJoinFollowedByGroupBy(true);
}
if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
}
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
// Rollup and Cubes are syntactic sugar on top of grouping sets
if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
qbp.getDestRollups().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
qbp.getDestCubes().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
qbp.getDestGroupingSets().add(ctx_1.dest);
}
break;
case HiveParser.TOK_HAVING:
qbp.setHavingExprForClause(ctx_1.dest, ast);
qbp.addAggregationExprsForClause(ctx_1.dest,
doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
break;
case HiveParser.KW_WINDOW:
if (!qb.hasWindowingSpec(ctx_1.dest) ) {
throw new SemanticException(generateErrorMessage(ast,
"Query has no Cluster/Distribute By; but has a Window definition"));
}
handleQueryWindowClauses(qb, ctx_1, ast);
break;
case HiveParser.TOK_LIMIT:
if (ast.getChildCount() == 2) {
qbp.setDestLimit(ctx_1.dest,
new Integer(ast.getChild(0).getText()),
new Integer(ast.getChild(1).getText()));
} else {
qbp.setDestLimit(ctx_1.dest, new Integer(0),
new Integer(ast.getChild(0).getText()));
}
break;
case HiveParser.TOK_ANALYZE:
// Case of analyze command
String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
qb.setTabAlias(table_name, table_name);
qb.addAlias(table_name);
qb.getParseInfo().setIsAnalyzeCommand(true);
qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
// Allow analyze the whole table and dynamic partitions
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
break;
case HiveParser.TOK_UNIONALL:
if (!qbp.getIsSubQ()) {
// this shouldn't happen. The parser should have converted the union to be
// contained in a subquery. Just in case, we keep the error as a fallback.
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
}
skipRecursion = false;
break;
case HiveParser.TOK_INSERT:
ASTNode destination = (ASTNode) ast.getChild(0);
Tree tab = destination.getChild(0);
// Proceed if AST contains partition & If Not Exists
if (destination.getChildCount() == 2 &&
tab.getChildCount() == 2 &&
destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
String tableName = tab.getChild(0).getChild(0).getText();
Tree partitions = tab.getChild(1);
int childCount = partitions.getChildCount();
HashMap partition = new HashMap();
for (int i = 0; i < childCount; i++) {
String partitionName = partitions.getChild(i).getChild(0).getText();
Tree pvalue = partitions.getChild(i).getChild(1);
if (pvalue == null) {
break;
}
String partitionVal = stripQuotes(pvalue.getText());
partition.put(partitionName, partitionVal);
}
// if it is a dynamic partition throw the exception
if (childCount != partition.size()) {
throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
.getMsg(partition.toString()));
}
Table table = null;
try {
table = this.getTableObjectByName(tableName);
} catch (HiveException ex) {
throw new SemanticException(ex);
}
try {
Partition parMetaData = db.getPartition(table, partition, false);
// Check partition exists if it exists skip the overwrite
if (parMetaData != null) {
phase1Result = false;
skipRecursion = true;
LOG.info("Partition already exists so insert into overwrite " +
"skipped for partition : " + parMetaData.toString());
break;
}
} catch (HiveException e) {
LOG.info("Error while getting metadata : ", e);
}
validatePartSpec(table, partition, (ASTNode)tab, conf, false);
}
skipRecursion = false;
break;
case HiveParser.TOK_LATERAL_VIEW:
case HiveParser.TOK_LATERAL_VIEW_OUTER:
// todo: nested LV
assert ast.getChildCount() == 1;
qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
break;
case HiveParser.TOK_CTE:
processCTE(qb, ast);
break;
default:
skipRecursion = false;
break;
}
}
if (!skipRecursion) {
// Iterate over the rest of the children
int child_count = ast.getChildCount();
for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
// Recurse
phase1Result = phase1Result && doPhase1(
(ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx);
}
}
return phase1Result;
}
/**
* This is phase1 of supporting specifying schema in insert statement
* insert into foo(z,y) select a,b from bar;
* @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode)
* @throws SemanticException
*/
private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException {
ASTNode tabColName = (ASTNode)ast.getChild(1);
if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) {
//we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present
List targetColNames = new ArrayList();
for(Node col : tabColName.getChildren()) {
assert ((ASTNode)col).getType() == HiveParser.Identifier :
"expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType();
targetColNames.add(((ASTNode)col).getText());
}
String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
SessionState.get().getCurrentDatabase());
qbp.setDestSchemaForClause(ctx_1.dest, targetColNames);
Set targetColumns = new HashSet();
targetColumns.addAll(targetColNames);
if(targetColNames.size() != targetColumns.size()) {
throw new SemanticException(generateErrorMessage(tabColName,
"Duplicate column name detected in " + fullTableName + " table schema specification"));
}
Table targetTable = null;
try {
targetTable = db.getTable(fullTableName, false);
}
catch (HiveException ex) {
LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex);
throw new SemanticException(ex);
}
if(targetTable == null) {
throw new SemanticException(generateErrorMessage(ast,
"Unable to access metadata for table " + fullTableName));
}
for(FieldSchema f : targetTable.getCols()) {
//parser only allows foo(a,b), not foo(foo.a, foo.b)
targetColumns.remove(f.getName());
}
if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns
/* We just checked the user specified schema columns among regular table column and found some which are not
'regular'. Now check is they are dynamic partition columns
For dynamic partitioning,
Given "create table multipart(a int, b int) partitioned by (c int, d int);"
for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this
(TOK_INSERT_INTO
(TOK_TAB
(TOK_TABNAME multipart)
(TOK_PARTSPEC
(TOK_PARTVAL c '1')
(TOK_PARTVAL d)
)
)
(TOK_TABCOLNAME d a)
)*/
List dynamicPartitionColumns = new ArrayList();
if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
ASTNode tokTab = (ASTNode)ast.getChild(0);
ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC);
if(tokPartSpec != null) {
for(Node n : tokPartSpec.getChildren()) {
ASTNode tokPartVal = null;
if(n instanceof ASTNode) {
tokPartVal = (ASTNode)n;
}
if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) {
assert tokPartVal.getChild(0).getType() == HiveParser.Identifier :
"Expected column name; found tokType=" + tokPartVal.getType();
dynamicPartitionColumns.add(tokPartVal.getChild(0).getText());
}
}
}
}
for(String colName : dynamicPartitionColumns) {
targetColumns.remove(colName);
}
if(!targetColumns.isEmpty()) {
//Found some columns in user specified schema which are neither regular not dynamic partition columns
throw new SemanticException(generateErrorMessage(tabColName,
"'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) +
"' in insert schema specification " + (targetColumns.size() == 1 ? "is" : "are") +
" not found among regular columns of " +
fullTableName + " nor dynamic partition columns."));
}
}
}
}
public void getMaterializationMetadata(QB qb) throws SemanticException {
try {
gatherCTEReferences(qb, rootClause);
int threshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD);
for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) {
if (threshold >= 0 && cte.reference >= threshold) {
cte.materialize = true;
}
}
} catch (HiveException e) {
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
if (e instanceof SemanticException) {
throw (SemanticException)e;
}
throw new SemanticException(e.getMessage(), e);
}
}
private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws HiveException {
if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
gatherCTEReferences(qbexpr.getQB(), parent);
} else {
gatherCTEReferences(qbexpr.getQBExpr1(), parent);
gatherCTEReferences(qbexpr.getQBExpr2(), parent);
}
}
// TODO: check view references, too
private void gatherCTEReferences(QB qb, CTEClause current) throws HiveException {
for (String alias : qb.getTabAliases()) {
String tabName = qb.getTabNameForAlias(alias);
String cteName = tabName.toLowerCase();
CTEClause cte = findCTEFromName(qb, cteName);
if (cte != null) {
if (ctesExpanded.contains(cteName)) {
throw new SemanticException("Recursive cte " + cteName +
" detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") +
" -> " + cteName + ").");
}
cte.reference++;
current.parents.add(cte);
if (cte.qbExpr != null) {
continue;
}
cte.qbExpr = new QBExpr(cteName);
doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName);
ctesExpanded.add(cteName);
gatherCTEReferences(cte.qbExpr, cte);
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
for (String alias : qb.getSubqAliases()) {
gatherCTEReferences(qb.getSubqForAlias(alias), current);
}
}
public void getMetaData(QB qb) throws SemanticException {
getMetaData(qb, false);
}
public void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException {
try {
if (enableMaterialization) {
getMaterializationMetadata(qb);
}
getMetaData(qb, null);
} catch (HiveException e) {
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
if (e instanceof SemanticException) {
throw (SemanticException)e;
}
throw new SemanticException(e.getMessage(), e);
}
}
private void getMetaData(QBExpr qbexpr, ReadEntity parentInput)
throws HiveException {
if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
getMetaData(qbexpr.getQB(), parentInput);
} else {
getMetaData(qbexpr.getQBExpr1(), parentInput);
getMetaData(qbexpr.getQBExpr2(), parentInput);
}
}
@SuppressWarnings("nls")
private void getMetaData(QB qb, ReadEntity parentInput)
throws HiveException {
LOG.info("Get metadata for source tables");
// Go over the tables and populate the related structures.
// We have to materialize the table alias list since we might
// modify it in the middle for view rewrite.
List tabAliases = new ArrayList(qb.getTabAliases());
// Keep track of view alias to view name and read entity
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
// This is needed for tracking the dependencies for inputs, along with their parents.
Map> aliasToViewInfo =
new HashMap>();
/*
* used to capture view to SQ conversions. This is used to check for
* recursive CTE invocations.
*/
Map sqAliasToCTEName = new HashMap();
for (String alias : tabAliases) {
String tabName = qb.getTabNameForAlias(alias);
String cteName = tabName.toLowerCase();
Table tab = db.getTable(tabName, false);
if (tab == null ||
tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
Table materializedTab = ctx.getMaterializedTable(cteName);
if (materializedTab == null) {
// we first look for this alias from CTE, and then from catalog.
CTEClause cte = findCTEFromName(qb, cteName);
if (cte != null) {
if (!cte.materialize) {
addCTEAsSubQuery(qb, cteName, alias);
sqAliasToCTEName.put(alias, cteName);
continue;
}
tab = materializeCTE(cteName, cte);
}
} else {
tab = materializedTab;
}
}
if (tab == null) {
ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
if (null != src) {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
} else {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
}
}
if (tab.isView()) {
if (qb.getParseInfo().isAnalyzeCommand()) {
throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
}
String fullViewName = tab.getDbName() + "." + tab.getTableName();
// Prevent view cycles
if (viewsExpanded.contains(fullViewName)) {
throw new SemanticException("Recursive view " + fullViewName +
" detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") +
" -> " + fullViewName + ").");
}
replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
// This is the last time we'll see the Table objects for views, so add it to the inputs
// now. isInsideView will tell if this view is embedded in another view.
ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
viewInput = PlanUtils.addInput(inputs, viewInput);
aliasToViewInfo.put(alias, new ObjectPair(fullViewName, viewInput));
String aliasId = getAliasId(alias, qb);
if (aliasId != null) {
aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "")
.replace(SemanticAnalyzer.SUBQUERY_TAG_2, "");
}
viewAliasToInput.put(aliasId, viewInput);
continue;
}
if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getSrcForAlias(alias),
ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
}
qb.getMetaData().setSrcForAlias(alias, tab);
if (qb.getParseInfo().isAnalyzeCommand()) {
// allow partial partition specification for nonscan since noscan is fast.
TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
try {
ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
} catch (HiveException e) {
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getSrcForAlias(alias),
"Cannot get partitions for " + ts.partSpec), e);
}
}
// validate partial scan command
QBParseInfo qbpi = qb.getParseInfo();
if (qbpi.isPartialScanAnalyzeCommand()) {
Class extends InputFormat> inputFormatClass = null;
switch (ts.specType) {
case TABLE_ONLY:
case DYNAMIC_PARTITION:
inputFormatClass = ts.tableHandle.getInputFormatClass();
break;
case STATIC_PARTITION:
inputFormatClass = ts.partHandle.getInputFormatClass();
break;
default:
assert false;
}
// throw a HiveException for formats other than rcfile or orcfile.
if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass
.equals(OrcInputFormat.class))) {
throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_NON_RCFILE.getMsg());
}
}
tab.setTableSpec(ts);
qb.getParseInfo().addTableSpec(alias, ts);
}
ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
PlanUtils.addInput(inputs,
new ReadEntity(tab, parentViewInfo, parentViewInfo == null),mergeIsDirect);
}
LOG.info("Get metadata for subqueries");
// Go over the subqueries and getMetaData for these
for (String alias : qb.getSubqAliases()) {
boolean wasView = aliasToViewInfo.containsKey(alias);
boolean wasCTE = sqAliasToCTEName.containsKey(alias);
ReadEntity newParentInput = null;
if (wasView) {
viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
newParentInput = aliasToViewInfo.get(alias).getSecond();
} else if (wasCTE) {
ctesExpanded.add(sqAliasToCTEName.get(alias));
}
QBExpr qbexpr = qb.getSubqForAlias(alias);
getMetaData(qbexpr, newParentInput);
if (wasView) {
viewsExpanded.remove(viewsExpanded.size() - 1);
} else if (wasCTE) {
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
RowFormatParams rowFormatParams = new RowFormatParams();
StorageFormat storageFormat = new StorageFormat(conf);
LOG.info("Get metadata for destination tables");
// Go over all the destination structures and populate the related
// metadata
QBParseInfo qbp = qb.getParseInfo();
for (String name : qbp.getClauseNamesForDest()) {
ASTNode ast = qbp.getDestForClause(name);
switch (ast.getToken().getType()) {
case HiveParser.TOK_TAB: {
TableSpec ts = new TableSpec(db, conf, ast);
if (ts.tableHandle.isView()) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
Class> outputFormatClass = ts.tableHandle.getOutputFormatClass();
if (!ts.tableHandle.isNonNative() &&
!HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
.getMsg(ast, "The class is " + outputFormatClass.toString()));
}
boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(),
ts.tableHandle.getTableName());
isTableWrittenTo |= (qb.getParseInfo().getInsertOverwriteTables().
get(getUnescapedName((ASTNode) ast.getChild(0), ts.tableHandle.getDbName())) != null);
assert isTableWrittenTo :
"Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " +
name + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()";
// Disallow update and delete on non-acid tables
boolean isAcid = AcidUtils.isAcidTable(ts.tableHandle);
if ((updating(name) || deleting(name)) && !isAcid) {
// Whether we are using an acid compliant transaction manager has already been caught in
// UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid
// here, it means the table itself doesn't support it.
throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, ts.tableName);
}
// TableSpec ts is got from the query (user specified),
// which means the user didn't specify partitions in their query,
// but whether the table itself is partitioned is not know.
if (ts.specType != SpecType.STATIC_PARTITION) {
// This is a table or dynamic partition
qb.getMetaData().setDestForAlias(name, ts.tableHandle);
// has dynamic as well as static partitions
if (ts.partSpec != null && ts.partSpec.size() > 0) {
qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
}
} else {
// This is a partition
qb.getMetaData().setDestForAlias(name, ts.partHandle);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
break;
}
case HiveParser.TOK_DIR: {
// This is a dfs file
String fname = stripQuotes(ast.getChild(0).getText());
if ((!qb.getParseInfo().getIsSubQ())
&& (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
if (qb.isCTAS()) {
qb.setIsQuery(false);
ctx.setResDir(null);
ctx.setResFile(null);
// allocate a temporary output dir on the location of the table
String tableName = getUnescapedName((ASTNode) ast.getChild(0));
String[] names = Utilities.getDbTableName(tableName);
Path location;
try {
Warehouse wh = new Warehouse(conf);
//Use destination table's db location.
String destTableDb = qb.getTableDesc() != null? qb.getTableDesc().getDatabaseName(): null;
if (destTableDb == null) {
destTableDb = names[0];
}
location = wh.getDatabasePath(db.getDatabase(destTableDb));
} catch (MetaException e) {
throw new SemanticException(e);
}
try {
fname = ctx.getExtTmpPathRelTo(
FileUtils.makeQualified(location, conf)).toString();
} catch (Exception e) {
throw new SemanticException(generateErrorMessage(ast,
"Error creating temporary folder on: " + location.toString()), e);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
TableSpec ts = new TableSpec(db, conf, this.ast);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
} else {
// This is the only place where isQuery is set to true; it defaults to false.
qb.setIsQuery(true);
Path stagingPath = getStagingDirectoryPathname(qb);
fname = stagingPath.toString();
ctx.setResDir(stagingPath);
}
}
boolean isDfsFile = true;
if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) {
isDfsFile = false;
}
qb.getMetaData().setDestForAlias(name, fname, isDfsFile);
CreateTableDesc directoryDesc = new CreateTableDesc();
boolean directoryDescIsSet = false;
int numCh = ast.getChildCount();
for (int num = 1; num < numCh ; num++){
ASTNode child = (ASTNode) ast.getChild(num);
if (child != null) {
if (storageFormat.fillStorageFormat(child)) {
directoryDesc.setOutputFormat(storageFormat.getOutputFormat());
directoryDesc.setSerName(storageFormat.getSerde());
directoryDescIsSet = true;
continue;
}
switch (child.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
directoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
directoryDesc.setLineDelim(rowFormatParams.lineDelim);
directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
directoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
directoryDesc.setNullFormat(rowFormatParams.nullFormat);
directoryDescIsSet=true;
break;
case HiveParser.TOK_TABLESERIALIZER:
ASTNode serdeChild = (ASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
directoryDesc.setSerName(storageFormat.getSerde());
if (serdeChild.getChildCount() > 1) {
directoryDesc.setSerdeProps(new HashMap());
readProps((ASTNode) serdeChild.getChild(1).getChild(0), directoryDesc.getSerdeProps());
}
directoryDescIsSet = true;
break;
}
}
}
if (directoryDescIsSet){
qb.setDirectoryDesc(directoryDesc);
}
break;
}
default:
throw new SemanticException(generateErrorMessage(ast,
"Unknown Token Type " + ast.getToken().getType()));
}
}
}
/**
* Checks if a given path is encrypted (valid only for HDFS files)
* @param path The path to check for encryption
* @return True if the path is encrypted; False if it is not encrypted
* @throws HiveException If an error occurs while checking for encryption
*/
private boolean isPathEncrypted(Path path) throws HiveException {
try {
HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(path.getFileSystem(conf));
if (hdfsEncryptionShim != null) {
if (hdfsEncryptionShim.isPathEncrypted(path)) {
return true;
}
}
} catch (Exception e) {
throw new HiveException("Unable to determine if " + path + " is encrypted: " + e, e);
}
return false;
}
/**
* Compares to path key encryption strenghts.
*
* @param p1 Path to an HDFS file system
* @param p2 Path to an HDFS file system
* @return -1 if strength is weak; 0 if is equals; 1 if it is stronger
* @throws HiveException If an error occurs while comparing key strengths.
*/
private int comparePathKeyStrength(Path p1, Path p2) throws HiveException {
HadoopShims.HdfsEncryptionShim hdfsEncryptionShim;
hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim();
if (hdfsEncryptionShim != null) {
try {
return hdfsEncryptionShim.comparePathKeyStrength(p1, p2);
} catch (Exception e) {
throw new HiveException("Unable to compare key strength for " + p1 + " and " + p2 + " : " + e, e);
}
}
return 0; // Non-encrypted path (or equals strength)
}
/**
* Checks if a given path has read-only access permissions.
*
* @param path The path to check for read-only permissions.
* @return True if the path is read-only; False otherwise.
* @throws HiveException If an error occurs while checking file permissions.
*/
private boolean isPathReadOnly(Path path) throws HiveException {
HiveConf conf = SessionState.get().getConf();
try {
FileSystem fs = path.getFileSystem(conf);
UserGroupInformation ugi = Utils.getUGI();
FileStatus status = fs.getFileStatus(path);
// We just check for writing permissions. If it fails with AccessControException, then it
// means the location may be read-only.
FileUtils.checkFileAccessWithImpersonation(fs, status, FsAction.WRITE, ugi.getUserName());
// Path has writing permissions
return false;
} catch (AccessControlException e) {
// An AccessControlException may be caused for other different errors,
// but we take it as if our path is read-only
return true;
} catch (Exception e) {
throw new HiveException("Unable to determine if " + path + " is read only: " + e, e);
}
}
/**
* Gets the strongest encrypted table path.
*
* @param qb The QB object that contains a list of all table locations.
* @return The strongest encrypted path. It may return NULL if there are not tables encrypted, or are not HDFS tables.
* @throws HiveException if an error occurred attempting to compare the encryption strength
*/
private Path getStrongestEncryptedTablePath(QB qb) throws HiveException {
List tabAliases = new ArrayList(qb.getTabAliases());
Path strongestPath = null;
/* Walk through all found table locations to get the most encrypted table */
for (String alias : tabAliases) {
Table tab = qb.getMetaData().getTableForAlias(alias);
if (tab != null) {
Path tablePath = tab.getDataLocation();
if (tablePath != null) {
if ("hdfs".equalsIgnoreCase(tablePath.toUri().getScheme())) {
if (isPathEncrypted(tablePath)) {
if (strongestPath == null) {
strongestPath = tablePath;
} else if (comparePathKeyStrength(tablePath, strongestPath) > 0) {
strongestPath = tablePath;
}
}
}
}
}
}
return strongestPath;
}
/**
* Gets the staging directory where MR files will be stored temporary.
* It walks through the QB plan to find the correct location where save temporary files. This
* temporary location (or staging directory) may be created inside encrypted tables locations for
* security reasons. If the QB has read-only tables, then the older scratch directory will be used,
* or a permission error will be thrown if the requested query table is encrypted and the old scratch
* directory is not.
*
* @param qb The QB object that contains a list of all table locations.
* @return The path to the staging directory.
* @throws HiveException If an error occurs while identifying the correct staging location.
*/
private Path getStagingDirectoryPathname(QB qb) throws HiveException {
Path stagingPath = null, tablePath;
// Looks for the most encrypted table location
// It may return null if there are not tables encrypted, or are not part of HDFS
tablePath = getStrongestEncryptedTablePath(qb);
if (tablePath != null) {
// At this point, tablePath is part of HDFS and it is encrypted
if (isPathReadOnly(tablePath)) {
Path tmpPath = ctx.getMRTmpPath();
if (comparePathKeyStrength(tablePath, tmpPath) < 0) {
throw new HiveException("Read-only encrypted tables cannot be read " +
"if the scratch directory is not encrypted (or encryption is weak)");
} else {
stagingPath = tmpPath;
}
}
if (stagingPath == null) {
stagingPath = ctx.getMRTmpPath(tablePath.toUri());
}
} else {
stagingPath = ctx.getMRTmpPath();
}
return stagingPath;
}
private void replaceViewReferenceWithDefinition(QB qb, Table tab,
String tab_name, String alias) throws SemanticException {
ASTNode viewTree;
final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(),
tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias(
alias));
try {
String viewText = tab.getViewExpandedText();
// Reparse text, passing null for context to avoid clobbering
// the top-level token stream.
ASTNode tree = ParseUtils.parse(viewText, ctx, false);
viewTree = tree;
Dispatcher nodeOriginDispatcher = new Dispatcher() {
@Override
public Object dispatch(Node nd, java.util.Stack stack,
Object... nodeOutputs) {
((ASTNode) nd).setOrigin(viewOrigin);
return null;
}
};
GraphWalker nodeOriginTagger = new DefaultGraphWalker(
nodeOriginDispatcher);
nodeOriginTagger.startWalking(java.util.Collections
. singleton(viewTree), null);
} catch (ParseException e) {
// A user could encounter this if a stored view definition contains
// an old SQL construct which has been eliminated in a later Hive
// version, so we need to provide full debugging info to help
// with fixing the view definition.
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
StringBuilder sb = new StringBuilder();
sb.append(e.getMessage());
ErrorMsg.renderOrigin(sb, viewOrigin);
throw new SemanticException(sb.toString(), e);
}
QBExpr qbexpr = new QBExpr(alias);
doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias, true);
// if skip authorization, skip checking;
// if it is inside a view, skip checking;
// if authorization flag is not enabled, skip checking.
// if HIVE_STATS_COLLECT_SCANCOLS is enabled, check.
if ((!this.skipAuthorization() && !qb.isInsideView() && HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED))
|| HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
qb.rewriteViewToSubq(alias, tab_name, qbexpr, tab);
} else {
qb.rewriteViewToSubq(alias, tab_name, qbexpr, null);
}
}
private boolean isPresent(String[] list, String elem) {
for (String s : list) {
if (s.toLowerCase().equals(elem)) {
return true;
}
}
return false;
}
/*
* This method is invoked for unqualified column references in join conditions.
* This is passed in the Alias to Operator mapping in the QueryBlock so far.
* We try to resolve the unqualified column against each of the Operator Row Resolvers.
* - if the column is present in only one RowResolver, we treat this as a reference to
* that Operator.
* - if the column resolves with more than one RowResolver, we treat it as an Ambiguous
* reference.
* - if the column doesn't resolve with any RowResolver, we treat this as an Invalid
* reference.
*/
@SuppressWarnings("rawtypes")
private String findAlias(ASTNode columnRef,
Map aliasToOpInfo) throws SemanticException {
String colName = unescapeIdentifier(columnRef.getChild(0).getText()
.toLowerCase());
String tabAlias = null;
if ( aliasToOpInfo != null ) {
for (Map.Entry opEntry : aliasToOpInfo.entrySet()) {
Operator op = opEntry.getValue();
RowResolver rr = opParseCtx.get(op).getRowResolver();
ColumnInfo colInfo = rr.get(null, colName);
if (colInfo != null) {
if (tabAlias == null) {
tabAlias = opEntry.getKey();
} else {
throw new SemanticException(
ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(columnRef.getChild(0)));
}
}
}
}
if ( tabAlias == null ) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(columnRef
.getChild(0)));
}
return tabAlias;
}
@SuppressWarnings("nls")
void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
ArrayList leftAliases, ArrayList rightAliases,
ArrayList fields,
Map aliasToOpInfo) throws SemanticException {
// String[] allAliases = joinTree.getAllAliases();
switch (condn.getToken().getType()) {
case HiveParser.TOK_TABLE_OR_COL:
String tableOrCol = unescapeIdentifier(condn.getChild(0).getText()
.toLowerCase());
unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0));
if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
if (!leftAliases.contains(tableOrCol)) {
leftAliases.add(tableOrCol);
}
} else if (isPresent(joinTree.getRightAliases(), tableOrCol)) {
if (!rightAliases.contains(tableOrCol)) {
rightAliases.add(tableOrCol);
}
} else {
tableOrCol = findAlias(condn, aliasToOpInfo);
if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
if (!leftAliases.contains(tableOrCol)) {
leftAliases.add(tableOrCol);
}
} else {
if (!rightAliases.contains(tableOrCol)) {
rightAliases.add(tableOrCol);
}
if (joinTree.getNoSemiJoin() == false) {
// if this is a semijoin, we need to add the condition
joinTree.addRHSSemijoinColumns(tableOrCol, condn);
}
}
}
break;
case HiveParser.Identifier:
// it may be a field name, return the identifier and let the caller decide
// whether it is or not
if (fields != null) {
fields
.add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
}
unparseTranslator.addIdentifierTranslation(condn);
break;
case HiveParser.Number:
case HiveParser.StringLiteral:
case HiveParser.BigintLiteral:
case HiveParser.SmallintLiteral:
case HiveParser.TinyintLiteral:
case HiveParser.DecimalLiteral:
case HiveParser.TOK_STRINGLITERALSEQUENCE:
case HiveParser.TOK_CHARSETLITERAL:
case HiveParser.KW_TRUE:
case HiveParser.KW_FALSE:
break;
case HiveParser.TOK_FUNCTION:
// check all the arguments
for (int i = 1; i < condn.getChildCount(); i++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
leftAliases, rightAliases, null, aliasToOpInfo);
}
break;
default:
// This is an operator - so check whether it is unary or binary operator
if (condn.getChildCount() == 1) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
} else if (condn.getChildCount() == 2) {
ArrayList fields1 = null;
// if it is a dot operator, remember the field name of the rhs of the
// left semijoin
if (joinTree.getNoSemiJoin() == false
&& condn.getToken().getType() == HiveParser.DOT) {
// get the semijoin rhs table name and field name
fields1 = new ArrayList();
int rhssize = rightAliases.size();
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
String rhsAlias = null;
if (rightAliases.size() > rhssize) { // the new table is rhs table
rhsAlias = rightAliases.get(rightAliases.size() - 1);
}
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1, aliasToOpInfo);
if (rhsAlias != null && fields1.size() > 0) {
joinTree.addRHSSemijoinColumns(rhsAlias, condn);
}
} else {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1, aliasToOpInfo);
}
} else {
throw new SemanticException(condn.toStringTree() + " encountered with "
+ condn.getChildCount() + " children");
}
break;
}
}
private void populateAliases(List leftAliases,
List rightAliases, ASTNode condn, QBJoinTree joinTree,
List leftSrc) throws SemanticException {
if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) {
joinTree.addPostJoinFilter(condn);
return;
}
if (rightAliases.size() != 0) {
assert rightAliases.size() == 1;
joinTree.getExpressions().get(1).add(condn);
} else if (leftAliases.size() != 0) {
joinTree.getExpressions().get(0).add(condn);
for (String s : leftAliases) {
if (!leftSrc.contains(s)) {
leftSrc.add(s);
}
}
} else {
joinTree.addPostJoinFilter(condn);
}
}
/*
* refactored out of the Equality case of parseJoinCondition
* so that this can be recursively called on its left tree in the case when
* only left sources are referenced in a Predicate
*/
void applyEqualityPredicateToQBJoinTree(QBJoinTree joinTree,
JoinType type,
List leftSrc,
ASTNode joinCond,
ASTNode leftCondn,
ASTNode rightCondn,
List leftCondAl1,
List leftCondAl2,
List rightCondAl1,
List rightCondAl2) throws SemanticException {
if (leftCondAl1.size() != 0) {
if ((rightCondAl1.size() != 0)
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
if (type.equals(JoinType.LEFTOUTER) ||
type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS.getErrorCodedMsg());
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
/*
* If the rhs references table sources and this QBJoinTree has a leftTree;
* hand it to the leftTree and let it recursively handle it.
* There are 3 cases of passing a condition down:
* 1. The leftSide && rightSide don't contains references to the leftTree's rightAlias
* => pass the lists down as is.
* 2. The leftSide contains refs to the leftTree's rightAlias, the rightSide doesn't
* => switch the leftCondAl1 and leftConAl2 lists and pass down.
* 3. The rightSide contains refs to the leftTree's rightAlias, the leftSide doesn't
* => switch the rightCondAl1 and rightConAl2 lists and pass down.
* 4. In case both contain references to the leftTree's rightAlias
* => we cannot push the condition down.
* 5. If either contain references to both left & right
* => we cannot push forward.
*/
if (rightCondAl1.size() != 0) {
QBJoinTree leftTree = joinTree.getJoinSrc();
List leftTreeLeftSrc = new ArrayList();
if (leftTree != null && leftTree.getNoOuterJoin()) {
String leftTreeRightSource = leftTree.getRightAliases() != null &&
leftTree.getRightAliases().length > 0 ?
leftTree.getRightAliases()[0] : null;
boolean leftHasRightReference = false;
for (String r : leftCondAl1) {
if (r.equals(leftTreeRightSource)) {
leftHasRightReference = true;
break;
}
}
boolean rightHasRightReference = false;
for (String r : rightCondAl1) {
if (r.equals(leftTreeRightSource)) {
rightHasRightReference = true;
break;
}
}
boolean pushedDown = false;
if ( !leftHasRightReference && !rightHasRightReference ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
pushedDown = true;
} else if ( !leftHasRightReference && rightHasRightReference && rightCondAl1.size() == 1 ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl2, rightCondAl1);
pushedDown = true;
} else if (leftHasRightReference && !rightHasRightReference && leftCondAl1.size() == 1 ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl2, leftCondAl1,
rightCondAl1, rightCondAl2);
pushedDown = true;
}
if (leftTreeLeftSrc.size() == 1) {
leftTree.setLeftAlias(leftTreeLeftSrc.get(0));
}
if ( pushedDown) {
return;
}
} // leftTree != null
}
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else if (rightCondAl2.size() != 0) {
populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
leftSrc);
populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
leftSrc);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
}
} else if (leftCondAl2.size() != 0) {
if ((rightCondAl2.size() != 0)
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS.getErrorCodedMsg());
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else if (rightCondAl1.size() != 0) {
populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
leftSrc);
populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
leftSrc);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
}
} else if (rightCondAl1.size() != 0) {
if (type.equals(JoinType.LEFTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS.getErrorCodedMsg());
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS.getErrorCodedMsg());
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else if (type.equals(JoinType.LEFTSEMI)) {
joinTree.getExpressions().get(0).add(leftCondn);
joinTree.getExpressions().get(1).add(rightCondn);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
joinTree.getFiltersForPushing().get(1).add(joinCond);
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
}
}
@SuppressWarnings("rawtypes")
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, List leftSrc,
Map aliasToOpInfo)
throws SemanticException {
if (joinCond == null) {
return;
}
JoinCond cond = joinTree.getJoinCond()[0];
JoinType type = cond.getJoinType();
parseJoinCondition(joinTree, joinCond, leftSrc, type, aliasToOpInfo);
List> filters = joinTree.getFilters();
if (type == JoinType.LEFTOUTER || type == JoinType.FULLOUTER) {
joinTree.addFilterMapping(cond.getLeft(), cond.getRight(), filters.get(0).size());
}
if (type == JoinType.RIGHTOUTER || type == JoinType.FULLOUTER) {
joinTree.addFilterMapping(cond.getRight(), cond.getLeft(), filters.get(1).size());
}
}
/**
* Parse the join condition. If the condition is a join condition, throw an
* error if it is not an equality. Otherwise, break it into left and right
* expressions and store in the join tree. If the condition is a join filter,
* add it to the filter list of join tree. The join condition can contains
* conditions on both the left and tree trees and filters on either.
* Currently, we only support equi-joins, so we throw an error if the
* condition involves both subtrees and is not a equality. Also, we only
* support AND i.e ORs are not supported currently as their semantics are not
* very clear, may lead to data explosion and there is no usecase.
*
* @param joinTree
* jointree to be populated
* @param joinCond
* join condition
* @param leftSrc
* left sources
* @throws SemanticException
*/
@SuppressWarnings("rawtypes")
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond,
List leftSrc, JoinType type,
Map aliasToOpInfo) throws SemanticException {
if (joinCond == null) {
return;
}
switch (joinCond.getToken().getType()) {
case HiveParser.KW_OR:
joinTree.addPostJoinFilter(joinCond);
break;
case HiveParser.KW_AND:
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc, type, aliasToOpInfo);
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc, type, aliasToOpInfo);
break;
case HiveParser.EQUAL_NS:
case HiveParser.EQUAL:
ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
ArrayList leftCondAl1 = new ArrayList();
ArrayList leftCondAl2 = new ArrayList();
parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
null, aliasToOpInfo);
ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
ArrayList rightCondAl1 = new ArrayList();
ArrayList rightCondAl2 = new ArrayList();
parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
rightCondAl2, null, aliasToOpInfo);
// is it a filter or a join condition
// if it is filter see if it can be pushed above the join
// filter cannot be pushed if
// * join is full outer or
// * join is left outer and filter is on left alias or
// * join is right outer and filter is on right alias
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
|| ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
joinTree.addPostJoinFilter(joinCond);
} else {
applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
}
break;
default:
boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION);
// Create all children
int childrenBegin = (isFunction ? 1 : 0);
ArrayList> leftAlias = new ArrayList>(
joinCond.getChildCount() - childrenBegin);
ArrayList> rightAlias = new ArrayList>(
joinCond.getChildCount() - childrenBegin);
for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) {
ArrayList left = new ArrayList();
ArrayList right = new ArrayList();
leftAlias.add(left);
rightAlias.add(right);
}
for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci),
leftAlias.get(ci - childrenBegin), rightAlias.get(ci
- childrenBegin), null, aliasToOpInfo);
}
boolean leftAliasNull = true;
for (ArrayList left : leftAlias) {
if (left.size() != 0) {
leftAliasNull = false;
break;
}
}
boolean rightAliasNull = true;
for (ArrayList right : rightAlias) {
if (right.size() != 0) {
rightAliasNull = false;
break;
}
}
if (!leftAliasNull && !rightAliasNull) {
joinTree.addPostJoinFilter(joinCond);
} else {
if (!leftAliasNull) {
if (type.equals(JoinType.LEFTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS.getErrorCodedMsg());
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS.getErrorCodedMsg());
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
}
}
break;
}
}
@SuppressWarnings("rawtypes")
private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, QB qb, String dest, ASTNode predicate,
Map aliasToOpInfo) throws SemanticException {
switch (predicate.getType()) {
case HiveParser.KW_AND:
extractJoinCondsFromWhereClause(joinTree, qb, dest,
(ASTNode) predicate.getChild(0), aliasToOpInfo);
extractJoinCondsFromWhereClause(joinTree, qb, dest,
(ASTNode) predicate.getChild(1), aliasToOpInfo);
break;
case HiveParser.EQUAL_NS:
case HiveParser.EQUAL:
ASTNode leftCondn = (ASTNode) predicate.getChild(0);
ArrayList leftCondAl1 = new ArrayList();
ArrayList leftCondAl2 = new ArrayList();
try {
parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
null, aliasToOpInfo);
} catch(SemanticException se) {
// suppress here; if it is a real issue will get caught in where clause handling.
return;
}
ASTNode rightCondn = (ASTNode) predicate.getChild(1);
ArrayList rightCondAl1 = new ArrayList();
ArrayList rightCondAl2 = new ArrayList();
try {
parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
rightCondAl2, null, aliasToOpInfo);
} catch(SemanticException se) {
// suppress here; if it is a real issue will get caught in where clause handling.
return;
}
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
|| ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
// this is not a join condition.
return;
}
if (((leftCondAl1.size() == 0) && (leftCondAl2.size() == 0))
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
// this is not a join condition. Will get handled by predicate pushdown.
return;
}
List leftSrc = new ArrayList();
JoinCond cond = joinTree.getJoinCond()[0];
JoinType type = cond.getJoinType();
applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
predicate, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
if (leftSrc.size() == 1) {
joinTree.setLeftAlias(leftSrc.get(0));
}
// todo: hold onto this predicate, so that we don't add it to the Filter Operator.
break;
default:
return;
}
}
@SuppressWarnings("nls")
public Operator putOpInsertMap(Operator op,
RowResolver rr) {
OpParseContext ctx = new OpParseContext(rr);
opParseCtx.put(op, ctx);
op.augmentPlan();
return op;
}
@SuppressWarnings("nls")
private Operator genHavingPlan(String dest, QB qb, Operator input,
Map aliasToOpInfo)
throws SemanticException {
ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest);
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
Map exprToColumnAlias = qb.getParseInfo().getAllExprToColumnAlias();
for (ASTNode astNode : exprToColumnAlias.keySet()) {
if (inputRR.getExpression(astNode) != null) {
inputRR.put("", exprToColumnAlias.get(astNode), inputRR.getExpression(astNode));
}
}
ASTNode condn = (ASTNode) havingExpr.getChild(0);
if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) {
// If CBO did not optimize the query, we might need to replace grouping function
final String destClauseName = qb.getParseInfo().getClauseNames().iterator().next();
final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty()
|| !qb.getParseInfo().getDestGroupingSets().isEmpty()
|| !qb.getParseInfo().getDestCubes().isEmpty());
// Special handling of grouping function
condn = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), destClauseName), condn,
!cubeRollupGrpSetPresent);
}
/*
* Now a having clause can contain a SubQuery predicate;
* so we invoke genFilterPlan to handle SubQuery algebraic transformation,
* just as is done for SubQuery predicates appearing in the Where Clause.
*/
Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true, false);
output = putOpInsertMap(output, inputRR);
return output;
}
protected static ASTNode rewriteGroupingFunctionAST(final List grpByAstExprs, ASTNode targetNode,
final boolean noneSet) throws SemanticException {
final MutableBoolean visited = new MutableBoolean(false);
final MutableBoolean found = new MutableBoolean(false);
TreeVisitorAction action = new TreeVisitorAction() {
@Override
public Object pre(Object t) {
return t;
}
@Override
public Object post(Object t) {
ASTNode root = (ASTNode) t;
if (root.getType() == HiveParser.TOK_FUNCTION && root.getChildCount() == 2) {
ASTNode func = (ASTNode) ParseDriver.adaptor.getChild(root, 0);
if (func.getText().equals("grouping")) {
ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, 1);
visited.setValue(true);
for (int i = 0; i < grpByAstExprs.size(); i++) {
ASTNode grpByExpr = grpByAstExprs.get(i);
if (grpByExpr.toStringTree().equals(c.toStringTree())) {
ASTNode child1;
if (noneSet) {
// Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus,
// grouping should return 0
child1 = (ASTNode) ParseDriver.adaptor.create(HiveParser.Number,
String.valueOf(0));
} else {
// We refer to grouping_id column
child1 = (ASTNode) ParseDriver.adaptor.create(
HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
ParseDriver.adaptor.addChild(child1, ParseDriver.adaptor.create(
HiveParser.Identifier, VirtualColumn.GROUPINGID.getName()));
}
ASTNode child2 = (ASTNode) ParseDriver.adaptor.create(HiveParser.Number,
String.valueOf(IntMath.mod(-i-1, grpByAstExprs.size())));
root.setChild(1, child1);
root.addChild(child2);
found.setValue(true);
break;
}
}
}
}
return t;
}
};
ASTNode newTargetNode = (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action);
if (visited.booleanValue() && !found.booleanValue()) {
throw new SemanticException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg());
}
return newTargetNode;
}
private Operator genPlanForSubQueryPredicate(
QB qbSQ,
ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery());
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1, null);
getMetaData(qbSQ);
Operator op = genPlan(qbSQ);
return op;
}
@SuppressWarnings("nls")
private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input,
Map aliasToOpInfo,
boolean forHavingClause, boolean forGroupByClause)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
/*
* Handling of SubQuery Expressions:
* if "Where clause contains no SubQuery expressions" then
* -->[true] ===CONTINUE_FILTER_PROCESSING===
* else
* -->[false] "extract SubQuery expressions\n from Where clause"
* if "this is a nested SubQuery or \nthere are more than 1 SubQuery expressions" then
* -->[yes] "throw Unsupported Error"
* else
* --> "Rewrite Search condition to \nremove SubQuery predicate"
* --> "build QBSubQuery"
* --> "extract correlated predicates \nfrom Where Clause"
* --> "add correlated Items to \nSelect List and Group By"
* --> "construct Join Predicate \nfrom correlation predicates"
* --> "Generate Plan for\n modified SubQuery"
* --> "Build the Join Condition\n for Parent Query to SubQuery join"
* --> "Build the QBJoinTree from the Join condition"
* --> "Update Parent Query Filter\n with any Post Join conditions"
* --> ===CONTINUE_FILTER_PROCESSING===
* endif
* endif
*
* Support for Sub Queries in Having Clause:
* - By and large this works the same way as SubQueries in the Where Clause.
* - The one addum is the handling of aggregation expressions from the Outer Query
* appearing in correlation clauses.
* - So such correlating predicates are allowed:
* min(OuterQuert.x) = SubQuery.y
* - this requires special handling when converting to joins. See QBSubQuery.rewrite
* method method for detailed comments.
*/
List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
if ( subQueriesInOriginalTree.size() > 0 ) {
/*
* Restriction.9.m :: disallow nested SubQuery expressions.
*/
if (qb.getSubQueryPredicateDef() != null ) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
}
/*
* Restriction.8.m :: We allow only 1 SubQuery expression per Query.
*/
if (subQueriesInOriginalTree.size() > 1 ) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
}
/*
* Clone the Search AST; apply all rewrites on the clone.
*/
ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
for(int i=0; i < subQueries.size(); i++) {
ASTNode subQueryAST = subQueries.get(i);
ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
int sqIdx = qb.incrNumSubQueryPredicates();
clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(),
sqIdx, subQueryAST, originalSubQueryAST, ctx);
if ( !forHavingClause ) {
qb.setWhereClauseSubQueryPredicate(subQuery);
} else {
qb.setHavingClauseSubQueryPredicate(subQuery);
}
String havingInputAlias = null;
if ( forHavingClause ) {
havingInputAlias = "gby_sq" + sqIdx;
aliasToOpInfo.put(havingInputAlias, input);
}
subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, aliasToOpInfo.keySet());
QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
Operator sqPlanTopOp = genPlanForSubQueryPredicate(qbSQ, subQuery);
aliasToOpInfo.put(subQuery.getAlias(), sqPlanTopOp);
RowResolver sqRR = opParseCtx.get(sqPlanTopOp).getRowResolver();
/*
* Check.5.h :: For In and Not In the SubQuery must implicitly or
* explicitly only contain one select item.
*/
if ( subQuery.getOperator().getType() != SubQueryType.EXISTS &&
subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS &&
sqRR.getColumnInfos().size() -
subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1 ) {
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, "SubQuery can contain only 1 item in Select List."));
}
/*
* If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery.
* See QBSubQuery.NotInCheck for details on why and how this is constructed.
*/
if ( subQuery.getNotInCheck() != null ) {
QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
notInCheck.setSQRR(sqRR);
QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
Operator sqnicPlanTopOp = genPlanForSubQueryPredicate(qbSQ_nic, notInCheck);
aliasToOpInfo.put(notInCheck.getAlias(), sqnicPlanTopOp);
QBJoinTree joinTree_nic = genSQJoinTree(qb, notInCheck,
input,
aliasToOpInfo);
pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false);
input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input);
inputRR = opParseCtx.get(input).getRowResolver();
if ( forHavingClause ) {
aliasToOpInfo.put(havingInputAlias, input);
}
}
/*
* Gen Join between outer Operator and SQ op
*/
subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
QBJoinTree joinTree = genSQJoinTree(qb, subQuery,
input,
aliasToOpInfo);
/*
* push filters only for this QBJoinTree. Child QBJoinTrees have already been handled.
*/
pushJoinFilters(qb, joinTree, aliasToOpInfo, false);
input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo, input);
searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
}
}
return genFilterPlan(qb, searchCond, input, forHavingClause || forGroupByClause);
}
/**
* create a filter plan. The condition and the inputs are specified.
*
* @param qb
* current query block
* @param condn
* The condition to be resolved
* @param input
* the input operator
*/
@SuppressWarnings("nls")
private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, boolean useCaching)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ExprNodeDesc filterCond = genExprNodeDesc(condn, inputRR, useCaching, isCBOExecuted());
if (filterCond instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc c = (ExprNodeConstantDesc) filterCond;
if (Boolean.TRUE.equals(c.getValue())) {
// If filter condition is TRUE, we ignore it
return input;
}
if (ExprNodeDescUtils.isNullConstant(c)) {
// If filter condition is NULL, transform to FALSE
filterCond = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false);
}
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new FilterDesc(filterCond, false), new RowSchema(
inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ inputRR.toString());
}
return output;
}
/*
* for inner joins push a 'is not null predicate' to the join sources for
* every non nullSafe predicate.
*/
private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input,
QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException {
if (qb == null || joinTree == null) {
return input;
}
if (!joinTree.getNoOuterJoin()) {
return input;
}
if (joinKeys == null || joinKeys.length == 0) {
return input;
}
Map hashes = new HashMap();
if (input instanceof FilterOperator) {
ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc)input.getConf()).getPredicate()), hashes);
}
ExprNodeDesc filterPred = null;
List nullSafes = joinTree.getNullSafes();
for (int i = 0; i < joinKeys.length; i++) {
if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc &&
((ExprNodeColumnDesc)joinKeys[i]).getIsPartitionColOrVirtualCol())) {
// no need to generate is not null predicate for partitioning or
// virtual column, since those columns can never be null.
continue;
}
if(null != hashes.get(joinKeys[i].hashCode())) {
// there is already a predicate on this src.
continue;
}
List args = new ArrayList();
args.add(joinKeys[i]);
ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance(
FunctionRegistry.getFunctionInfo("isnotnull").getGenericUDF(), args);
filterPred = filterPred == null ? nextExpr : ExprNodeDescUtils
.mergePredicates(filterPred, nextExpr);
}
if (filterPred == null) {
return input;
}
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
if (input instanceof FilterOperator) {
FilterOperator f = (FilterOperator) input;
List preds = new ArrayList();
preds.add(f.getConf().getPredicate());
preds.add(filterPred);
f.getConf().setPredicate(ExprNodeDescUtils.mergePredicates(preds));
return input;
}
FilterDesc filterDesc = new FilterDesc(filterPred, false);
filterDesc.setGenerated(true);
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(filterDesc,
new RowSchema(inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ inputRR.toString());
}
return output;
}
@SuppressWarnings("nls")
// TODO: make aliases unique, otherwise needless rewriting takes place
Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel,
ArrayList col_list, HashSet excludeCols, RowResolver input,
RowResolver colSrcRR, Integer pos, RowResolver output, List aliases,
boolean ensureUniqueCols) throws SemanticException {
if (colSrcRR == null) {
colSrcRR = input;
}
// The table alias should exist
if (tabAlias != null && !colSrcRR.hasTableAlias(tabAlias)) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
}
// TODO: Have to put in the support for AS clause
Pattern regex = null;
try {
regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e
.getMessage()));
}
StringBuilder replacementText = new StringBuilder();
int matched = 0;
// add empty string to the list of aliases. Some operators (ex. GroupBy) add
// ColumnInfos for table alias "".
if (!aliases.contains("")) {
aliases.add("");
}
/*
* track the input ColumnInfos that are added to the output.
* if a columnInfo has multiple mappings; then add the column only once,
* but carry the mappings forward.
*/
Map inputColsProcessed = new HashMap();
// For expr "*", aliases should be iterated in the order they are specified
// in the query.
for (String alias : aliases) {
HashMap fMap = colSrcRR.getFieldMap(alias);
if (fMap == null) {
continue;
}
// For the tab.* case, add all the columns to the fieldList
// from the input schema
for (Map.Entry entry : fMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
if (excludeCols != null && excludeCols.contains(colInfo)) {
continue; // This was added during plan generation.
}
// First, look up the column from the source against which * is to be resolved.
// We'd later translated this into the column from proper input, if it's valid.
// TODO: excludeCols may be possible to remove using the same technique.
String name = colInfo.getInternalName();
String[] tmp = colSrcRR.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
// Not matching the regex?
if (!regex.matcher(tmp[1]).matches()) {
continue;
}
// If input (GBY) is different than the source of columns, find the same column in input.
// TODO: This is fraught with peril.
if (input != colSrcRR) {
colInfo = input.get(tabAlias, tmp[1]);
if (colInfo == null) {
LOG.error("Cannot find colInfo for " + tabAlias + "." + tmp[1]
+ ", derived from [" + colSrcRR + "], in [" + input + "]");
throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, tmp[1]);
}
String oldCol = null;
if (LOG.isDebugEnabled()) {
oldCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
}
name = colInfo.getInternalName();
tmp = input.reverseLookup(name);
if (LOG.isDebugEnabled()) {
String newCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
LOG.debug("Translated [" + oldCol + "] to [" + newCol + "]");
}
}
ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
if (oColInfo == null) {
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
colInfo.isSkewedCol());
col_list.add(expr);
oColInfo = new ColumnInfo(getColumnInternalName(pos),
colInfo.getType(), colInfo.getTabAlias(),
colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
inputColsProcessed.put(colInfo, oColInfo);
}
if (ensureUniqueCols) {
if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1]
+ " => " + oColInfo + " due to duplication, see previous warnings",
UnsupportedFeature.Duplicates_in_RR);
}
} else {
output.put(tmp[0], tmp[1], oColInfo);
}
pos = Integer.valueOf(pos.intValue() + 1);
matched++;
if (unparseTranslator.isEnabled() || tableMask.isEnabled()) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
}
}
}
if (matched == 0) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel));
}
if (unparseTranslator.isEnabled()) {
unparseTranslator.addTranslation(sel, replacementText.toString());
} else if (tableMask.isEnabled()) {
tableMask.addTranslation(sel, replacementText.toString());
}
return pos;
}
public static String getColumnInternalName(int pos) {
return HiveConf.getColumnInternalName(pos);
}
private String getScriptProgName(String cmd) {
int end = cmd.indexOf(" ");
return (end == -1) ? cmd : cmd.substring(0, end);
}
private String getScriptArgs(String cmd) {
int end = cmd.indexOf(" ");
return (end == -1) ? "" : cmd.substring(end, cmd.length());
}
static int getPositionFromInternalName(String internalName) {
return HiveConf.getPositionFromInternalName(internalName);
}
private String fetchFilesNotInLocalFilesystem(String cmd) {
SessionState ss = SessionState.get();
String progName = getScriptProgName(cmd);
if (!ResourceDownloader.isFileUri(progName)) {
String filePath = ss.add_resource(ResourceType.FILE, progName, true);
Path p = new Path(filePath);
String fileName = p.getName();
String scriptArgs = getScriptArgs(cmd);
String finalCmd = fileName + scriptArgs;
return finalCmd;
}
return cmd;
}
private TableDesc getTableDescFromSerDe(ASTNode child, String cols,
String colTypes, boolean defaultCols) throws SemanticException {
if (child.getType() == HiveParser.TOK_SERDENAME) {
String serdeName = unescapeSQLString(child.getChild(0).getText());
Class extends Deserializer> serdeClass = null;
try {
serdeClass = (Class extends Deserializer>) Class.forName(serdeName,
true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
TableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer
.toString(Utilities.tabCode), cols, colTypes, defaultCols);
// copy all the properties
if (child.getChildCount() == 2) {
ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0);
for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
.getText());
String value = unescapeSQLString(prop.getChild(propChild).getChild(1)
.getText());
tblDesc.getProperties().setProperty(key, value);
}
}
return tblDesc;
} else if (child.getType() == HiveParser.TOK_SERDEPROPS) {
TableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer
.toString(Utilities.ctrlaCode), cols, colTypes, defaultCols);
int numChildRowFormat = child.getChildCount();
for (int numC = 0; numC < numChildRowFormat; numC++) {
ASTNode rowChild = (ASTNode) child.getChild(numC);
switch (rowChild.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMATFIELD:
String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties()
.setProperty(serdeConstants.FIELD_DELIM, fieldDelim);
tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_FORMAT,
fieldDelim);
if (rowChild.getChildCount() >= 2) {
String fieldEscape = unescapeSQLString(rowChild.getChild(1)
.getText());
tblDesc.getProperties().setProperty(serdeConstants.ESCAPE_CHAR,
fieldEscape);
}
break;
case HiveParser.TOK_TABLEROWFORMATCOLLITEMS:
tblDesc.getProperties().setProperty(serdeConstants.COLLECTION_DELIM,
unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATMAPKEYS:
tblDesc.getProperties().setProperty(serdeConstants.MAPKEY_DELIM,
unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATLINES:
String lineDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties().setProperty(serdeConstants.LINE_DELIM, lineDelim);
if (!lineDelim.equals("\n") && !lineDelim.equals("10")) {
throw new SemanticException(generateErrorMessage(rowChild,
ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()));
}
break;
case HiveParser.TOK_TABLEROWFORMATNULL:
String nullFormat = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT,
nullFormat);
break;
default:
assert false;
}
}
return tblDesc;
}
// should never come here
return null;
}
private void failIfColAliasExists(Set nameSet, String name)
throws SemanticException {
if (nameSet.contains(name)) {
throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS
.getMsg(name));
}
nameSet.add(name);
}
@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input)
throws SemanticException {
// If there is no "AS" clause, the output schema will be "key,value"
ArrayList outputCols = new ArrayList();
int inputSerDeNum = 1, inputRecordWriterNum = 2;
int outputSerDeNum = 4, outputRecordReaderNum = 5;
int outputColsNum = 6;
boolean outputColNames = false, outputColSchemas = false;
int execPos = 3;
boolean defaultOutputCols = false;
// Go over all the children
if (trfm.getChildCount() > outputColsNum) {
ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
outputColNames = true;
} else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
outputColSchemas = true;
}
}
// If column type is not specified, use a string
if (!outputColNames && !outputColSchemas) {
String intName = getColumnInternalName(0);
ColumnInfo colInfo = new ColumnInfo(intName,
TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("key");
outputCols.add(colInfo);
intName = getColumnInternalName(1);
colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null,
false);
colInfo.setAlias("value");
outputCols.add(colInfo);
defaultOutputCols = true;
} else {
ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
int ccount = collist.getChildCount();
Set colAliasNamesDuplicateCheck = new HashSet();
if (outputColNames) {
for (int i = 0; i < ccount; ++i) {
String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i))
.getText());
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName,
TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
} else {
for (int i = 0; i < ccount; ++i) {
ASTNode child = (ASTNode) collist.getChild(i);
assert child.getType() == HiveParser.TOK_TABCOL;
String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0))
.getText());
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils
.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child
.getChild(1))), null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
}
}
RowResolver out_rwsch = new RowResolver();
StringBuilder columns = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
for (int i = 0; i < outputCols.size(); ++i) {
if (i != 0) {
columns.append(",");
columnTypes.append(",");
}
columns.append(outputCols.get(i).getInternalName());
columnTypes.append(outputCols.get(i).getType().getTypeName());
out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(),
outputCols.get(i));
}
StringBuilder inpColumns = new StringBuilder();
StringBuilder inpColumnTypes = new StringBuilder();
ArrayList inputSchema = opParseCtx.get(input).getRowResolver()
.getColumnInfos();
for (int i = 0; i < inputSchema.size(); ++i) {
if (i != 0) {
inpColumns.append(",");
inpColumnTypes.append(",");
}
inpColumns.append(inputSchema.get(i).getInternalName());
inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
}
TableDesc outInfo;
TableDesc errInfo;
TableDesc inInfo;
String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
Class extends Deserializer> serde;
try {
serde = (Class extends Deserializer>) Class.forName(defaultSerdeName,
true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
int fieldSeparator = Utilities.tabCode;
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
fieldSeparator = Utilities.ctrlaCode;
}
// Input and Output Serdes
if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(),
inpColumnTypes.toString(), false);
} else {
inInfo = PlanUtils.getTableDesc(serde, Integer
.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes
.toString(), false, true);
}
if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
.getChild(outputSerDeNum))).getChild(0), columns.toString(),
columnTypes.toString(), false);
// This is for backward compatibility. If the user did not specify the
// output column list, we assume that there are 2 columns: key and value.
// However, if the script outputs: col1, col2, col3 seperated by TAB, the
// requirement is: key is col and value is (col2 TAB col3)
} else {
outInfo = PlanUtils.getTableDesc(serde, Integer
.toString(fieldSeparator), columns.toString(), columnTypes
.toString(), defaultOutputCols);
}
// Error stream always uses the default serde with a single column
errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
// Output record readers
Class extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm
.getChild(outputRecordReaderNum));
Class extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm
.getChild(inputRecordWriterNum));
Class extends RecordReader> errRecordReader = getDefaultRecordReader();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(
fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())),
inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo),
new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(new HashMap()); // disable backtracking
// Add URI entity for transform script. script assumed t be local unless downloadable
if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
getInputs().add(new ReadEntity(new Path(scriptCmd),
ResourceDownloader.isFileUri(scriptCmd)));
}
return output;
}
private Class extends RecordReader> getRecordReader(ASTNode node)
throws SemanticException {
String name;
if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
} else {
name = unescapeSQLString(node.getChild(0).getText());
}
try {
return (Class extends RecordReader>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private Class extends RecordReader> getDefaultRecordReader()
throws SemanticException {
String name;
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
try {
return (Class extends RecordReader>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private Class extends RecordWriter> getRecordWriter(ASTNode node)
throws SemanticException {
String name;
if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER);
} else {
name = unescapeSQLString(node.getChild(0).getText());
}
try {
return (Class extends RecordWriter>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
protected List getGroupingSetsForRollup(int size) {
List groupingSetKeys = new ArrayList();
for (int i = 0; i <= size; i++) {
groupingSetKeys.add((1 << i) - 1);
}
return groupingSetKeys;
}
protected List getGroupingSetsForCube(int size) {
int count = 1 << size;
List results = new ArrayList(count);
for (int i = 0; i < count; ++i) {
results.add(i);
}
return results;
}
// This function returns the grouping sets along with the grouping expressions
// Even if rollups and cubes are present in the query, they are converted to
// grouping sets at this point
private ObjectPair, List> getGroupByGroupingSetsForClause(
QBParseInfo parseInfo, String dest) throws SemanticException {
List groupingSets = new ArrayList();
List groupByExprs = getGroupByForClause(parseInfo, dest);
if (parseInfo.getDestRollups().contains(dest)) {
groupingSets = getGroupingSetsForRollup(groupByExprs.size());
} else if (parseInfo.getDestCubes().contains(dest)) {
groupingSets = getGroupingSetsForCube(groupByExprs.size());
} else if (parseInfo.getDestGroupingSets().contains(dest)) {
groupingSets = getGroupingSets(groupByExprs, parseInfo, dest);
}
return new ObjectPair, List>(groupByExprs, groupingSets);
}
protected List getGroupingSets(List groupByExpr, QBParseInfo parseInfo,
String dest) throws SemanticException {
Map exprPos = new HashMap();
for (int i = 0; i < groupByExpr.size(); ++i) {
ASTNode node = groupByExpr.get(i);
exprPos.put(node.toStringTree(), i);
}
ASTNode root = parseInfo.getGroupByForClause(dest);
List result = new ArrayList(root == null ? 0 : root.getChildCount());
if (root != null) {
for (int i = 0; i < root.getChildCount(); ++i) {
ASTNode child = (ASTNode) root.getChild(i);
if (child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
continue;
}
int bitmap = 0;
for (int j = 0; j < child.getChildCount(); ++j) {
String treeAsString = child.getChild(j).toStringTree();
Integer pos = exprPos.get(treeAsString);
if (pos == null) {
throw new SemanticException(
generateErrorMessage((ASTNode) child.getChild(j),
ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg()));
}
bitmap = setBit(bitmap, pos);
}
result.add(bitmap);
}
}
if (checkForNoAggr(result)) {
throw new SemanticException(
ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOFUNC.getMsg());
}
return result;
}
private boolean checkForNoAggr(List bitmaps) {
boolean ret = true;
for (int mask : bitmaps) {
ret &= mask == 0;
}
return ret;
}
public static int setBit(int bitmap, int bitIdx) {
return bitmap | (1 << bitIdx);
}
/**
* This function is a wrapper of parseInfo.getGroupByForClause which
* automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
* a,b,c.
*/
List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException {
if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
ASTNode selectExprs = parseInfo.getSelForClause(dest);
List result = new ArrayList(selectExprs == null ? 0
: selectExprs.getChildCount());
if (selectExprs != null) {
for (int i = 0; i < selectExprs.getChildCount(); ++i) {
if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.TOK_HINTLIST) {
continue;
}
// table.column AS alias
ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
result.add(grpbyExpr);
}
}
return result;
} else {
ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
List result = new ArrayList(grpByExprs == null ? 0
: grpByExprs.getChildCount());
if (grpByExprs != null) {
for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
result.add(grpbyExpr);
}
}
}
return result;
}
}
static String[] getColAlias(ASTNode selExpr, String defaultName,
RowResolver inputRR, boolean includeFuncName, int colNum) {
String colAlias = null;
String tabAlias = null;
String[] colRef = new String[2];
//for queries with a windowing expressions, the selexpr may have a third child
if (selExpr.getChildCount() == 2 ||
(selExpr.getChildCount() == 3 &&
selExpr.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC)) {
// return zz for "xx + yy AS zz"
colAlias = unescapeIdentifier(selExpr.getChild(1).getText().toLowerCase());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
ASTNode root = (ASTNode) selExpr.getChild(0);
if (root.getType() == HiveParser.TOK_TABLE_OR_COL) {
colAlias =
BaseSemanticAnalyzer.unescapeIdentifier(root.getChild(0).getText().toLowerCase());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
if (root.getType() == HiveParser.DOT) {
ASTNode tab = (ASTNode) root.getChild(0);
if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) {
String t = unescapeIdentifier(tab.getChild(0).getText());
if (inputRR.hasTableAlias(t)) {
tabAlias = t;
}
}
// Return zz for "xx.zz" and "xx.yy.zz"
ASTNode col = (ASTNode) root.getChild(1);
if (col.getType() == HiveParser.Identifier) {
colAlias = unescapeIdentifier(col.getText().toLowerCase());
}
}
// if specified generate alias using func name
if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) {
String expr_flattened = root.toStringTree();
// remove all TOK tokens
String expr_no_tok = expr_flattened.replaceAll("tok_\\S+", "");
// remove all non alphanumeric letters, replace whitespace spans with underscore
String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_");
// limit length to 20 chars
if (expr_formatted.length() > AUTOGEN_COLALIAS_PRFX_MAXLENGTH) {
expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH);
}
// append colnum to make it unique
colAlias = expr_formatted.concat("_" + colNum);
}
if (colAlias == null) {
// Return defaultName if selExpr is not a simple xx.yy.zz
colAlias = defaultName + colNum;
}
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
/**
* Returns whether the pattern is a regex expression (instead of a normal
* string). Normal string is a string with all alphabets/digits and "_".
*/
static boolean isRegex(String pattern, HiveConf conf) {
String qIdSupport = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT);
if ( "column".equals(qIdSupport)) {
return false;
}
for (int i = 0; i < pattern.length(); i++) {
if (!Character.isLetterOrDigit(pattern.charAt(i))
&& pattern.charAt(i) != '_') {
return true;
}
}
return false;
}
private Operator> genSelectPlan(String dest, QB qb, Operator> input,
Operator> inputForSelectStar) throws SemanticException {
ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
Operator> op = genSelectPlan(dest, selExprList, qb, input, inputForSelectStar, false);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan for clause: " + dest);
}
return op;
}
@SuppressWarnings("nls")
private Operator> genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator> input,
Operator> inputForSelectStar, boolean outerLV) throws SemanticException {
if (LOG.isDebugEnabled()) {
LOG.debug("tree: " + selExprList.toStringTree());
}
ArrayList col_list = new ArrayList();
RowResolver out_rwsch = new RowResolver();
ASTNode trfm = null;
Integer pos = Integer.valueOf(0);
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
RowResolver starRR = null;
if (inputForSelectStar != null && inputForSelectStar != input) {
starRR = opParseCtx.get(inputForSelectStar).getRowResolver();
}
// SELECT * or SELECT TRANSFORM(*)
boolean selectStar = false;
int posn = 0;
boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST);
if (hintPresent) {
posn++;
}
boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() ==
HiveParser.TOK_TRANSFORM);
if (isInTransform) {
queryProperties.setUsesScript(true);
globalLimitCtx.setHasTransformOrUDTF(true);
trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
}
// Detect queries of the form SELECT udtf(col) AS ...
// by looking for a function as the first child, and then checking to see
// if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
// the lack of a special token.
boolean isUDTF = false;
String udtfTableAlias = null;
ArrayList udtfColAliases = new ArrayList();
ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
GenericUDTF genericUDTF = null;
int udtfExprType = udtfExpr.getType();
if (udtfExprType == HiveParser.TOK_FUNCTION
|| udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
String funcName = TypeCheckProcFactory.DefaultExprProcessor
.getFunctionText(udtfExpr, true);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
if (fi != null) {
genericUDTF = fi.getGenericUDTF();
}
isUDTF = (genericUDTF != null);
if (isUDTF) {
globalLimitCtx.setHasTransformOrUDTF(true);
}
if (isUDTF && !fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr
.getChild(0));
}
if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) {
genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0),
col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
}
}
if (isUDTF) {
// Only support a single expression when it's a UDTF
if (selExprList.getChildCount() > 1) {
throw new SemanticException(generateErrorMessage(
(ASTNode) selExprList.getChild(1),
ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
}
ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
// Get the column / table aliases from the expression. Start from 1 as
// 0 is the TOK_FUNCTION
// column names also can be inferred from result of UDTF
for (int i = 1; i < selExpr.getChildCount(); i++) {
ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
switch (selExprChild.getType()) {
case HiveParser.Identifier:
udtfColAliases.add(unescapeIdentifier(selExprChild.getText().toLowerCase()));
unparseTranslator.addIdentifierTranslation(selExprChild);
break;
case HiveParser.TOK_TABALIAS:
assert (selExprChild.getChildCount() == 1);
udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0)
.getText());
qb.addAlias(udtfTableAlias);
unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild
.getChild(0));
break;
default:
assert (false);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("UDTF table alias is " + udtfTableAlias);
LOG.debug("UDTF col aliases are " + udtfColAliases);
}
}
// The list of expressions after SELECT or SELECT TRANSFORM.
ASTNode exprList;
if (isInTransform) {
exprList = (ASTNode) trfm.getChild(0);
} else if (isUDTF) {
exprList = udtfExpr;
} else {
exprList = selExprList;
}
if (LOG.isDebugEnabled()) {
LOG.debug("genSelectPlan: input = " + inputRR + " starRr = " + starRR);
}
// For UDTF's, skip the function name to get the expressions
int startPosn = isUDTF ? posn + 1 : posn;
if (isInTransform) {
startPosn = 0;
}
final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty()
|| !qb.getParseInfo().getDestGroupingSets().isEmpty()
|| !qb.getParseInfo().getDestCubes().isEmpty());
Set colAliases = new HashSet();
ASTNode[] exprs = new ASTNode[exprList.getChildCount()];
String[][] aliases = new String[exprList.getChildCount()][];
boolean[] hasAsClauses = new boolean[exprList.getChildCount()];
int offset = 0;
// Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
for (int i = startPosn; i < exprList.getChildCount(); ++i) {
// child can be EXPR AS ALIAS, or EXPR.
ASTNode child = (ASTNode) exprList.getChild(i);
boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
boolean isWindowSpec = child.getChildCount() == 3 &&
child.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC;
// EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
// This check is not needed and invalid when there is a transform b/c the
// AST's are slightly different.
if (!isWindowSpec && !isInTransform && !isUDTF && child.getChildCount() > 2) {
throw new SemanticException(generateErrorMessage(
(ASTNode) child.getChild(2),
ErrorMsg.INVALID_AS.getMsg()));
}
// The real expression
ASTNode expr;
String tabAlias;
String colAlias;
if (isInTransform || isUDTF) {
tabAlias = null;
colAlias = autogenColAliasPrfxLbl + i;
expr = child;
} else {
// Get rid of TOK_SELEXPR
expr = (ASTNode) child.getChild(0);
String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR,
autogenColAliasPrfxIncludeFuncName, i + offset);
tabAlias = colRef[0];
colAlias = colRef[1];
if (hasAsClause) {
unparseTranslator.addIdentifierTranslation((ASTNode) child
.getChild(1));
}
}
exprs[i] = expr;
aliases[i] = new String[] {tabAlias, colAlias};
hasAsClauses[i] = hasAsClause;
colAliases.add(colAlias);
// The real expression
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
int initPos = pos;
pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
: getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
if (unparseTranslator.isEnabled()) {
offset += pos - initPos - 1;
}
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) {
// In case the expression is a regex COL.
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
null, expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
.getChild(0).getText().toLowerCase())) && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) {
// In case the expression is TABLE.COL (col can be regex).
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
} else {
// Case when this is an expression
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, true, isCBOExecuted());
// We allow stateful functions in the SELECT list (but nowhere else)
tcCtx.setAllowStatefulFunctions(true);
tcCtx.setAllowDistinctFunctions(false);
if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) {
// If CBO did not optimize the query, we might need to replace grouping function
// Special handling of grouping function
expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr,
!cubeRollupGrpSetPresent);
}
ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
String recommended = recommendName(exp, colAlias);
if (recommended != null && !colAliases.contains(recommended) &&
out_rwsch.get(null, recommended) == null) {
colAlias = recommended;
}
col_list.add(exp);
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos),
exp.getWritableObjectInspector(), tabAlias, false);
colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp)
.isSkewedCol() : false);
out_rwsch.put(tabAlias, colAlias, colInfo);
if ( exp instanceof ExprNodeColumnDesc ) {
ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
if ( altMapping != null ) {
out_rwsch.put(altMapping[0], altMapping[1], colInfo);
}
}
pos = Integer.valueOf(pos.intValue() + 1);
}
}
selectStar = selectStar && exprList.getChildCount() == posn + 1;
out_rwsch = handleInsertStatementSpec(col_list, dest, out_rwsch, inputRR, qb, selExprList);
ArrayList columnNames = new ArrayList();
Map colExprMap = new HashMap();
for (int i = 0; i < col_list.size(); i++) {
String outputCol = getColumnInternalName(i);
colExprMap.put(outputCol, col_list.get(i));
columnNames.add(outputCol);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new SelectDesc(col_list, columnNames, selectStar), new RowSchema(
out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(colExprMap);
if (isInTransform) {
output = genScriptPlan(trfm, qb, output);
}
if (isUDTF) {
output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb,
output, outerLV);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
}
return output;
}
/**
* This modifies the Select projections when the Select is part of an insert statement and
* the insert statement specifies a column list for the target table, e.g.
* create table source (a int, b int);
* create table target (x int, y int, z int);
* insert into target(z,x) select * from source
*
* Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks
* as if the original query was written as
* insert into target select b, null, a from source
*
* if target schema is not specified, this is no-op
*
* @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
* @throws SemanticException
*/
public RowResolver handleInsertStatementSpec(List col_list, String dest,
RowResolver outputRR, RowResolver inputRR, QB qb,
ASTNode selExprList) throws SemanticException {
//(z,x)
List targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);//specified in the query
if(targetTableSchema == null) {
//no insert schema was specified
return outputRR;
}
if(targetTableSchema.size() != col_list.size()) {
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
throw new SemanticException(generateErrorMessage(selExprList,
"Expected " + targetTableSchema.size() + " columns for " + dest +
(target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) +
"; select produces " + col_list.size() + " columns"));
}
//e.g. map z->expr for a
Map targetCol2Projection = new HashMap();
//e.g. map z->ColumnInfo for a
Map targetCol2ColumnInfo = new HashMap();
int colListPos = 0;
for(String targetCol : targetTableSchema) {
targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos));
targetCol2Projection.put(targetCol, col_list.get(colListPos++));
}
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
if(target == null && partition == null) {
throw new SemanticException(generateErrorMessage(selExprList,
"No table/partition found in QB metadata for dest='" + dest + "'"));
}
ArrayList new_col_list = new ArrayList();
colListPos = 0;
List targetTableCols = target != null ? target.getCols() : partition.getCols();
List targetTableColNames = new ArrayList();
List targetTableColTypes = new ArrayList();
for(FieldSchema fs : targetTableCols) {
targetTableColNames.add(fs.getName());
targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
}
Map partSpec = qb.getMetaData().getPartSpecForAlias(dest);
if(partSpec != null) {
//find dynamic partition columns
//relies on consistent order via LinkedHashMap
for(Map.Entry partKeyVal : partSpec.entrySet()) {
if (partKeyVal.getValue() == null) {
targetTableColNames.add(partKeyVal.getKey());//these must be after non-partition cols
targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
}
}
}
RowResolver newOutputRR = new RowResolver();
//now make the select produce , with
//where missing columns are NULL-filled
for (int i = 0; i < targetTableColNames.size(); i++) {
String f = targetTableColNames.get(i);
if(targetCol2Projection.containsKey(f)) {
//put existing column in new list to make sure it is in the right position
new_col_list.add(targetCol2Projection.get(f));
ColumnInfo ci = targetCol2ColumnInfo.get(f);//todo: is this OK?
ci.setInternalName(getColumnInternalName(colListPos));
newOutputRR.put(ci.getTabAlias(), ci.getInternalName(), ci);
}
else {
//add new 'synthetic' columns for projections not provided by Select
ExprNodeDesc exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null);
new_col_list.add(exp);
final String tableAlias = null;//this column doesn't come from any table
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos),
exp.getWritableObjectInspector(), tableAlias, false);
newOutputRR.put(colInfo.getTabAlias(), colInfo.getInternalName(), colInfo);
}
colListPos++;
}
col_list.clear();
col_list.addAll(new_col_list);
return newOutputRR;
}
String recommendName(ExprNodeDesc exp, String colAlias) {
if (!colAlias.startsWith(autogenColAliasPrfxLbl)) {
return null;
}
String column = ExprNodeDescUtils.recommendInputName(exp);
if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) {
return column;
}
return null;
}
String getAutogenColAliasPrfxLbl() {
return this.autogenColAliasPrfxLbl;
}
boolean autogenColAliasPrfxIncludeFuncName() {
return this.autogenColAliasPrfxIncludeFuncName;
}
/**
* Class to store GenericUDAF related information.
*/
public static class GenericUDAFInfo {
public ArrayList convertedParameters;
public GenericUDAFEvaluator genericUDAFEvaluator;
public TypeInfo returnType;
}
/**
* Convert exprNodeDesc array to Typeinfo array.
*/
static ArrayList getTypeInfo(ArrayList exprs) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : exprs) {
result.add(expr.getTypeInfo());
}
return result;
}
/**
* Convert exprNodeDesc array to ObjectInspector array.
*/
static ArrayList getWritableObjectInspector(ArrayList exprs) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : exprs) {
result.add(expr.getWritableObjectInspector());
}
return result;
}
/**
* Convert exprNodeDesc array to Typeinfo array.
*/
static ObjectInspector[] getStandardObjectInspector(ArrayList exprs) {
ObjectInspector[] result = new ObjectInspector[exprs.size()];
for (int i = 0; i < exprs.size(); i++) {
result[i] = TypeInfoUtils
.getStandardWritableObjectInspectorFromTypeInfo(exprs.get(i));
}
return result;
}
/**
* Returns the GenericUDAFEvaluator for the aggregation. This is called once
* for each GroupBy aggregation.
*/
public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName,
ArrayList aggParameters, ASTNode aggTree,
boolean isDistinct, boolean isAllColumns)
throws SemanticException {
ArrayList originalParameterTypeInfos =
getWritableObjectInspector(aggParameters);
GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator(
aggName, originalParameterTypeInfos, isDistinct, isAllColumns);
if (null == result) {
String reason = "Looking for UDAF Evaluator\"" + aggName
+ "\" with parameters " + originalParameterTypeInfos;
throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(
(ASTNode) aggTree.getChild(0), reason));
}
return result;
}
/**
* Returns the GenericUDAFInfo struct for the aggregation.
*
* @param aggName
* The name of the UDAF.
* @param aggParameters
* The exprNodeDesc of the original parameters
* @param aggTree
* The ASTNode node of the UDAF in the query.
* @return GenericUDAFInfo
* @throws SemanticException
* when the UDAF is not found or has problems.
*/
public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator,
GenericUDAFEvaluator.Mode emode, ArrayList aggParameters)
throws SemanticException {
GenericUDAFInfo r = new GenericUDAFInfo();
// set r.genericUDAFEvaluator
r.genericUDAFEvaluator = evaluator;
// set r.returnType
ObjectInspector returnOI = null;
try {
ArrayList aggOIs = getWritableObjectInspector(aggParameters);
ObjectInspector[] aggOIArray = new ObjectInspector[aggOIs.size()];
for (int ii = 0; ii < aggOIs.size(); ++ii) {
aggOIArray[ii] = aggOIs.get(ii);
}
returnOI = r.genericUDAFEvaluator.init(emode, aggOIArray);
r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
} catch (HiveException e) {
throw new SemanticException(e);
}
// set r.convertedParameters
// TODO: type conversion
r.convertedParameters = aggParameters;
return r;
}
public static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(
GroupByDesc.Mode mode, boolean isDistinct) {
switch (mode) {
case COMPLETE:
return GenericUDAFEvaluator.Mode.COMPLETE;
case PARTIAL1:
return GenericUDAFEvaluator.Mode.PARTIAL1;
case PARTIAL2:
return GenericUDAFEvaluator.Mode.PARTIAL2;
case PARTIALS:
return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1
: GenericUDAFEvaluator.Mode.PARTIAL2;
case FINAL:
return GenericUDAFEvaluator.Mode.FINAL;
case HASH:
return GenericUDAFEvaluator.Mode.PARTIAL1;
case MERGEPARTIAL:
return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE
: GenericUDAFEvaluator.Mode.FINAL;
default:
throw new RuntimeException("internal error in groupByDescModeToUDAFMode");
}
}
/**
* Check if the given internalName represents a constant parameter in aggregation parameters
* of an aggregation tree.
* This method is only invoked when map-side aggregation is not involved. In this case,
* every parameter in every aggregation tree should already have a corresponding ColumnInfo,
* which is generated when the corresponding ReduceSinkOperator of the GroupByOperator being
* generating is generated. If we find that this parameter is a constant parameter,
* we will return the corresponding ExprNodeDesc in reduceValues, and we will not need to
* use a new ExprNodeColumnDesc, which can not be treated as a constant parameter, for this
* parameter (since the writableObjectInspector of a ExprNodeColumnDesc will not be
* a instance of ConstantObjectInspector).
*
* @param reduceValues
* value columns of the corresponding ReduceSinkOperator
* @param internalName
* the internal name of this parameter
* @return the ExprNodeDesc of the constant parameter if the given internalName represents
* a constant parameter; otherwise, return null
*/
public static ExprNodeDesc isConstantParameterInAggregationParameters(String internalName,
List reduceValues) {
// only the pattern of "VALUE._col([0-9]+)" should be handled.
String[] terms = internalName.split("\\.");
if (terms.length != 2 || reduceValues == null) {
return null;
}
if (Utilities.ReduceField.VALUE.toString().equals(terms[0])) {
int pos = getPositionFromInternalName(terms[1]);
if (pos >= 0 && pos < reduceValues.size()) {
ExprNodeDesc reduceValue = reduceValues.get(pos);
if (reduceValue != null) {
if (reduceValue.getWritableObjectInspector() instanceof ConstantObjectInspector) {
// this internalName represents a constant parameter in aggregation parameters
return reduceValue;
}
}
}
}
return null;
}
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (PARTIAL1 or COMPLETE)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo,
String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode,
Map genericUDAFEvaluators)
throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx
.get(input).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList aggregations = new ArrayList();
ArrayList outputColumnNames = new ArrayList();
Map colExprMap = new HashMap();
List grpByExprs = getGroupByForClause(parseInfo, dest);
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo
.getInternalName(), "", false));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false);
groupByOutputRowResolver.putExpression(grpbyExpr,
oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// For each aggregation
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List inputKeyCols = rs.getConf().getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
List reduceValues = rs.getConf().getValueCols();
int numDistinctUDFs = 0;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
// Convert children to aggParameters
ArrayList aggParameters = new ArrayList();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "." +
getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
// Save the evaluator so that it can be used by the next-stage
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
false, groupByMemoryUsage, memoryThreshold, null, false, -1, numDistinctUDFs > 0),
new RowSchema(groupByOutputRowResolver.getColumnInfos()),
input), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
// Add the grouping set key to the group by operator.
// This is not the first group by operator, but it is a subsequent group by operator
// which is forwarding the grouping keys introduced by the grouping sets.
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for GroupBy2 to pass the additional grouping keys introduced by
// GroupBy1 for the grouping set (corresponding to the rollup).
private void addGroupingSetKey(List groupByKeys,
RowResolver groupByInputRowResolver,
RowResolver groupByOutputRowResolver,
List outputColumnNames,
Map colExprMap) throws SemanticException {
// For grouping sets, add a dummy grouping key
String groupingSetColumnName =
groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo,
groupingSetColumnName, null, false);
groupByKeys.add(inputExpr);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
TypeInfoFactory.intTypeInfo,
null,
true));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// Process grouping set for the reduce sink operator
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for ReduceSink to add the additional grouping keys introduced by
// GroupBy1 into the reduce keys.
private void processGroupingSetReduceSinkOperator(RowResolver reduceSinkInputRowResolver,
RowResolver reduceSinkOutputRowResolver,
List reduceKeys,
List outputKeyColumnNames,
Map colExprMap) throws SemanticException {
// add a key for reduce sink
String groupingSetColumnName =
reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo,
groupingSetColumnName, null, false);
reduceKeys.add(inputExpr);
outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
String field = Utilities.ReduceField.KEY.toString() + "."
+ getColumnInternalName(reduceKeys.size() - 1);
ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
reduceKeys.size() - 1).getTypeInfo(), null, true);
reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
}
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @param groupingSets
* list of grouping sets
* @param groupingSetsPresent
* whether grouping sets are present in this query
* @param groupingSetsConsumedCurrentMR
* whether grouping sets are consumed by this group by
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo,
String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
Map genericUDAFEvaluators,
List groupingSets,
boolean groupingSetsPresent,
boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
ArrayList outputColumnNames = new ArrayList();
RowResolver groupByInputRowResolver = opParseCtx
.get(reduceSinkOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList aggregations = new ArrayList();
List grpByExprs = getGroupByForClause(parseInfo, dest);
Map colExprMap = new HashMap();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver.putExpression(grpbyExpr,
oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// This is only needed if a new grouping set key is being created
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in a single map-reduce job
// The plan is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1 already added the grouping id as part of the row
// This function is called for GroupBy2 to add grouping id as part of the groupby keys
if (!groupingSetsNeedAdditionalMRJob) {
addGroupingSetKey(
groupByKeys,
groupByInputRowResolver,
groupByOutputRowResolver,
outputColumnNames,
colExprMap);
}
else {
// The grouping set has not yet been processed. Create a new grouping key
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in 2 map-reduce jobs
// The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1/ReduceSink worked as if grouping sets were not present
// This function is called for GroupBy2 to create new rows for grouping sets
// For each input row (a,b), 4 rows are created for the example above:
// (a,b), (a,null), (null, b), (null, null)
createNewGroupingKey(groupByKeys,
outputColumnNames,
groupByOutputRowResolver,
colExprMap);
}
}
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List inputKeyCols = ((ReduceSinkDesc)
reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
boolean containsDistinctAggr = false;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList aggParameters = new ArrayList();
boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
containsDistinctAggr = containsDistinctAggr || isDistinct;
// If the function is distinct, partial aggregation has not been done on
// the client side.
// If distPartAgg is set, the client is letting us know that partial
// aggregation has not been done.
// For eg: select a, count(b+c), count(distinct d+e) group by a
// For count(b+c), if partial aggregation has been performed, then we
// directly look for count(b+c).
// Otherwise, we look for b+c.
// For distincts, partial aggregation is never performed on the client
// side, so always look for the parameters: d+e
if (isDistinct) {
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN
.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "."
+ getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
} else {
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(), paraExprInfo
.getIsVirtualCol()));
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = null;
genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters,
(mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
groupByMemoryUsage, memoryThreshold,
groupingSets,
groupingSetsPresent && groupingSetsNeedAdditionalMRJob,
groupingSetsPosition, containsDistinctAggr),
new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo),
groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
/*
* Create a new grouping key for grouping id.
* A dummy grouping id. is added. At runtime, the group by operator
* creates 'n' rows per input row, where 'n' is the number of grouping sets.
*/
private void createNewGroupingKey(List groupByKeys,
List outputColumnNames,
RowResolver groupByOutputRowResolver,
Map colExprMap) {
// The value for the constant does not matter. It is replaced by the grouping set
// value for the actual implementation
ExprNodeConstantDesc constant = new ExprNodeConstantDesc(0);
groupByKeys.add(constant);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
TypeInfoFactory.intTypeInfo,
null,
true));
colExprMap.put(field, constant);
}
/**
* Generate the map-side GroupByOperator for the Query Block
* (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
* of the inputOperatorInfo.
*
* @param mode
* The mode of the aggregation (HASH)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb,
String dest,
List grpByExprs,
Operator inputOperatorInfo,
GroupByDesc.Mode mode,
Map genericUDAFEvaluators,
List groupingSetKeys,
boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo)
.getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList outputColumnNames = new ArrayList();
ArrayList aggregations = new ArrayList();
Map colExprMap = new HashMap();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr,
groupByInputRowResolver);
if ((grpByExprNode instanceof ExprNodeColumnDesc) && ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
// Skip duplicated grouping keys, it happens when define column alias.
grpByExprs.remove(i--);
continue;
}
groupByKeys.add(grpByExprNode);
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr,
new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// The grouping set key is present after the grouping keys, before the distinct keys
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
// This dummy key needs to be added as a reduce key
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for GroupBy1 to create an additional grouping key
// for the grouping set (corresponding to the rollup).
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
createNewGroupingKey(groupByKeys,
outputColumnNames,
groupByOutputRowResolver,
colExprMap);
}
// If there is a distinctFuncExp, add all parameters to the reduceKeys.
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
List list = parseInfo.getDistinctFuncExprsForClause(dest);
for (ASTNode value : list) {
// 0 is function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode parameter = (ASTNode) value.getChild(i);
if (groupByOutputRowResolver.getExpression(parameter) == null) {
ExprNodeDesc distExprNode = genExprNodeDesc(parameter,
groupByInputRowResolver);
groupByKeys.add(distExprNode);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(
field, distExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
}
}
}
// For each aggregation
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
boolean containsDistinctAggr = false;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList