Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeVisitor;
import org.antlr.runtime.tree.TreeVisitorAction;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RecordReader;
import org.apache.hadoop.hive.ql.exec.RecordWriter;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.optiq.HiveDefaultRelMetadataProvider;
import org.apache.hadoop.hive.ql.optimizer.optiq.HiveOptiqUtil;
import org.apache.hadoop.hive.ql.optimizer.optiq.HiveTypeSystemImpl;
import org.apache.hadoop.hive.ql.optimizer.optiq.OptiqSemanticException;
import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.optiq.TraitsUtil;
import org.apache.hadoop.hive.ql.optimizer.optiq.cost.HiveVolcanoPlanner;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveAggregateRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveJoinRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveProjectRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveSortRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveUnionRel;
import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePartitionPrunerRule;
import org.apache.hadoop.hive.ql.optimizer.optiq.rules.HivePushFilterPastJoinRule;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.ASTConverter;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinCondTypeCheckProcFactory;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.JoinTypeCheckCtx;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.RexNodeConverter;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.SqlFunctionConverter;
import org.apache.hadoop.hive.ql.optimizer.optiq.translator.TypeConverter;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec.SpecType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
import org.apache.hadoop.hive.ql.plan.ForwardDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
import org.apache.hadoop.hive.ql.plan.LimitDesc;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PTFDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.UDTFDesc;
import org.apache.hadoop.hive.ql.plan.UnionDesc;
import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
import org.apache.hadoop.hive.ql.stats.StatsFactory;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
import org.apache.hadoop.hive.serde2.NullStructSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.InputFormat;
import org.eigenbase.rel.AggregateCall;
import org.eigenbase.rel.AggregateRelBase;
import org.eigenbase.rel.Aggregation;
import org.eigenbase.rel.FilterRelBase;
import org.eigenbase.rel.InvalidRelException;
import org.eigenbase.rel.JoinRelBase;
import org.eigenbase.rel.JoinRelType;
import org.eigenbase.rel.RelCollation;
import org.eigenbase.rel.RelCollationImpl;
import org.eigenbase.rel.RelFactories;
import org.eigenbase.rel.RelFieldCollation;
import org.eigenbase.rel.RelNode;
import org.eigenbase.rel.metadata.CachingRelMetadataProvider;
import org.eigenbase.rel.metadata.ChainedRelMetadataProvider;
import org.eigenbase.rel.metadata.RelMetadataProvider;
import org.eigenbase.rel.rules.ConvertMultiJoinRule;
import org.eigenbase.rel.rules.FilterAggregateTransposeRule;
import org.eigenbase.rel.rules.LoptOptimizeJoinRule;
import org.eigenbase.rel.rules.MergeFilterRule;
import org.eigenbase.rel.rules.PushFilterPastProjectRule;
import org.eigenbase.rel.rules.PushFilterPastSetOpRule;
import org.eigenbase.rel.rules.PushSemiJoinPastFilterRule;
import org.eigenbase.rel.rules.PushSemiJoinPastJoinRule;
import org.eigenbase.rel.rules.PushSemiJoinPastProjectRule;
import org.eigenbase.rel.rules.SemiJoinRel;
import org.eigenbase.rel.rules.TransitivePredicatesOnJoinRule;
import org.eigenbase.relopt.RelOptCluster;
import org.eigenbase.relopt.RelOptPlanner;
import org.eigenbase.relopt.RelOptQuery;
import org.eigenbase.relopt.RelOptRule;
import org.eigenbase.relopt.RelOptSchema;
import org.eigenbase.relopt.RelOptUtil;
import org.eigenbase.relopt.RelTraitSet;
import org.eigenbase.relopt.hep.HepMatchOrder;
import org.eigenbase.relopt.hep.HepPlanner;
import org.eigenbase.relopt.hep.HepProgram;
import org.eigenbase.relopt.hep.HepProgramBuilder;
import org.eigenbase.reltype.RelDataType;
import org.eigenbase.reltype.RelDataTypeFactory;
import org.eigenbase.reltype.RelDataTypeField;
import org.eigenbase.rex.RexBuilder;
import org.eigenbase.rex.RexFieldCollation;
import org.eigenbase.rex.RexInputRef;
import org.eigenbase.rex.RexNode;
import org.eigenbase.rex.RexUtil;
import org.eigenbase.rex.RexWindowBound;
import org.eigenbase.sql.SqlAggFunction;
import org.eigenbase.sql.SqlCall;
import org.eigenbase.sql.SqlExplainLevel;
import org.eigenbase.sql.SqlKind;
import org.eigenbase.sql.SqlLiteral;
import org.eigenbase.sql.SqlNode;
import org.eigenbase.sql.SqlWindow;
import org.eigenbase.sql.parser.SqlParserPos;
import org.eigenbase.sql.type.SqlTypeName;
import org.eigenbase.sql2rel.RelFieldTrimmer;
import org.eigenbase.util.CompositeList;
import org.eigenbase.util.ImmutableIntList;
import org.eigenbase.util.Pair;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.UndeclaredThrowableException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.hydromatic.optiq.SchemaPlus;
import net.hydromatic.optiq.tools.Frameworks;
/**
* Implementation of the semantic analyzer. It generates the query plan.
* There are other specific semantic analyzers for some hive operations such as
* DDLSemanticAnalyzer for ddl operations.
*/
public class SemanticAnalyzer extends BaseSemanticAnalyzer {
public static final String DUMMY_DATABASE = "_dummy_database";
public static final String DUMMY_TABLE = "_dummy_table";
// Max characters when auto generating the column name with func name
private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
private static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__";
@VisibleForTesting
static final String ACID_TABLE_PROPERTY = "transactional";
private HashMap opToPartPruner;
private HashMap opToPartList;
private HashMap> topOps;
private HashMap> topSelOps;
private LinkedHashMap, OpParseContext> opParseCtx;
private List loadTableWork;
private List loadFileWork;
private Map joinContext;
private Map smbMapJoinContext;
private final HashMap topToTable;
private final Map fsopToTable;
private final List reduceSinkOperatorsAddedByEnforceBucketingSorting;
private final HashMap> topToTableProps;
private QB qb;
private ASTNode ast;
private int destTableId;
private UnionProcContext uCtx;
List> listMapJoinOpsNoReducer;
private HashMap opToSamplePruner;
private final Map> opToPartToSkewedPruner;
/**
* a map for the split sampling, from alias to an instance of SplitSample
* that describes percentage and number.
*/
private final HashMap nameToSplitSample;
Map> groupOpToInputTables;
Map prunedPartitions;
private List resultSchema;
private CreateViewDesc createVwDesc;
private ArrayList viewsExpanded;
private ASTNode viewSelect;
private final UnparseTranslator unparseTranslator;
private final GlobalLimitCtx globalLimitCtx;
// prefix for column names auto generated by hive
private final String autogenColAliasPrfxLbl;
private final boolean autogenColAliasPrfxIncludeFuncName;
// Keep track of view alias to read entity corresponding to the view
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of aliases for V3, V3:V2, V3:V2:V1.
// This is used when T is added as an input for the query, the parents of T is
// derived from the alias V3:V2:V1:T
private final Map viewAliasToInput;
// flag for no scan during analyze ... compute statistics
protected boolean noscan;
//flag for partial scan during analyze ... compute statistics
protected boolean partialscan;
private volatile boolean runCBO = true; // TODO: why is this volatile?
private volatile boolean disableJoinMerge = false;
/*
* Capture the CTE definitions in a Query.
*/
private final Map aliasToCTEs;
/*
* Used to check recursive CTE invocations. Similar to viewsExpanded
*/
private ArrayList ctesExpanded;
/** Not thread-safe. */
private final ASTSearcher astSearcher = new ASTSearcher();
private static class Phase1Ctx {
String dest;
int nextNum;
}
protected SemanticAnalyzer(HiveConf conf, boolean runCBO) throws SemanticException {
this(conf);
this.runCBO = runCBO;
}
public SemanticAnalyzer(HiveConf conf) throws SemanticException {
super(conf);
opToPartPruner = new HashMap();
opToPartList = new HashMap();
opToSamplePruner = new HashMap();
nameToSplitSample = new HashMap();
topOps = new HashMap>();
topSelOps = new HashMap>();
loadTableWork = new ArrayList();
loadFileWork = new ArrayList();
opParseCtx = new LinkedHashMap, OpParseContext>();
joinContext = new HashMap();
smbMapJoinContext = new HashMap();
topToTable = new HashMap();
fsopToTable = new HashMap();
reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList();
topToTableProps = new HashMap>();
destTableId = 1;
uCtx = null;
listMapJoinOpsNoReducer = new ArrayList>();
groupOpToInputTables = new HashMap>();
prunedPartitions = new HashMap();
unparseTranslator = new UnparseTranslator(conf);
autogenColAliasPrfxLbl = HiveConf.getVar(conf,
HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
queryProperties = new QueryProperties();
opToPartToSkewedPruner = new HashMap>();
aliasToCTEs = new HashMap();
globalLimitCtx = new GlobalLimitCtx();
viewAliasToInput = new HashMap();
noscan = partialscan = false;
}
@Override
protected void reset(boolean clearPartsCache) {
super.reset(true);
if(clearPartsCache) {
prunedPartitions.clear();
}
loadTableWork.clear();
loadFileWork.clear();
topOps.clear();
topSelOps.clear();
destTableId = 1;
idToTableNameMap.clear();
qb = null;
ast = null;
uCtx = null;
joinContext.clear();
smbMapJoinContext.clear();
opParseCtx.clear();
groupOpToInputTables.clear();
disableJoinMerge = false;
aliasToCTEs.clear();
topToTable.clear();
opToPartPruner.clear();
opToPartList.clear();
opToPartToSkewedPruner.clear();
opToSamplePruner.clear();
nameToSplitSample.clear();
fsopToTable.clear();
resultSchema = null;
createVwDesc = null;
viewsExpanded = null;
viewSelect = null;
ctesExpanded = null;
globalLimitCtx.disableOpt();
viewAliasToInput.clear();
reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
topToTableProps.clear();
listMapJoinOpsNoReducer.clear();
unparseTranslator.clear();
queryProperties.clear();
outputs.clear();
}
public void initParseCtx(ParseContext pctx) {
opToPartPruner = pctx.getOpToPartPruner();
opToPartList = pctx.getOpToPartList();
opToSamplePruner = pctx.getOpToSamplePruner();
topOps = pctx.getTopOps();
topSelOps = pctx.getTopSelOps();
opParseCtx = pctx.getOpParseCtx();
loadTableWork = pctx.getLoadTableWork();
loadFileWork = pctx.getLoadFileWork();
joinContext = pctx.getJoinContext();
smbMapJoinContext = pctx.getSmbMapJoinContext();
ctx = pctx.getContext();
destTableId = pctx.getDestTableId();
idToTableNameMap = pctx.getIdToTableNameMap();
uCtx = pctx.getUCtx();
listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
qb = pctx.getQB();
groupOpToInputTables = pctx.getGroupOpToInputTables();
prunedPartitions = pctx.getPrunedPartitions();
fetchTask = pctx.getFetchTask();
setLineageInfo(pctx.getLineageInfo());
}
public ParseContext getParseContext() {
return new ParseContext(conf, qb, ast, opToPartPruner, opToPartList, topOps,
topSelOps, opParseCtx, joinContext, smbMapJoinContext, topToTable, topToTableProps,
fsopToTable, loadTableWork,
loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
opToPartToSkewedPruner, viewAliasToInput,
reduceSinkOperatorsAddedByEnforceBucketingSorting,
queryProperties);
}
@SuppressWarnings("nls")
public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
throws SemanticException {
assert (ast.getToken() != null);
switch (ast.getToken().getType()) {
case HiveParser.TOK_QUERY: {
QB qb = new QB(id, alias, true);
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(ast, qb, ctx_1, null);
qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
qbexpr.setQB(qb);
}
break;
case HiveParser.TOK_UNION: {
qbexpr.setOpcode(QBExpr.Opcode.UNION);
// query 1
assert (ast.getChild(0) != null);
QBExpr qbexpr1 = new QBExpr(alias + "-subquery1");
doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + "-subquery1",
alias + "-subquery1");
qbexpr.setQBExpr1(qbexpr1);
// query 2
assert (ast.getChild(0) != null);
QBExpr qbexpr2 = new QBExpr(alias + "-subquery2");
doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + "-subquery2",
alias + "-subquery2");
qbexpr.setQBExpr2(qbexpr2);
}
break;
}
}
private LinkedHashMap doPhase1GetAggregationsFromSelect(
ASTNode selExpr, QB qb, String dest) throws SemanticException {
// Iterate over the selects search for aggregation Trees.
// Use String as keys to eliminate duplicate trees.
LinkedHashMap aggregationTrees = new LinkedHashMap();
List wdwFns = new ArrayList();
for (int i = 0; i < selExpr.getChildCount(); ++i) {
ASTNode function = (ASTNode) selExpr.getChild(i);
if (function.getType() == HiveParser.TOK_SELEXPR ||
function.getType() == HiveParser.TOK_SUBQUERY_EXPR) {
function = (ASTNode)function.getChild(0);
}
doPhase1GetAllAggregations(function, aggregationTrees, wdwFns);
}
// window based aggregations are handled differently
for (ASTNode wdwFn : wdwFns) {
WindowingSpec spec = qb.getWindowingSpec(dest);
if(spec == null) {
queryProperties.setHasWindowing(true);
spec = new WindowingSpec();
qb.addDestToWindowingSpec(dest, spec);
}
HashMap wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest);
int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size();
WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn,
(ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1));
// If this is a duplicate invocation of a function; don't add to WindowingSpec.
if ( wExprsInDest != null &&
wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) {
continue;
}
wFnSpec.setAlias("_wcol" + wColIdx);
spec.addWindowFunction(wFnSpec);
qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression());
}
return aggregationTrees;
}
private void doPhase1GetColumnAliasesFromSelect(
ASTNode selectExpr, QBParseInfo qbp) {
for (int i = 0; i < selectExpr.getChildCount(); ++i) {
ASTNode selExpr = (ASTNode) selectExpr.getChild(i);
if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR)
&& (selExpr.getChildCount() == 2)) {
String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText());
qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias);
}
}
}
/**
* DFS-scan the expressionTree to find all aggregation subtrees and put them
* in aggregations.
*
* @param expressionTree
* @param aggregations
* the key to the HashTable is the toStringTree() representation of
* the aggregation subtree.
* @throws SemanticException
*/
private void doPhase1GetAllAggregations(ASTNode expressionTree,
HashMap aggregations, List wdwFns) throws SemanticException {
int exprTokenType = expressionTree.getToken().getType();
if (exprTokenType == HiveParser.TOK_FUNCTION
|| exprTokenType == HiveParser.TOK_FUNCTIONDI
|| exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
assert (expressionTree.getChildCount() != 0);
if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
== HiveParser.TOK_WINDOWSPEC) {
wdwFns.add(expressionTree);
return;
}
if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
String functionName = unescapeIdentifier(expressionTree.getChild(0)
.getText());
if(FunctionRegistry.impliesOrder(functionName)) {
throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
}
if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
if(containsLeadLagUDF(expressionTree)) {
throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
}
aggregations.put(expressionTree.toStringTree().toLowerCase(), expressionTree);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
if (!fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
.getChild(0));
}
return;
}
}
}
for (int i = 0; i < expressionTree.getChildCount(); i++) {
doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
aggregations, wdwFns);
}
}
private List doPhase1GetDistinctFuncExprs(
HashMap aggregationTrees) throws SemanticException {
List exprs = new ArrayList();
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
assert (value != null);
if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
exprs.add(value);
}
}
return exprs;
}
public static String generateErrorMessage(ASTNode ast, String message) {
StringBuilder sb = new StringBuilder();
if (ast == null) {
sb.append("The abstract syntax tree is null");
return sb.toString();
}
sb.append(ast.getLine());
sb.append(":");
sb.append(ast.getCharPositionInLine());
sb.append(" ");
sb.append(message);
sb.append(". Error encountered near token '");
sb.append(ErrorMsg.getText(ast));
sb.append("'");
return sb.toString();
}
/**
* Goes though the tabref tree and finds the alias for the table. Once found,
* it records the table name-> alias association in aliasToTabs. It also makes
* an association from the alias to the table AST in parse info.
*
* @return the alias of the table
*/
private String processTable(QB qb, ASTNode tabref) throws SemanticException {
// For each table reference get the table name
// and the alias (if alias is not present, the table name
// is used as an alias)
int aliasIndex = 0;
int propsIndex = -1;
int tsampleIndex = -1;
int ssampleIndex = -1;
for (int index = 1; index < tabref.getChildCount(); index++) {
ASTNode ct = (ASTNode) tabref.getChild(index);
if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
tsampleIndex = index;
} else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
ssampleIndex = index;
} else if (ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
propsIndex = index;
} else {
aliasIndex = index;
}
}
ASTNode tableTree = (ASTNode) (tabref.getChild(0));
String tabIdName = getUnescapedName(tableTree);
String alias;
if (aliasIndex != 0) {
alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
}
else {
alias = getUnescapedUnqualifiedTableName(tableTree);
}
if (propsIndex >= 0) {
Tree propsAST = tabref.getChild(propsIndex);
Map props = DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0));
qb.setTabProps(alias, props);
}
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
.getChild(aliasIndex)));
}
if (tsampleIndex >= 0) {
ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex);
ArrayList sampleCols = new ArrayList();
if (sampleClause.getChildCount() > 2) {
for (int i = 2; i < sampleClause.getChildCount(); i++) {
sampleCols.add((ASTNode) sampleClause.getChild(i));
}
}
// TODO: For now only support sampling on up to two columns
// Need to change it to list of columns
if (sampleCols.size() > 2) {
throw new SemanticException(generateErrorMessage(
(ASTNode) tabref.getChild(0),
ErrorMsg.SAMPLE_RESTRICTION.getMsg()));
}
qb.getParseInfo().setTabSample(
alias,
new TableSample(
unescapeIdentifier(sampleClause.getChild(0).getText()),
unescapeIdentifier(sampleClause.getChild(1).getText()),
sampleCols));
if (unparseTranslator.isEnabled()) {
for (ASTNode sampleCol : sampleCols) {
unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
.getChild(0));
}
}
} else if (ssampleIndex >= 0) {
ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex);
Tree type = sampleClause.getChild(0);
Tree numerator = sampleClause.getChild(1);
String value = unescapeIdentifier(numerator.getText());
SplitSample sample;
if (type.getType() == HiveParser.TOK_PERCENT) {
assertCombineInputFormat(numerator, "Percentage");
Double percent = Double.valueOf(value).doubleValue();
if (percent < 0 || percent > 100) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
"Sampling percentage should be between 0 and 100"));
}
int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
sample = new SplitSample(percent, seedNum);
} else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
sample = new SplitSample(Integer.valueOf(value));
} else {
assert type.getType() == HiveParser.TOK_LENGTH;
assertCombineInputFormat(numerator, "Total Length");
long length = Integer.valueOf(value.substring(0, value.length() - 1));
char last = value.charAt(value.length() - 1);
if (last == 'k' || last == 'K') {
length <<= 10;
} else if (last == 'm' || last == 'M') {
length <<= 20;
} else if (last == 'g' || last == 'G') {
length <<= 30;
}
int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
sample = new SplitSample(length, seedNum);
}
String alias_id = getAliasId(alias, qb);
nameToSplitSample.put(alias_id, sample);
}
// Insert this map into the stats
qb.setTabAlias(alias, tabIdName);
qb.addAlias(alias);
qb.getParseInfo().setSrcForAlias(alias, tableTree);
unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
if (aliasIndex != 0) {
unparseTranslator.addIdentifierTranslation((ASTNode) tabref
.getChild(aliasIndex));
}
return alias;
}
// Generate a temp table out of a value clause
private ASTNode genValuesTempTable(ASTNode originalFrom) throws SemanticException {
// Pick a name for the table
SessionState ss = SessionState.get();
String tableName = VALUES_TMP_TABLE_NAME_PREFIX + ss.getNextValuesTempTableSuffix();
// Step 1, parse the values clause we were handed
List extends Node> fromChildren = originalFrom.getChildren();
// First child should be the virtual table ref
ASTNode virtualTableRef = (ASTNode)fromChildren.get(0);
assert virtualTableRef.getToken().getType() == HiveParser.TOK_VIRTUAL_TABREF :
"Expected first child of TOK_VIRTUAL_TABLE to be TOK_VIRTUAL_TABREF but was " +
virtualTableRef.getName();
List extends Node> virtualTableRefChildren = virtualTableRef.getChildren();
// First child of this should be the table name. If it's anonymous,
// then we don't have a table name.
ASTNode tabName = (ASTNode)virtualTableRefChildren.get(0);
if (tabName.getToken().getType() != HiveParser.TOK_ANONYMOUS) {
// TODO, if you want to make select ... from (values(...) as foo(...) work,
// you need to parse this list of columns names and build it into the table
throw new SemanticException(ErrorMsg.VALUES_TABLE_CONSTRUCTOR_NOT_SUPPORTED.getMsg());
}
// The second child of the TOK_VIRTUAL_TABLE should be TOK_VALUES_TABLE
ASTNode valuesTable = (ASTNode)fromChildren.get(1);
assert valuesTable.getToken().getType() == HiveParser.TOK_VALUES_TABLE :
"Expected second child of TOK_VIRTUAL_TABLE to be TOK_VALUE_TABLE but was " +
valuesTable.getName();
// Each of the children of TOK_VALUES_TABLE will be a TOK_VALUE_ROW
List extends Node> valuesTableChildren = valuesTable.getChildren();
// Now that we're going to start reading through the rows, open a file to write the rows too
// If we leave this method before creating the temporary table we need to be sure to clean up
// this file.
Path tablePath = null;
FileSystem fs = null;
try {
tablePath = Warehouse.getDnsPath(new Path(ss.getTempTableSpace(), tableName), conf);
fs = tablePath.getFileSystem(conf);
fs.mkdirs(tablePath);
Path dataFile = new Path(tablePath, "data_file");
FSDataOutputStream out = fs.create(dataFile);
List fields = new ArrayList();
boolean firstRow = true;
for (Node n : valuesTableChildren) {
ASTNode valuesRow = (ASTNode) n;
assert valuesRow.getToken().getType() == HiveParser.TOK_VALUE_ROW :
"Expected child of TOK_VALUE_TABLE to be TOK_VALUE_ROW but was " + valuesRow.getName();
// Each of the children of this should be a literal
List extends Node> valuesRowChildren = valuesRow.getChildren();
boolean isFirst = true;
int nextColNum = 1;
for (Node n1 : valuesRowChildren) {
ASTNode value = (ASTNode) n1;
if (firstRow) {
fields.add(new FieldSchema("tmp_values_col" + nextColNum++, "string", ""));
}
if (isFirst) isFirst = false;
else out.writeBytes("\u0001");
out.writeBytes(unparseExprForValuesClause(value));
}
out.writeBytes("\n");
firstRow = false;
}
out.close();
// Step 2, create a temp table, using the created file as the data
StorageFormat format = new StorageFormat(conf);
format.processStorageFormat("TextFile");
Table table = db.newTable(tableName);
table.setSerializationLib(format.getSerde());
table.setFields(fields);
table.setDataLocation(tablePath);
table.getTTable().setTemporary(true);
table.setStoredAsSubDirectories(false);
table.setInputFormatClass(format.getInputFormat());
table.setOutputFormatClass(format.getOutputFormat());
db.createTable(table, false);
} catch (Exception e) {
String errMsg = ErrorMsg.INSERT_CANNOT_CREATE_TEMP_FILE.getMsg() + e.getMessage();
LOG.error(errMsg);
// Try to delete the file
if (fs != null && tablePath != null) {
try {
fs.delete(tablePath, false);
} catch (IOException swallowIt) {}
}
throw new SemanticException(errMsg, e);
}
// Step 3, return a new subtree with a from clause built around that temp table
// The form of the tree is TOK_TABREF->TOK_TABNAME->identifier(tablename)
Token t = new ClassicToken(HiveParser.TOK_TABREF);
ASTNode tabRef = new ASTNode(t);
t = new ClassicToken(HiveParser.TOK_TABNAME);
ASTNode tabNameNode = new ASTNode(t);
tabRef.addChild(tabNameNode);
t = new ClassicToken(HiveParser.Identifier, tableName);
ASTNode identifier = new ASTNode(t);
tabNameNode.addChild(identifier);
return tabRef;
}
// Take an expression in the values clause and turn it back into a string. This is far from
// comprehensive. At the moment it only supports:
// * literals (all types)
// * unary negatives
// * true/false
private String unparseExprForValuesClause(ASTNode expr) throws SemanticException {
switch (expr.getToken().getType()) {
case HiveParser.Number:
return expr.getText();
case HiveParser.StringLiteral:
return PlanUtils.stripQuotes(expr.getText());
case HiveParser.KW_FALSE:
// UDFToBoolean casts any non-empty string to true, so set this to false
return "";
case HiveParser.KW_TRUE:
return "TRUE";
case HiveParser.MINUS:
return "-" + unparseExprForValuesClause((ASTNode)expr.getChildren().get(0));
case HiveParser.TOK_NULL:
// Hive's text input will translate this as a null
return "\\N";
default:
throw new SemanticException("Expression of type " + expr.getText() +
" not supported in insert/values");
}
}
private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT):
HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
message + " sampling is not supported in " + inputFormat));
}
}
private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {
// This is a subquery and must have an alias
if (subq.getChildCount() != 2) {
throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
}
ASTNode subqref = (ASTNode) subq.getChild(0);
String alias = unescapeIdentifier(subq.getChild(1).getText());
// Recursively do the first phase of semantic analysis for the subquery
QBExpr qbexpr = new QBExpr(alias);
doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias);
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
.getChild(1)));
}
// Insert this map into the stats
qb.setSubqAlias(alias, qbexpr);
qb.addAlias(alias);
unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
return alias;
}
/*
* Phase1: hold onto any CTE definitions in aliasToCTE.
* CTE definitions are global to the Query.
*/
private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
int numCTEs = ctes.getChildCount();
for(int i=0; i aliasToCTEs map.
*
*/
private ASTNode findCTEFromName(QB qb, String cteName) {
/*
* When saving a view definition all table references in the AST are qualified; including CTE references.
* Where as CTE definitions have no DB qualifier; so we strip out the DB qualifier before searching in
* aliasToCTEs map.
*/
String currDB = SessionState.get().getCurrentDatabase();
if ( currDB != null && cteName.startsWith(currDB) &&
cteName.length() > currDB.length() &&
cteName.charAt(currDB.length()) == '.' ) {
cteName = cteName.substring(currDB.length() + 1);
}
StringBuffer qId = new StringBuffer();
if (qb.getId() != null) {
qId.append(qb.getId());
}
while (qId.length() > 0) {
String nm = qId + ":" + cteName;
if (aliasToCTEs.containsKey(nm)) {
return aliasToCTEs.get(nm);
}
int lastIndex = qId.lastIndexOf(":");
lastIndex = lastIndex < 0 ? 0 : lastIndex;
qId.setLength(lastIndex);
}
return aliasToCTEs.get(cteName);
}
/*
* If a CTE is referenced in a QueryBlock:
* - add it as a SubQuery for now.
* - SQ.alias is the alias used in QB. (if no alias is specified,
* it used the CTE name. Works just like table references)
* - Adding SQ done by:
* - copying AST of CTE
* - setting ASTOrigin on cloned AST.
* - trigger phase 1 on new QBExpr.
* - update QB data structs: remove this as a table reference, move it to a SQ invocation.
*/
private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias) throws SemanticException {
cteAlias = cteAlias == null ? cteName : cteAlias;
ASTNode cteQryNode = findCTEFromName(qb, cteName);
QBExpr cteQBExpr = new QBExpr(cteAlias);
String cteText = ctx.getTokenRewriteStream().toString(
cteQryNode.getTokenStartIndex(), cteQryNode.getTokenStopIndex());
final ASTNodeOrigin cteOrigin = new ASTNodeOrigin("CTE", cteName,
cteText, cteAlias, cteQryNode);
cteQryNode = (ASTNode) ParseDriver.adaptor.dupTree(cteQryNode);
SubQueryUtils.setOriginDeep(cteQryNode, cteOrigin);
doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias);
qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr);
}
private boolean isJoinToken(ASTNode node) {
if ((node.getToken().getType() == HiveParser.TOK_JOIN)
|| (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
|| isOuterJoinToken(node)
|| (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
|| (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
return true;
}
return false;
}
private boolean isOuterJoinToken(ASTNode node) {
return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
}
/**
* Given the AST with TOK_JOIN as the root, get all the aliases for the tables
* or subqueries in the join.
*
* @param qb
* @param join
* @throws SemanticException
*/
@SuppressWarnings("nls")
private void processJoin(QB qb, ASTNode join) throws SemanticException {
int numChildren = join.getChildCount();
if ((numChildren != 2) && (numChildren != 3)
&& join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
throw new SemanticException(generateErrorMessage(join,
"Join with multiple children"));
}
queryProperties.incrementJoinCount(isOuterJoinToken(join));
for (int num = 0; num < numChildren; num++) {
ASTNode child = (ASTNode) join.getChild(num);
if (child.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, child);
} else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, child);
} else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
queryProperties.setHasPTF(true);
processPTF(qb, child);
PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child);
String inputAlias = ptfInvocationSpec == null ? null :
ptfInvocationSpec.getFunction().getAlias();;
if ( inputAlias == null ) {
throw new SemanticException(generateErrorMessage(child,
"PTF invocation in a Join must have an alias"));
}
} else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
// SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
// is not supported. Instead, the lateral view must be in a subquery
// SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
// JOIN src2 ...
throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
.getMsg(join));
} else if (isJoinToken(child)) {
processJoin(qb, child);
}
}
}
/**
* Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
* table or subquery in the lateral view and also make a mapping from the
* alias to all the lateral view AST's.
*
* @param qb
* @param lateralView
* @return the alias for the table/subquery
* @throws SemanticException
*/
private String processLateralView(QB qb, ASTNode lateralView)
throws SemanticException {
int numChildren = lateralView.getChildCount();
assert (numChildren == 2);
ASTNode next = (ASTNode) lateralView.getChild(1);
String alias = null;
switch (next.getToken().getType()) {
case HiveParser.TOK_TABREF:
alias = processTable(qb, next);
break;
case HiveParser.TOK_SUBQUERY:
alias = processSubQuery(qb, next);
break;
case HiveParser.TOK_LATERAL_VIEW:
case HiveParser.TOK_LATERAL_VIEW_OUTER:
alias = processLateralView(qb, next);
break;
default:
throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
.getMsg(lateralView));
}
alias = alias.toLowerCase();
qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
qb.addAlias(alias);
return alias;
}
/** The context that doPhase1 uses to populate information pertaining
* to CBO (currently, this is used for CTAS and insert-as-select). */
private static class PreCboCtx {
enum Type {
NONE,
INSERT,
CTAS,
UNEXPECTED
}
public ASTNode nodeOfInterest;
public Type type = Type.NONE;
public void set(Type type, ASTNode ast) {
if (this.type != Type.NONE) {
STATIC_LOG.warn("Setting " + type + " when already " + this.type
+ "; node " + ast.dump() + " vs old node " + nodeOfInterest.dump());
this.type = Type.UNEXPECTED;
return;
}
this.type = type;
this.nodeOfInterest = ast;
}
}
/**
* Phase 1: (including, but not limited to):
*
* 1. Gets all the aliases for all the tables / subqueries and makes the
* appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
* destination and names the clause "inclause" + i 3. Creates a map from a
* string representation of an aggregation tree to the actual aggregation AST
* 4. Creates a mapping from the clause name to the select expression AST in
* destToSelExpr 5. Creates a mapping from a table alias to the lateral view
* AST's in aliasToLateralViews
*
* @param ast
* @param qb
* @param ctx_1
* @throws SemanticException
*/
@SuppressWarnings({"fallthrough", "nls"})
public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PreCboCtx cboCtx)
throws SemanticException {
boolean phase1Result = true;
QBParseInfo qbp = qb.getParseInfo();
boolean skipRecursion = false;
if (ast.getToken() != null) {
skipRecursion = true;
switch (ast.getToken().getType()) {
case HiveParser.TOK_SELECTDI:
qb.countSelDi();
// fall through
case HiveParser.TOK_SELECT:
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
int posn = 0;
if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
qbp.setHints((ASTNode) ast.getChild(0));
posn++;
}
if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
queryProperties.setUsesScript(true);
LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast,
qb, ctx_1.dest);
doPhase1GetColumnAliasesFromSelect(ast, qbp);
qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
qbp.setDistinctFuncExprsForClause(ctx_1.dest,
doPhase1GetDistinctFuncExprs(aggregations));
break;
case HiveParser.TOK_WHERE:
qbp.setWhrExprForClause(ctx_1.dest, ast);
if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
queryProperties.setFilterWithSubQuery(true);
break;
case HiveParser.TOK_INSERT_INTO:
String currentDatabase = SessionState.get().getCurrentDatabase();
String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
qbp.addInsertIntoTable(tab_name);
case HiveParser.TOK_DESTINATION:
ctx_1.dest = "insclause-" + ctx_1.nextNum;
ctx_1.nextNum++;
boolean isTmpFileDest = false;
if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
ASTNode ch = (ASTNode)ast.getChild(0);
if (ch.getToken().getType() == HiveParser.TOK_DIR
&& ch.getChildCount() > 0 && ch.getChild(0) instanceof ASTNode) {
ch = (ASTNode)ch.getChild(0);
isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
}
}
// is there a insert in the subquery
if (qbp.getIsSubQ() && !isTmpFileDest) {
throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
}
if (cboCtx != null && !isTmpFileDest) {
cboCtx.set(PreCboCtx.Type.INSERT, ast);
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
if (qbp.getClauseNamesForDest().size() > 1) {
queryProperties.setMultiDestQuery(true);
}
break;
case HiveParser.TOK_FROM:
int child_count = ast.getChildCount();
if (child_count != 1) {
throw new SemanticException(generateErrorMessage(ast,
"Multiple Children " + child_count));
}
// Check if this is a subquery / lateral view
ASTNode frm = (ASTNode) ast.getChild(0);
if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
// Create a temp table with the passed values in it then rewrite this portion of the
// tree to be from that table.
ASTNode newFrom = genValuesTempTable(frm);
ast.setChild(0, newFrom);
processTable(qb, newFrom);
} else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
queryProperties.setHasLateralViews(true);
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
processJoin(qb, frm);
qbp.setJoinExpr(frm);
}else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
queryProperties.setHasPTF(true);
processPTF(qb, frm);
}
break;
case HiveParser.TOK_CLUSTERBY:
// Get the clusterby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasClusterBy(true);
qbp.setClusterByExprForClause(ctx_1.dest, ast);
break;
case HiveParser.TOK_DISTRIBUTEBY:
// Get the distribute by aliases - these are aliased to the entries in
// the
// select list
queryProperties.setHasDistributeBy(true);
qbp.setDistributeByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_SORTBY:
// Get the sort by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasSortBy(true);
qbp.setSortByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_ORDERBY:
// Get the order by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasOrderBy(true);
qbp.setOrderByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_GROUPBY:
case HiveParser.TOK_ROLLUP_GROUPBY:
case HiveParser.TOK_CUBE_GROUPBY:
case HiveParser.TOK_GROUPING_SETS:
// Get the groupby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasGroupBy(true);
if (qbp.getJoinExpr() != null) {
queryProperties.setHasJoinFollowedByGroupBy(true);
}
if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
}
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
// Rollup and Cubes are syntactic sugar on top of grouping sets
if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
qbp.getDestRollups().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
qbp.getDestCubes().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
qbp.getDestGroupingSets().add(ctx_1.dest);
}
break;
case HiveParser.TOK_HAVING:
qbp.setHavingExprForClause(ctx_1.dest, ast);
qbp.addAggregationExprsForClause(ctx_1.dest,
doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
break;
case HiveParser.KW_WINDOW:
if (!qb.hasWindowingSpec(ctx_1.dest) ) {
throw new SemanticException(generateErrorMessage(ast,
"Query has no Cluster/Distribute By; but has a Window definition"));
}
handleQueryWindowClauses(qb, ctx_1, ast);
break;
case HiveParser.TOK_LIMIT:
qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText()));
break;
case HiveParser.TOK_ANALYZE:
// Case of analyze command
String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0));
qb.setTabAlias(table_name, table_name);
qb.addAlias(table_name);
qb.getParseInfo().setIsAnalyzeCommand(true);
qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
// Allow analyze the whole table and dynamic partitions
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
break;
case HiveParser.TOK_UNION:
if (!qbp.getIsSubQ()) {
// this shouldn't happen. The parser should have converted the union to be
// contained in a subquery. Just in case, we keep the error as a fallback.
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
}
skipRecursion = false;
break;
case HiveParser.TOK_INSERT:
ASTNode destination = (ASTNode) ast.getChild(0);
Tree tab = destination.getChild(0);
// Proceed if AST contains partition & If Not Exists
if (destination.getChildCount() == 2 &&
tab.getChildCount() == 2 &&
destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
String tableName = tab.getChild(0).getChild(0).getText();
Tree partitions = tab.getChild(1);
int childCount = partitions.getChildCount();
HashMap partition = new HashMap();
for (int i = 0; i < childCount; i++) {
String partitionName = partitions.getChild(i).getChild(0).getText();
Tree pvalue = partitions.getChild(i).getChild(1);
if (pvalue == null) {
break;
}
String partitionVal = stripQuotes(pvalue.getText());
partition.put(partitionName, partitionVal);
}
// if it is a dynamic partition throw the exception
if (childCount != partition.size()) {
throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
.getMsg(partition.toString()));
}
Table table = null;
try {
table = db.getTable(tableName);
} catch (HiveException ex) {
throw new SemanticException(ex);
}
try {
Partition parMetaData = db.getPartition(table, partition, false);
// Check partition exists if it exists skip the overwrite
if (parMetaData != null) {
phase1Result = false;
skipRecursion = true;
LOG.info("Partition already exists so insert into overwrite " +
"skipped for partition : " + parMetaData.toString());
break;
}
} catch (HiveException e) {
LOG.info("Error while getting metadata : ", e);
}
validatePartSpec(table, partition, (ASTNode)tab, conf, false);
}
skipRecursion = false;
break;
case HiveParser.TOK_LATERAL_VIEW:
case HiveParser.TOK_LATERAL_VIEW_OUTER:
// todo: nested LV
assert ast.getChildCount() == 1;
qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
break;
case HiveParser.TOK_CTE:
processCTE(qb, ast);
break;
default:
skipRecursion = false;
break;
}
}
if (!skipRecursion) {
// Iterate over the rest of the children
int child_count = ast.getChildCount();
for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
// Recurse
phase1Result = phase1Result && doPhase1(
(ASTNode)ast.getChild(child_pos), qb, ctx_1, cboCtx);
}
}
return phase1Result;
}
private void traceLogAst(ASTNode ast, String what) {
if (!LOG.isTraceEnabled()) return;
LOG.trace(what + ast.dump());
}
private void getMetaData(QBExpr qbexpr, ReadEntity parentInput)
throws SemanticException {
if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
getMetaData(qbexpr.getQB(), parentInput);
} else {
getMetaData(qbexpr.getQBExpr1(), parentInput);
getMetaData(qbexpr.getQBExpr2(), parentInput);
}
}
public Table getTable(TableScanOperator ts) {
return topToTable.get(ts);
}
public void getMetaData(QB qb) throws SemanticException {
getMetaData(qb, null);
}
@SuppressWarnings("nls")
public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException {
try {
LOG.info("Get metadata for source tables");
// Go over the tables and populate the related structures.
// We have to materialize the table alias list since we might
// modify it in the middle for view rewrite.
List tabAliases = new ArrayList(qb.getTabAliases());
// Keep track of view alias to view name and read entity
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
// This is needed for tracking the dependencies for inputs, along with their parents.
Map> aliasToViewInfo =
new HashMap>();
/*
* used to capture view to SQ conversions. This is used to check for
* recursive CTE invocations.
*/
Map sqAliasToCTEName = new HashMap();
for (String alias : tabAliases) {
String tab_name = qb.getTabNameForAlias(alias);
Table tab = db.getTable(tab_name, false);
if (tab == null) {
/*
* if this s a CTE reference:
* Add its AST as a SubQuery to this QB.
*/
ASTNode cteNode = findCTEFromName(qb, tab_name.toLowerCase());
if ( cteNode != null ) {
String cte_name = tab_name.toLowerCase();
if (ctesExpanded.contains(cte_name)) {
throw new SemanticException("Recursive cte " + tab_name +
" detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") +
" -> " + tab_name + ").");
}
addCTEAsSubQuery(qb, cte_name, alias);
sqAliasToCTEName.put(alias, cte_name);
continue;
}
ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
if (null != src) {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
} else {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
}
}
// Disallow INSERT INTO on bucketized tables
boolean isAcid = isAcidTable(tab);
if (qb.getParseInfo().isInsertIntoTable(tab.getDbName(), tab.getTableName()) &&
tab.getNumBuckets() > 0 && !isAcid) {
throw new SemanticException(ErrorMsg.INSERT_INTO_BUCKETIZED_TABLE.
getMsg("Table: " + tab_name));
}
// Disallow update and delete on non-acid tables
if ((updating() || deleting()) && !isAcid) {
// isAcidTable above also checks for whether we are using an acid compliant
// transaction manager. But that has already been caught in
// UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid
// here, it means the table itself doesn't support it.
throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, tab_name);
}
// We check offline of the table, as if people only select from an
// non-existing partition of an offline table, the partition won't
// be added to inputs and validate() won't have the information to
// check the table's offline status.
// TODO: Modify the code to remove the checking here and consolidate
// it in validate()
//
if (tab.isOffline()) {
throw new SemanticException(ErrorMsg.OFFLINE_TABLE_OR_PARTITION.
getMsg("Table " + getUnescapedName(qb.getParseInfo().getSrcForAlias(alias))));
}
if (tab.isView()) {
if (qb.getParseInfo().isAnalyzeCommand()) {
throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
}
String fullViewName = tab.getDbName() + "." + tab.getTableName();
// Prevent view cycles
if (viewsExpanded.contains(fullViewName)) {
throw new SemanticException("Recursive view " + fullViewName +
" detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") +
" -> " + fullViewName + ").");
}
replaceViewReferenceWithDefinition(qb, tab, tab_name, alias);
// This is the last time we'll see the Table objects for views, so add it to the inputs
// now
ReadEntity viewInput = new ReadEntity(tab, parentInput);
viewInput = PlanUtils.addInput(inputs, viewInput);
aliasToViewInfo.put(alias, new ObjectPair(fullViewName, viewInput));
viewAliasToInput.put(getAliasId(alias, qb), viewInput);
continue;
}
if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getSrcForAlias(alias),
ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
}
qb.getMetaData().setSrcForAlias(alias, tab);
if (qb.getParseInfo().isAnalyzeCommand()) {
// allow partial partition specification for nonscan since noscan is fast.
tableSpec ts = new tableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
try {
ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
} catch (HiveException e) {
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getSrcForAlias(alias),
"Cannot get partitions for " + ts.partSpec), e);
}
}
// validate partial scan command
QBParseInfo qbpi = qb.getParseInfo();
if (qbpi.isPartialScanAnalyzeCommand()) {
Class extends InputFormat> inputFormatClass = null;
switch (ts.specType) {
case TABLE_ONLY:
case DYNAMIC_PARTITION:
inputFormatClass = ts.tableHandle.getInputFormatClass();
break;
case STATIC_PARTITION:
inputFormatClass = ts.partHandle.getInputFormatClass();
break;
default:
assert false;
}
// throw a HiveException for formats other than rcfile or orcfile.
if (!(inputFormatClass.equals(RCFileInputFormat.class) || inputFormatClass
.equals(OrcInputFormat.class))) {
throw new SemanticException(ErrorMsg.ANALYZE_TABLE_PARTIALSCAN_NON_RCFILE.getMsg());
}
}
qb.getParseInfo().addTableSpec(alias, ts);
}
ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
PlanUtils.addInput(inputs,
new ReadEntity(tab, parentViewInfo, parentViewInfo == null));
}
LOG.info("Get metadata for subqueries");
// Go over the subqueries and getMetaData for these
for (String alias : qb.getSubqAliases()) {
boolean wasView = aliasToViewInfo.containsKey(alias);
boolean wasCTE = sqAliasToCTEName.containsKey(alias);
ReadEntity newParentInput = null;
if (wasView) {
viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
newParentInput = aliasToViewInfo.get(alias).getSecond();
} else if (wasCTE) {
ctesExpanded.add(sqAliasToCTEName.get(alias));
}
QBExpr qbexpr = qb.getSubqForAlias(alias);
getMetaData(qbexpr, newParentInput);
if (wasView) {
viewsExpanded.remove(viewsExpanded.size() - 1);
} else if (wasCTE) {
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
RowFormatParams rowFormatParams = new RowFormatParams();
StorageFormat storageFormat = new StorageFormat(conf);
LOG.info("Get metadata for destination tables");
// Go over all the destination structures and populate the related
// metadata
QBParseInfo qbp = qb.getParseInfo();
for (String name : qbp.getClauseNamesForDest()) {
ASTNode ast = qbp.getDestForClause(name);
switch (ast.getToken().getType()) {
case HiveParser.TOK_TAB: {
tableSpec ts = new tableSpec(db, conf, ast);
if (ts.tableHandle.isView()) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
Class> outputFormatClass = ts.tableHandle.getOutputFormatClass();
if (!HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
.getMsg(ast, "The class is " + outputFormatClass.toString()));
}
// tableSpec ts is got from the query (user specified),
// which means the user didn't specify partitions in their query,
// but whether the table itself is partitioned is not know.
if (ts.specType != SpecType.STATIC_PARTITION) {
// This is a table or dynamic partition
qb.getMetaData().setDestForAlias(name, ts.tableHandle);
// has dynamic as well as static partitions
if (ts.partSpec != null && ts.partSpec.size() > 0) {
qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
}
} else {
// This is a partition
qb.getMetaData().setDestForAlias(name, ts.partHandle);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
// Set that variable to automatically collect stats during the MapReduce job
qb.getParseInfo().setIsInsertToTable(true);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
break;
}
case HiveParser.TOK_LOCAL_DIR:
case HiveParser.TOK_DIR: {
// This is a dfs file
String fname = stripQuotes(ast.getChild(0).getText());
if ((!qb.getParseInfo().getIsSubQ())
&& (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
if (qb.isCTAS()) {
qb.setIsQuery(false);
ctx.setResDir(null);
ctx.setResFile(null);
// allocate a temporary output dir on the location of the table
String tableName = getUnescapedName((ASTNode) ast.getChild(0));
String[] names = Utilities.getDbTableName(tableName);
Path location;
try {
Warehouse wh = new Warehouse(conf);
location = wh.getDatabasePath(db.getDatabase(names[0]));
} catch (MetaException e) {
throw new SemanticException(e);
}
try {
fname = ctx.getExternalTmpPath(
FileUtils.makeQualified(location, conf)).toString();
} catch (Exception e) {
throw new SemanticException(generateErrorMessage(ast,
"Error creating temporary folder on: " + location.toString()), e);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
tableSpec ts = new tableSpec(db, conf, this.ast);
// Set that variable to automatically collect stats during the MapReduce job
qb.getParseInfo().setIsInsertToTable(true);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
} else {
// This is the only place where isQuery is set to true; it defaults to false.
qb.setIsQuery(true);
fname = ctx.getMRTmpPath().toString();
ctx.setResDir(new Path(fname));
}
}
qb.getMetaData().setDestForAlias(name, fname,
(ast.getToken().getType() == HiveParser.TOK_DIR));
CreateTableDesc localDirectoryDesc = new CreateTableDesc();
boolean localDirectoryDescIsSet = false;
int numCh = ast.getChildCount();
for (int num = 1; num < numCh ; num++){
ASTNode child = (ASTNode) ast.getChild(num);
if (child != null) {
if (storageFormat.fillStorageFormat(child)) {
localDirectoryDesc.setOutputFormat(storageFormat.getOutputFormat());
localDirectoryDesc.setSerName(storageFormat.getSerde());
localDirectoryDescIsSet = true;
continue;
}
switch (child.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
localDirectoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
localDirectoryDesc.setLineDelim(rowFormatParams.lineDelim);
localDirectoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
localDirectoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
localDirectoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
localDirectoryDesc.setNullFormat(rowFormatParams.nullFormat);
localDirectoryDescIsSet=true;
break;
case HiveParser.TOK_TABLESERIALIZER:
ASTNode serdeChild = (ASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
localDirectoryDesc.setSerName(storageFormat.getSerde());
localDirectoryDescIsSet=true;
break;
}
}
}
if (localDirectoryDescIsSet){
qb.setLocalDirectoryDesc(localDirectoryDesc);
}
break;
}
default:
throw new SemanticException(generateErrorMessage(ast,
"Unknown Token Type " + ast.getToken().getType()));
}
}
} catch (HiveException e) {
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
}
private void replaceViewReferenceWithDefinition(QB qb, Table tab,
String tab_name, String alias) throws SemanticException {
ParseDriver pd = new ParseDriver();
ASTNode viewTree;
final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(),
tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias(
alias));
try {
String viewText = tab.getViewExpandedText();
// Reparse text, passing null for context to avoid clobbering
// the top-level token stream.
ASTNode tree = pd.parse(viewText, ctx, false);
tree = ParseUtils.findRootNonNullToken(tree);
viewTree = tree;
Dispatcher nodeOriginDispatcher = new Dispatcher() {
@Override
public Object dispatch(Node nd, java.util.Stack stack,
Object... nodeOutputs) {
((ASTNode) nd).setOrigin(viewOrigin);
return null;
}
};
GraphWalker nodeOriginTagger = new DefaultGraphWalker(
nodeOriginDispatcher);
nodeOriginTagger.startWalking(java.util.Collections
. singleton(viewTree), null);
} catch (ParseException e) {
// A user could encounter this if a stored view definition contains
// an old SQL construct which has been eliminated in a later Hive
// version, so we need to provide full debugging info to help
// with fixing the view definition.
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
StringBuilder sb = new StringBuilder();
sb.append(e.getMessage());
ErrorMsg.renderOrigin(sb, viewOrigin);
throw new SemanticException(sb.toString(), e);
}
QBExpr qbexpr = new QBExpr(alias);
doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias);
qb.rewriteViewToSubq(alias, tab_name, qbexpr);
}
private boolean isPresent(String[] list, String elem) {
for (String s : list) {
if (s.toLowerCase().equals(elem)) {
return true;
}
}
return false;
}
/*
* This method is invoked for unqualified column references in join conditions.
* This is passed in the Alias to Operator mapping in the QueryBlock so far.
* We try to resolve the unqualified column against each of the Operator Row Resolvers.
* - if the column is present in only one RowResolver, we treat this as a reference to
* that Operator.
* - if the column resolves with more than one RowResolver, we treat it as an Ambiguous
* reference.
* - if the column doesn't resolve with any RowResolver, we treat this as an Invalid
* reference.
*/
@SuppressWarnings("rawtypes")
private String findAlias(ASTNode columnRef,
Map aliasToOpInfo) throws SemanticException {
String colName = unescapeIdentifier(columnRef.getChild(0).getText()
.toLowerCase());
String tabAlias = null;
if ( aliasToOpInfo != null ) {
for (Map.Entry opEntry : aliasToOpInfo.entrySet()) {
Operator op = opEntry.getValue();
RowResolver rr = opParseCtx.get(op).getRowResolver();
ColumnInfo colInfo = rr.get(null, colName);
if (colInfo != null) {
if (tabAlias == null) {
tabAlias = opEntry.getKey();
} else {
throw new SemanticException(
ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(columnRef.getChild(0)));
}
}
}
}
if ( tabAlias == null ) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(columnRef
.getChild(0)));
}
return tabAlias;
}
@SuppressWarnings("nls")
void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
ArrayList leftAliases, ArrayList rightAliases,
ArrayList fields,
Map aliasToOpInfo) throws SemanticException {
// String[] allAliases = joinTree.getAllAliases();
switch (condn.getToken().getType()) {
case HiveParser.TOK_TABLE_OR_COL:
String tableOrCol = unescapeIdentifier(condn.getChild(0).getText()
.toLowerCase());
unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0));
if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
if (!leftAliases.contains(tableOrCol)) {
leftAliases.add(tableOrCol);
}
} else if (isPresent(joinTree.getRightAliases(), tableOrCol)) {
if (!rightAliases.contains(tableOrCol)) {
rightAliases.add(tableOrCol);
}
} else {
tableOrCol = findAlias(condn, aliasToOpInfo);
if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
if (!leftAliases.contains(tableOrCol)) {
leftAliases.add(tableOrCol);
}
} else {
if (!rightAliases.contains(tableOrCol)) {
rightAliases.add(tableOrCol);
}
}
}
break;
case HiveParser.Identifier:
// it may be a field name, return the identifier and let the caller decide
// whether it is or not
if (fields != null) {
fields
.add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
}
unparseTranslator.addIdentifierTranslation(condn);
break;
case HiveParser.Number:
case HiveParser.StringLiteral:
case HiveParser.BigintLiteral:
case HiveParser.SmallintLiteral:
case HiveParser.TinyintLiteral:
case HiveParser.DecimalLiteral:
case HiveParser.TOK_STRINGLITERALSEQUENCE:
case HiveParser.TOK_CHARSETLITERAL:
case HiveParser.KW_TRUE:
case HiveParser.KW_FALSE:
break;
case HiveParser.TOK_FUNCTION:
// check all the arguments
for (int i = 1; i < condn.getChildCount(); i++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
leftAliases, rightAliases, null, aliasToOpInfo);
}
break;
default:
// This is an operator - so check whether it is unary or binary operator
if (condn.getChildCount() == 1) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
} else if (condn.getChildCount() == 2) {
ArrayList fields1 = null;
// if it is a dot operator, remember the field name of the rhs of the
// left semijoin
if (joinTree.getNoSemiJoin() == false
&& condn.getToken().getType() == HiveParser.DOT) {
// get the semijoin rhs table name and field name
fields1 = new ArrayList();
int rhssize = rightAliases.size();
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
String rhsAlias = null;
if (rightAliases.size() > rhssize) { // the new table is rhs table
rhsAlias = rightAliases.get(rightAliases.size() - 1);
}
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1, aliasToOpInfo);
if (rhsAlias != null && fields1.size() > 0) {
joinTree.addRHSSemijoinColumns(rhsAlias, condn);
}
} else {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1, aliasToOpInfo);
}
} else {
throw new SemanticException(condn.toStringTree() + " encountered with "
+ condn.getChildCount() + " children");
}
break;
}
}
private void populateAliases(List leftAliases,
List rightAliases, ASTNode condn, QBJoinTree joinTree,
List leftSrc) throws SemanticException {
if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) {
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
.getMsg(condn));
}
if (rightAliases.size() != 0) {
assert rightAliases.size() == 1;
joinTree.getExpressions().get(1).add(condn);
} else if (leftAliases.size() != 0) {
joinTree.getExpressions().get(0).add(condn);
for (String s : leftAliases) {
if (!leftSrc.contains(s)) {
leftSrc.add(s);
}
}
} else {
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2
.getMsg(condn));
}
}
/*
* refactored out of the Equality case of parseJoinCondition
* so that this can be recursively called on its left tree in the case when
* only left sources are referenced in a Predicate
*/
void applyEqualityPredicateToQBJoinTree(QBJoinTree joinTree,
JoinType type,
List leftSrc,
ASTNode joinCond,
ASTNode leftCondn,
ASTNode rightCondn,
List leftCondAl1,
List leftCondAl2,
List rightCondAl1,
List rightCondAl2) throws SemanticException {
if (leftCondAl1.size() != 0) {
if ((rightCondAl1.size() != 0)
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
if (type.equals(JoinType.LEFTOUTER) ||
type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
/*
* If the rhs references table sources and this QBJoinTree has a leftTree;
* hand it to the leftTree and let it recursively handle it.
* There are 3 cases of passing a condition down:
* 1. The leftSide && rightSide don't contains references to the leftTree's rightAlias
* => pass the lists down as is.
* 2. The leftSide contains refs to the leftTree's rightAlias, the rightSide doesn't
* => switch the leftCondAl1 and leftConAl2 lists and pass down.
* 3. The rightSide contains refs to the leftTree's rightAlias, the leftSide doesn't
* => switch the rightCondAl1 and rightConAl2 lists and pass down.
* 4. In case both contain references to the leftTree's rightAlias
* => we cannot push the condition down.
* 5. If either contain references to both left & right
* => we cannot push forward.
*/
if (rightCondAl1.size() != 0) {
QBJoinTree leftTree = joinTree.getJoinSrc();
List leftTreeLeftSrc = new ArrayList();
if (leftTree != null) {
String leftTreeRightSource = leftTree.getRightAliases() != null &&
leftTree.getRightAliases().length > 0 ?
leftTree.getRightAliases()[0] : null;
boolean leftHasRightReference = false;
for (String r : leftCondAl1) {
if (r.equals(leftTreeRightSource)) {
leftHasRightReference = true;
break;
}
}
boolean rightHasRightReference = false;
for (String r : rightCondAl1) {
if (r.equals(leftTreeRightSource)) {
rightHasRightReference = true;
break;
}
}
boolean pushedDown = false;
if ( !leftHasRightReference && !rightHasRightReference ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
pushedDown = true;
} else if ( !leftHasRightReference && rightHasRightReference && rightCondAl1.size() == 1 ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl2, rightCondAl1);
pushedDown = true;
} else if (leftHasRightReference && !rightHasRightReference && leftCondAl1.size() == 1 ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl2, leftCondAl1,
rightCondAl1, rightCondAl2);
pushedDown = true;
}
if (leftTreeLeftSrc.size() == 1) {
leftTree.setLeftAlias(leftTreeLeftSrc.get(0));
}
if ( pushedDown) {
return;
}
} // leftTree != null
}
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else if (rightCondAl2.size() != 0) {
populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
leftSrc);
populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
leftSrc);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
}
} else if (leftCondAl2.size() != 0) {
if ((rightCondAl2.size() != 0)
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else if (rightCondAl1.size() != 0) {
populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
leftSrc);
populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
leftSrc);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
}
} else if (rightCondAl1.size() != 0) {
if (type.equals(JoinType.LEFTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
}
}
@SuppressWarnings("rawtypes")
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, List leftSrc,
Map aliasToOpInfo)
throws SemanticException {
if (joinCond == null) {
return;
}
JoinCond cond = joinTree.getJoinCond()[0];
JoinType type = cond.getJoinType();
parseJoinCondition(joinTree, joinCond, leftSrc, type, aliasToOpInfo);
List> filters = joinTree.getFilters();
if (type == JoinType.LEFTOUTER || type == JoinType.FULLOUTER) {
joinTree.addFilterMapping(cond.getLeft(), cond.getRight(), filters.get(0).size());
}
if (type == JoinType.RIGHTOUTER || type == JoinType.FULLOUTER) {
joinTree.addFilterMapping(cond.getRight(), cond.getLeft(), filters.get(1).size());
}
}
/**
* Parse the join condition. If the condition is a join condition, throw an
* error if it is not an equality. Otherwise, break it into left and right
* expressions and store in the join tree. If the condition is a join filter,
* add it to the filter list of join tree. The join condition can contains
* conditions on both the left and tree trees and filters on either.
* Currently, we only support equi-joins, so we throw an error if the
* condition involves both subtrees and is not a equality. Also, we only
* support AND i.e ORs are not supported currently as their semantics are not
* very clear, may lead to data explosion and there is no usecase.
*
* @param joinTree
* jointree to be populated
* @param joinCond
* join condition
* @param leftSrc
* left sources
* @throws SemanticException
*/
@SuppressWarnings("rawtypes")
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond,
List leftSrc, JoinType type,
Map aliasToOpInfo) throws SemanticException {
if (joinCond == null) {
return;
}
switch (joinCond.getToken().getType()) {
case HiveParser.KW_OR:
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3
.getMsg(joinCond));
case HiveParser.KW_AND:
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc, type, aliasToOpInfo);
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc, type, aliasToOpInfo);
break;
case HiveParser.EQUAL_NS:
case HiveParser.EQUAL:
ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
ArrayList leftCondAl1 = new ArrayList();
ArrayList leftCondAl2 = new ArrayList();
parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
null, aliasToOpInfo);
ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
ArrayList rightCondAl1 = new ArrayList();
ArrayList rightCondAl2 = new ArrayList();
parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
rightCondAl2, null, aliasToOpInfo);
// is it a filter or a join condition
// if it is filter see if it can be pushed above the join
// filter cannot be pushed if
// * join is full outer or
// * join is left outer and filter is on left alias or
// * join is right outer and filter is on right alias
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
|| ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
.getMsg(joinCond));
}
applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
break;
default:
boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION);
// Create all children
int childrenBegin = (isFunction ? 1 : 0);
ArrayList> leftAlias = new ArrayList>(
joinCond.getChildCount() - childrenBegin);
ArrayList> rightAlias = new ArrayList>(
joinCond.getChildCount() - childrenBegin);
for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) {
ArrayList left = new ArrayList();
ArrayList right = new ArrayList();
leftAlias.add(left);
rightAlias.add(right);
}
for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci),
leftAlias.get(ci - childrenBegin), rightAlias.get(ci
- childrenBegin), null, aliasToOpInfo);
}
boolean leftAliasNull = true;
for (ArrayList left : leftAlias) {
if (left.size() != 0) {
leftAliasNull = false;
break;
}
}
boolean rightAliasNull = true;
for (ArrayList right : rightAlias) {
if (right.size() != 0) {
rightAliasNull = false;
break;
}
}
if (!leftAliasNull && !rightAliasNull) {
throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
.getMsg(joinCond));
}
if (!leftAliasNull) {
if (type.equals(JoinType.LEFTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOUTERJOINSUPPORTSFILTERS)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS);
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
}
break;
}
}
@SuppressWarnings("rawtypes")
private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, QB qb, String dest, ASTNode predicate,
Map aliasToOpInfo) throws SemanticException {
switch (predicate.getType()) {
case HiveParser.KW_AND:
extractJoinCondsFromWhereClause(joinTree, qb, dest,
(ASTNode) predicate.getChild(0), aliasToOpInfo);
extractJoinCondsFromWhereClause(joinTree, qb, dest,
(ASTNode) predicate.getChild(1), aliasToOpInfo);
break;
case HiveParser.EQUAL_NS:
case HiveParser.EQUAL:
ASTNode leftCondn = (ASTNode) predicate.getChild(0);
ArrayList leftCondAl1 = new ArrayList();
ArrayList leftCondAl2 = new ArrayList();
try {
parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
null, aliasToOpInfo);
} catch(SemanticException se) {
// suppress here; if it is a real issue will get caught in where clause handling.
return;
}
ASTNode rightCondn = (ASTNode) predicate.getChild(1);
ArrayList rightCondAl1 = new ArrayList();
ArrayList rightCondAl2 = new ArrayList();
try {
parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
rightCondAl2, null, aliasToOpInfo);
} catch(SemanticException se) {
// suppress here; if it is a real issue will get caught in where clause handling.
return;
}
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
|| ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
// this is not a join condition.
return;
}
if (((leftCondAl1.size() == 0) && (leftCondAl2.size() == 0))
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
// this is not a join condition. Will get handled by predicate pushdown.
return;
}
List leftSrc = new ArrayList();
JoinCond cond = joinTree.getJoinCond()[0];
JoinType type = cond.getJoinType();
applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
predicate, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
if (leftSrc.size() == 1) {
joinTree.setLeftAlias(leftSrc.get(0));
}
// todo: hold onto this predicate, so that we don't add it to the Filter Operator.
break;
default:
return;
}
}
@SuppressWarnings("nls")
public Operator putOpInsertMap(Operator op,
RowResolver rr) {
OpParseContext ctx = new OpParseContext(rr);
opParseCtx.put(op, ctx);
op.augmentPlan();
return op;
}
@SuppressWarnings("nls")
private Operator genHavingPlan(String dest, QB qb, Operator input,
Map aliasToOpInfo)
throws SemanticException {
ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest);
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
Map exprToColumnAlias = qb.getParseInfo().getAllExprToColumnAlias();
for (ASTNode astNode : exprToColumnAlias.keySet()) {
if (inputRR.getExpression(astNode) != null) {
inputRR.put("", exprToColumnAlias.get(astNode), inputRR.getExpression(astNode));
}
}
ASTNode condn = (ASTNode) havingExpr.getChild(0);
/*
* Now a having clause can contain a SubQuery predicate;
* so we invoke genFilterPlan to handle SubQuery algebraic transformation,
* just as is done for SubQuery predicates appearing in the Where Clause.
*/
Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true);
output = putOpInsertMap(output, inputRR);
return output;
}
private Operator genPlanForSubQueryPredicate(
QB qbSQ,
ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery());
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1, null);
getMetaData(qbSQ);
Operator op = genPlan(qbSQ);
return op;
}
@SuppressWarnings("nls")
private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input,
Map aliasToOpInfo,
boolean forHavingClause)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
/*
* Handling of SubQuery Expressions:
* if "Where clause contains no SubQuery expressions" then
* -->[true] ===CONTINUE_FILTER_PROCESSING===
* else
* -->[false] "extract SubQuery expressions\n from Where clause"
* if "this is a nested SubQuery or \nthere are more than 1 SubQuery expressions" then
* -->[yes] "throw Unsupported Error"
* else
* --> "Rewrite Search condition to \nremove SubQuery predicate"
* --> "build QBSubQuery"
* --> "extract correlated predicates \nfrom Where Clause"
* --> "add correlated Items to \nSelect List and Group By"
* --> "construct Join Predicate \nfrom correlation predicates"
* --> "Generate Plan for\n modified SubQuery"
* --> "Build the Join Condition\n for Parent Query to SubQuery join"
* --> "Build the QBJoinTree from the Join condition"
* --> "Update Parent Query Filter\n with any Post Join conditions"
* --> ===CONTINUE_FILTER_PROCESSING===
* endif
* endif
*
* Support for Sub Queries in Having Clause:
* - By and large this works the same way as SubQueries in the Where Clause.
* - The one addum is the handling of aggregation expressions from the Outer Query
* appearing in correlation clauses.
* - So such correlating predicates are allowed:
* min(OuterQuert.x) = SubQuery.y
* - this requires special handling when converting to joins. See QBSubQuery.rewrite
* method method for detailed comments.
*/
List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
if ( subQueriesInOriginalTree.size() > 0 ) {
/*
* Restriction.9.m :: disallow nested SubQuery expressions.
*/
if (qb.getSubQueryPredicateDef() != null ) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
}
/*
* Restriction.8.m :: We allow only 1 SubQuery expression per Query.
*/
if (subQueriesInOriginalTree.size() > 1 ) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
}
/*
* Clone the Search AST; apply all rewrites on the clone.
*/
ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
for(int i=0; i < subQueries.size(); i++) {
ASTNode subQueryAST = subQueries.get(i);
ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
int sqIdx = qb.incrNumSubQueryPredicates();
clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(),
sqIdx, subQueryAST, originalSubQueryAST, ctx);
if ( !forHavingClause ) {
qb.setWhereClauseSubQueryPredicate(subQuery);
} else {
qb.setHavingClauseSubQueryPredicate(subQuery);
}
String havingInputAlias = null;
if ( forHavingClause ) {
havingInputAlias = "gby_sq" + sqIdx;
aliasToOpInfo.put(havingInputAlias, input);
}
subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, aliasToOpInfo.keySet());
QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
Operator sqPlanTopOp = genPlanForSubQueryPredicate(qbSQ, subQuery);
aliasToOpInfo.put(subQuery.getAlias(), sqPlanTopOp);
RowResolver sqRR = opParseCtx.get(sqPlanTopOp).getRowResolver();
/*
* Check.5.h :: For In and Not In the SubQuery must implicitly or
* explicitly only contain one select item.
*/
if ( subQuery.getOperator().getType() != SubQueryType.EXISTS &&
subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS &&
sqRR.getColumnInfos().size() -
subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1 ) {
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, "SubQuery can contain only 1 item in Select List."));
}
/*
* If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery.
* See QBSubQuery.NotInCheck for details on why and how this is constructed.
*/
if ( subQuery.getNotInCheck() != null ) {
QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
notInCheck.setSQRR(sqRR);
QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
Operator sqnicPlanTopOp = genPlanForSubQueryPredicate(qbSQ_nic, notInCheck);
aliasToOpInfo.put(notInCheck.getAlias(), sqnicPlanTopOp);
QBJoinTree joinTree_nic = genSQJoinTree(qb, notInCheck,
input,
aliasToOpInfo);
pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false);
input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input);
inputRR = opParseCtx.get(input).getRowResolver();
if ( forHavingClause ) {
aliasToOpInfo.put(havingInputAlias, input);
}
}
/*
* Gen Join between outer Operator and SQ op
*/
subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
QBJoinTree joinTree = genSQJoinTree(qb, subQuery,
input,
aliasToOpInfo);
/*
* push filters only for this QBJoinTree. Child QBJoinTrees have already been handled.
*/
pushJoinFilters(qb, joinTree, aliasToOpInfo, false);
input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo, input);
searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
}
}
return genFilterPlan(qb, searchCond, input);
}
/**
* create a filter plan. The condition and the inputs are specified.
*
* @param qb
* current query block
* @param condn
* The condition to be resolved
* @param input
* the input operator
*/
@SuppressWarnings("nls")
private Operator genFilterPlan(QB qb, ASTNode condn, Operator input)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema(
inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ inputRR.toString());
}
return output;
}
/*
* for inner joins push a 'is not null predicate' to the join sources for
* every non nullSafe predicate.
*/
private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input,
QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException {
if (qb == null || joinTree == null) {
return input;
}
if (!joinTree.getNoOuterJoin()) {
return input;
}
if (joinKeys == null || joinKeys.length == 0) {
return input;
}
Map hashes = new HashMap();
if (input instanceof FilterOperator) {
ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc)input.getConf()).getPredicate()), hashes);
}
ExprNodeDesc filterPred = null;
List nullSafes = joinTree.getNullSafes();
for (int i = 0; i < joinKeys.length; i++) {
if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc &&
((ExprNodeColumnDesc)joinKeys[i]).getIsPartitionColOrVirtualCol())) {
// no need to generate is not null predicate for partitioning or
// virtual column, since those columns can never be null.
continue;
}
if(null != hashes.get(joinKeys[i].hashCode())) {
// there is already a predicate on this src.
continue;
}
List args = new ArrayList();
args.add(joinKeys[i]);
ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance(
FunctionRegistry.getFunctionInfo("isnotnull").getGenericUDF(), args);
filterPred = filterPred == null ? nextExpr : ExprNodeDescUtils
.mergePredicates(filterPred, nextExpr);
}
if (filterPred == null) {
return input;
}
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
if (input instanceof FilterOperator) {
FilterOperator f = (FilterOperator) input;
List preds = new ArrayList();
preds.add(f.getConf().getPredicate());
preds.add(filterPred);
f.getConf().setPredicate(ExprNodeDescUtils.mergePredicates(preds));
return input;
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new FilterDesc(filterPred, false),
new RowSchema(inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ inputRR.toString());
}
return output;
}
@SuppressWarnings("nls")
// TODO: make aliases unique, otherwise needless rewriting takes place
private Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel,
ArrayList col_list, HashSet excludeCols, RowResolver input,
Integer pos, RowResolver output, List aliases, boolean ensureUniqueCols)
throws SemanticException {
// The table alias should exist
if (tabAlias != null && !input.hasTableAlias(tabAlias)) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
}
// TODO: Have to put in the support for AS clause
Pattern regex = null;
try {
regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e
.getMessage()));
}
StringBuilder replacementText = new StringBuilder();
int matched = 0;
// add empty string to the list of aliases. Some operators (ex. GroupBy) add
// ColumnInfos for table alias "".
if (!aliases.contains("")) {
aliases.add("");
}
/*
* track the input ColumnInfos that are added to the output.
* if a columnInfo has multiple mappings; then add the column only once,
* but carry the mappings forward.
*/
Map inputColsProcessed = new HashMap();
// For expr "*", aliases should be iterated in the order they are specified
// in the query.
for (String alias : aliases) {
HashMap fMap = input.getFieldMap(alias);
if (fMap == null) {
continue;
}
// For the tab.* case, add all the columns to the fieldList
// from the input schema
for (Map.Entry entry : fMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
if (excludeCols != null && excludeCols.contains(colInfo)) {
continue; // This was added during plan generation.
}
String name = colInfo.getInternalName();
String[] tmp = input.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
// Not matching the regex?
if (!regex.matcher(tmp[1]).matches()) {
continue;
}
ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
if (oColInfo == null) {
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
colInfo.isSkewedCol());
col_list.add(expr);
oColInfo = new ColumnInfo(getColumnInternalName(pos),
colInfo.getType(), colInfo.getTabAlias(),
colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
inputColsProcessed.put(colInfo, oColInfo);
}
if (ensureUniqueCols) {
if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
throw new OptiqSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1]
+ " => " + oColInfo + " due to duplication, see previous warnings");
}
} else {
output.put(tmp[0], tmp[1], oColInfo);
}
pos = Integer.valueOf(pos.intValue() + 1);
matched++;
if (unparseTranslator.isEnabled()) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
}
}
}
if (matched == 0) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel));
}
if (unparseTranslator.isEnabled()) {
unparseTranslator.addTranslation(sel, replacementText.toString());
}
return pos;
}
public static String getColumnInternalName(int pos) {
return HiveConf.getColumnInternalName(pos);
}
private String getScriptProgName(String cmd) {
int end = cmd.indexOf(" ");
return (end == -1) ? cmd : cmd.substring(0, end);
}
private String getScriptArgs(String cmd) {
int end = cmd.indexOf(" ");
return (end == -1) ? "" : cmd.substring(end, cmd.length());
}
private static int getPositionFromInternalName(String internalName) {
return HiveConf.getPositionFromInternalName(internalName);
}
private String fetchFilesNotInLocalFilesystem(String cmd) {
SessionState ss = SessionState.get();
String progName = getScriptProgName(cmd);
if (SessionState.canDownloadResource(progName)) {
String filePath = ss.add_resource(ResourceType.FILE, progName, true);
Path p = new Path(filePath);
String fileName = p.getName();
String scriptArgs = getScriptArgs(cmd);
String finalCmd = fileName + scriptArgs;
return finalCmd;
}
return cmd;
}
private TableDesc getTableDescFromSerDe(ASTNode child, String cols,
String colTypes, boolean defaultCols) throws SemanticException {
if (child.getType() == HiveParser.TOK_SERDENAME) {
String serdeName = unescapeSQLString(child.getChild(0).getText());
Class extends Deserializer> serdeClass = null;
try {
serdeClass = (Class extends Deserializer>) Class.forName(serdeName,
true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
TableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer
.toString(Utilities.tabCode), cols, colTypes, defaultCols);
// copy all the properties
if (child.getChildCount() == 2) {
ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0);
for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
.getText());
String value = unescapeSQLString(prop.getChild(propChild).getChild(1)
.getText());
tblDesc.getProperties().setProperty(key, value);
}
}
return tblDesc;
} else if (child.getType() == HiveParser.TOK_SERDEPROPS) {
TableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer
.toString(Utilities.ctrlaCode), cols, colTypes, defaultCols);
int numChildRowFormat = child.getChildCount();
for (int numC = 0; numC < numChildRowFormat; numC++) {
ASTNode rowChild = (ASTNode) child.getChild(numC);
switch (rowChild.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMATFIELD:
String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties()
.setProperty(serdeConstants.FIELD_DELIM, fieldDelim);
tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_FORMAT,
fieldDelim);
if (rowChild.getChildCount() >= 2) {
String fieldEscape = unescapeSQLString(rowChild.getChild(1)
.getText());
tblDesc.getProperties().setProperty(serdeConstants.ESCAPE_CHAR,
fieldEscape);
}
break;
case HiveParser.TOK_TABLEROWFORMATCOLLITEMS:
tblDesc.getProperties().setProperty(serdeConstants.COLLECTION_DELIM,
unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATMAPKEYS:
tblDesc.getProperties().setProperty(serdeConstants.MAPKEY_DELIM,
unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATLINES:
String lineDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties().setProperty(serdeConstants.LINE_DELIM, lineDelim);
if (!lineDelim.equals("\n") && !lineDelim.equals("10")) {
throw new SemanticException(generateErrorMessage(rowChild,
ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()));
}
break;
case HiveParser.TOK_TABLEROWFORMATNULL:
String nullFormat = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT,
nullFormat);
break;
default:
assert false;
}
}
return tblDesc;
}
// should never come here
return null;
}
private void failIfColAliasExists(Set nameSet, String name)
throws SemanticException {
if (nameSet.contains(name)) {
throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS
.getMsg(name));
}
nameSet.add(name);
}
@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input)
throws SemanticException {
// If there is no "AS" clause, the output schema will be "key,value"
ArrayList outputCols = new ArrayList();
int inputSerDeNum = 1, inputRecordWriterNum = 2;
int outputSerDeNum = 4, outputRecordReaderNum = 5;
int outputColsNum = 6;
boolean outputColNames = false, outputColSchemas = false;
int execPos = 3;
boolean defaultOutputCols = false;
// Go over all the children
if (trfm.getChildCount() > outputColsNum) {
ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
outputColNames = true;
} else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
outputColSchemas = true;
}
}
// If column type is not specified, use a string
if (!outputColNames && !outputColSchemas) {
String intName = getColumnInternalName(0);
ColumnInfo colInfo = new ColumnInfo(intName,
TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("key");
outputCols.add(colInfo);
intName = getColumnInternalName(1);
colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null,
false);
colInfo.setAlias("value");
outputCols.add(colInfo);
defaultOutputCols = true;
} else {
ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
int ccount = collist.getChildCount();
Set colAliasNamesDuplicateCheck = new HashSet();
if (outputColNames) {
for (int i = 0; i < ccount; ++i) {
String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i))
.getText());
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName,
TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
} else {
for (int i = 0; i < ccount; ++i) {
ASTNode child = (ASTNode) collist.getChild(i);
assert child.getType() == HiveParser.TOK_TABCOL;
String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0))
.getText());
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils
.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child
.getChild(1))), null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
}
}
RowResolver out_rwsch = new RowResolver();
StringBuilder columns = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
for (int i = 0; i < outputCols.size(); ++i) {
if (i != 0) {
columns.append(",");
columnTypes.append(",");
}
columns.append(outputCols.get(i).getInternalName());
columnTypes.append(outputCols.get(i).getType().getTypeName());
out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(),
outputCols.get(i));
}
StringBuilder inpColumns = new StringBuilder();
StringBuilder inpColumnTypes = new StringBuilder();
ArrayList inputSchema = opParseCtx.get(input).getRowResolver()
.getColumnInfos();
for (int i = 0; i < inputSchema.size(); ++i) {
if (i != 0) {
inpColumns.append(",");
inpColumnTypes.append(",");
}
inpColumns.append(inputSchema.get(i).getInternalName());
inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
}
TableDesc outInfo;
TableDesc errInfo;
TableDesc inInfo;
String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
Class extends Deserializer> serde;
try {
serde = (Class extends Deserializer>) Class.forName(defaultSerdeName,
true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
int fieldSeparator = Utilities.tabCode;
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
fieldSeparator = Utilities.ctrlaCode;
}
// Input and Output Serdes
if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(),
inpColumnTypes.toString(), false);
} else {
inInfo = PlanUtils.getTableDesc(serde, Integer
.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes
.toString(), false, true);
}
if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
.getChild(outputSerDeNum))).getChild(0), columns.toString(),
columnTypes.toString(), false);
// This is for backward compatibility. If the user did not specify the
// output column list, we assume that there are 2 columns: key and value.
// However, if the script outputs: col1, col2, col3 seperated by TAB, the
// requirement is: key is col and value is (col2 TAB col3)
} else {
outInfo = PlanUtils.getTableDesc(serde, Integer
.toString(fieldSeparator), columns.toString(), columnTypes
.toString(), defaultOutputCols);
}
// Error stream always uses the default serde with a single column
errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
// Output record readers
Class extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm
.getChild(outputRecordReaderNum));
Class extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm
.getChild(inputRecordWriterNum));
Class extends RecordReader> errRecordReader = getDefaultRecordReader();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new ScriptDesc(
fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())),
inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo),
new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(new HashMap()); // disable backtracking
return output;
}
private Class extends RecordReader> getRecordReader(ASTNode node)
throws SemanticException {
String name;
if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
} else {
name = unescapeSQLString(node.getChild(0).getText());
}
try {
return (Class extends RecordReader>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private Class extends RecordReader> getDefaultRecordReader()
throws SemanticException {
String name;
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
try {
return (Class extends RecordReader>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private Class extends RecordWriter> getRecordWriter(ASTNode node)
throws SemanticException {
String name;
if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER);
} else {
name = unescapeSQLString(node.getChild(0).getText());
}
try {
return (Class extends RecordWriter>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private List getGroupingSetsForRollup(int size) {
List groupingSetKeys = new ArrayList();
for (int i = 0; i <= size; i++) {
groupingSetKeys.add((1 << i) - 1);
}
return groupingSetKeys;
}
private List getGroupingSetsForCube(int size) {
int count = 1 << size;
List results = new ArrayList(count);
for (int i = 0; i < count; ++i) {
results.add(i);
}
return results;
}
// This function returns the grouping sets along with the grouping expressions
// Even if rollups and cubes are present in the query, they are converted to
// grouping sets at this point
private ObjectPair, List> getGroupByGroupingSetsForClause(
QBParseInfo parseInfo, String dest) throws SemanticException {
List groupingSets = new ArrayList();
List groupByExprs = getGroupByForClause(parseInfo, dest);
if (parseInfo.getDestRollups().contains(dest)) {
groupingSets = getGroupingSetsForRollup(groupByExprs.size());
} else if (parseInfo.getDestCubes().contains(dest)) {
groupingSets = getGroupingSetsForCube(groupByExprs.size());
} else if (parseInfo.getDestGroupingSets().contains(dest)) {
groupingSets = getGroupingSets(groupByExprs, parseInfo, dest);
}
return new ObjectPair, List>(groupByExprs, groupingSets);
}
private List getGroupingSets(List groupByExpr, QBParseInfo parseInfo,
String dest) throws SemanticException {
Map exprPos = new HashMap();
for (int i = 0; i < groupByExpr.size(); ++i) {
ASTNode node = groupByExpr.get(i);
exprPos.put(node.toStringTree(), i);
}
ASTNode root = parseInfo.getGroupByForClause(dest);
List result = new ArrayList(root == null ? 0 : root.getChildCount());
if (root != null) {
for (int i = 0; i < root.getChildCount(); ++i) {
ASTNode child = (ASTNode) root.getChild(i);
if (child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
continue;
}
int bitmap = 0;
for (int j = 0; j < child.getChildCount(); ++j) {
String treeAsString = child.getChild(j).toStringTree();
Integer pos = exprPos.get(treeAsString);
if (pos == null) {
throw new SemanticException(
generateErrorMessage((ASTNode) child.getChild(j),
ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg()));
}
bitmap = setBit(bitmap, pos);
}
result.add(bitmap);
}
}
if (checkForNoAggr(result)) {
throw new SemanticException(
ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOFUNC.getMsg());
}
return result;
}
private boolean checkForNoAggr(List bitmaps) {
boolean ret = true;
for (int mask : bitmaps) {
ret &= mask == 0;
}
return ret;
}
private int setBit(int bitmap, int bitIdx) {
return bitmap | (1 << bitIdx);
}
/**
* This function is a wrapper of parseInfo.getGroupByForClause which
* automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
* a,b,c.
*/
static List getGroupByForClause(QBParseInfo parseInfo, String dest) {
if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
ASTNode selectExprs = parseInfo.getSelForClause(dest);
List result = new ArrayList(selectExprs == null ? 0
: selectExprs.getChildCount());
if (selectExprs != null) {
HashMap windowingExprs = parseInfo.getWindowingExprsForClause(dest);
for (int i = 0; i < selectExprs.getChildCount(); ++i) {
if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.TOK_HINTLIST) {
continue;
}
// table.column AS alias
ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
/*
* If this is handled by Windowing then ignore it.
*/
if (windowingExprs != null && windowingExprs.containsKey(grpbyExpr.toStringTree())) {
continue;
}
result.add(grpbyExpr);
}
}
return result;
} else {
ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
List result = new ArrayList(grpByExprs == null ? 0
: grpByExprs.getChildCount());
if (grpByExprs != null) {
for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
result.add(grpbyExpr);
}
}
}
return result;
}
}
private static String[] getColAlias(ASTNode selExpr, String defaultName,
RowResolver inputRR, boolean includeFuncName, int colNum) {
String colAlias = null;
String tabAlias = null;
String[] colRef = new String[2];
//for queries with a windowing expressions, the selexpr may have a third child
if (selExpr.getChildCount() == 2 ||
(selExpr.getChildCount() == 3 &&
selExpr.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC)) {
// return zz for "xx + yy AS zz"
colAlias = unescapeIdentifier(selExpr.getChild(1).getText());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
ASTNode root = (ASTNode) selExpr.getChild(0);
if (root.getType() == HiveParser.TOK_TABLE_OR_COL) {
colAlias =
BaseSemanticAnalyzer.unescapeIdentifier(root.getChild(0).getText());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
if (root.getType() == HiveParser.DOT) {
ASTNode tab = (ASTNode) root.getChild(0);
if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) {
String t = unescapeIdentifier(tab.getChild(0).getText());
if (inputRR.hasTableAlias(t)) {
tabAlias = t;
}
}
// Return zz for "xx.zz" and "xx.yy.zz"
ASTNode col = (ASTNode) root.getChild(1);
if (col.getType() == HiveParser.Identifier) {
colAlias = unescapeIdentifier(col.getText());
}
}
// if specified generate alias using func name
if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) {
String expr_flattened = root.toStringTree();
// remove all TOK tokens
String expr_no_tok = expr_flattened.replaceAll("TOK_\\S+", "");
// remove all non alphanumeric letters, replace whitespace spans with underscore
String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_");
// limit length to 20 chars
if (expr_formatted.length() > AUTOGEN_COLALIAS_PRFX_MAXLENGTH) {
expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH);
}
// append colnum to make it unique
colAlias = expr_formatted.concat("_" + colNum);
}
if (colAlias == null) {
// Return defaultName if selExpr is not a simple xx.yy.zz
colAlias = defaultName + colNum;
}
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
/**
* Returns whether the pattern is a regex expression (instead of a normal
* string). Normal string is a string with all alphabets/digits and "_".
*/
private static boolean isRegex(String pattern, HiveConf conf) {
String qIdSupport = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT);
if ( "column".equals(qIdSupport)) {
return false;
}
for (int i = 0; i < pattern.length(); i++) {
if (!Character.isLetterOrDigit(pattern.charAt(i))
&& pattern.charAt(i) != '_') {
return true;
}
}
return false;
}
private Operator> genSelectPlan(String dest, QB qb, Operator> input)
throws SemanticException {
ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
Operator> op = genSelectPlan(selExprList, qb, input, false);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan for clause: " + dest);
}
return op;
}
@SuppressWarnings("nls")
private Operator> genSelectPlan(ASTNode selExprList, QB qb,
Operator> input, boolean outerLV) throws SemanticException {
if (LOG.isDebugEnabled()) {
LOG.debug("tree: " + selExprList.toStringTree());
}
ArrayList col_list = new ArrayList();
RowResolver out_rwsch = new RowResolver();
ASTNode trfm = null;
Integer pos = Integer.valueOf(0);
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
// SELECT * or SELECT TRANSFORM(*)
boolean selectStar = false;
int posn = 0;
boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST);
if (hintPresent) {
posn++;
}
boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() ==
HiveParser.TOK_TRANSFORM);
if (isInTransform) {
queryProperties.setUsesScript(true);
globalLimitCtx.setHasTransformOrUDTF(true);
trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
}
// Detect queries of the form SELECT udtf(col) AS ...
// by looking for a function as the first child, and then checking to see
// if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
// the lack of a special token.
boolean isUDTF = false;
String udtfTableAlias = null;
ArrayList udtfColAliases = new ArrayList();
ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
GenericUDTF genericUDTF = null;
int udtfExprType = udtfExpr.getType();
if (udtfExprType == HiveParser.TOK_FUNCTION
|| udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
String funcName = TypeCheckProcFactory.DefaultExprProcessor
.getFunctionText(udtfExpr, true);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
if (fi != null) {
genericUDTF = fi.getGenericUDTF();
}
isUDTF = (genericUDTF != null);
if (isUDTF) {
globalLimitCtx.setHasTransformOrUDTF(true);
}
if (isUDTF && !fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr
.getChild(0));
}
if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) {
genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0),
col_list, null, inputRR, pos, out_rwsch, qb.getAliases(), false);
}
}
if (isUDTF) {
// Only support a single expression when it's a UDTF
if (selExprList.getChildCount() > 1) {
throw new SemanticException(generateErrorMessage(
(ASTNode) selExprList.getChild(1),
ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
}
ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
// Get the column / table aliases from the expression. Start from 1 as
// 0 is the TOK_FUNCTION
// column names also can be inferred from result of UDTF
for (int i = 1; i < selExpr.getChildCount(); i++) {
ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
switch (selExprChild.getType()) {
case HiveParser.Identifier:
udtfColAliases.add(unescapeIdentifier(selExprChild.getText()));
unparseTranslator.addIdentifierTranslation(selExprChild);
break;
case HiveParser.TOK_TABALIAS:
assert (selExprChild.getChildCount() == 1);
udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0)
.getText());
qb.addAlias(udtfTableAlias);
unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild
.getChild(0));
break;
default:
assert (false);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("UDTF table alias is " + udtfTableAlias);
LOG.debug("UDTF col aliases are " + udtfColAliases);
}
}
// The list of expressions after SELECT or SELECT TRANSFORM.
ASTNode exprList;
if (isInTransform) {
exprList = (ASTNode) trfm.getChild(0);
} else if (isUDTF) {
exprList = udtfExpr;
} else {
exprList = selExprList;
}
if (LOG.isDebugEnabled()) {
LOG.debug("genSelectPlan: input = " + inputRR.toString());
}
// For UDTF's, skip the function name to get the expressions
int startPosn = isUDTF ? posn + 1 : posn;
if (isInTransform) {
startPosn = 0;
}
Set colAliases = new HashSet();
ASTNode[] exprs = new ASTNode[exprList.getChildCount()];
String[][] aliases = new String[exprList.getChildCount()][];
boolean[] hasAsClauses = new boolean[exprList.getChildCount()];
// Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
for (int i = startPosn; i < exprList.getChildCount(); ++i) {
// child can be EXPR AS ALIAS, or EXPR.
ASTNode child = (ASTNode) exprList.getChild(i);
boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
boolean isWindowSpec = child.getChildCount() == 3 &&
child.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC;
// EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
// This check is not needed and invalid when there is a transform b/c the
// AST's are slightly different.
if (!isWindowSpec && !isInTransform && !isUDTF && child.getChildCount() > 2) {
throw new SemanticException(generateErrorMessage(
(ASTNode) child.getChild(2),
ErrorMsg.INVALID_AS.getMsg()));
}
// The real expression
ASTNode expr;
String tabAlias;
String colAlias;
if (isInTransform || isUDTF) {
tabAlias = null;
colAlias = autogenColAliasPrfxLbl + i;
expr = child;
} else {
// Get rid of TOK_SELEXPR
expr = (ASTNode) child.getChild(0);
String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR,
autogenColAliasPrfxIncludeFuncName, i);
tabAlias = colRef[0];
colAlias = colRef[1];
if (hasAsClause) {
unparseTranslator.addIdentifierTranslation((ASTNode) child
.getChild(1));
}
}
exprs[i] = expr;
aliases[i] = new String[] {tabAlias, colAlias};
hasAsClauses[i] = hasAsClause;
colAliases.add(colAlias);
}
// Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
for (int i = startPosn; i < exprList.getChildCount(); ++i) {
// The real expression
ASTNode expr = exprs[i];
String tabAlias = aliases[i][0];
String colAlias = aliases[i][1];
boolean hasAsClause = hasAsClauses[i];
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
: getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases(), false);
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) {
// In case the expression is a regex COL.
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
null, expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases(), false);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
.getChild(0).getText().toLowerCase())) && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) {
// In case the expression is TABLE.COL (col can be regex).
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
expr, col_list, null, inputRR, pos, out_rwsch, qb.getAliases(), false);
} else {
// Case when this is an expression
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
// We allow stateful functions in the SELECT list (but nowhere else)
tcCtx.setAllowStatefulFunctions(true);
tcCtx.setAllowDistinctFunctions(false);
ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
String recommended = recommendName(exp, colAlias);
if (recommended != null && !colAliases.contains(recommended) &&
out_rwsch.get(null, recommended) == null) {
colAlias = recommended;
}
col_list.add(exp);
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos),
exp.getWritableObjectInspector(), tabAlias, false);
colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp)
.isSkewedCol() : false);
out_rwsch.put(tabAlias, colAlias, colInfo);
if ( exp instanceof ExprNodeColumnDesc ) {
ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
if ( altMapping != null ) {
out_rwsch.put(altMapping[0], altMapping[1], colInfo);
}
}
pos = Integer.valueOf(pos.intValue() + 1);
}
}
selectStar = selectStar && exprList.getChildCount() == posn + 1;
ArrayList columnNames = new ArrayList();
Map colExprMap = new HashMap();
for (int i = 0; i < col_list.size(); i++) {
// Replace NULL with CAST(NULL AS STRING)
if (col_list.get(i) instanceof ExprNodeNullDesc) {
col_list.set(i, new ExprNodeConstantDesc(
TypeInfoFactory.stringTypeInfo, null));
}
String outputCol = getColumnInternalName(i);
colExprMap.put(outputCol, col_list.get(i));
columnNames.add(outputCol);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new SelectDesc(col_list, columnNames, selectStar), new RowSchema(
out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(colExprMap);
if (isInTransform) {
output = genScriptPlan(trfm, qb, output);
}
if (isUDTF) {
output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb,
output, outerLV);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
}
return output;
}
private String recommendName(ExprNodeDesc exp, String colAlias) {
if (!colAlias.startsWith(autogenColAliasPrfxLbl)) {
return null;
}
String column = ExprNodeDescUtils.recommendInputName(exp);
if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) {
return column;
}
return null;
}
/**
* Class to store GenericUDAF related information.
*/
static class GenericUDAFInfo {
ArrayList convertedParameters;
GenericUDAFEvaluator genericUDAFEvaluator;
TypeInfo returnType;
}
/**
* Convert exprNodeDesc array to Typeinfo array.
*/
static ArrayList getTypeInfo(ArrayList exprs) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : exprs) {
result.add(expr.getTypeInfo());
}
return result;
}
/**
* Convert exprNodeDesc array to ObjectInspector array.
*/
static ArrayList getWritableObjectInspector(ArrayList exprs) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : exprs) {
result.add(expr.getWritableObjectInspector());
}
return result;
}
/**
* Convert exprNodeDesc array to Typeinfo array.
*/
static ObjectInspector[] getStandardObjectInspector(ArrayList exprs) {
ObjectInspector[] result = new ObjectInspector[exprs.size()];
for (int i = 0; i < exprs.size(); i++) {
result[i] = TypeInfoUtils
.getStandardWritableObjectInspectorFromTypeInfo(exprs.get(i));
}
return result;
}
/**
* Returns the GenericUDAFEvaluator for the aggregation. This is called once
* for each GroupBy aggregation.
*/
static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName,
ArrayList aggParameters, ASTNode aggTree,
boolean isDistinct, boolean isAllColumns)
throws SemanticException {
ArrayList originalParameterTypeInfos =
getWritableObjectInspector(aggParameters);
GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator(
aggName, originalParameterTypeInfos, isDistinct, isAllColumns);
if (null == result) {
String reason = "Looking for UDAF Evaluator\"" + aggName
+ "\" with parameters " + originalParameterTypeInfos;
throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(
(ASTNode) aggTree.getChild(0), reason));
}
return result;
}
/**
* Returns the GenericUDAFInfo struct for the aggregation.
*
* @param aggName
* The name of the UDAF.
* @param aggParameters
* The exprNodeDesc of the original parameters
* @param aggTree
* The ASTNode node of the UDAF in the query.
* @return GenericUDAFInfo
* @throws SemanticException
* when the UDAF is not found or has problems.
*/
static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator,
GenericUDAFEvaluator.Mode emode, ArrayList aggParameters)
throws SemanticException {
GenericUDAFInfo r = new GenericUDAFInfo();
// set r.genericUDAFEvaluator
r.genericUDAFEvaluator = evaluator;
// set r.returnType
ObjectInspector returnOI = null;
try {
ArrayList aggOIs = getWritableObjectInspector(aggParameters);
ObjectInspector[] aggOIArray = new ObjectInspector[aggOIs.size()];
for (int ii = 0; ii < aggOIs.size(); ++ii) {
aggOIArray[ii] = aggOIs.get(ii);
}
returnOI = r.genericUDAFEvaluator.init(emode, aggOIArray);
r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
} catch (HiveException e) {
throw new SemanticException(e);
}
// set r.convertedParameters
// TODO: type conversion
r.convertedParameters = aggParameters;
return r;
}
private static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(
GroupByDesc.Mode mode, boolean isDistinct) {
switch (mode) {
case COMPLETE:
return GenericUDAFEvaluator.Mode.COMPLETE;
case PARTIAL1:
return GenericUDAFEvaluator.Mode.PARTIAL1;
case PARTIAL2:
return GenericUDAFEvaluator.Mode.PARTIAL2;
case PARTIALS:
return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1
: GenericUDAFEvaluator.Mode.PARTIAL2;
case FINAL:
return GenericUDAFEvaluator.Mode.FINAL;
case HASH:
return GenericUDAFEvaluator.Mode.PARTIAL1;
case MERGEPARTIAL:
return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE
: GenericUDAFEvaluator.Mode.FINAL;
default:
throw new RuntimeException("internal error in groupByDescModeToUDAFMode");
}
}
/**
* Check if the given internalName represents a constant parameter in aggregation parameters
* of an aggregation tree.
* This method is only invoked when map-side aggregation is not involved. In this case,
* every parameter in every aggregation tree should already have a corresponding ColumnInfo,
* which is generated when the corresponding ReduceSinkOperator of the GroupByOperator being
* generating is generated. If we find that this parameter is a constant parameter,
* we will return the corresponding ExprNodeDesc in reduceValues, and we will not need to
* use a new ExprNodeColumnDesc, which can not be treated as a constant parameter, for this
* parameter (since the writableObjectInspector of a ExprNodeColumnDesc will not be
* a instance of ConstantObjectInspector).
*
* @param reduceValues
* value columns of the corresponding ReduceSinkOperator
* @param internalName
* the internal name of this parameter
* @return the ExprNodeDesc of the constant parameter if the given internalName represents
* a constant parameter; otherwise, return null
*/
private ExprNodeDesc isConstantParameterInAggregationParameters(String internalName,
List reduceValues) {
// only the pattern of "VALUE._col([0-9]+)" should be handled.
String[] terms = internalName.split("\\.");
if (terms.length != 2 || reduceValues == null) {
return null;
}
if (Utilities.ReduceField.VALUE.toString().equals(terms[0])) {
int pos = getPositionFromInternalName(terms[1]);
if (pos >= 0 && pos < reduceValues.size()) {
ExprNodeDesc reduceValue = reduceValues.get(pos);
if (reduceValue != null) {
if (reduceValue.getWritableObjectInspector() instanceof ConstantObjectInspector) {
// this internalName represents a constant parameter in aggregation parameters
return reduceValue;
}
}
}
}
return null;
}
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (PARTIAL1 or COMPLETE)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo,
String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode,
Map genericUDAFEvaluators)
throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx
.get(input).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList aggregations = new ArrayList();
ArrayList outputColumnNames = new ArrayList();
Map colExprMap = new HashMap();
List grpByExprs = getGroupByForClause(parseInfo, dest);
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo
.getInternalName(), "", false));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false);
groupByOutputRowResolver.putExpression(grpbyExpr,
oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// For each aggregation
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List inputKeyCols = rs.getConf().getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
List reduceValues = rs.getConf().getValueCols();
int numDistinctUDFs = 0;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
// Convert children to aggParameters
ArrayList aggParameters = new ArrayList();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "." +
getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
// Save the evaluator so that it can be used by the next-stage
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
false, groupByMemoryUsage, memoryThreshold, null, false, 0, numDistinctUDFs > 0),
new RowSchema(groupByOutputRowResolver.getColumnInfos()),
input), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
// Add the grouping set key to the group by operator.
// This is not the first group by operator, but it is a subsequent group by operator
// which is forwarding the grouping keys introduced by the grouping sets.
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for GroupBy2 to pass the additional grouping keys introduced by
// GroupBy1 for the grouping set (corresponding to the rollup).
private void addGroupingSetKey(List groupByKeys,
RowResolver groupByInputRowResolver,
RowResolver groupByOutputRowResolver,
List outputColumnNames,
Map colExprMap) throws SemanticException {
// For grouping sets, add a dummy grouping key
String groupingSetColumnName =
groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
groupingSetColumnName, null, false);
groupByKeys.add(inputExpr);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
TypeInfoFactory.stringTypeInfo,
null,
true));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// Process grouping set for the reduce sink operator
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for ReduceSink to add the additional grouping keys introduced by
// GroupBy1 into the reduce keys.
private void processGroupingSetReduceSinkOperator(RowResolver reduceSinkInputRowResolver,
RowResolver reduceSinkOutputRowResolver,
List reduceKeys,
List outputKeyColumnNames,
Map colExprMap) throws SemanticException {
// add a key for reduce sink
String groupingSetColumnName =
reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
groupingSetColumnName, null, false);
reduceKeys.add(inputExpr);
outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
String field = Utilities.ReduceField.KEY.toString() + "."
+ getColumnInternalName(reduceKeys.size() - 1);
ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
reduceKeys.size() - 1).getTypeInfo(), null, true);
reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
}
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @param distPartAggr
* partial aggregation for distincts
* @param groupingSets
* list of grouping sets
* @param groupingSetsPresent
* whether grouping sets are present in this query
* @param groupingSetsConsumedCurrentMR
* whether grouping sets are consumed by this group by
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo,
String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
Map genericUDAFEvaluators,
boolean distPartAgg,
List groupingSets,
boolean groupingSetsPresent,
boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
ArrayList outputColumnNames = new ArrayList();
RowResolver groupByInputRowResolver = opParseCtx
.get(reduceSinkOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList aggregations = new ArrayList();
List grpByExprs = getGroupByForClause(parseInfo, dest);
Map colExprMap = new HashMap();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver.putExpression(grpbyExpr,
oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// This is only needed if a new grouping set key is being created
int groupingSetsPosition = 0;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in a single map-reduce job
// The plan is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1 already added the grouping id as part of the row
// This function is called for GroupBy2 to add grouping id as part of the groupby keys
if (!groupingSetsNeedAdditionalMRJob) {
addGroupingSetKey(
groupByKeys,
groupByInputRowResolver,
groupByOutputRowResolver,
outputColumnNames,
colExprMap);
}
else {
groupingSetsPosition = groupByKeys.size();
// The grouping set has not yet been processed. Create a new grouping key
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in 2 map-reduce jobs
// The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1/ReduceSink worked as if grouping sets were not present
// This function is called for GroupBy2 to create new rows for grouping sets
// For each input row (a,b), 4 rows are created for the example above:
// (a,b), (a,null), (null, b), (null, null)
createNewGroupingKey(groupByKeys,
outputColumnNames,
groupByOutputRowResolver,
colExprMap);
}
}
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List inputKeyCols = ((ReduceSinkDesc)
reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
boolean containsDistinctAggr = false;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList aggParameters = new ArrayList();
boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
containsDistinctAggr = containsDistinctAggr || isDistinct;
// If the function is distinct, partial aggregation has not been done on
// the client side.
// If distPartAgg is set, the client is letting us know that partial
// aggregation has not been done.
// For eg: select a, count(b+c), count(distinct d+e) group by a
// For count(b+c), if partial aggregation has been performed, then we
// directly look for count(b+c).
// Otherwise, we look for b+c.
// For distincts, partial aggregation is never performed on the client
// side, so always look for the parameters: d+e
boolean partialAggDone = !(distPartAgg || isDistinct);
if (!partialAggDone) {
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN
.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "."
+ getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
} else {
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(), paraExprInfo
.getIsVirtualCol()));
}
if (isDistinct) {
numDistinctUDFs++;
}
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = null;
// For distincts, partial aggregations have not been done
if (distPartAgg) {
genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters,
value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
} else {
genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
}
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters,
(mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
distPartAgg, groupByMemoryUsage, memoryThreshold,
groupingSets,
groupingSetsPresent && groupingSetsNeedAdditionalMRJob,
groupingSetsPosition, containsDistinctAggr),
new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo),
groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
/*
* Create a new grouping key for grouping id.
* A dummy grouping id. is added. At runtime, the group by operator
* creates 'n' rows per input row, where 'n' is the number of grouping sets.
*/
private void createNewGroupingKey(List groupByKeys,
List outputColumnNames,
RowResolver groupByOutputRowResolver,
Map colExprMap) {
// The value for the constant does not matter. It is replaced by the grouping set
// value for the actual implementation
ExprNodeConstantDesc constant = new ExprNodeConstantDesc("0");
groupByKeys.add(constant);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
TypeInfoFactory.stringTypeInfo,
null,
true));
colExprMap.put(field, constant);
}
/**
* Generate the map-side GroupByOperator for the Query Block
* (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
* of the inputOperatorInfo.
*
* @param mode
* The mode of the aggregation (HASH)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb,
String dest,
List grpByExprs,
Operator inputOperatorInfo,
GroupByDesc.Mode mode,
Map genericUDAFEvaluators,
List groupingSetKeys,
boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo)
.getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList outputColumnNames = new ArrayList();
ArrayList aggregations = new ArrayList();
Map colExprMap = new HashMap();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr,
groupByInputRowResolver);
groupByKeys.add(grpByExprNode);
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr,
new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// The grouping set key is present after the grouping keys, before the distinct keys
int groupingSetsPosition = groupByKeys.size();
// For grouping sets, add a dummy grouping key
// This dummy key needs to be added as a reduce key
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for GroupBy1 to create an additional grouping key
// for the grouping set (corresponding to the rollup).
if (groupingSetsPresent) {
createNewGroupingKey(groupByKeys,
outputColumnNames,
groupByOutputRowResolver,
colExprMap);
}
// If there is a distinctFuncExp, add all parameters to the reduceKeys.
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
List list = parseInfo.getDistinctFuncExprsForClause(dest);
for (ASTNode value : list) {
// 0 is function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode parameter = (ASTNode) value.getChild(i);
if (groupByOutputRowResolver.getExpression(parameter) == null) {
ExprNodeDesc distExprNode = genExprNodeDesc(parameter,
groupByInputRowResolver);
groupByKeys.add(distExprNode);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(
field, distExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
}
}
}
// For each aggregation
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
boolean containsDistinctAggr = false;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList aggParameters = new ArrayList();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr,
groupByInputRowResolver);
aggParameters.add(paraExprNode);
}
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
if (groupByOutputRowResolver.getExpression(value) == null) {
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
}
// Save the evaluator so that it can be used by the next-stage
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
false, groupByMemoryUsage, memoryThreshold,
groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr),
new RowSchema(groupByOutputRowResolver.getColumnInfos()),
inputOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
/**
* Generate the ReduceSinkOperator for the Group By Query Block
* (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child
* of inputOperatorInfo.
*
* It will put all Group By keys and the distinct field (if any) in the
* map-reduce sort key, and all other fields in the map-reduce value.
*
* @param numPartitionFields
* the number of fields for map-reduce partitioning. This is usually
* the number of fields in the Group By keys.
* @return the new ReduceSinkOperator.
* @throws SemanticException
*/
@SuppressWarnings("nls")
private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb,
String dest,
Operator inputOperatorInfo,
List grpByExprs,
int numPartitionFields,
boolean changeNumPartitionFields,
int numReducers,
boolean mapAggrDone,
boolean groupingSetsPresent) throws SemanticException {
RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo)
.getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver reduceSinkOutputRowResolver = new RowResolver();
reduceSinkOutputRowResolver.setIsExprResolver(true);
Map colExprMap = new HashMap();
// Pre-compute group-by keys and store in reduceKeys
List outputKeyColumnNames = new ArrayList();
List outputValueColumnNames = new ArrayList();
ArrayList reduceKeys = getReduceKeysForReduceSink(grpByExprs, dest,
reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames,
colExprMap);
int keyLength = reduceKeys.size();
// add a key for reduce sink
if (groupingSetsPresent) {
// Process grouping set for the reduce sink operator
processGroupingSetReduceSinkOperator(
reduceSinkInputRowResolver,
reduceSinkOutputRowResolver,
reduceKeys,
outputKeyColumnNames,
colExprMap);
if (changeNumPartitionFields) {
numPartitionFields++;
}
}
List> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest,
reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames,
colExprMap);
ArrayList reduceValues = new ArrayList();
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
if (!mapAggrDone) {
getReduceValuesForReduceSinkNoMapAgg(parseInfo, dest, reduceSinkInputRowResolver,
reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap);
} else {
// Put partial aggregation results in reduceValues
int inputField = reduceKeys.size();
for (Map.Entry entry : aggregationTrees.entrySet()) {
TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(
inputField).getType();
ExprNodeColumnDesc exprDesc = new ExprNodeColumnDesc(type,
getColumnInternalName(inputField), "", false);
reduceValues.add(exprDesc);
inputField++;
String outputColName = getColumnInternalName(reduceValues.size() - 1);
outputValueColumnNames.add(outputColName);
String internalName = Utilities.ReduceField.VALUE.toString() + "."
+ outputColName;
reduceSinkOutputRowResolver.putExpression(entry.getValue(),
new ColumnInfo(internalName, type, null, false));
colExprMap.put(internalName, exprDesc);
}
}
ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
OperatorFactory.getAndMakeChild(
PlanUtils.getReduceSinkDesc(reduceKeys,
groupingSetsPresent ? keyLength + 1 : keyLength,
reduceValues, distinctColIndices,
outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields,
numReducers, AcidUtils.Operation.NOT_ACID),
new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo),
reduceSinkOutputRowResolver);
rsOp.setColumnExprMap(colExprMap);
return rsOp;
}
private ArrayList getReduceKeysForReduceSink(List grpByExprs, String dest,
RowResolver reduceSinkInputRowResolver, RowResolver reduceSinkOutputRowResolver,
List outputKeyColumnNames, Map colExprMap)
throws SemanticException {
ArrayList reduceKeys = new ArrayList();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ExprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr,
reduceSinkInputRowResolver);
ColumnInfo prev = reduceSinkOutputRowResolver.getExpression(grpbyExpr);
if (prev != null && isDeterministic(inputExpr)) {
colExprMap.put(prev.getInternalName(), inputExpr);
continue;
}
reduceKeys.add(inputExpr);
outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
String field = Utilities.ReduceField.KEY.toString() + "."
+ getColumnInternalName(reduceKeys.size() - 1);
ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
reduceKeys.size() - 1).getTypeInfo(), null, false);
reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
}
return reduceKeys;
}
private boolean isDeterministic(ExprNodeDesc expr) throws SemanticException {
try {
return ExprNodeEvaluatorFactory.get(expr).isDeterministic();
} catch (Exception e) {
throw new SemanticException(e);
}
}
private List> getDistinctColIndicesForReduceSink(QBParseInfo parseInfo,
String dest,
List reduceKeys, RowResolver reduceSinkInputRowResolver,
RowResolver reduceSinkOutputRowResolver, List outputKeyColumnNames,
Map colExprMap)
throws SemanticException {
List> distinctColIndices = new ArrayList>();
// If there is a distinctFuncExp, add all parameters to the reduceKeys.
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
List distFuncs = parseInfo.getDistinctFuncExprsForClause(dest);
String colName = getColumnInternalName(reduceKeys.size());
outputKeyColumnNames.add(colName);
for (int i = 0; i < distFuncs.size(); i++) {
ASTNode value = distFuncs.get(i);
int numExprs = 0;
List distinctIndices = new ArrayList();
// 0 is function name
for (int j = 1; j < value.getChildCount(); j++) {
ASTNode parameter = (ASTNode) value.getChild(j);
ExprNodeDesc expr = genExprNodeDesc(parameter, reduceSinkInputRowResolver);
// see if expr is already present in reduceKeys.
// get index of expr in reduceKeys
int ri;
for (ri = 0; ri < reduceKeys.size(); ri++) {
if (reduceKeys.get(ri).getExprString().equals(expr.getExprString())) {
break;
}
}
// add the expr to reduceKeys if it is not present
if (ri == reduceKeys.size()) {
String name = getColumnInternalName(numExprs);
String field = Utilities.ReduceField.KEY.toString() + "." + colName
+ ":" + i
+ "." + name;
ColumnInfo colInfo = new ColumnInfo(field, expr.getTypeInfo(), null, false);
reduceSinkOutputRowResolver.putExpression(parameter, colInfo);
colExprMap.put(field, expr);
reduceKeys.add(expr);
}
// add the index of expr in reduceKeys to distinctIndices
distinctIndices.add(ri);
numExprs++;
}
distinctColIndices.add(distinctIndices);
}
}
return distinctColIndices;
}
private void getReduceValuesForReduceSinkNoMapAgg(QBParseInfo parseInfo, String dest,
RowResolver reduceSinkInputRowResolver, RowResolver reduceSinkOutputRowResolver,
List outputValueColumnNames, ArrayList reduceValues,
Map colExprMap) throws SemanticException {
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
// Put parameters to aggregations in reduceValues
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
// 0 is function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode parameter = (ASTNode) value.getChild(i);
if (reduceSinkOutputRowResolver.getExpression(parameter) == null) {
ExprNodeDesc exprDesc = genExprNodeDesc(parameter, reduceSinkInputRowResolver);
reduceValues.add(exprDesc);
outputValueColumnNames
.add(getColumnInternalName(reduceValues.size() - 1));
String field = Utilities.ReduceField.VALUE.toString() + "."
+ getColumnInternalName(reduceValues.size() - 1);
reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field,
reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null,
false));
colExprMap.put(field, exprDesc);
}
}
}
}
@SuppressWarnings("nls")
private ReduceSinkOperator genCommonGroupByPlanReduceSinkOperator(QB qb, List dests,
Operator inputOperatorInfo) throws SemanticException {
RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo)
.getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver reduceSinkOutputRowResolver = new RowResolver();
reduceSinkOutputRowResolver.setIsExprResolver(true);
Map colExprMap = new HashMap();
// The group by keys and distinct keys should be the same for all dests, so using the first
// one to produce these will be the same as using any other.
String dest = dests.get(0);
// Pre-compute group-by keys and store in reduceKeys
List outputKeyColumnNames = new ArrayList();
List outputValueColumnNames = new ArrayList();
List grpByExprs = getGroupByForClause(parseInfo, dest);
ArrayList reduceKeys = getReduceKeysForReduceSink(grpByExprs, dest,
reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames,
colExprMap);
int keyLength = reduceKeys.size();
List> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest,
reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames,
colExprMap);
ArrayList reduceValues = new ArrayList();
// The dests can have different non-distinct aggregations, so we have to iterate over all of
// them
for (String destination : dests) {
getReduceValuesForReduceSinkNoMapAgg(parseInfo, destination, reduceSinkInputRowResolver,
reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap);
// Need to pass all of the columns used in the where clauses as reduce values
ASTNode whereClause = parseInfo.getWhrForClause(destination);
if (whereClause != null) {
assert whereClause.getChildCount() == 1;
ASTNode predicates = (ASTNode) whereClause.getChild(0);
Map nodeOutputs =
genAllExprNodeDesc(predicates, reduceSinkInputRowResolver);
removeMappingForKeys(predicates, nodeOutputs, reduceKeys);
// extract columns missing in current RS key/value
for (Map.Entry entry : nodeOutputs.entrySet()) {
ASTNode parameter = entry.getKey();
ExprNodeDesc expression = entry.getValue();
if (!(expression instanceof ExprNodeColumnDesc)) {
continue;
}
if (ExprNodeDescUtils.indexOf(expression, reduceValues) >= 0) {
continue;
}
String internalName = getColumnInternalName(reduceValues.size());
String field = Utilities.ReduceField.VALUE.toString() + "." + internalName;
reduceValues.add(expression);
outputValueColumnNames.add(internalName);
reduceSinkOutputRowResolver.putExpression(parameter,
new ColumnInfo(field, expression.getTypeInfo(), null, false));
colExprMap.put(field, expression);
}
}
}
// Optimize the scenario when there are no grouping keys - only 1 reducer is needed
int numReducers = -1;
if (grpByExprs.isEmpty()) {
numReducers = 1;
}
ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues,
distinctColIndices, outputKeyColumnNames, outputValueColumnNames,
true, -1, keyLength, numReducers, AcidUtils.Operation.NOT_ACID);
ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(reduceSinkOutputRowResolver
.getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
rsOp.setColumnExprMap(colExprMap);
return rsOp;
}
// Remove expression node descriptor and children of it for a given predicate
// from mapping if it's already on RS keys.
// Remaining column expressions would be a candidate for an RS value
private void removeMappingForKeys(ASTNode predicate, Map mapping,
List