Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import com.google.common.math.IntMath;
import com.google.common.math.LongMath;
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.Token;
import org.antlr.runtime.TokenRewriteStream;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeVisitor;
import org.antlr.runtime.tree.TreeVisitorAction;
import org.antlr.runtime.tree.TreeWizard;
import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.TransactionalValidationListener;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SQLCheckConstraint;
import org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint;
import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint;
import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.cache.results.CacheUsage;
import org.apache.hadoop.hive.ql.cache.results.QueryResultsCache;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.MaterializedViewDesc;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RecordReader;
import org.apache.hadoop.hive.ql.exec.RecordWriter;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType;
import org.apache.hadoop.hive.ql.io.AcidInputFormat;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
import org.apache.hadoop.hive.ql.io.arrow.ArrowColumnarBatchSerDe;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.NullRowsInputFormat;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.metadata.CheckConstraint;
import org.apache.hadoop.hive.ql.metadata.DefaultConstraint;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.NotNullConstraint;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.Optimizer;
import org.apache.hadoop.hive.ql.optimizer.QueryPlanPostProcessor;
import org.apache.hadoop.hive.ql.optimizer.Transform;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
import org.apache.hadoop.hive.ql.optimizer.lineage.Generator;
import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType;
import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec;
import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc;
import org.apache.hadoop.hive.ql.plan.ForwardDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.InsertCommitHookDesc;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc;
import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
import org.apache.hadoop.hive.ql.plan.LimitDesc;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PTFDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.UDTFDesc;
import org.apache.hadoop.hive.ql.plan.UnionDesc;
import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCardinalityViolation;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline;
import org.apache.hadoop.hive.ql.util.ResourceDownloader;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
import org.apache.hadoop.hive.serde2.NoOpFetchFormatter;
import org.apache.hadoop.hive.serde2.NullStructSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe2;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.security.AccessControlException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
/**
* Implementation of the semantic analyzer. It generates the query plan.
* There are other specific semantic analyzers for some hive operations such as
* DDLSemanticAnalyzer for ddl operations.
*/
public class SemanticAnalyzer extends BaseSemanticAnalyzer {
public static final String DUMMY_DATABASE = "_dummy_database";
public static final String DUMMY_TABLE = "_dummy_table";
public static final String SUBQUERY_TAG_1 = "-subquery1";
public static final String SUBQUERY_TAG_2 = "-subquery2";
// Max characters when auto generating the column name with func name
private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20;
public static final String VALUES_TMP_TABLE_NAME_PREFIX = "Values__Tmp__Table__";
/** Marks the temporary table created for a serialized CTE. The table is scoped to the query. */
static final String MATERIALIZATION_MARKER = "$MATERIALIZATION";
private HashMap opToPartPruner;
private HashMap opToPartList;
protected HashMap topOps;
protected LinkedHashMap, OpParseContext> opParseCtx;
private List loadTableWork;
private List loadFileWork;
private final List columnStatsAutoGatherContexts;
private final Map joinContext;
private final Map smbMapJoinContext;
private final HashMap topToTable;
private final List reduceSinkOperatorsAddedByEnforceBucketingSorting;
private final HashMap> topToTableProps;
private QB qb;
private ASTNode ast;
private int destTableId;
private UnionProcContext uCtx;
List> listMapJoinOpsNoReducer;
private HashMap opToSamplePruner;
private final Map> opToPartToSkewedPruner;
private Map viewProjectToTableSchema;
/**
* a map for the split sampling, from alias to an instance of SplitSample
* that describes percentage and number.
*/
private final HashMap nameToSplitSample;
Map> groupOpToInputTables;
Map prunedPartitions;
protected List resultSchema;
protected CreateViewDesc createVwDesc;
protected MaterializedViewDesc materializedViewUpdateDesc;
protected ArrayList viewsExpanded;
protected ASTNode viewSelect;
protected final UnparseTranslator unparseTranslator;
private final GlobalLimitCtx globalLimitCtx;
// prefix for column names auto generated by hive
private final String autogenColAliasPrfxLbl;
private final boolean autogenColAliasPrfxIncludeFuncName;
// Keep track of view alias to read entity corresponding to the view
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of aliases for V3, V3:V2, V3:V2:V1.
// This is used when T is added as an input for the query, the parents of T is
// derived from the alias V3:V2:V1:T
private final Map viewAliasToInput;
//need merge isDirect flag to input even if the newInput does not have a parent
private boolean mergeIsDirect;
// flag for no scan during analyze ... compute statistics
protected boolean noscan;
// whether this is a mv rebuild rewritten expression
protected MaterializationRebuildMode mvRebuildMode = MaterializationRebuildMode.NONE;
protected String mvRebuildDbName; // Db name for materialization to rebuild
protected String mvRebuildName; // Name for materialization to rebuild
protected volatile boolean disableJoinMerge = false;
protected final boolean defaultJoinMerge;
/*
* Capture the CTE definitions in a Query.
*/
final Map aliasToCTEs;
/*
* Used to check recursive CTE invocations. Similar to viewsExpanded
*/
ArrayList ctesExpanded;
/*
* Whether root tasks after materialized CTE linkage have been resolved
*/
boolean rootTasksResolved;
protected TableMask tableMask;
CreateTableDesc tableDesc;
/** Not thread-safe. */
final ASTSearcher astSearcher = new ASTSearcher();
protected AnalyzeRewriteContext analyzeRewrite;
// A mapping from a tableName to a table object in metastore.
Map tabNameToTabObject;
// The tokens we should ignore when we are trying to do table masking.
private final Set ignoredTokens = Sets.newHashSet(HiveParser.TOK_GROUPBY,
HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY,
HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY);
private String invalidQueryMaterializationReason;
static class Phase1Ctx {
String dest;
int nextNum;
}
public SemanticAnalyzer(QueryState queryState) throws SemanticException {
super(queryState);
opToPartPruner = new HashMap();
opToPartList = new HashMap();
opToSamplePruner = new HashMap();
nameToSplitSample = new HashMap();
// Must be deterministic order maps - see HIVE-8707
topOps = new LinkedHashMap();
loadTableWork = new ArrayList();
loadFileWork = new ArrayList();
columnStatsAutoGatherContexts = new ArrayList();
opParseCtx = new LinkedHashMap, OpParseContext>();
joinContext = new HashMap();
smbMapJoinContext = new HashMap();
// Must be deterministic order map for consistent q-test output across Java versions
topToTable = new LinkedHashMap();
reduceSinkOperatorsAddedByEnforceBucketingSorting = new ArrayList();
topToTableProps = new HashMap>();
destTableId = 1;
uCtx = null;
listMapJoinOpsNoReducer = new ArrayList>();
groupOpToInputTables = new HashMap>();
prunedPartitions = new HashMap();
unparseTranslator = new UnparseTranslator(conf);
autogenColAliasPrfxLbl = HiveConf.getVar(conf,
HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL);
autogenColAliasPrfxIncludeFuncName = HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME);
queryProperties = new QueryProperties();
opToPartToSkewedPruner = new HashMap>();
aliasToCTEs = new HashMap();
globalLimitCtx = new GlobalLimitCtx();
viewAliasToInput = new HashMap();
mergeIsDirect = true;
noscan = false;
tabNameToTabObject = new HashMap<>();
defaultJoinMerge = false == HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MERGE_NWAY_JOINS);
disableJoinMerge = defaultJoinMerge;
}
@Override
protected void reset(boolean clearCache) {
super.reset(true);
if(clearCache) {
prunedPartitions.clear();
if (ctx != null) {
ctx.getOpContext().getColStatsCache().clear();
}
//When init(true) combine with genResolvedParseTree, it will generate Resolved Parse tree from syntax tree
//ReadEntity created under these conditions should be all relevant to the syntax tree even the ones without parents
//set mergeIsDirect to true here.
mergeIsDirect = true;
} else {
mergeIsDirect = false;
}
tabNameToTabObject.clear();
loadTableWork.clear();
loadFileWork.clear();
columnStatsAutoGatherContexts.clear();
topOps.clear();
destTableId = 1;
idToTableNameMap.clear();
qb = null;
ast = null;
uCtx = null;
joinContext.clear();
smbMapJoinContext.clear();
opParseCtx.clear();
groupOpToInputTables.clear();
disableJoinMerge = defaultJoinMerge;
aliasToCTEs.clear();
topToTable.clear();
opToPartPruner.clear();
opToPartList.clear();
opToPartToSkewedPruner.clear();
opToSamplePruner.clear();
nameToSplitSample.clear();
resultSchema = null;
createVwDesc = null;
materializedViewUpdateDesc = null;
viewsExpanded = null;
viewSelect = null;
ctesExpanded = null;
globalLimitCtx.disableOpt();
viewAliasToInput.clear();
reduceSinkOperatorsAddedByEnforceBucketingSorting.clear();
topToTableProps.clear();
listMapJoinOpsNoReducer.clear();
unparseTranslator.clear();
queryProperties.clear();
outputs.clear();
}
public void initParseCtx(ParseContext pctx) {
opToPartPruner = pctx.getOpToPartPruner();
opToPartList = pctx.getOpToPartList();
opToSamplePruner = pctx.getOpToSamplePruner();
topOps = pctx.getTopOps();
loadTableWork = pctx.getLoadTableWork();
loadFileWork = pctx.getLoadFileWork();
ctx = pctx.getContext();
destTableId = pctx.getDestTableId();
idToTableNameMap = pctx.getIdToTableNameMap();
uCtx = pctx.getUCtx();
listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
prunedPartitions = pctx.getPrunedPartitions();
tabNameToTabObject = pctx.getTabNameToTabObject();
fetchTask = pctx.getFetchTask();
setLineageInfo(pctx.getLineageInfo());
}
public ParseContext getParseContext() {
// Make sure the basic query properties are initialized
copyInfoToQueryProperties(queryProperties);
return new ParseContext(queryState, opToPartPruner, opToPartList, topOps,
new HashSet(joinContext.keySet()),
new HashSet(smbMapJoinContext.keySet()),
loadTableWork, loadFileWork, columnStatsAutoGatherContexts,
ctx, idToTableNameMap, destTableId, uCtx,
listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject,
opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting,
analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc,
queryProperties, viewProjectToTableSchema, acidFileSinks);
}
public CompilationOpContext getOpContext() {
return ctx.getOpContext();
}
public String genPartValueString(String partColType, String partVal) throws SemanticException {
String returnVal = partVal;
if (partColType.equals(serdeConstants.STRING_TYPE_NAME) ||
partColType.contains(serdeConstants.VARCHAR_TYPE_NAME) ||
partColType.contains(serdeConstants.CHAR_TYPE_NAME)) {
returnVal = "'" + escapeSQLString(partVal) + "'";
} else if (partColType.equals(serdeConstants.TINYINT_TYPE_NAME)) {
returnVal = partVal + "Y";
} else if (partColType.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
returnVal = partVal + "S";
} else if (partColType.equals(serdeConstants.INT_TYPE_NAME)) {
returnVal = partVal;
} else if (partColType.equals(serdeConstants.BIGINT_TYPE_NAME)) {
returnVal = partVal + "L";
} else if (partColType.contains(serdeConstants.DECIMAL_TYPE_NAME)) {
returnVal = partVal + "BD";
} else if (partColType.equals(serdeConstants.DATE_TYPE_NAME) ||
partColType.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
returnVal = partColType + " '" + escapeSQLString(partVal) + "'";
} else {
//for other usually not used types, just quote the value
returnVal = "'" + escapeSQLString(partVal) + "'";
}
return returnVal;
}
public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias)
throws SemanticException {
doPhase1QBExpr(ast, qbexpr, id, alias, false);
}
@SuppressWarnings("nls")
public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, boolean insideView)
throws SemanticException {
assert (ast.getToken() != null);
if (ast.getToken().getType() == HiveParser.TOK_QUERY) {
QB qb = new QB(id, alias, true);
qb.setInsideView(insideView);
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(ast, qb, ctx_1, null);
qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
qbexpr.setQB(qb);
}
// setop
else {
switch (ast.getToken().getType()) {
case HiveParser.TOK_UNIONALL:
qbexpr.setOpcode(QBExpr.Opcode.UNION);
break;
case HiveParser.TOK_INTERSECTALL:
qbexpr.setOpcode(QBExpr.Opcode.INTERSECTALL);
break;
case HiveParser.TOK_INTERSECTDISTINCT:
qbexpr.setOpcode(QBExpr.Opcode.INTERSECT);
break;
case HiveParser.TOK_EXCEPTALL:
qbexpr.setOpcode(QBExpr.Opcode.EXCEPTALL);
break;
case HiveParser.TOK_EXCEPTDISTINCT:
qbexpr.setOpcode(QBExpr.Opcode.EXCEPT);
break;
default:
throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg("Type "
+ ast.getToken().getType()));
}
// query 1
assert (ast.getChild(0) != null);
QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1);
doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id,
alias + SUBQUERY_TAG_1, insideView);
qbexpr.setQBExpr1(qbexpr1);
// query 2
assert (ast.getChild(1) != null);
QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2);
doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id,
alias + SUBQUERY_TAG_2, insideView);
qbexpr.setQBExpr2(qbexpr2);
}
}
private LinkedHashMap doPhase1GetAggregationsFromSelect(
ASTNode selExpr, QB qb, String dest) throws SemanticException {
// Iterate over the selects search for aggregation Trees.
// Use String as keys to eliminate duplicate trees.
LinkedHashMap aggregationTrees = new LinkedHashMap();
List wdwFns = new ArrayList();
for (int i = 0; i < selExpr.getChildCount(); ++i) {
ASTNode function = (ASTNode) selExpr.getChild(i);
if (function.getType() == HiveParser.TOK_SELEXPR ||
function.getType() == HiveParser.TOK_SUBQUERY_EXPR) {
function = (ASTNode)function.getChild(0);
}
doPhase1GetAllAggregations(function, aggregationTrees, wdwFns, null);
}
// window based aggregations are handled differently
for (ASTNode wdwFn : wdwFns) {
WindowingSpec spec = qb.getWindowingSpec(dest);
if(spec == null) {
queryProperties.setHasWindowing(true);
spec = new WindowingSpec();
qb.addDestToWindowingSpec(dest, spec);
}
HashMap wExprsInDest = qb.getParseInfo().getWindowingExprsForClause(dest);
int wColIdx = spec.getWindowExpressions() == null ? 0 : spec.getWindowExpressions().size();
WindowFunctionSpec wFnSpec = processWindowFunction(wdwFn,
(ASTNode)wdwFn.getChild(wdwFn.getChildCount()-1));
// If this is a duplicate invocation of a function; don't add to WindowingSpec.
if ( wExprsInDest != null &&
wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) {
continue;
}
wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx);
spec.addWindowFunction(wFnSpec);
qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression());
}
return aggregationTrees;
}
/**
* This method figures out if current AST is for INSERT INTO
* @param qbp qbParseInfo
* @param dest destination clause
* @return true or false
*/
private boolean isInsertInto(QBParseInfo qbp, String dest) {
// get the destination and check if it is TABLE
if(qbp == null || dest == null ) return false;
ASTNode destNode = qbp.getDestForClause(dest);
if(destNode != null && destNode.getType() == HiveParser.TOK_TAB) {
return true;
}
return false;
}
/**
* Given an AST this method figures out if it is a value clause
* e.g. VALUES(1,3..)
*/
private boolean isValueClause(ASTNode select) {
if(select == null) return false;
if(select.getChildCount() == 1) {
ASTNode selectExpr = (ASTNode)select.getChild(0);
if(selectExpr.getChildCount() == 1 ) {
ASTNode selectChildExpr = (ASTNode)selectExpr.getChild(0);
if(selectChildExpr.getType() == HiveParser.TOK_FUNCTION) {
ASTNode inline = (ASTNode)selectChildExpr.getChild(0);
ASTNode func = (ASTNode)selectChildExpr.getChild(1);
if(inline.getText().equals(GenericUDTFInline.class.getAnnotation(Description.class).name())
&& func.getType() == HiveParser.TOK_FUNCTION) {
ASTNode arrayNode = (ASTNode)func.getChild(0);
ASTNode funcNode= (ASTNode)func.getChild(1);
if(arrayNode.getText().equals(GenericUDFArray.class.getAnnotation(Description.class).name() )
&& funcNode.getType() == HiveParser.TOK_FUNCTION) {
return true;
}
}
}
}
}
return false;
}
/**
* This method creates a list of default constraints which corresponds to
* given schema (targetSchema) or target table's column schema (if targetSchema is null)
* @param tbl
* @param targetSchema
* @return List of default constraints (including NULL if there is no default)
* @throws SemanticException
*/
private static List getDefaultConstraints(Table tbl, List targetSchema) throws SemanticException{
Map colNameToDefaultVal = null;
try {
DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(tbl.getDbName(), tbl.getTableName());
colNameToDefaultVal = dc.getColNameToDefaultValueMap();
} catch (Exception e) {
if (e instanceof SemanticException) {
throw (SemanticException) e;
} else {
throw (new RuntimeException(e));
}
}
List defaultConstraints = new ArrayList<>();
if(targetSchema != null) {
for (String colName : targetSchema) {
defaultConstraints.add(colNameToDefaultVal.get(colName));
}
}
else {
for(FieldSchema fs:tbl.getCols()) {
defaultConstraints.add(colNameToDefaultVal.get(fs.getName()));
}
}
return defaultConstraints;
}
/**
* Constructs an AST for given DEFAULT string
* @param newValue
* @throws SemanticException
*/
private ASTNode getNodeReplacementforDefault(String newValue) throws SemanticException {
ASTNode newNode = null;
if(newValue== null) {
newNode = ASTBuilder.construct(HiveParser.TOK_NULL, "TOK_NULL").node();
}
else {
try {
newNode = new ParseDriver().parseExpression(newValue);
} catch(Exception e) {
throw new SemanticException("Error while parsing default value for DEFAULT keyword: " + newValue
+ ". Error message: " + e.getMessage());
}
}
return newNode;
}
public static String replaceDefaultKeywordForMerge(String valueClause,Table targetTable)
throws SemanticException {
List defaultConstraints = null;
String[] values = valueClause.trim().split(",");
StringBuilder newValueClause = new StringBuilder();
for (int i = 0; i < values.length; i++) {
if (values[i].trim().toLowerCase().equals("`default`")) {
if (defaultConstraints == null) {
defaultConstraints = getDefaultConstraints(targetTable, null);
}
newValueClause.append(defaultConstraints.get(i));
}
else {
newValueClause.append(values[i]);
}
if(i != values.length-1) {
newValueClause.append(",");
}
}
return newValueClause.toString();
}
/**
* This method replaces ASTNode corresponding to DEFAULT keyword with either DEFAULT constraint
* expression if exists or NULL otherwise
* @param selectExprs
* @param targetTable
* @throws SemanticException
*/
private void replaceDefaultKeywordForUpdate(ASTNode selectExprs, Table targetTable) throws SemanticException {
List defaultConstraints = null;
for (int i = 0; i < selectExprs.getChildCount(); i++) {
ASTNode selectExpr = (ASTNode) selectExprs.getChild(i);
if (selectExpr.getChildCount() == 1 && selectExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) {
//first child should be rowid
if (i == 0 && !selectExpr.getChild(0).getChild(0).getText().equals("ROW__ID")) {
throw new SemanticException("Unexpected element when replacing default keyword for UPDATE."
+ " Expected ROW_ID, found: " + selectExpr.getChild(0).getChild(0).getText());
}
else if (selectExpr.getChild(0).getChild(0).getText().toLowerCase().equals("default")) {
if (defaultConstraints == null) {
defaultConstraints = getDefaultConstraints(targetTable, null);
}
ASTNode newNode = getNodeReplacementforDefault(defaultConstraints.get(i - 1));
// replace the node in place
selectExpr.replaceChildren(0, 0, newNode);
if (LOG.isDebugEnabled()) {
LOG.debug("DEFAULT keyword replacement - Inserted " + newNode.getText() + " for table: " + targetTable.getTableName());
}
}
}
}
}
/**
* This method replaces DEFAULT AST node with DEFAULT expression
* @param valueArrClause This is AST for value clause
* @param targetTable
* @param targetSchema this is target schema/column schema if specified in query
* @throws SemanticException
*/
private void replaceDefaultKeyword(ASTNode valueArrClause, Table targetTable, List targetSchema) throws SemanticException{
List defaultConstraints = null;
for(int i=1; i aggregations, List wdwFns,
ASTNode wndParent) throws SemanticException {
int exprTokenType = expressionTree.getToken().getType();
if(exprTokenType == HiveParser.TOK_SUBQUERY_EXPR) {
//since now we have scalar subqueries we can get subquery expression in having
// we don't want to include aggregate from within subquery
return;
}
boolean parentIsWindowSpec = wndParent != null;
if (exprTokenType == HiveParser.TOK_FUNCTION
|| exprTokenType == HiveParser.TOK_FUNCTIONDI
|| exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
assert (expressionTree.getChildCount() != 0);
if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
== HiveParser.TOK_WINDOWSPEC) {
// If it is a windowing spec, we include it in the list
// Further, we will examine its children AST nodes to check whether
// there are aggregation functions within
wdwFns.add(expressionTree);
for(Node child : expressionTree.getChildren()) {
doPhase1GetAllAggregations((ASTNode) child, aggregations, wdwFns, expressionTree);
}
return;
}
if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {
String functionName = unescapeIdentifier(expressionTree.getChild(0)
.getText());
// Validate the function name
if (FunctionRegistry.getFunctionInfo(functionName) == null) {
throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(functionName));
}
if(FunctionRegistry.impliesOrder(functionName) && !parentIsWindowSpec) {
throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
}
if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
if(containsLeadLagUDF(expressionTree) && !parentIsWindowSpec) {
throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName));
}
aggregations.put(expressionTree.toStringTree(), expressionTree);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName);
if (!fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree
.getChild(0));
}
return;
}
}
}
for (int i = 0; i < expressionTree.getChildCount(); i++) {
doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i),
aggregations, wdwFns, wndParent);
}
}
private List doPhase1GetDistinctFuncExprs(
HashMap aggregationTrees) throws SemanticException {
List exprs = new ArrayList();
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
assert (value != null);
if (value.getToken().getType() == HiveParser.TOK_FUNCTIONDI) {
exprs.add(value);
}
}
return exprs;
}
public static String generateErrorMessage(ASTNode ast, String message) {
StringBuilder sb = new StringBuilder();
if (ast == null) {
sb.append(message).append(". Cannot tell the position of null AST.");
return sb.toString();
}
sb.append(ast.getLine());
sb.append(":");
sb.append(ast.getCharPositionInLine());
sb.append(" ");
sb.append(message);
sb.append(". Error encountered near token '");
sb.append(ErrorMsg.getText(ast));
sb.append("'");
return sb.toString();
}
ASTNode getAST() {
return this.ast;
}
protected void setAST(ASTNode newAST) {
this.ast = newAST;
}
int[] findTabRefIdxs(ASTNode tabref) {
assert tabref.getType() == HiveParser.TOK_TABREF;
int aliasIndex = 0;
int propsIndex = -1;
int tsampleIndex = -1;
int ssampleIndex = -1;
for (int index = 1; index < tabref.getChildCount(); index++) {
ASTNode ct = (ASTNode) tabref.getChild(index);
if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE) {
tsampleIndex = index;
} else if (ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE) {
ssampleIndex = index;
} else if (ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
propsIndex = index;
} else {
aliasIndex = index;
}
}
return new int[] {aliasIndex, propsIndex, tsampleIndex, ssampleIndex};
}
String findSimpleTableName(ASTNode tabref, int aliasIndex) {
assert tabref.getType() == HiveParser.TOK_TABREF;
ASTNode tableTree = (ASTNode) (tabref.getChild(0));
String alias;
if (aliasIndex != 0) {
alias = unescapeIdentifier(tabref.getChild(aliasIndex).getText());
}
else {
alias = getUnescapedUnqualifiedTableName(tableTree);
}
return alias;
}
/**
* Goes though the tabref tree and finds the alias for the table. Once found,
* it records the table name-> alias association in aliasToTabs. It also makes
* an association from the alias to the table AST in parse info.
*
* @return the alias of the table
*/
private String processTable(QB qb, ASTNode tabref) throws SemanticException {
// For each table reference get the table name
// and the alias (if alias is not present, the table name
// is used as an alias)
int[] indexes = findTabRefIdxs(tabref);
int aliasIndex = indexes[0];
int propsIndex = indexes[1];
int tsampleIndex = indexes[2];
int ssampleIndex = indexes[3];
ASTNode tableTree = (ASTNode) (tabref.getChild(0));
String tabIdName = getUnescapedName(tableTree).toLowerCase();
String alias = findSimpleTableName(tabref, aliasIndex);
if (propsIndex >= 0) {
Tree propsAST = tabref.getChild(propsIndex);
Map props = DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0));
// We get the information from Calcite.
if ("TRUE".equals(props.get("insideView"))) {
qb.getAliasInsideView().add(alias.toLowerCase());
}
qb.setTabProps(alias, props);
}
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(tabref
.getChild(aliasIndex)));
}
if (tsampleIndex >= 0) {
ASTNode sampleClause = (ASTNode) tabref.getChild(tsampleIndex);
ArrayList sampleCols = new ArrayList();
if (sampleClause.getChildCount() > 2) {
for (int i = 2; i < sampleClause.getChildCount(); i++) {
sampleCols.add((ASTNode) sampleClause.getChild(i));
}
}
// TODO: For now only support sampling on up to two columns
// Need to change it to list of columns
if (sampleCols.size() > 2) {
throw new SemanticException(generateErrorMessage(
(ASTNode) tabref.getChild(0),
ErrorMsg.SAMPLE_RESTRICTION.getMsg()));
}
TableSample tabSample = new TableSample(
unescapeIdentifier(sampleClause.getChild(0).getText()),
unescapeIdentifier(sampleClause.getChild(1).getText()),
sampleCols);
qb.getParseInfo().setTabSample(alias, tabSample);
if (unparseTranslator.isEnabled()) {
for (ASTNode sampleCol : sampleCols) {
unparseTranslator.addIdentifierTranslation((ASTNode) sampleCol
.getChild(0));
}
}
} else if (ssampleIndex >= 0) {
ASTNode sampleClause = (ASTNode) tabref.getChild(ssampleIndex);
Tree type = sampleClause.getChild(0);
Tree numerator = sampleClause.getChild(1);
String value = unescapeIdentifier(numerator.getText());
SplitSample sample;
if (type.getType() == HiveParser.TOK_PERCENT) {
assertCombineInputFormat(numerator, "Percentage");
Double percent = Double.valueOf(value).doubleValue();
if (percent < 0 || percent > 100) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
"Sampling percentage should be between 0 and 100"));
}
int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
sample = new SplitSample(percent, seedNum);
} else if (type.getType() == HiveParser.TOK_ROWCOUNT) {
sample = new SplitSample(Integer.parseInt(value));
} else {
assert type.getType() == HiveParser.TOK_LENGTH;
assertCombineInputFormat(numerator, "Total Length");
long length = Integer.parseInt(value.substring(0, value.length() - 1));
char last = value.charAt(value.length() - 1);
if (last == 'k' || last == 'K') {
length <<= 10;
} else if (last == 'm' || last == 'M') {
length <<= 20;
} else if (last == 'g' || last == 'G') {
length <<= 30;
}
int seedNum = conf.getIntVar(ConfVars.HIVESAMPLERANDOMNUM);
sample = new SplitSample(length, seedNum);
}
String alias_id = getAliasId(alias, qb);
nameToSplitSample.put(alias_id, sample);
}
// Insert this map into the stats
qb.setTabAlias(alias, tabIdName);
if (qb.isInsideView()) {
qb.getAliasInsideView().add(alias.toLowerCase());
}
qb.addAlias(alias);
qb.getParseInfo().setSrcForAlias(alias, tableTree);
// if alias to CTE contains the table name, we do not do the translation because
// cte is actually a subquery.
if (!this.aliasToCTEs.containsKey(tabIdName)) {
unparseTranslator.addTableNameTranslation(tableTree, SessionState.get().getCurrentDatabase());
if (aliasIndex != 0) {
unparseTranslator.addIdentifierTranslation((ASTNode) tabref.getChild(aliasIndex));
}
}
return alias;
}
Map getNameToSplitSampleMap() {
return this.nameToSplitSample;
}
/**
* Convert a string to Text format and write its bytes in the same way TextOutputFormat would do.
* This is needed to properly encode non-ascii characters.
*/
private static void writeAsText(String text, FSDataOutputStream out) throws IOException {
Text to = new Text(text);
out.write(to.getBytes(), 0, to.getLength());
}
private void assertCombineInputFormat(Tree numerator, String message) throws SemanticException {
String inputFormat = conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") ?
HiveConf.getVar(conf, HiveConf.ConfVars.HIVETEZINPUTFORMAT):
HiveConf.getVar(conf, HiveConf.ConfVars.HIVEINPUTFORMAT);
if (!inputFormat.equals(CombineHiveInputFormat.class.getName())) {
throw new SemanticException(generateErrorMessage((ASTNode) numerator,
message + " sampling is not supported in " + inputFormat));
}
}
private String processSubQuery(QB qb, ASTNode subq) throws SemanticException {
// This is a subquery and must have an alias
if (subq.getChildCount() != 2) {
throw new SemanticException(ErrorMsg.NO_SUBQUERY_ALIAS.getMsg(subq));
}
ASTNode subqref = (ASTNode) subq.getChild(0);
String alias = unescapeIdentifier(subq.getChild(1).getText());
// Recursively do the first phase of semantic analysis for the subquery
QBExpr qbexpr = new QBExpr(alias);
doPhase1QBExpr(subqref, qbexpr, qb.getId(), alias, qb.isInsideView());
// If the alias is already there then we have a conflict
if (qb.exists(alias)) {
throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(subq
.getChild(1)));
}
// Insert this map into the stats
qb.setSubqAlias(alias, qbexpr);
qb.addAlias(alias);
unparseTranslator.addIdentifierTranslation((ASTNode) subq.getChild(1));
return alias;
}
/*
* Phase1: hold onto any CTE definitions in aliasToCTE.
* CTE definitions are global to the Query.
*/
private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
int numCTEs = ctes.getChildCount();
for(int i=0; i aliasToCTEs map.
*
*/
private CTEClause findCTEFromName(QB qb, String cteName) {
StringBuilder qId = new StringBuilder();
if (qb.getId() != null) {
qId.append(qb.getId());
}
while (qId.length() > 0) {
String nm = qId + ":" + cteName;
CTEClause cte = aliasToCTEs.get(nm);
if (cte != null) {
return cte;
}
int lastIndex = qId.lastIndexOf(":");
lastIndex = lastIndex < 0 ? 0 : lastIndex;
qId.setLength(lastIndex);
}
return aliasToCTEs.get(cteName);
}
/*
* If a CTE is referenced in a QueryBlock:
* - add it as a SubQuery for now.
* - SQ.alias is the alias used in QB. (if no alias is specified,
* it used the CTE name. Works just like table references)
* - Adding SQ done by:
* - copying AST of CTE
* - setting ASTOrigin on cloned AST.
* - trigger phase 1 on new QBExpr.
* - update QB data structs: remove this as a table reference, move it to a SQ invocation.
*/
private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias)
throws SemanticException {
cteAlias = cteAlias == null ? cteName : cteAlias;
CTEClause cte = findCTEFromName(qb, cteName);
ASTNode cteQryNode = cte.cteNode;
QBExpr cteQBExpr = new QBExpr(cteAlias);
doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias);
qb.rewriteCTEToSubq(cteAlias, cteName, cteQBExpr);
}
private final CTEClause rootClause = new CTEClause(null, null);
@Override
public List> getAllRootTasks() {
if (!rootTasksResolved) {
rootTasks = toRealRootTasks(rootClause.asExecutionOrder());
rootTasksResolved = true;
}
return rootTasks;
}
@Override
public HashSet getAllInputs() {
HashSet readEntities = new HashSet(getInputs());
for (CTEClause cte : rootClause.asExecutionOrder()) {
if (cte.source != null) {
readEntities.addAll(cte.source.getInputs());
}
}
return readEntities;
}
@Override
public HashSet getAllOutputs() {
HashSet writeEntities = new HashSet(getOutputs());
for (CTEClause cte : rootClause.asExecutionOrder()) {
if (cte.source != null) {
writeEntities.addAll(cte.source.getOutputs());
}
}
return writeEntities;
}
class CTEClause {
CTEClause(String alias, ASTNode cteNode) {
this.alias = alias;
this.cteNode = cteNode;
}
String alias;
ASTNode cteNode;
boolean materialize;
int reference;
QBExpr qbExpr;
List parents = new ArrayList();
// materialized
Table table;
SemanticAnalyzer source;
List> getTasks() {
return source == null ? null : source.rootTasks;
}
List asExecutionOrder() {
List execution = new ArrayList();
asExecutionOrder(new HashSet(), execution);
return execution;
}
void asExecutionOrder(Set visited, List execution) {
for (CTEClause parent : parents) {
if (visited.add(parent)) {
parent.asExecutionOrder(visited, execution);
}
}
execution.add(this);
}
@Override
public String toString() {
return alias == null ? "" : alias;
}
}
private List> toRealRootTasks(List execution) {
List> cteRoots = new ArrayList<>();
List> cteLeafs = new ArrayList<>();
List> curTopRoots = null;
List> curBottomLeafs = null;
for (int i = 0; i < execution.size(); i++) {
CTEClause current = execution.get(i);
if (current.parents.isEmpty() && curTopRoots != null) {
cteRoots.addAll(curTopRoots);
cteLeafs.addAll(curBottomLeafs);
curTopRoots = curBottomLeafs = null;
}
List> curTasks = current.getTasks();
if (curTasks == null) {
continue;
}
if (curTopRoots == null) {
curTopRoots = curTasks;
}
if (curBottomLeafs != null) {
for (Task> topLeafTask : curBottomLeafs) {
for (Task> currentRootTask : curTasks) {
topLeafTask.addDependentTask(currentRootTask);
}
}
}
curBottomLeafs = Task.findLeafs(curTasks);
}
if (curTopRoots != null) {
cteRoots.addAll(curTopRoots);
cteLeafs.addAll(curBottomLeafs);
}
if (cteRoots.isEmpty()) {
return rootTasks;
}
for (Task> cteLeafTask : cteLeafs) {
for (Task> mainRootTask : rootTasks) {
cteLeafTask.addDependentTask(mainRootTask);
}
}
return cteRoots;
}
Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));
ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));
ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));
createTable.addChild(tableName);
createTable.addChild(temporary);
createTable.addChild(cte.cteNode);
SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
analyzer.initCtx(ctx);
analyzer.init(false);
// should share cte contexts
analyzer.aliasToCTEs.putAll(aliasToCTEs);
HiveOperation operation = queryState.getHiveOperation();
try {
analyzer.analyzeInternal(createTable);
} finally {
queryState.setCommandType(operation);
}
Table table = analyzer.tableDesc.toTable(conf);
Path location = table.getDataLocation();
try {
location.getFileSystem(conf).mkdirs(location);
} catch (IOException e) {
throw new HiveException(e);
}
table.setMaterializedTable(true);
LOG.info(cteName + " will be materialized into " + location);
cte.table = table;
cte.source = analyzer;
ctx.addMaterializedTable(cteName, table);
return table;
}
static boolean isJoinToken(ASTNode node) {
if ((node.getToken().getType() == HiveParser.TOK_JOIN)
|| (node.getToken().getType() == HiveParser.TOK_CROSSJOIN)
|| isOuterJoinToken(node)
|| (node.getToken().getType() == HiveParser.TOK_LEFTSEMIJOIN)
|| (node.getToken().getType() == HiveParser.TOK_UNIQUEJOIN)) {
return true;
}
return false;
}
static private boolean isOuterJoinToken(ASTNode node) {
return (node.getToken().getType() == HiveParser.TOK_LEFTOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_RIGHTOUTERJOIN)
|| (node.getToken().getType() == HiveParser.TOK_FULLOUTERJOIN);
}
/**
* Given the AST with TOK_JOIN as the root, get all the aliases for the tables
* or subqueries in the join.
*
* @param qb
* @param join
* @throws SemanticException
*/
@SuppressWarnings("nls")
private void processJoin(QB qb, ASTNode join) throws SemanticException {
int numChildren = join.getChildCount();
if ((numChildren != 2) && (numChildren != 3)
&& join.getToken().getType() != HiveParser.TOK_UNIQUEJOIN) {
throw new SemanticException(generateErrorMessage(join,
"Join with multiple children"));
}
queryProperties.incrementJoinCount(isOuterJoinToken(join));
for (int num = 0; num < numChildren; num++) {
ASTNode child = (ASTNode) join.getChild(num);
if (child.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, child);
} else if (child.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, child);
} else if (child.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
queryProperties.setHasPTF(true);
processPTF(qb, child);
PTFInvocationSpec ptfInvocationSpec = qb.getPTFInvocationSpec(child);
String inputAlias = ptfInvocationSpec == null ? null :
ptfInvocationSpec.getFunction().getAlias();;
if ( inputAlias == null ) {
throw new SemanticException(generateErrorMessage(child,
"PTF invocation in a Join must have an alias"));
}
} else if (child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
child.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
// SELECT * FROM src1 LATERAL VIEW udtf() AS myTable JOIN src2 ...
// is not supported. Instead, the lateral view must be in a subquery
// SELECT * FROM (SELECT * FROM src1 LATERAL VIEW udtf() AS myTable) a
// JOIN src2 ...
throw new SemanticException(ErrorMsg.LATERAL_VIEW_WITH_JOIN
.getMsg(join));
} else if (isJoinToken(child)) {
processJoin(qb, child);
}
}
}
/**
* Given the AST with TOK_LATERAL_VIEW as the root, get the alias for the
* table or subquery in the lateral view and also make a mapping from the
* alias to all the lateral view AST's.
*
* @param qb
* @param lateralView
* @return the alias for the table/subquery
* @throws SemanticException
*/
private String processLateralView(QB qb, ASTNode lateralView)
throws SemanticException {
int numChildren = lateralView.getChildCount();
assert (numChildren == 2);
if (!isCBOSupportedLateralView(lateralView)) {
queryProperties.setCBOSupportedLateralViews(false);
}
ASTNode next = (ASTNode) lateralView.getChild(1);
String alias = null;
switch (next.getToken().getType()) {
case HiveParser.TOK_TABREF:
alias = processTable(qb, next);
break;
case HiveParser.TOK_SUBQUERY:
alias = processSubQuery(qb, next);
break;
case HiveParser.TOK_LATERAL_VIEW:
case HiveParser.TOK_LATERAL_VIEW_OUTER:
alias = processLateralView(qb, next);
break;
default:
throw new SemanticException(ErrorMsg.LATERAL_VIEW_INVALID_CHILD
.getMsg(lateralView));
}
alias = alias.toLowerCase();
qb.getParseInfo().addLateralViewForAlias(alias, lateralView);
qb.addAlias(alias);
return alias;
}
private String extractLateralViewAlias(ASTNode lateralView) {
// Lateral view AST has the following shape:
// ^(TOK_LATERAL_VIEW
// ^(TOK_SELECT ^(TOK_SELEXPR ^(TOK_FUNCTION Identifier params) identifier* tableAlias)))
ASTNode selExpr = (ASTNode) lateralView.getChild(0).getChild(0);
ASTNode astTableAlias = (ASTNode) Iterables.getLast(selExpr.getChildren());
return astTableAlias.getChild(0).getText();
}
/**
* Phase 1: (including, but not limited to):
*
* 1. Gets all the aliases for all the tables / subqueries and makes the
* appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
* destination and names the clause "inclause" + i 3. Creates a map from a
* string representation of an aggregation tree to the actual aggregation AST
* 4. Creates a mapping from the clause name to the select expression AST in
* destToSelExpr 5. Creates a mapping from a table alias to the lateral view
* AST's in aliasToLateralViews
*
* @param ast
* @param qb
* @param ctx_1
* @throws SemanticException
*/
@SuppressWarnings({"fallthrough", "nls"})
public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx)
throws SemanticException {
boolean phase1Result = true;
QBParseInfo qbp = qb.getParseInfo();
boolean skipRecursion = false;
if (ast.getToken() != null) {
skipRecursion = true;
switch (ast.getToken().getType()) {
case HiveParser.TOK_SELECTDI:
qb.countSelDi();
// fall through
case HiveParser.TOK_SELECT:
qb.countSel();
qbp.setSelExprForClause(ctx_1.dest, ast);
int posn = 0;
if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.QUERY_HINT) {
ParseDriver pd = new ParseDriver();
String queryHintStr = ast.getChild(0).getText();
if (LOG.isDebugEnabled()) {
LOG.debug("QUERY HINT: "+queryHintStr);
}
try {
ASTNode hintNode = pd.parseHint(queryHintStr);
qbp.setHints(hintNode);
posn++;
} catch (ParseException e) {
throw new SemanticException("failed to parse query hint: "+e.getMessage(), e);
}
}
if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) {
queryProperties.setUsesScript(true);
}
LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast,
qb, ctx_1.dest);
doPhase1GetColumnAliasesFromSelect(ast, qbp, ctx_1.dest);
qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
qbp.setDistinctFuncExprsForClause(ctx_1.dest,
doPhase1GetDistinctFuncExprs(aggregations));
break;
case HiveParser.TOK_WHERE:
qbp.setWhrExprForClause(ctx_1.dest, ast);
if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty()) {
queryProperties.setFilterWithSubQuery(true);
}
break;
case HiveParser.TOK_INSERT_INTO:
String currentDatabase = SessionState.get().getCurrentDatabase();
String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
qbp.addInsertIntoTable(tab_name, ast);
case HiveParser.TOK_DESTINATION:
ctx_1.dest = this.ctx.getDestNamePrefix(ast, qb).toString() + ctx_1.nextNum;
ctx_1.nextNum++;
boolean isTmpFileDest = false;
if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
ASTNode ch = (ASTNode) ast.getChild(0);
if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0
&& ch.getChild(0) instanceof ASTNode) {
ch = (ASTNode) ch.getChild(0);
isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
} else {
if (ast.getToken().getType() == HiveParser.TOK_DESTINATION
&& ast.getChild(0).getType() == HiveParser.TOK_TAB) {
String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
SessionState.get().getCurrentDatabase());
qbp.getInsertOverwriteTables().put(fullTableName.toLowerCase(), ast);
qbp.setDestToOpType(ctx_1.dest, true);
}
}
}
// is there a insert in the subquery
if (qbp.getIsSubQ() && !isTmpFileDest) {
throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
}
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
if (qbp.getClauseNamesForDest().size() == 2) {
// From the moment that we have two destination clauses,
// we know that this is a multi-insert query.
// Thus, set property to right value.
// Using qbp.getClauseNamesForDest().size() >= 2 would be
// equivalent, but we use == to avoid setting the property
// multiple times
queryProperties.setMultiDestQuery(true);
}
if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
plannerCtx.setInsertToken(ast, isTmpFileDest);
} else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
// For multi-insert query, currently we only optimize the FROM clause.
// Hence, introduce multi-insert token on top of it.
// However, first we need to reset existing token (insert).
// Using qbp.getClauseNamesForDest().size() >= 2 would be
// equivalent, but we use == to avoid setting the property
// multiple times
plannerCtx.resetToken();
plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
}
break;
case HiveParser.TOK_FROM:
int child_count = ast.getChildCount();
if (child_count != 1) {
throw new SemanticException(generateErrorMessage(ast,
"Multiple Children " + child_count));
}
if (!qbp.getIsSubQ()) {
qbp.setQueryFromExpr(ast);
}
// Check if this is a subquery / lateral view
ASTNode frm = (ASTNode) ast.getChild(0);
if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
processTable(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
processSubQuery(qb, frm);
} else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
queryProperties.setHasLateralViews(true);
processLateralView(qb, frm);
} else if (isJoinToken(frm)) {
processJoin(qb, frm);
qbp.setJoinExpr(frm);
}else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
queryProperties.setHasPTF(true);
processPTF(qb, frm);
}
break;
case HiveParser.TOK_CLUSTERBY:
// Get the clusterby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasClusterBy(true);
qbp.setClusterByExprForClause(ctx_1.dest, ast);
break;
case HiveParser.TOK_DISTRIBUTEBY:
// Get the distribute by aliases - these are aliased to the entries in
// the
// select list
queryProperties.setHasDistributeBy(true);
qbp.setDistributeByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_SORTBY:
// Get the sort by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasSortBy(true);
qbp.setSortByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
} else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
}
break;
case HiveParser.TOK_ORDERBY:
// Get the order by aliases - these are aliased to the entries in the
// select list
queryProperties.setHasOrderBy(true);
qbp.setOrderByExprForClause(ctx_1.dest, ast);
if (qbp.getClusterByForClause(ctx_1.dest) != null) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
}
// If there are aggregations in order by, we need to remember them in qb.
qbp.addAggregationExprsForClause(ctx_1.dest,
doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
break;
case HiveParser.TOK_GROUPBY:
case HiveParser.TOK_ROLLUP_GROUPBY:
case HiveParser.TOK_CUBE_GROUPBY:
case HiveParser.TOK_GROUPING_SETS:
// Get the groupby aliases - these are aliased to the entries in the
// select list
queryProperties.setHasGroupBy(true);
if (qbp.getJoinExpr() != null) {
queryProperties.setHasJoinFollowedByGroupBy(true);
}
if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
}
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
// Rollup and Cubes are syntactic sugar on top of grouping sets
if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
qbp.getDestRollups().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
qbp.getDestCubes().add(ctx_1.dest);
} else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
qbp.getDestGroupingSets().add(ctx_1.dest);
}
break;
case HiveParser.TOK_HAVING:
qbp.setHavingExprForClause(ctx_1.dest, ast);
qbp.addAggregationExprsForClause(ctx_1.dest,
doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
break;
case HiveParser.KW_WINDOW:
if (!qb.hasWindowingSpec(ctx_1.dest) ) {
throw new SemanticException(generateErrorMessage(ast,
"Query has no Cluster/Distribute By; but has a Window definition"));
}
handleQueryWindowClauses(qb, ctx_1, ast);
break;
case HiveParser.TOK_LIMIT:
if (ast.getChildCount() == 2) {
qbp.setDestLimit(ctx_1.dest,
new Integer(ast.getChild(0).getText()),
new Integer(ast.getChild(1).getText()));
} else {
qbp.setDestLimit(ctx_1.dest, new Integer(0),
new Integer(ast.getChild(0).getText()));
}
break;
case HiveParser.TOK_ANALYZE:
// Case of analyze command
String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();
qb.setTabAlias(table_name, table_name);
qb.addAlias(table_name);
qb.getParseInfo().setIsAnalyzeCommand(true);
qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
// Allow analyze the whole table and dynamic partitions
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");
break;
case HiveParser.TOK_UNIONALL:
if (!qbp.getIsSubQ()) {
// this shouldn't happen. The parser should have converted the union to be
// contained in a subquery. Just in case, we keep the error as a fallback.
throw new SemanticException(generateErrorMessage(ast,
ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
}
skipRecursion = false;
break;
case HiveParser.TOK_INSERT:
ASTNode destination = (ASTNode) ast.getChild(0);
Tree tab = destination.getChild(0);
// Proceed if AST contains partition & If Not Exists
if (destination.getChildCount() == 2 &&
tab.getChildCount() == 2 &&
destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
String tableName = tab.getChild(0).getChild(0).getText();
Tree partitions = tab.getChild(1);
int childCount = partitions.getChildCount();
HashMap partition = new HashMap();
for (int i = 0; i < childCount; i++) {
String partitionName = partitions.getChild(i).getChild(0).getText();
// Convert to lowercase for the comparison
partitionName = partitionName.toLowerCase();
Tree pvalue = partitions.getChild(i).getChild(1);
if (pvalue == null) {
break;
}
String partitionVal = stripQuotes(pvalue.getText());
partition.put(partitionName, partitionVal);
}
// if it is a dynamic partition throw the exception
if (childCount != partition.size()) {
throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
.getMsg(partition.toString()));
}
Table table = null;
try {
table = this.getTableObjectByName(tableName);
} catch (HiveException ex) {
throw new SemanticException(ex);
}
try {
Partition parMetaData = db.getPartition(table, partition, false);
// Check partition exists if it exists skip the overwrite
if (parMetaData != null) {
phase1Result = false;
skipRecursion = true;
LOG.info("Partition already exists so insert into overwrite " +
"skipped for partition : " + parMetaData.toString());
break;
}
} catch (HiveException e) {
LOG.info("Error while getting metadata : ", e);
}
validatePartSpec(table, partition, (ASTNode)tab, conf, false);
}
skipRecursion = false;
break;
case HiveParser.TOK_LATERAL_VIEW:
case HiveParser.TOK_LATERAL_VIEW_OUTER:
// todo: nested LV
assert ast.getChildCount() == 1;
qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
break;
case HiveParser.TOK_CTE:
processCTE(qb, ast);
break;
default:
skipRecursion = false;
break;
}
}
if (!skipRecursion) {
// Iterate over the rest of the children
int child_count = ast.getChildCount();
for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
// Recurse
phase1Result = phase1Result && doPhase1(
(ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx);
}
}
return phase1Result;
}
/**
* This is phase1 of supporting specifying schema in insert statement
* insert into foo(z,y) select a,b from bar;
* @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode)
* @throws SemanticException
*/
private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException {
ASTNode tabColName = (ASTNode)ast.getChild(1);
if(ast.getType() == HiveParser.TOK_INSERT_INTO && tabColName != null && tabColName.getType() == HiveParser.TOK_TABCOLNAME) {
//we have "insert into foo(a,b)..."; parser will enforce that 1+ columns are listed if TOK_TABCOLNAME is present
List targetColNames = new ArrayList();
for(Node col : tabColName.getChildren()) {
assert ((ASTNode)col).getType() == HiveParser.Identifier :
"expected token " + HiveParser.Identifier + " found " + ((ASTNode)col).getType();
targetColNames.add(((ASTNode)col).getText().toLowerCase());
}
String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
SessionState.get().getCurrentDatabase());
qbp.setDestSchemaForClause(ctx_1.dest, targetColNames);
Set targetColumns = new HashSet();
targetColumns.addAll(targetColNames);
if(targetColNames.size() != targetColumns.size()) {
throw new SemanticException(generateErrorMessage(tabColName,
"Duplicate column name detected in " + fullTableName + " table schema specification"));
}
Table targetTable = null;
try {
targetTable = db.getTable(fullTableName, false);
}
catch (HiveException ex) {
LOG.error("Error processing HiveParser.TOK_DESTINATION: " + ex.getMessage(), ex);
throw new SemanticException(ex);
}
if(targetTable == null) {
throw new SemanticException(generateErrorMessage(ast,
"Unable to access metadata for table " + fullTableName));
}
for(FieldSchema f : targetTable.getCols()) {
//parser only allows foo(a,b), not foo(foo.a, foo.b)
targetColumns.remove(f.getName());
}
if(!targetColumns.isEmpty()) {//here we need to see if remaining columns are dynamic partition columns
/* We just checked the user specified schema columns among regular table column and found some which are not
'regular'. Now check is they are dynamic partition columns
For dynamic partitioning,
Given "create table multipart(a int, b int) partitioned by (c int, d int);"
for "insert into multipart partition(c='1',d)(d,a) values(2,3);" we expect parse tree to look like this
(TOK_INSERT_INTO
(TOK_TAB
(TOK_TABNAME multipart)
(TOK_PARTSPEC
(TOK_PARTVAL c '1')
(TOK_PARTVAL d)
)
)
(TOK_TABCOLNAME d a)
)*/
List dynamicPartitionColumns = new ArrayList();
if(ast.getChild(0) != null && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
ASTNode tokTab = (ASTNode)ast.getChild(0);
ASTNode tokPartSpec = (ASTNode)tokTab.getFirstChildWithType(HiveParser.TOK_PARTSPEC);
if(tokPartSpec != null) {
for(Node n : tokPartSpec.getChildren()) {
ASTNode tokPartVal = null;
if(n instanceof ASTNode) {
tokPartVal = (ASTNode)n;
}
if(tokPartVal != null && tokPartVal.getType() == HiveParser.TOK_PARTVAL && tokPartVal.getChildCount() == 1) {
assert tokPartVal.getChild(0).getType() == HiveParser.Identifier :
"Expected column name; found tokType=" + tokPartVal.getType();
dynamicPartitionColumns.add(tokPartVal.getChild(0).getText());
}
}
for(String colName : dynamicPartitionColumns) {
targetColumns.remove(colName);
}
} else {
// partition spec is not specified but column schema can have partitions specified
for(FieldSchema f : targetTable.getPartCols()) {
//parser only allows foo(a,b), not foo(foo.a, foo.b)
targetColumns.remove(f.getName());
}
}
}
if(!targetColumns.isEmpty()) {
//Found some columns in user specified schema which are neither regular not dynamic partition columns
throw new SemanticException(generateErrorMessage(tabColName,
"'" + (targetColumns.size() == 1 ? targetColumns.iterator().next() : targetColumns) +
"' in insert schema specification " + (targetColumns.size() == 1 ? "is" : "are") +
" not found among regular columns of " +
fullTableName + " nor dynamic partition columns."));
}
}
}
}
public void getMaterializationMetadata(QB qb) throws SemanticException {
try {
gatherCTEReferences(qb, rootClause);
int threshold = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD);
for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) {
if (threshold >= 0 && cte.reference >= threshold) {
cte.materialize = true;
}
}
} catch (HiveException e) {
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
if (e instanceof SemanticException) {
throw (SemanticException)e;
}
throw new SemanticException(e.getMessage(), e);
}
}
private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws HiveException {
if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
gatherCTEReferences(qbexpr.getQB(), parent);
} else {
gatherCTEReferences(qbexpr.getQBExpr1(), parent);
gatherCTEReferences(qbexpr.getQBExpr2(), parent);
}
}
// TODO: check view references, too
private void gatherCTEReferences(QB qb, CTEClause current) throws HiveException {
for (String alias : qb.getTabAliases()) {
String tabName = qb.getTabNameForAlias(alias);
String cteName = tabName.toLowerCase();
CTEClause cte = findCTEFromName(qb, cteName);
if (cte != null) {
if (ctesExpanded.contains(cteName)) {
throw new SemanticException("Recursive cte " + cteName +
" detected (cycle: " + StringUtils.join(ctesExpanded, " -> ") +
" -> " + cteName + ").");
}
cte.reference++;
current.parents.add(cte);
if (cte.qbExpr != null) {
continue;
}
cte.qbExpr = new QBExpr(cteName);
doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName);
ctesExpanded.add(cteName);
gatherCTEReferences(cte.qbExpr, cte);
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
for (String alias : qb.getSubqAliases()) {
gatherCTEReferences(qb.getSubqForAlias(alias), current);
}
}
public void getMetaData(QB qb) throws SemanticException {
getMetaData(qb, false);
}
public void getMetaData(QB qb, boolean enableMaterialization) throws SemanticException {
try {
if (enableMaterialization) {
getMaterializationMetadata(qb);
}
getMetaData(qb, null);
} catch (HiveException e) {
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
if (e instanceof SemanticException) {
throw (SemanticException)e;
}
throw new SemanticException(e.getMessage(), e);
}
}
private void getMetaData(QBExpr qbexpr, ReadEntity parentInput)
throws HiveException {
if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
getMetaData(qbexpr.getQB(), parentInput);
} else {
getMetaData(qbexpr.getQBExpr1(), parentInput);
getMetaData(qbexpr.getQBExpr2(), parentInput);
}
}
@SuppressWarnings("nls")
private void getMetaData(QB qb, ReadEntity parentInput)
throws HiveException {
LOG.info("Get metadata for source tables");
// Go over the tables and populate the related structures.
// We have to materialize the table alias list since we might
// modify it in the middle for view rewrite.
List tabAliases = new ArrayList(qb.getTabAliases());
// Keep track of view alias to view name and read entity
// For eg: for a query like 'select * from V3', where V3 -> V2, V2 -> V1, V1 -> T
// keeps track of full view name and read entity corresponding to alias V3, V3:V2, V3:V2:V1.
// This is needed for tracking the dependencies for inputs, along with their parents.
Map> aliasToViewInfo =
new HashMap>();
/*
* used to capture view to SQ conversions. This is used to check for
* recursive CTE invocations.
*/
Map sqAliasToCTEName = new HashMap();
for (String alias : tabAliases) {
String tabName = qb.getTabNameForAlias(alias);
String cteName = tabName.toLowerCase();
// Get table details from tabNameToTabObject cache
Table tab = getTableObjectByName(tabName, false);
if (tab != null) {
// do a deep copy, in case downstream changes it.
tab = new Table(tab.getTTable().deepCopy());
}
if (tab == null ||
tab.getDbName().equals(SessionState.get().getCurrentDatabase())) {
Table materializedTab = ctx.getMaterializedTable(cteName);
if (materializedTab == null) {
// we first look for this alias from CTE, and then from catalog.
CTEClause cte = findCTEFromName(qb, cteName);
if (cte != null) {
if (!cte.materialize) {
addCTEAsSubQuery(qb, cteName, alias);
sqAliasToCTEName.put(alias, cteName);
continue;
}
tab = materializeCTE(cteName, cte);
}
} else {
tab = materializedTab;
}
}
if (tab == null) {
if(tabName.equals(DUMMY_DATABASE + "." + DUMMY_TABLE)) {
continue;
}
ASTNode src = qb.getParseInfo().getSrcForAlias(alias);
if (null != src) {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(src));
} else {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(alias));
}
}
if (tab.isView()) {
if (qb.getParseInfo().isAnalyzeCommand()) {
throw new SemanticException(ErrorMsg.ANALYZE_VIEW.getMsg());
}
String fullViewName = tab.getFullyQualifiedName();
// Prevent view cycles
if (viewsExpanded.contains(fullViewName)) {
throw new SemanticException("Recursive view " + fullViewName +
" detected (cycle: " + StringUtils.join(viewsExpanded, " -> ") +
" -> " + fullViewName + ").");
}
replaceViewReferenceWithDefinition(qb, tab, tabName, alias);
// This is the last time we'll see the Table objects for views, so add it to the inputs
// now. isInsideView will tell if this view is embedded in another view.
// If the view is Inside another view, it should have at least one parent
if (qb.isInsideView() && parentInput == null) {
parentInput = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
}
ReadEntity viewInput = new ReadEntity(tab, parentInput, !qb.isInsideView());
viewInput = PlanUtils.addInput(inputs, viewInput);
aliasToViewInfo.put(alias, new ObjectPair(fullViewName, viewInput));
String aliasId = getAliasId(alias, qb);
if (aliasId != null) {
aliasId = aliasId.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "")
.replace(SemanticAnalyzer.SUBQUERY_TAG_2, "");
}
viewAliasToInput.put(aliasId, viewInput);
continue;
}
if (!InputFormat.class.isAssignableFrom(tab.getInputFormatClass())) {
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getSrcForAlias(alias),
ErrorMsg.INVALID_INPUT_FORMAT_TYPE.getMsg()));
}
qb.getMetaData().setSrcForAlias(alias, tab);
if (qb.getParseInfo().isAnalyzeCommand()) {
// allow partial partition specification for nonscan since noscan is fast.
TableSpec ts = new TableSpec(db, conf, (ASTNode) ast.getChild(0), true, this.noscan);
if (ts.specType == SpecType.DYNAMIC_PARTITION) { // dynamic partitions
try {
ts.partitions = db.getPartitionsByNames(ts.tableHandle, ts.partSpec);
} catch (HiveException e) {
throw new SemanticException(generateErrorMessage(
qb.getParseInfo().getSrcForAlias(alias),
"Cannot get partitions for " + ts.partSpec), e);
}
}
tab.setTableSpec(ts);
qb.getParseInfo().addTableSpec(alias, ts);
}
ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(getAliasId(alias, qb), viewAliasToInput);
// Temporary tables created during the execution are not the input sources
if (!PlanUtils.isValuesTempTable(alias)) {
PlanUtils.addInput(inputs,
new ReadEntity(tab, parentViewInfo, parentViewInfo == null),mergeIsDirect);
}
}
LOG.info("Get metadata for subqueries");
// Go over the subqueries and getMetaData for these
for (String alias : qb.getSubqAliases()) {
boolean wasView = aliasToViewInfo.containsKey(alias);
boolean wasCTE = sqAliasToCTEName.containsKey(alias);
ReadEntity newParentInput = null;
if (wasView) {
viewsExpanded.add(aliasToViewInfo.get(alias).getFirst());
newParentInput = aliasToViewInfo.get(alias).getSecond();
} else if (wasCTE) {
ctesExpanded.add(sqAliasToCTEName.get(alias));
}
QBExpr qbexpr = qb.getSubqForAlias(alias);
getMetaData(qbexpr, newParentInput);
if (wasView) {
viewsExpanded.remove(viewsExpanded.size() - 1);
} else if (wasCTE) {
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
RowFormatParams rowFormatParams = new RowFormatParams();
StorageFormat storageFormat = new StorageFormat(conf);
LOG.info("Get metadata for destination tables");
// Go over all the destination structures and populate the related
// metadata
QBParseInfo qbp = qb.getParseInfo();
for (String name : qbp.getClauseNamesForDest()) {
ASTNode ast = qbp.getDestForClause(name);
switch (ast.getToken().getType()) {
case HiveParser.TOK_TAB: {
TableSpec ts = new TableSpec(db, conf, ast);
if (ts.tableHandle.isView() ||
(mvRebuildMode == MaterializationRebuildMode.NONE && ts.tableHandle.isMaterializedView())) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
Class> outputFormatClass = ts.tableHandle.getOutputFormatClass();
if (!ts.tableHandle.isNonNative() &&
!HiveOutputFormat.class.isAssignableFrom(outputFormatClass)) {
throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE
.getMsg(ast, "The class is " + outputFormatClass.toString()));
}
boolean isTableWrittenTo = qb.getParseInfo().isInsertIntoTable(ts.tableHandle.getDbName(),
ts.tableHandle.getTableName());
isTableWrittenTo |= (qb.getParseInfo().getInsertOverwriteTables().
get(getUnescapedName((ASTNode) ast.getChild(0), ts.tableHandle.getDbName()).toLowerCase()) != null);
assert isTableWrittenTo :
"Inconsistent data structure detected: we are writing to " + ts.tableHandle + " in " +
name + " but it's not in isInsertIntoTable() or getInsertOverwriteTables()";
// Disallow update and delete on non-acid tables
boolean isFullAcid = AcidUtils.isFullAcidTable(ts.tableHandle);
if ((updating(name) || deleting(name)) && !isFullAcid) {
if (!AcidUtils.isInsertOnlyTable(ts.tableHandle)) {
// Whether we are using an acid compliant transaction manager has already been caught in
// UpdateDeleteSemanticAnalyzer, so if we are updating or deleting and getting nonAcid
// here, it means the table itself doesn't support it.
throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TABLE, ts.tableName);
} else {
throw new SemanticException(ErrorMsg.ACID_OP_ON_INSERTONLYTRAN_TABLE, ts.tableName);
}
}
// TableSpec ts is got from the query (user specified),
// which means the user didn't specify partitions in their query,
// but whether the table itself is partitioned is not know.
if (ts.specType != SpecType.STATIC_PARTITION) {
// This is a table or dynamic partition
qb.getMetaData().setDestForAlias(name, ts.tableHandle);
// has dynamic as well as static partitions
if (ts.partSpec != null && ts.partSpec.size() > 0) {
qb.getMetaData().setPartSpecForAlias(name, ts.partSpec);
}
} else {
// This is a partition
qb.getMetaData().setDestForAlias(name, ts.partHandle);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
break;
}
case HiveParser.TOK_DIR: {
// This is a dfs file
String fname = stripQuotes(ast.getChild(0).getText());
if ((!qb.getParseInfo().getIsSubQ())
&& (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_TMP_FILE)) {
if (qb.isCTAS() || qb.isMaterializedView()) {
qb.setIsQuery(false);
ctx.setResDir(null);
ctx.setResFile(null);
Path location;
// If the CTAS query does specify a location, use the table location, else use the db location
if (qb.getTableDesc() != null && qb.getTableDesc().getLocation() != null) {
location = new Path(qb.getTableDesc().getLocation());
} else {
// allocate a temporary output dir on the location of the table
String tableName = getUnescapedName((ASTNode) ast.getChild(0));
String[] names = Utilities.getDbTableName(tableName);
try {
Warehouse wh = new Warehouse(conf);
//Use destination table's db location.
String destTableDb = qb.getTableDesc() != null ? qb.getTableDesc().getDatabaseName() : null;
if (destTableDb == null) {
destTableDb = names[0];
}
location = wh.getDatabasePath(db.getDatabase(destTableDb));
} catch (MetaException e) {
throw new SemanticException(e);
}
}
try {
CreateTableDesc tblDesc = qb.getTableDesc();
if (tblDesc != null
&& tblDesc.isTemporary()
&& AcidUtils.isInsertOnlyTable(tblDesc.getTblProps(), true)) {
fname = FileUtils.makeQualified(location, conf).toString();
} else {
fname = ctx.getExtTmpPathRelTo(
FileUtils.makeQualified(location, conf)).toString();
}
} catch (Exception e) {
throw new SemanticException(generateErrorMessage(ast,
"Error creating temporary folder on: " + location.toString()), e);
}
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
TableSpec ts = new TableSpec(db, conf, this.ast);
// Add the table spec for the destination table.
qb.getParseInfo().addTableSpec(ts.tableName.toLowerCase(), ts);
}
} else {
// This is the only place where isQuery is set to true; it defaults to false.
qb.setIsQuery(true);
Path stagingPath = getStagingDirectoryPathname(qb);
fname = stagingPath.toString();
ctx.setResDir(stagingPath);
}
}
boolean isDfsFile = true;
if (ast.getChildCount() >= 2 && ast.getChild(1).getText().toLowerCase().equals("local")) {
isDfsFile = false;
}
// Set the destination for the SELECT query inside the CTAS
qb.getMetaData().setDestForAlias(name, fname, isDfsFile);
CreateTableDesc directoryDesc = new CreateTableDesc();
boolean directoryDescIsSet = false;
int numCh = ast.getChildCount();
for (int num = 1; num < numCh ; num++){
ASTNode child = (ASTNode) ast.getChild(num);
if (child != null) {
if (storageFormat.fillStorageFormat(child)) {
directoryDesc.setInputFormat(storageFormat.getInputFormat());
directoryDesc.setOutputFormat(storageFormat.getOutputFormat());
directoryDesc.setSerName(storageFormat.getSerde());
directoryDescIsSet = true;
continue;
}
switch (child.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMAT:
rowFormatParams.analyzeRowFormat(child);
directoryDesc.setFieldDelim(rowFormatParams.fieldDelim);
directoryDesc.setLineDelim(rowFormatParams.lineDelim);
directoryDesc.setCollItemDelim(rowFormatParams.collItemDelim);
directoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim);
directoryDesc.setFieldEscape(rowFormatParams.fieldEscape);
directoryDesc.setNullFormat(rowFormatParams.nullFormat);
directoryDescIsSet=true;
break;
case HiveParser.TOK_TABLESERIALIZER:
ASTNode serdeChild = (ASTNode) child.getChild(0);
storageFormat.setSerde(unescapeSQLString(serdeChild.getChild(0).getText()));
directoryDesc.setSerName(storageFormat.getSerde());
if (serdeChild.getChildCount() > 1) {
directoryDesc.setSerdeProps(new HashMap());
readProps((ASTNode) serdeChild.getChild(1).getChild(0), directoryDesc.getSerdeProps());
}
directoryDescIsSet = true;
break;
}
}
}
if (directoryDescIsSet){
qb.setDirectoryDesc(directoryDesc);
}
break;
}
default:
throw new SemanticException(generateErrorMessage(ast,
"Unknown Token Type " + ast.getToken().getType()));
}
}
}
/**
* Checks if a given path is encrypted (valid only for HDFS files)
* @param path The path to check for encryption
* @return True if the path is encrypted; False if it is not encrypted
* @throws HiveException If an error occurs while checking for encryption
*/
private boolean isPathEncrypted(Path path) throws HiveException {
try {
HadoopShims.HdfsEncryptionShim hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim(path.getFileSystem(conf));
if (hdfsEncryptionShim != null) {
if (hdfsEncryptionShim.isPathEncrypted(path)) {
return true;
}
}
} catch (Exception e) {
throw new HiveException("Unable to determine if " + path + " is encrypted: " + e, e);
}
return false;
}
/**
* Compares to path key encryption strenghts.
*
* @param p1 Path to an HDFS file system
* @param p2 Path to an HDFS file system
* @return -1 if strength is weak; 0 if is equals; 1 if it is stronger
* @throws HiveException If an error occurs while comparing key strengths.
*/
private int comparePathKeyStrength(Path p1, Path p2) throws HiveException {
HadoopShims.HdfsEncryptionShim hdfsEncryptionShim;
hdfsEncryptionShim = SessionState.get().getHdfsEncryptionShim();
if (hdfsEncryptionShim != null) {
try {
return hdfsEncryptionShim.comparePathKeyStrength(p1, p2);
} catch (Exception e) {
throw new HiveException("Unable to compare key strength for " + p1 + " and " + p2 + " : " + e, e);
}
}
return 0; // Non-encrypted path (or equals strength)
}
/**
* Checks if a given path has read-only access permissions.
*
* @param path The path to check for read-only permissions.
* @return True if the path is read-only; False otherwise.
* @throws HiveException If an error occurs while checking file permissions.
*/
private boolean isPathReadOnly(Path path) throws HiveException {
HiveConf conf = SessionState.get().getConf();
try {
FileSystem fs = path.getFileSystem(conf);
UserGroupInformation ugi = Utils.getUGI();
FileStatus status = fs.getFileStatus(path);
// We just check for writing permissions. If it fails with AccessControException, then it
// means the location may be read-only.
FileUtils.checkFileAccessWithImpersonation(fs, status, FsAction.WRITE, ugi.getUserName());
// Path has writing permissions
return false;
} catch (AccessControlException e) {
// An AccessControlException may be caused for other different errors,
// but we take it as if our path is read-only
return true;
} catch (Exception e) {
throw new HiveException("Unable to determine if " + path + " is read only: " + e, e);
}
}
/**
* Gets the strongest encrypted table path.
*
* @param qb The QB object that contains a list of all table locations.
* @return The strongest encrypted path. It may return NULL if there are not tables encrypted, or are not HDFS tables.
* @throws HiveException if an error occurred attempting to compare the encryption strength
*/
private Path getStrongestEncryptedTablePath(QB qb) throws HiveException {
List tabAliases = new ArrayList(qb.getTabAliases());
Path strongestPath = null;
/* Walk through all found table locations to get the most encrypted table */
for (String alias : tabAliases) {
Table tab = qb.getMetaData().getTableForAlias(alias);
if (tab != null) {
Path tablePath = tab.getDataLocation();
if (tablePath != null) {
if ("hdfs".equalsIgnoreCase(tablePath.toUri().getScheme())) {
if (isPathEncrypted(tablePath)) {
if (strongestPath == null) {
strongestPath = tablePath;
} else if (comparePathKeyStrength(tablePath, strongestPath) > 0) {
strongestPath = tablePath;
}
}
}
}
}
}
return strongestPath;
}
/**
* Gets the staging directory where MR files will be stored temporary.
* It walks through the QB plan to find the correct location where save temporary files. This
* temporary location (or staging directory) may be created inside encrypted tables locations for
* security reasons. If the QB has read-only tables, then the older scratch directory will be used,
* or a permission error will be thrown if the requested query table is encrypted and the old scratch
* directory is not.
*
* @param qb The QB object that contains a list of all table locations.
* @return The path to the staging directory.
* @throws HiveException If an error occurs while identifying the correct staging location.
*/
private Path getStagingDirectoryPathname(QB qb) throws HiveException {
Path stagingPath = null, tablePath;
// Looks for the most encrypted table location
// It may return null if there are not tables encrypted, or are not part of HDFS
tablePath = getStrongestEncryptedTablePath(qb);
if (tablePath != null) {
// At this point, tablePath is part of HDFS and it is encrypted
if (isPathReadOnly(tablePath)) {
Path tmpPath = ctx.getMRTmpPath();
if (comparePathKeyStrength(tablePath, tmpPath) < 0) {
throw new HiveException("Read-only encrypted tables cannot be read " +
"if the scratch directory is not encrypted (or encryption is weak)");
} else {
stagingPath = tmpPath;
}
}
if (stagingPath == null) {
stagingPath = ctx.getMRTmpPath(tablePath.toUri());
}
} else {
stagingPath = ctx.getMRTmpPath();
}
return stagingPath;
}
private void replaceViewReferenceWithDefinition(QB qb, Table tab,
String tab_name, String alias) throws SemanticException {
ASTNode viewTree;
final ASTNodeOrigin viewOrigin = new ASTNodeOrigin("VIEW", tab.getTableName(),
tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias(
alias));
try {
// Reparse text, passing null for context to avoid clobbering
// the top-level token stream.
String viewFullyQualifiedName = tab.getCompleteName();
String viewText = tab.getViewExpandedText();
TableMask viewMask = new TableMask(this, conf, false);
viewTree = ParseUtils.parse(viewText, ctx, tab.getCompleteName());
if (!unparseTranslator.isEnabled() &&
(viewMask.isEnabled() && analyzeRewrite == null)) {
viewTree = rewriteASTWithMaskAndFilter(viewMask, viewTree,
ctx.getViewTokenRewriteStream(viewFullyQualifiedName),
ctx, db, tabNameToTabObject, ignoredTokens);
}
Dispatcher nodeOriginDispatcher = new Dispatcher() {
@Override
public Object dispatch(Node nd, java.util.Stack stack,
Object... nodeOutputs) {
((ASTNode) nd).setOrigin(viewOrigin);
return null;
}
};
GraphWalker nodeOriginTagger = new DefaultGraphWalker(
nodeOriginDispatcher);
nodeOriginTagger.startWalking(java.util.Collections
. singleton(viewTree), null);
} catch (ParseException e) {
// A user could encounter this if a stored view definition contains
// an old SQL construct which has been eliminated in a later Hive
// version, so we need to provide full debugging info to help
// with fixing the view definition.
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
StringBuilder sb = new StringBuilder();
sb.append(e.getMessage());
ErrorMsg.renderOrigin(sb, viewOrigin);
throw new SemanticException(sb.toString(), e);
}
QBExpr qbexpr = new QBExpr(alias);
doPhase1QBExpr(viewTree, qbexpr, qb.getId(), alias, true);
// if skip authorization, skip checking;
// if it is inside a view, skip checking;
// if authorization flag is not enabled, skip checking.
// if HIVE_STATS_COLLECT_SCANCOLS is enabled, check.
if ((!this.skipAuthorization() && !qb.isInsideView() && HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED))
|| HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
qb.rewriteViewToSubq(alias, tab_name, qbexpr, tab);
} else {
qb.rewriteViewToSubq(alias, tab_name, qbexpr, null);
}
}
private boolean isPresent(String[] list, String elem) {
for (String s : list) {
if (s.toLowerCase().equals(elem)) {
return true;
}
}
return false;
}
/*
* This method is invoked for unqualified column references in join conditions.
* This is passed in the Alias to Operator mapping in the QueryBlock so far.
* We try to resolve the unqualified column against each of the Operator Row Resolvers.
* - if the column is present in only one RowResolver, we treat this as a reference to
* that Operator.
* - if the column resolves with more than one RowResolver, we treat it as an Ambiguous
* reference.
* - if the column doesn't resolve with any RowResolver, we treat this as an Invalid
* reference.
*/
@SuppressWarnings("rawtypes")
private String findAlias(ASTNode columnRef,
Map aliasToOpInfo) throws SemanticException {
String colName = unescapeIdentifier(columnRef.getChild(0).getText()
.toLowerCase());
String tabAlias = null;
if ( aliasToOpInfo != null ) {
for (Map.Entry opEntry : aliasToOpInfo.entrySet()) {
Operator op = opEntry.getValue();
RowResolver rr = opParseCtx.get(op).getRowResolver();
ColumnInfo colInfo = rr.get(null, colName);
if (colInfo != null) {
if (tabAlias == null) {
tabAlias = opEntry.getKey();
} else {
throw new SemanticException(
ErrorMsg.AMBIGUOUS_TABLE_ALIAS.getMsg(columnRef.getChild(0)));
}
}
}
}
if ( tabAlias == null ) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(columnRef
.getChild(0)));
}
return tabAlias;
}
@SuppressWarnings("nls")
void parseJoinCondPopulateAlias(QBJoinTree joinTree, ASTNode condn,
ArrayList leftAliases, ArrayList rightAliases,
ArrayList fields,
Map aliasToOpInfo) throws SemanticException {
// String[] allAliases = joinTree.getAllAliases();
switch (condn.getToken().getType()) {
case HiveParser.TOK_TABLE_OR_COL:
String tableOrCol = unescapeIdentifier(condn.getChild(0).getText()
.toLowerCase());
unparseTranslator.addIdentifierTranslation((ASTNode) condn.getChild(0));
if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
if (!leftAliases.contains(tableOrCol)) {
leftAliases.add(tableOrCol);
}
} else if (isPresent(joinTree.getRightAliases(), tableOrCol)) {
if (!rightAliases.contains(tableOrCol)) {
rightAliases.add(tableOrCol);
}
} else {
tableOrCol = findAlias(condn, aliasToOpInfo);
if (isPresent(joinTree.getLeftAliases(), tableOrCol)) {
if (!leftAliases.contains(tableOrCol)) {
leftAliases.add(tableOrCol);
}
} else {
if (!rightAliases.contains(tableOrCol)) {
rightAliases.add(tableOrCol);
}
if (joinTree.getNoSemiJoin() == false) {
// if this is a semijoin, we need to add the condition
joinTree.addRHSSemijoinColumns(tableOrCol, condn);
}
}
}
break;
case HiveParser.Identifier:
// it may be a field name, return the identifier and let the caller decide
// whether it is or not
if (fields != null) {
fields
.add(unescapeIdentifier(condn.getToken().getText().toLowerCase()));
}
unparseTranslator.addIdentifierTranslation(condn);
break;
case HiveParser.TOK_NULL:
case HiveParser.Number:
case HiveParser.StringLiteral:
case HiveParser.IntegralLiteral:
case HiveParser.NumberLiteral:
case HiveParser.TOK_STRINGLITERALSEQUENCE:
case HiveParser.TOK_CHARSETLITERAL:
case HiveParser.KW_TRUE:
case HiveParser.KW_FALSE:
break;
case HiveParser.TOK_FUNCTION:
// check all the arguments
for (int i = 1; i < condn.getChildCount(); i++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(i),
leftAliases, rightAliases, null, aliasToOpInfo);
}
break;
default:
// This is an operator - so check whether it is unary or binary operator
if (condn.getChildCount() == 1) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
} else if (condn.getChildCount() == 2) {
ArrayList fields1 = null;
// if it is a dot operator, remember the field name of the rhs of the
// left semijoin
if (joinTree.getNoSemiJoin() == false
&& condn.getToken().getType() == HiveParser.DOT) {
// get the semijoin rhs table name and field name
fields1 = new ArrayList();
int rhssize = rightAliases.size();
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
String rhsAlias = null;
if (rightAliases.size() > rhssize) { // the new table is rhs table
rhsAlias = rightAliases.get(rightAliases.size() - 1);
}
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1, aliasToOpInfo);
if (rhsAlias != null && fields1.size() > 0) {
joinTree.addRHSSemijoinColumns(rhsAlias, condn);
}
} else {
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
leftAliases, rightAliases, null, aliasToOpInfo);
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1, aliasToOpInfo);
}
} else {
throw new SemanticException(condn.toStringTree() + " encountered with "
+ condn.getChildCount() + " children");
}
break;
}
}
private void populateAliases(List leftAliases,
List rightAliases, ASTNode condn, QBJoinTree joinTree,
List leftSrc) throws SemanticException {
if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) {
joinTree.addPostJoinFilter(condn);
return;
}
if (rightAliases.size() != 0) {
assert rightAliases.size() == 1;
joinTree.getExpressions().get(1).add(condn);
} else if (leftAliases.size() != 0) {
joinTree.getExpressions().get(0).add(condn);
for (String s : leftAliases) {
if (!leftSrc.contains(s)) {
leftSrc.add(s);
}
}
} else {
joinTree.addPostJoinFilter(condn);
}
}
/*
* refactored out of the Equality case of parseJoinCondition
* so that this can be recursively called on its left tree in the case when
* only left sources are referenced in a Predicate
*/
void applyEqualityPredicateToQBJoinTree(QBJoinTree joinTree,
JoinType type,
List leftSrc,
ASTNode joinCond,
ASTNode leftCondn,
ASTNode rightCondn,
List leftCondAl1,
List leftCondAl2,
List rightCondAl1,
List rightCondAl2) throws SemanticException {
if (leftCondAl1.size() != 0) {
if ((rightCondAl1.size() != 0)
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
if (type.equals(JoinType.LEFTOUTER) ||
type.equals(JoinType.FULLOUTER)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
/*
* If the rhs references table sources and this QBJoinTree has a leftTree;
* hand it to the leftTree and let it recursively handle it.
* There are 3 cases of passing a condition down:
* 1. The leftSide && rightSide don't contains references to the leftTree's rightAlias
* => pass the lists down as is.
* 2. The leftSide contains refs to the leftTree's rightAlias, the rightSide doesn't
* => switch the leftCondAl1 and leftConAl2 lists and pass down.
* 3. The rightSide contains refs to the leftTree's rightAlias, the leftSide doesn't
* => switch the rightCondAl1 and rightConAl2 lists and pass down.
* 4. In case both contain references to the leftTree's rightAlias
* => we cannot push the condition down.
* 5. If either contain references to both left & right
* => we cannot push forward.
*/
if (rightCondAl1.size() != 0) {
QBJoinTree leftTree = joinTree.getJoinSrc();
List leftTreeLeftSrc = new ArrayList();
if (leftTree != null && leftTree.getNoOuterJoin()) {
String leftTreeRightSource = leftTree.getRightAliases() != null &&
leftTree.getRightAliases().length > 0 ?
leftTree.getRightAliases()[0] : null;
boolean leftHasRightReference = false;
for (String r : leftCondAl1) {
if (r.equals(leftTreeRightSource)) {
leftHasRightReference = true;
break;
}
}
boolean rightHasRightReference = false;
for (String r : rightCondAl1) {
if (r.equals(leftTreeRightSource)) {
rightHasRightReference = true;
break;
}
}
boolean pushedDown = false;
if ( !leftHasRightReference && !rightHasRightReference ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
pushedDown = true;
} else if ( !leftHasRightReference && rightHasRightReference && rightCondAl1.size() == 1 ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl2, rightCondAl1);
pushedDown = true;
} else if (leftHasRightReference && !rightHasRightReference && leftCondAl1.size() == 1 ) {
applyEqualityPredicateToQBJoinTree(leftTree, type, leftTreeLeftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl2, leftCondAl1,
rightCondAl1, rightCondAl2);
pushedDown = true;
}
if (leftTreeLeftSrc.size() == 1) {
leftTree.setLeftAlias(leftTreeLeftSrc.get(0));
}
if ( pushedDown) {
return;
}
} // leftTree != null
}
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else if (rightCondAl2.size() != 0) {
populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
leftSrc);
populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
leftSrc);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
}
} else if (leftCondAl2.size() != 0) {
if ((rightCondAl2.size() != 0)
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
} else if (rightCondAl1.size() != 0) {
populateAliases(leftCondAl1, leftCondAl2, leftCondn, joinTree,
leftSrc);
populateAliases(rightCondAl1, rightCondAl2, rightCondn, joinTree,
leftSrc);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
}
} else if (rightCondAl1.size() != 0) {
if (type.equals(JoinType.LEFTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
joinTree.getFilters().get(1).add(joinCond);
} else if (type.equals(JoinType.LEFTSEMI)) {
joinTree.getExpressions().get(0).add(leftCondn);
joinTree.getExpressions().get(1).add(rightCondn);
boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
joinTree.getNullSafes().add(nullsafe);
joinTree.getFiltersForPushing().get(1).add(joinCond);
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
}
}
@SuppressWarnings("rawtypes")
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, List leftSrc,
Map aliasToOpInfo)
throws SemanticException {
if (joinCond == null) {
return;
}
JoinCond cond = joinTree.getJoinCond()[0];
JoinType type = cond.getJoinType();
parseJoinCondition(joinTree, joinCond, leftSrc, type, aliasToOpInfo);
List> filters = joinTree.getFilters();
if (type == JoinType.LEFTOUTER || type == JoinType.FULLOUTER) {
joinTree.addFilterMapping(cond.getLeft(), cond.getRight(), filters.get(0).size());
}
if (type == JoinType.RIGHTOUTER || type == JoinType.FULLOUTER) {
joinTree.addFilterMapping(cond.getRight(), cond.getLeft(), filters.get(1).size());
}
}
/**
* Parse the join condition. For equality conjuncts, break them into left and
* right expressions and store in the join tree. For other conditions, either
* add them to the post-conditions if they apply to more than one input, add
* them to the filter conditions of a given input if it applies only on
* one of them and should not be pushed, e.g., left outer join with condition
* that applies only to left input, or push them below the join if they
* apply only to one input and can be pushed, e.g., left outer join with
* condition that applies only to right input.
*
* @param joinTree
* jointree to be populated
* @param joinCond
* join condition
* @param leftSrc
* left sources
* @throws SemanticException
*/
@SuppressWarnings("rawtypes")
private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond,
List leftSrc, JoinType type,
Map aliasToOpInfo) throws SemanticException {
if (joinCond == null) {
return;
}
switch (joinCond.getToken().getType()) {
case HiveParser.KW_OR:
parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(0),
new ArrayList(), new ArrayList(),
null, aliasToOpInfo);
parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(1),
new ArrayList(), new ArrayList(),
null, aliasToOpInfo);
joinTree.addPostJoinFilter(joinCond);
break;
case HiveParser.KW_AND:
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc, type, aliasToOpInfo);
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(1), leftSrc, type, aliasToOpInfo);
break;
case HiveParser.EQUAL_NS:
case HiveParser.EQUAL:
ASTNode leftCondn = (ASTNode) joinCond.getChild(0);
ArrayList leftCondAl1 = new ArrayList();
ArrayList leftCondAl2 = new ArrayList();
parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
null, aliasToOpInfo);
ASTNode rightCondn = (ASTNode) joinCond.getChild(1);
ArrayList rightCondAl1 = new ArrayList();
ArrayList rightCondAl2 = new ArrayList();
parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
rightCondAl2, null, aliasToOpInfo);
// is it a filter or a join condition
// if it is filter see if it can be pushed above the join
// filter cannot be pushed if
// * join is full outer or
// * join is left outer and filter is on left alias or
// * join is right outer and filter is on right alias
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
|| ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
joinTree.addPostJoinFilter(joinCond);
} else {
applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
joinCond, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
}
break;
default:
boolean isFunction = (joinCond.getType() == HiveParser.TOK_FUNCTION);
// Create all children
int childrenBegin = (isFunction ? 1 : 0);
ArrayList> leftAlias = new ArrayList>(
joinCond.getChildCount() - childrenBegin);
ArrayList> rightAlias = new ArrayList>(
joinCond.getChildCount() - childrenBegin);
for (int ci = 0; ci < joinCond.getChildCount() - childrenBegin; ci++) {
ArrayList left = new ArrayList();
ArrayList right = new ArrayList();
leftAlias.add(left);
rightAlias.add(right);
}
for (int ci = childrenBegin; ci < joinCond.getChildCount(); ci++) {
parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(ci),
leftAlias.get(ci - childrenBegin), rightAlias.get(ci
- childrenBegin), null, aliasToOpInfo);
}
boolean leftAliasNull = true;
for (ArrayList left : leftAlias) {
if (left.size() != 0) {
leftAliasNull = false;
break;
}
}
boolean rightAliasNull = true;
for (ArrayList right : rightAlias) {
if (right.size() != 0) {
rightAliasNull = false;
break;
}
}
if (!leftAliasNull && !rightAliasNull) {
joinTree.addPostJoinFilter(joinCond);
} else {
if (!leftAliasNull) {
if (type.equals(JoinType.LEFTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
joinTree.getFilters().get(0).add(joinCond);
} else {
joinTree.getFiltersForPushing().get(0).add(joinCond);
}
} else {
if (type.equals(JoinType.RIGHTOUTER)
|| type.equals(JoinType.FULLOUTER)) {
joinTree.getFilters().get(1).add(joinCond);
} else {
joinTree.getFiltersForPushing().get(1).add(joinCond);
}
}
}
break;
}
}
@SuppressWarnings("rawtypes")
private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, QB qb, String dest, ASTNode predicate,
Map aliasToOpInfo) throws SemanticException {
switch (predicate.getType()) {
case HiveParser.KW_AND:
extractJoinCondsFromWhereClause(joinTree, qb, dest,
(ASTNode) predicate.getChild(0), aliasToOpInfo);
extractJoinCondsFromWhereClause(joinTree, qb, dest,
(ASTNode) predicate.getChild(1), aliasToOpInfo);
break;
case HiveParser.EQUAL_NS:
case HiveParser.EQUAL:
ASTNode leftCondn = (ASTNode) predicate.getChild(0);
ArrayList leftCondAl1 = new ArrayList();
ArrayList leftCondAl2 = new ArrayList();
try {
parseJoinCondPopulateAlias(joinTree, leftCondn, leftCondAl1, leftCondAl2,
null, aliasToOpInfo);
} catch(SemanticException se) {
// suppress here; if it is a real issue will get caught in where clause handling.
return;
}
ASTNode rightCondn = (ASTNode) predicate.getChild(1);
ArrayList rightCondAl1 = new ArrayList();
ArrayList rightCondAl2 = new ArrayList();
try {
parseJoinCondPopulateAlias(joinTree, rightCondn, rightCondAl1,
rightCondAl2, null, aliasToOpInfo);
} catch(SemanticException se) {
// suppress here; if it is a real issue will get caught in where clause handling.
return;
}
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
|| ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
// this is not a join condition.
return;
}
if (((leftCondAl1.size() == 0) && (leftCondAl2.size() == 0))
|| ((rightCondAl1.size() == 0) && (rightCondAl2.size() == 0))) {
// this is not a join condition. Will get handled by predicate pushdown.
return;
}
List leftSrc = new ArrayList();
JoinCond cond = joinTree.getJoinCond()[0];
JoinType type = cond.getJoinType();
applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
predicate, leftCondn, rightCondn,
leftCondAl1, leftCondAl2,
rightCondAl1, rightCondAl2);
if (leftSrc.size() == 1) {
joinTree.setLeftAlias(leftSrc.get(0));
}
// todo: hold onto this predicate, so that we don't add it to the Filter Operator.
break;
default:
return;
}
}
@SuppressWarnings("nls")
public Operator putOpInsertMap(Operator op,
RowResolver rr) {
OpParseContext ctx = new OpParseContext(rr);
opParseCtx.put(op, ctx);
op.augmentPlan();
return op;
}
@SuppressWarnings("nls")
private Operator genHavingPlan(String dest, QB qb, Operator input,
Map aliasToOpInfo)
throws SemanticException {
ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest);
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
Map exprToColumnAlias = qb.getParseInfo().getAllExprToColumnAlias();
for (ASTNode astNode : exprToColumnAlias.keySet()) {
if (inputRR.getExpression(astNode) != null) {
inputRR.put("", exprToColumnAlias.get(astNode), inputRR.getExpression(astNode));
}
}
ASTNode condn = (ASTNode) havingExpr.getChild(0);
if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) {
// If CBO did not optimize the query, we might need to replace grouping function
final String destClauseName = qb.getParseInfo().getClauseNames().iterator().next();
final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty()
|| !qb.getParseInfo().getDestGroupingSets().isEmpty()
|| !qb.getParseInfo().getDestCubes().isEmpty());
// Special handling of grouping function
condn = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), destClauseName), condn,
!cubeRollupGrpSetPresent);
}
/*
* Now a having clause can contain a SubQuery predicate;
* so we invoke genFilterPlan to handle SubQuery algebraic transformation,
* just as is done for SubQuery predicates appearing in the Where Clause.
*/
Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true, false);
output = putOpInsertMap(output, inputRR);
return output;
}
protected static ASTNode rewriteGroupingFunctionAST(final List grpByAstExprs, ASTNode targetNode,
final boolean noneSet) throws SemanticException {
TreeVisitorAction action = new TreeVisitorAction() {
@Override
public Object pre(Object t) {
return t;
}
@Override
public Object post(Object t) {
ASTNode root = (ASTNode) t;
if (root.getType() == HiveParser.TOK_FUNCTION) {
ASTNode func = (ASTNode) ParseDriver.adaptor.getChild(root, 0);
if (func.getText().equals("grouping") && func.getChildCount() == 0) {
int numberOperands = ParseDriver.adaptor.getChildCount(root);
// We implement this logic using replaceChildren instead of replacing
// the root node itself because windowing logic stores multiple
// pointers to the AST, and replacing root might lead to some pointers
// leading to non-rewritten version
ASTNode newRoot = new ASTNode();
// Rewritten grouping function
ASTNode groupingFunc = (ASTNode) ParseDriver.adaptor.create(
HiveParser.Identifier, "grouping");
ParseDriver.adaptor.addChild(groupingFunc, ParseDriver.adaptor.create(
HiveParser.Identifier, "rewritten"));
newRoot.addChild(groupingFunc);
// Grouping ID reference
ASTNode childGroupingID;
if (noneSet) {
// Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus,
// grouping should return 0
childGroupingID = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral,
"0L");
} else {
// We refer to grouping_id column
childGroupingID = (ASTNode) ParseDriver.adaptor.create(
HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
ParseDriver.adaptor.addChild(childGroupingID, ParseDriver.adaptor.create(
HiveParser.Identifier, VirtualColumn.GROUPINGID.getName()));
}
newRoot.addChild(childGroupingID);
// Indices
for (int i = 1; i < numberOperands; i++) {
ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, i);
for (int j = 0; j < grpByAstExprs.size(); j++) {
ASTNode grpByExpr = grpByAstExprs.get(j);
if (grpByExpr.toStringTree().equals(c.toStringTree())) {
// Create and add AST node with position of grouping function input
// in group by clause
ASTNode childN = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral,
String.valueOf(IntMath.mod(-j-1, grpByAstExprs.size())) + "L");
newRoot.addChild(childN);
break;
}
}
}
if (numberOperands + 1 != ParseDriver.adaptor.getChildCount(newRoot)) {
throw new RuntimeException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg());
}
// Replace expression
root.replaceChildren(0, numberOperands - 1, newRoot);
}
}
return t;
}
};
return (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action);
}
private Operator genPlanForSubQueryPredicate(
QB qbSQ,
ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery());
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1, null);
getMetaData(qbSQ);
Operator op = genPlan(qbSQ);
return op;
}
@SuppressWarnings("nls")
private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input,
Map aliasToOpInfo,
boolean forHavingClause, boolean forGroupByClause)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
/*
* Handling of SubQuery Expressions:
* if "Where clause contains no SubQuery expressions" then
* -->[true] ===CONTINUE_FILTER_PROCESSING===
* else
* -->[false] "extract SubQuery expressions\n from Where clause"
* if "this is a nested SubQuery or \nthere are more than 1 SubQuery expressions" then
* -->[yes] "throw Unsupported Error"
* else
* --> "Rewrite Search condition to \nremove SubQuery predicate"
* --> "build QBSubQuery"
* --> "extract correlated predicates \nfrom Where Clause"
* --> "add correlated Items to \nSelect List and Group By"
* --> "construct Join Predicate \nfrom correlation predicates"
* --> "Generate Plan for\n modified SubQuery"
* --> "Build the Join Condition\n for Parent Query to SubQuery join"
* --> "Build the QBJoinTree from the Join condition"
* --> "Update Parent Query Filter\n with any Post Join conditions"
* --> ===CONTINUE_FILTER_PROCESSING===
* endif
* endif
*
* Support for Sub Queries in Having Clause:
* - By and large this works the same way as SubQueries in the Where Clause.
* - The one addum is the handling of aggregation expressions from the Outer Query
* appearing in correlation clauses.
* - So such correlating predicates are allowed:
* min(OuterQuert.x) = SubQuery.y
* - this requires special handling when converting to joins. See QBSubQuery.rewrite
* method method for detailed comments.
*/
List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
if ( subQueriesInOriginalTree.size() > 0 ) {
/*
* Restriction.9.m :: disallow nested SubQuery expressions.
*/
if (qb.getSubQueryPredicateDef() != null ) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
}
/*
* Restriction.8.m :: We allow only 1 SubQuery expression per Query.
*/
if (subQueriesInOriginalTree.size() > 1 ) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
}
/*
* Clone the Search AST; apply all rewrites on the clone.
*/
ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
for(int i=0; i < subQueries.size(); i++) {
ASTNode subQueryAST = subQueries.get(i);
ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
int sqIdx = qb.incrNumSubQueryPredicates();
clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(),
sqIdx, subQueryAST, originalSubQueryAST, ctx);
if ( !forHavingClause ) {
qb.setWhereClauseSubQueryPredicate(subQuery);
} else {
qb.setHavingClauseSubQueryPredicate(subQuery);
}
String havingInputAlias = null;
if ( forHavingClause ) {
havingInputAlias = "gby_sq" + sqIdx;
aliasToOpInfo.put(havingInputAlias, input);
}
subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, aliasToOpInfo.keySet());
QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
Operator sqPlanTopOp = genPlanForSubQueryPredicate(qbSQ, subQuery);
aliasToOpInfo.put(subQuery.getAlias(), sqPlanTopOp);
RowResolver sqRR = opParseCtx.get(sqPlanTopOp).getRowResolver();
/*
* Check.5.h :: For In and Not In the SubQuery must implicitly or
* explicitly only contain one select item.
*/
if ( subQuery.getOperator().getType() != SubQueryType.EXISTS &&
subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS &&
sqRR.getColumnInfos().size() -
subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1 ) {
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, "SubQuery can contain only 1 item in Select List."));
}
/*
* If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery.
* See QBSubQuery.NotInCheck for details on why and how this is constructed.
*/
if ( subQuery.getNotInCheck() != null ) {
QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
notInCheck.setSQRR(sqRR);
QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
Operator sqnicPlanTopOp = genPlanForSubQueryPredicate(qbSQ_nic, notInCheck);
aliasToOpInfo.put(notInCheck.getAlias(), sqnicPlanTopOp);
QBJoinTree joinTree_nic = genSQJoinTree(qb, notInCheck,
input,
aliasToOpInfo);
pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false);
input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input);
inputRR = opParseCtx.get(input).getRowResolver();
if ( forHavingClause ) {
aliasToOpInfo.put(havingInputAlias, input);
}
}
/*
* Gen Join between outer Operator and SQ op
*/
subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
QBJoinTree joinTree = genSQJoinTree(qb, subQuery,
input,
aliasToOpInfo);
/*
* push filters only for this QBJoinTree. Child QBJoinTrees have already been handled.
*/
pushJoinFilters(qb, joinTree, aliasToOpInfo, false);
input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo, input);
searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
}
}
return genFilterPlan(qb, searchCond, input, forHavingClause || forGroupByClause);
}
/**
* create a filter plan. The condition and the inputs are specified.
*
* @param qb
* current query block
* @param condn
* The condition to be resolved
* @param input
* the input operator
*/
@SuppressWarnings("nls")
private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, boolean useCaching)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ExprNodeDesc filterCond = genExprNodeDesc(condn, inputRR, useCaching, isCBOExecuted());
if (filterCond instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc c = (ExprNodeConstantDesc) filterCond;
if (Boolean.TRUE.equals(c.getValue())) {
// If filter condition is TRUE, we ignore it
return input;
}
if (ExprNodeDescUtils.isNullConstant(c)) {
// If filter condition is NULL, transform to FALSE
filterCond = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, false);
}
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new FilterDesc(filterCond, false), new RowSchema(
inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ inputRR.toString());
}
return output;
}
/*
* for inner joins push a 'is not null predicate' to the join sources for
* every non nullSafe predicate.
*/
private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input,
QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException {
if (qb == null || joinTree == null) {
return input;
}
if (!joinTree.getNoOuterJoin()) {
return input;
}
if (joinKeys == null || joinKeys.length == 0) {
return input;
}
Map hashes = new HashMap();
if (input instanceof FilterOperator) {
ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc)input.getConf()).getPredicate()), hashes);
}
ExprNodeDesc filterPred = null;
List nullSafes = joinTree.getNullSafes();
for (int i = 0; i < joinKeys.length; i++) {
if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc &&
((ExprNodeColumnDesc)joinKeys[i]).getIsPartitionColOrVirtualCol())) {
// no need to generate is not null predicate for partitioning or
// virtual column, since those columns can never be null.
continue;
}
if(null != hashes.get(joinKeys[i].hashCode())) {
// there is already a predicate on this src.
continue;
}
List args = new ArrayList();
args.add(joinKeys[i]);
ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance(
FunctionRegistry.getFunctionInfo("isnotnull").getGenericUDF(), args);
filterPred = filterPred == null ? nextExpr : ExprNodeDescUtils
.mergePredicates(filterPred, nextExpr);
}
if (filterPred == null) {
return input;
}
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
if (input instanceof FilterOperator) {
FilterOperator f = (FilterOperator) input;
List preds = new ArrayList();
preds.add(f.getConf().getPredicate());
preds.add(filterPred);
f.getConf().setPredicate(ExprNodeDescUtils.mergePredicates(preds));
return input;
}
FilterDesc filterDesc = new FilterDesc(filterPred, false);
filterDesc.setGenerated(true);
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(filterDesc,
new RowSchema(inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: "
+ inputRR.toString());
}
return output;
}
@SuppressWarnings("nls")
// TODO: make aliases unique, otherwise needless rewriting takes place
Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel,
ArrayList col_list, HashSet excludeCols, RowResolver input,
RowResolver colSrcRR, Integer pos, RowResolver output, List aliases,
boolean ensureUniqueCols) throws SemanticException {
if (colSrcRR == null) {
colSrcRR = input;
}
// The table alias should exist
if (tabAlias != null && !colSrcRR.hasTableAlias(tabAlias)) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
}
// TODO: Have to put in the support for AS clause
Pattern regex = null;
try {
regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e
.getMessage()));
}
StringBuilder replacementText = new StringBuilder();
int matched = 0;
// add empty string to the list of aliases. Some operators (ex. GroupBy) add
// ColumnInfos for table alias "".
if (!aliases.contains("")) {
aliases.add("");
}
/*
* track the input ColumnInfos that are added to the output.
* if a columnInfo has multiple mappings; then add the column only once,
* but carry the mappings forward.
*/
Map inputColsProcessed = new HashMap();
// For expr "*", aliases should be iterated in the order they are specified
// in the query.
if (colSrcRR.getNamedJoinInfo() != null) {
// We got using() clause in previous join. Need to generate select list as
// per standard. For * we will have joining columns first non-repeated
// followed by other columns.
HashMap leftMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(0));
HashMap rightMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(1));
HashMap chosenMap = null;
if (colSrcRR.getNamedJoinInfo().getHiveJoinType() != JoinType.RIGHTOUTER) {
chosenMap = leftMap;
} else {
chosenMap = rightMap;
}
// first get the columns in named columns
for (String columnName : colSrcRR.getNamedJoinInfo().getNamedColumns()) {
for (Map.Entry entry : chosenMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
if (!columnName.equals(colInfo.getAlias())) {
continue;
}
String name = colInfo.getInternalName();
String[] tmp = colSrcRR.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
if (oColInfo == null) {
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name,
colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol());
col_list.add(expr);
oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(),
colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
inputColsProcessed.put(colInfo, oColInfo);
}
if (ensureUniqueCols) {
if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "."
+ tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings",
UnsupportedFeature.Duplicates_in_RR);
}
} else {
output.put(tmp[0], tmp[1], oColInfo);
}
pos = Integer.valueOf(pos.intValue() + 1);
matched++;
if (unparseTranslator.isEnabled() || (tableMask.isEnabled() && analyzeRewrite == null)) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
}
}
}
}
for (String alias : aliases) {
HashMap fMap = colSrcRR.getFieldMap(alias);
if (fMap == null) {
continue;
}
// For the tab.* case, add all the columns to the fieldList
// from the input schema
for (Map.Entry entry : fMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
if (colSrcRR.getNamedJoinInfo() != null && colSrcRR.getNamedJoinInfo().getNamedColumns().contains(colInfo.getAlias())) {
// we already added this column in select list.
continue;
}
if (excludeCols != null && excludeCols.contains(colInfo)) {
continue; // This was added during plan generation.
}
// First, look up the column from the source against which * is to be
// resolved.
// We'd later translated this into the column from proper input, if
// it's valid.
// TODO: excludeCols may be possible to remove using the same
// technique.
String name = colInfo.getInternalName();
String[] tmp = colSrcRR.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
// Not matching the regex?
if (!regex.matcher(tmp[1]).matches()) {
continue;
}
// If input (GBY) is different than the source of columns, find the
// same column in input.
// TODO: This is fraught with peril.
if (input != colSrcRR) {
colInfo = input.get(tabAlias, tmp[1]);
if (colInfo == null) {
LOG.error("Cannot find colInfo for " + tabAlias + "." + tmp[1] + ", derived from ["
+ colSrcRR + "], in [" + input + "]");
throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, tmp[1]);
}
String oldCol = null;
if (LOG.isDebugEnabled()) {
oldCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
}
name = colInfo.getInternalName();
tmp = input.reverseLookup(name);
if (LOG.isDebugEnabled()) {
String newCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
LOG.debug("Translated [" + oldCol + "] to [" + newCol + "]");
}
}
ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
if (oColInfo == null) {
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name,
colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol());
col_list.add(expr);
oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(),
colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
inputColsProcessed.put(colInfo, oColInfo);
}
if (ensureUniqueCols) {
if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1]
+ " => " + oColInfo + " due to duplication, see previous warnings",
UnsupportedFeature.Duplicates_in_RR);
}
} else {
output.put(tmp[0], tmp[1], oColInfo);
}
pos = Integer.valueOf(pos.intValue() + 1);
matched++;
if (unparseTranslator.isEnabled() || tableMask.isEnabled()) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
}
}
}
if (matched == 0) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel));
}
if (unparseTranslator.isEnabled()) {
unparseTranslator.addTranslation(sel, replacementText.toString());
} else if (tableMask.isEnabled()) {
tableMask.addTranslation(sel, replacementText.toString());
}
return pos;
}
public static String getColumnInternalName(int pos) {
return HiveConf.getColumnInternalName(pos);
}
private String getScriptProgName(String cmd) {
int end = cmd.indexOf(" ");
return (end == -1) ? cmd : cmd.substring(0, end);
}
private String getScriptArgs(String cmd) {
int end = cmd.indexOf(" ");
return (end == -1) ? "" : cmd.substring(end, cmd.length());
}
static int getPositionFromInternalName(String internalName) {
return HiveConf.getPositionFromInternalName(internalName);
}
private String fetchFilesNotInLocalFilesystem(String cmd) {
SessionState ss = SessionState.get();
String progName = getScriptProgName(cmd);
if (!ResourceDownloader.isFileUri(progName)) {
String filePath = ss.add_resource(ResourceType.FILE, progName, true);
Path p = new Path(filePath);
String fileName = p.getName();
String scriptArgs = getScriptArgs(cmd);
String finalCmd = fileName + scriptArgs;
return finalCmd;
}
return cmd;
}
private TableDesc getTableDescFromSerDe(ASTNode child, String cols,
String colTypes, boolean defaultCols) throws SemanticException {
if (child.getType() == HiveParser.TOK_SERDENAME) {
String serdeName = unescapeSQLString(child.getChild(0).getText());
Class extends Deserializer> serdeClass = null;
try {
serdeClass = (Class extends Deserializer>) Class.forName(serdeName,
true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
TableDesc tblDesc = PlanUtils.getTableDesc(serdeClass, Integer
.toString(Utilities.tabCode), cols, colTypes, defaultCols);
// copy all the properties
if (child.getChildCount() == 2) {
ASTNode prop = (ASTNode) ((ASTNode) child.getChild(1)).getChild(0);
for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
.getText());
String value = unescapeSQLString(prop.getChild(propChild).getChild(1)
.getText());
tblDesc.getProperties().setProperty(key, value);
}
}
return tblDesc;
} else if (child.getType() == HiveParser.TOK_SERDEPROPS) {
TableDesc tblDesc = PlanUtils.getDefaultTableDesc(Integer
.toString(Utilities.ctrlaCode), cols, colTypes, defaultCols);
int numChildRowFormat = child.getChildCount();
for (int numC = 0; numC < numChildRowFormat; numC++) {
ASTNode rowChild = (ASTNode) child.getChild(numC);
switch (rowChild.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMATFIELD:
String fieldDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties()
.setProperty(serdeConstants.FIELD_DELIM, fieldDelim);
tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_FORMAT,
fieldDelim);
if (rowChild.getChildCount() >= 2) {
String fieldEscape = unescapeSQLString(rowChild.getChild(1)
.getText());
tblDesc.getProperties().setProperty(serdeConstants.ESCAPE_CHAR,
fieldEscape);
}
break;
case HiveParser.TOK_TABLEROWFORMATCOLLITEMS:
tblDesc.getProperties().setProperty(serdeConstants.COLLECTION_DELIM,
unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATMAPKEYS:
tblDesc.getProperties().setProperty(serdeConstants.MAPKEY_DELIM,
unescapeSQLString(rowChild.getChild(0).getText()));
break;
case HiveParser.TOK_TABLEROWFORMATLINES:
String lineDelim = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties().setProperty(serdeConstants.LINE_DELIM, lineDelim);
if (!lineDelim.equals("\n") && !lineDelim.equals("10")) {
throw new SemanticException(generateErrorMessage(rowChild,
ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()));
}
break;
case HiveParser.TOK_TABLEROWFORMATNULL:
String nullFormat = unescapeSQLString(rowChild.getChild(0).getText());
tblDesc.getProperties().setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT,
nullFormat);
break;
default:
assert false;
}
}
return tblDesc;
}
// should never come here
return null;
}
private void failIfColAliasExists(Set nameSet, String name)
throws SemanticException {
if (nameSet.contains(name)) {
throw new SemanticException(ErrorMsg.COLUMN_ALIAS_ALREADY_EXISTS
.getMsg(name));
}
nameSet.add(name);
}
@SuppressWarnings("nls")
private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input)
throws SemanticException {
// If there is no "AS" clause, the output schema will be "key,value"
ArrayList outputCols = new ArrayList();
int inputSerDeNum = 1, inputRecordWriterNum = 2;
int outputSerDeNum = 4, outputRecordReaderNum = 5;
int outputColsNum = 6;
boolean outputColNames = false, outputColSchemas = false;
int execPos = 3;
boolean defaultOutputCols = false;
// Go over all the children
if (trfm.getChildCount() > outputColsNum) {
ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum);
if (outCols.getType() == HiveParser.TOK_ALIASLIST) {
outputColNames = true;
} else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) {
outputColSchemas = true;
}
}
// If column type is not specified, use a string
if (!outputColNames && !outputColSchemas) {
String intName = getColumnInternalName(0);
ColumnInfo colInfo = new ColumnInfo(intName,
TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias("key");
outputCols.add(colInfo);
intName = getColumnInternalName(1);
colInfo = new ColumnInfo(intName, TypeInfoFactory.stringTypeInfo, null,
false);
colInfo.setAlias("value");
outputCols.add(colInfo);
defaultOutputCols = true;
} else {
ASTNode collist = (ASTNode) trfm.getChild(outputColsNum);
int ccount = collist.getChildCount();
Set colAliasNamesDuplicateCheck = new HashSet();
if (outputColNames) {
for (int i = 0; i < ccount; ++i) {
String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i))
.getText()).toLowerCase();
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName,
TypeInfoFactory.stringTypeInfo, null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
} else {
for (int i = 0; i < ccount; ++i) {
ASTNode child = (ASTNode) collist.getChild(i);
assert child.getType() == HiveParser.TOK_TABCOL;
String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0))
.getText()).toLowerCase();
failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias);
String intName = getColumnInternalName(i);
ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils
.getTypeInfoFromTypeString(getTypeStringFromAST((ASTNode) child
.getChild(1))), null, false);
colInfo.setAlias(colAlias);
outputCols.add(colInfo);
}
}
}
RowResolver out_rwsch = new RowResolver();
StringBuilder columns = new StringBuilder();
StringBuilder columnTypes = new StringBuilder();
for (int i = 0; i < outputCols.size(); ++i) {
if (i != 0) {
columns.append(",");
columnTypes.append(",");
}
columns.append(outputCols.get(i).getInternalName());
columnTypes.append(outputCols.get(i).getType().getTypeName());
out_rwsch.put(qb.getParseInfo().getAlias(), outputCols.get(i).getAlias(),
outputCols.get(i));
}
StringBuilder inpColumns = new StringBuilder();
StringBuilder inpColumnTypes = new StringBuilder();
ArrayList inputSchema = opParseCtx.get(input).getRowResolver()
.getColumnInfos();
for (int i = 0; i < inputSchema.size(); ++i) {
if (i != 0) {
inpColumns.append(",");
inpColumnTypes.append(",");
}
inpColumns.append(inputSchema.get(i).getInternalName());
inpColumnTypes.append(inputSchema.get(i).getType().getTypeName());
}
TableDesc outInfo;
TableDesc errInfo;
TableDesc inInfo;
String defaultSerdeName = conf.getVar(HiveConf.ConfVars.HIVESCRIPTSERDE);
Class extends Deserializer> serde;
try {
serde = (Class extends Deserializer>) Class.forName(defaultSerdeName,
true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
int fieldSeparator = Utilities.tabCode;
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESCRIPTESCAPE)) {
fieldSeparator = Utilities.ctrlaCode;
}
// Input and Output Serdes
if (trfm.getChild(inputSerDeNum).getChildCount() > 0) {
inInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
.getChild(inputSerDeNum))).getChild(0), inpColumns.toString(),
inpColumnTypes.toString(), false);
} else {
inInfo = PlanUtils.getTableDesc(serde, Integer
.toString(fieldSeparator), inpColumns.toString(), inpColumnTypes
.toString(), false, true);
}
if (trfm.getChild(outputSerDeNum).getChildCount() > 0) {
outInfo = getTableDescFromSerDe((ASTNode) (((ASTNode) trfm
.getChild(outputSerDeNum))).getChild(0), columns.toString(),
columnTypes.toString(), false);
// This is for backward compatibility. If the user did not specify the
// output column list, we assume that there are 2 columns: key and value.
// However, if the script outputs: col1, col2, col3 seperated by TAB, the
// requirement is: key is col and value is (col2 TAB col3)
} else {
outInfo = PlanUtils.getTableDesc(serde, Integer
.toString(fieldSeparator), columns.toString(), columnTypes
.toString(), defaultOutputCols);
}
// Error stream always uses the default serde with a single column
errInfo = PlanUtils.getTableDesc(serde, Integer.toString(Utilities.tabCode), "KEY");
// Output record readers
Class extends RecordReader> outRecordReader = getRecordReader((ASTNode) trfm
.getChild(outputRecordReaderNum));
Class extends RecordWriter> inRecordWriter = getRecordWriter((ASTNode) trfm
.getChild(inputRecordWriterNum));
Class extends RecordReader> errRecordReader = getDefaultRecordReader();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new ScriptDesc(
fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())),
inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo),
new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(new HashMap()); // disable backtracking
// Add URI entity for transform script. script assumed t be local unless downloadable
if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) {
String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText()));
getInputs().add(new ReadEntity(new Path(scriptCmd),
ResourceDownloader.isFileUri(scriptCmd)));
}
return output;
}
private Class extends RecordReader> getRecordReader(ASTNode node)
throws SemanticException {
String name;
if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
} else {
name = unescapeSQLString(node.getChild(0).getText());
}
try {
return (Class extends RecordReader>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private Class extends RecordReader> getDefaultRecordReader()
throws SemanticException {
String name;
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDREADER);
try {
return (Class extends RecordReader>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
private Class extends RecordWriter> getRecordWriter(ASTNode node)
throws SemanticException {
String name;
if (node.getChildCount() == 0) {
name = conf.getVar(HiveConf.ConfVars.HIVESCRIPTRECORDWRITER);
} else {
name = unescapeSQLString(node.getChild(0).getText());
}
try {
return (Class extends RecordWriter>) Class.forName(name, true,
Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException(e);
}
}
protected List getGroupingSetsForRollup(int size) {
List groupingSetKeys = new ArrayList();
for (int i = 0; i <= size; i++) {
groupingSetKeys.add((1L << i) - 1);
}
return groupingSetKeys;
}
protected List getGroupingSetsForCube(int size) {
long count = 1L << size;
List results = new ArrayList();
for (long i = 0; i < count; ++i) {
results.add(i);
}
return results;
}
// This function returns the grouping sets along with the grouping expressions
// Even if rollups and cubes are present in the query, they are converted to
// grouping sets at this point
ObjectPair, List> getGroupByGroupingSetsForClause(
QBParseInfo parseInfo, String dest) throws SemanticException {
List groupingSets = new ArrayList();
List groupByExprs = getGroupByForClause(parseInfo, dest);
if (groupByExprs.size() > Long.SIZE) {
throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_SIZE_LIMIT.getMsg());
}
if (parseInfo.getDestRollups().contains(dest)) {
groupingSets = getGroupingSetsForRollup(groupByExprs.size());
} else if (parseInfo.getDestCubes().contains(dest)) {
groupingSets = getGroupingSetsForCube(groupByExprs.size());
} else if (parseInfo.getDestGroupingSets().contains(dest)) {
groupingSets = getGroupingSets(groupByExprs, parseInfo, dest);
}
return new ObjectPair, List>(groupByExprs, groupingSets);
}
protected List getGroupingSets(List groupByExpr, QBParseInfo parseInfo,
String dest) throws SemanticException {
Map exprPos = new HashMap();
for (int i = 0; i < groupByExpr.size(); ++i) {
ASTNode node = groupByExpr.get(i);
exprPos.put(node.toStringTree(), i);
}
ASTNode root = parseInfo.getGroupByForClause(dest);
List result = new ArrayList(root == null ? 0 : root.getChildCount());
if (root != null) {
for (int i = 0; i < root.getChildCount(); ++i) {
ASTNode child = (ASTNode) root.getChild(i);
if (child.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
continue;
}
long bitmap = LongMath.pow(2, groupByExpr.size()) - 1;
for (int j = 0; j < child.getChildCount(); ++j) {
String treeAsString = child.getChild(j).toStringTree();
Integer pos = exprPos.get(treeAsString);
if (pos == null) {
throw new SemanticException(
generateErrorMessage((ASTNode) child.getChild(j),
ErrorMsg.HIVE_GROUPING_SETS_EXPR_NOT_IN_GROUPBY.getErrorCodedMsg()));
}
bitmap = unsetBit(bitmap, groupByExpr.size() - pos - 1);
}
result.add(bitmap);
}
}
if (checkForEmptyGroupingSets(result, LongMath.pow(2, groupByExpr.size()) - 1)) {
throw new SemanticException(
ErrorMsg.HIVE_GROUPING_SETS_EMPTY.getMsg());
}
return result;
}
private boolean checkForEmptyGroupingSets(List bitmaps, long groupingIdAllSet) {
boolean ret = true;
for (long mask : bitmaps) {
ret &= mask == groupingIdAllSet;
}
return ret;
}
public static long setBit(long bitmap, int bitIdx) {
return bitmap | (1L << bitIdx);
}
public static long unsetBit(long bitmap, int bitIdx) {
return bitmap & ~(1L << bitIdx);
}
/**
* This function is a wrapper of parseInfo.getGroupByForClause which
* automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
* a,b,c.
*/
List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException {
if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
ASTNode selectExprs = parseInfo.getSelForClause(dest);
List result = new ArrayList(selectExprs == null ? 0
: selectExprs.getChildCount());
if (selectExprs != null) {
for (int i = 0; i < selectExprs.getChildCount(); ++i) {
if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) {
continue;
}
// table.column AS alias
ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
result.add(grpbyExpr);
}
}
return result;
} else {
ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
List result = new ArrayList(grpByExprs == null ? 0
: grpByExprs.getChildCount());
if (grpByExprs != null) {
for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
if (grpbyExpr.getType() != HiveParser.TOK_GROUPING_SETS_EXPRESSION) {
result.add(grpbyExpr);
}
}
}
return result;
}
}
static String[] getColAlias(ASTNode selExpr, String defaultName,
RowResolver inputRR, boolean includeFuncName, int colNum) {
String colAlias = null;
String tabAlias = null;
String[] colRef = new String[2];
//for queries with a windowing expressions, the selexpr may have a third child
if (selExpr.getChildCount() == 2 ||
(selExpr.getChildCount() == 3 &&
selExpr.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC)) {
// return zz for "xx + yy AS zz"
colAlias = unescapeIdentifier(selExpr.getChild(1).getText().toLowerCase());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
ASTNode root = (ASTNode) selExpr.getChild(0);
if (root.getType() == HiveParser.TOK_TABLE_OR_COL) {
colAlias =
BaseSemanticAnalyzer.unescapeIdentifier(root.getChild(0).getText().toLowerCase());
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
if (root.getType() == HiveParser.DOT) {
ASTNode tab = (ASTNode) root.getChild(0);
if (tab.getType() == HiveParser.TOK_TABLE_OR_COL) {
String t = unescapeIdentifier(tab.getChild(0).getText());
if (inputRR.hasTableAlias(t)) {
tabAlias = t;
}
}
// Return zz for "xx.zz" and "xx.yy.zz"
ASTNode col = (ASTNode) root.getChild(1);
if (col.getType() == HiveParser.Identifier) {
colAlias = unescapeIdentifier(col.getText().toLowerCase());
}
}
// if specified generate alias using func name
if (includeFuncName && (root.getType() == HiveParser.TOK_FUNCTION)) {
String expr_flattened = root.toStringTree();
// remove all TOK tokens
String expr_no_tok = expr_flattened.replaceAll("tok_\\S+", "");
// remove all non alphanumeric letters, replace whitespace spans with underscore
String expr_formatted = expr_no_tok.replaceAll("\\W", " ").trim().replaceAll("\\s+", "_");
// limit length to 20 chars
if (expr_formatted.length() > AUTOGEN_COLALIAS_PRFX_MAXLENGTH) {
expr_formatted = expr_formatted.substring(0, AUTOGEN_COLALIAS_PRFX_MAXLENGTH);
}
// append colnum to make it unique
colAlias = expr_formatted.concat("_" + colNum);
}
if (colAlias == null) {
// Return defaultName if selExpr is not a simple xx.yy.zz
colAlias = defaultName + colNum;
}
colRef[0] = tabAlias;
colRef[1] = colAlias;
return colRef;
}
/**
* Returns whether the pattern is a regex expression (instead of a normal
* string). Normal string is a string with all alphabets/digits and "_".
*/
static boolean isRegex(String pattern, HiveConf conf) {
String qIdSupport = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_QUOTEDID_SUPPORT);
if ( "column".equals(qIdSupport)) {
return false;
}
for (int i = 0; i < pattern.length(); i++) {
if (!Character.isLetterOrDigit(pattern.charAt(i))
&& pattern.charAt(i) != '_') {
return true;
}
}
return false;
}
private Operator> genSelectPlan(String dest, QB qb, Operator> input,
Operator> inputForSelectStar) throws SemanticException {
ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
Operator> op = genSelectPlan(dest, selExprList, qb, input, inputForSelectStar, false);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan for clause: " + dest);
}
return op;
}
@SuppressWarnings("nls")
private Operator> genSelectPlan(String dest, ASTNode selExprList, QB qb, Operator> input,
Operator> inputForSelectStar, boolean outerLV) throws SemanticException {
if (LOG.isDebugEnabled()) {
LOG.debug("tree: " + selExprList.toStringTree());
}
ArrayList col_list = new ArrayList();
RowResolver out_rwsch = new RowResolver();
ASTNode trfm = null;
Integer pos = Integer.valueOf(0);
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
RowResolver starRR = null;
if (inputForSelectStar != null && inputForSelectStar != input) {
starRR = opParseCtx.get(inputForSelectStar).getRowResolver();
}
// SELECT * or SELECT TRANSFORM(*)
boolean selectStar = false;
int posn = 0;
boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT);
if (hintPresent) {
posn++;
}
boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() ==
HiveParser.TOK_TRANSFORM);
if (isInTransform) {
queryProperties.setUsesScript(true);
globalLimitCtx.setHasTransformOrUDTF(true);
trfm = (ASTNode) selExprList.getChild(posn).getChild(0);
}
// Detect queries of the form SELECT udtf(col) AS ...
// by looking for a function as the first child, and then checking to see
// if the function is a Generic UDTF. It's not as clean as TRANSFORM due to
// the lack of a special token.
boolean isUDTF = false;
String udtfTableAlias = null;
ArrayList udtfColAliases = new ArrayList();
ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0);
GenericUDTF genericUDTF = null;
int udtfExprType = udtfExpr.getType();
if (udtfExprType == HiveParser.TOK_FUNCTION
|| udtfExprType == HiveParser.TOK_FUNCTIONSTAR) {
String funcName = TypeCheckProcFactory.DefaultExprProcessor
.getFunctionText(udtfExpr, true);
FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
if (fi != null) {
genericUDTF = fi.getGenericUDTF();
}
isUDTF = (genericUDTF != null);
if (isUDTF) {
globalLimitCtx.setHasTransformOrUDTF(true);
}
if (isUDTF && !fi.isNative()) {
unparseTranslator.addIdentifierTranslation((ASTNode) udtfExpr
.getChild(0));
}
if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) {
genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0),
col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
}
}
if (isUDTF) {
// Only support a single expression when it's a UDTF
if (selExprList.getChildCount() > 1) {
throw new SemanticException(generateErrorMessage(
(ASTNode) selExprList.getChild(1),
ErrorMsg.UDTF_MULTIPLE_EXPR.getMsg()));
}
ASTNode selExpr = (ASTNode) selExprList.getChild(posn);
// Get the column / table aliases from the expression. Start from 1 as
// 0 is the TOK_FUNCTION
// column names also can be inferred from result of UDTF
for (int i = 1; i < selExpr.getChildCount(); i++) {
ASTNode selExprChild = (ASTNode) selExpr.getChild(i);
switch (selExprChild.getType()) {
case HiveParser.Identifier:
udtfColAliases.add(unescapeIdentifier(selExprChild.getText().toLowerCase()));
unparseTranslator.addIdentifierTranslation(selExprChild);
break;
case HiveParser.TOK_TABALIAS:
assert (selExprChild.getChildCount() == 1);
udtfTableAlias = unescapeIdentifier(selExprChild.getChild(0)
.getText());
qb.addAlias(udtfTableAlias);
unparseTranslator.addIdentifierTranslation((ASTNode) selExprChild
.getChild(0));
break;
default:
assert (false);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("UDTF table alias is " + udtfTableAlias);
LOG.debug("UDTF col aliases are " + udtfColAliases);
}
}
// The list of expressions after SELECT or SELECT TRANSFORM.
ASTNode exprList;
if (isInTransform) {
exprList = (ASTNode) trfm.getChild(0);
} else if (isUDTF) {
exprList = udtfExpr;
} else {
exprList = selExprList;
}
if (LOG.isDebugEnabled()) {
LOG.debug("genSelectPlan: input = " + inputRR + " starRr = " + starRR);
}
// For UDTF's, skip the function name to get the expressions
int startPosn = isUDTF ? posn + 1 : posn;
if (isInTransform) {
startPosn = 0;
}
final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty()
|| !qb.getParseInfo().getDestGroupingSets().isEmpty()
|| !qb.getParseInfo().getDestCubes().isEmpty());
Set colAliases = new HashSet();
ASTNode[] exprs = new ASTNode[exprList.getChildCount()];
String[][] aliases = new String[exprList.getChildCount()][];
boolean[] hasAsClauses = new boolean[exprList.getChildCount()];
int offset = 0;
// Iterate over all expression (either after SELECT, or in SELECT TRANSFORM)
for (int i = startPosn; i < exprList.getChildCount(); ++i) {
// child can be EXPR AS ALIAS, or EXPR.
ASTNode child = (ASTNode) exprList.getChild(i);
boolean hasAsClause = (!isInTransform) && (child.getChildCount() == 2);
boolean isWindowSpec = child.getChildCount() == 3 &&
child.getChild(2).getType() == HiveParser.TOK_WINDOWSPEC;
// EXPR AS (ALIAS,...) parses, but is only allowed for UDTF's
// This check is not needed and invalid when there is a transform b/c the
// AST's are slightly different.
if (!isWindowSpec && !isInTransform && !isUDTF && child.getChildCount() > 2) {
throw new SemanticException(generateErrorMessage(
(ASTNode) child.getChild(2),
ErrorMsg.INVALID_AS.getMsg()));
}
// The real expression
ASTNode expr;
String tabAlias;
String colAlias;
if (isInTransform || isUDTF) {
tabAlias = null;
colAlias = autogenColAliasPrfxLbl + i;
expr = child;
} else {
// Get rid of TOK_SELEXPR
expr = (ASTNode) child.getChild(0);
String[] colRef = getColAlias(child, autogenColAliasPrfxLbl, inputRR,
autogenColAliasPrfxIncludeFuncName, i + offset);
tabAlias = colRef[0];
colAlias = colRef[1];
if (hasAsClause) {
unparseTranslator.addIdentifierTranslation((ASTNode) child
.getChild(1));
}
}
exprs[i] = expr;
aliases[i] = new String[] {tabAlias, colAlias};
hasAsClauses[i] = hasAsClause;
colAliases.add(colAlias);
// The real expression
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
int initPos = pos;
pos = genColListRegex(".*", expr.getChildCount() == 0 ? null
: getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
if (unparseTranslator.isEnabled()) {
offset += pos - initPos - 1;
}
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(0).getText()), conf)) {
// In case the expression is a regex COL.
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()),
null, expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
&& inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
.getChild(0).getText().toLowerCase())) && !hasAsClause
&& !inputRR.getIsExprResolver()
&& isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) {
// In case the expression is TABLE.COL (col can be regex).
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()),
unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
expr, col_list, null, inputRR, starRR, pos, out_rwsch, qb.getAliases(), false);
} else {
// Case when this is an expression
TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR, true, isCBOExecuted());
// We allow stateful functions in the SELECT list (but nowhere else)
tcCtx.setAllowStatefulFunctions(true);
tcCtx.setAllowDistinctFunctions(false);
if (!isCBOExecuted() && !qb.getParseInfo().getDestToGroupBy().isEmpty()) {
// If CBO did not optimize the query, we might need to replace grouping function
// Special handling of grouping function
expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr,
!cubeRollupGrpSetPresent);
}
ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
String recommended = recommendName(exp, colAlias);
if (recommended != null && !colAliases.contains(recommended) &&
out_rwsch.get(null, recommended) == null) {
colAlias = recommended;
}
col_list.add(exp);
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos),
exp.getWritableObjectInspector(), tabAlias, false);
colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp)
.isSkewedCol() : false);
out_rwsch.put(tabAlias, colAlias, colInfo);
if ( exp instanceof ExprNodeColumnDesc ) {
ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
if ( altMapping != null ) {
out_rwsch.put(altMapping[0], altMapping[1], colInfo);
}
}
pos = Integer.valueOf(pos.intValue() + 1);
}
}
selectStar = selectStar && exprList.getChildCount() == posn + 1;
out_rwsch = handleInsertStatementSpec(col_list, dest, out_rwsch, inputRR, qb, selExprList);
ArrayList columnNames = new ArrayList();
Map colExprMap = new HashMap();
for (int i = 0; i < col_list.size(); i++) {
String outputCol = getColumnInternalName(i);
colExprMap.put(outputCol, col_list.get(i));
columnNames.add(outputCol);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
new SelectDesc(col_list, columnNames, selectStar), new RowSchema(
out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(colExprMap);
if (isInTransform) {
output = genScriptPlan(trfm, qb, output);
}
if (isUDTF) {
output = genUDTFPlan(genericUDTF, udtfTableAlias, udtfColAliases, qb,
output, outerLV);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created Select Plan row schema: " + out_rwsch.toString());
}
return output;
}
private RowResolver getColForInsertStmtSpec(Map targetCol2Projection, final Table target,
Map targetCol2ColumnInfo, int colListPos,
List targetTableColTypes, ArrayList new_col_list,
List targetTableColNames)
throws SemanticException {
RowResolver newOutputRR = new RowResolver();
Map colNameToDefaultVal = null;
// see if we need to fetch default constraints from metastore
if(targetCol2Projection.size() < targetTableColNames.size()) {
try {
DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(target.getDbName(), target.getTableName());
colNameToDefaultVal = dc.getColNameToDefaultValueMap();
} catch (Exception e) {
if (e instanceof SemanticException) {
throw (SemanticException) e;
} else {
throw (new RuntimeException(e));
}
}
}
boolean defaultConstraintsFetch = true;
for (int i = 0; i < targetTableColNames.size(); i++) {
String f = targetTableColNames.get(i);
if(targetCol2Projection.containsKey(f)) {
//put existing column in new list to make sure it is in the right position
new_col_list.add(targetCol2Projection.get(f));
ColumnInfo ci = targetCol2ColumnInfo.get(f);
ci.setInternalName(getColumnInternalName(colListPos));
newOutputRR.put(ci.getTabAlias(), ci.getInternalName(), ci);
}
else {
//add new 'synthetic' columns for projections not provided by Select
assert(colNameToDefaultVal != null);
ExprNodeDesc exp = null;
if(colNameToDefaultVal.containsKey(f)) {
// make an expression for default value
String defaultValue = colNameToDefaultVal.get(f);
ParseDriver parseDriver = new ParseDriver();
try {
ASTNode defValAst = parseDriver.parseExpression(defaultValue);
exp = TypeCheckProcFactory.genExprNode(defValAst, new TypeCheckCtx(null)).get(defValAst);
} catch(Exception e) {
throw new SemanticException("Error while parsing default value: " + defaultValue
+ ". Error message: " + e.getMessage());
}
LOG.debug("Added default value from metastore: " + exp);
}
else {
exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null);
}
new_col_list.add(exp);
final String tableAlias = null;//this column doesn't come from any table
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos),
exp.getWritableObjectInspector(), tableAlias, false);
newOutputRR.put(colInfo.getTabAlias(), colInfo.getInternalName(), colInfo);
}
colListPos++;
}
return newOutputRR;
}
/**
* This modifies the Select projections when the Select is part of an insert statement and
* the insert statement specifies a column list for the target table, e.g.
* create table source (a int, b int);
* create table target (x int, y int, z int);
* insert into target(z,x) select * from source
*
* Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks
* as if the original query was written as
* insert into target select b, null, a from source
*
* if target schema is not specified, this is no-op
*
* @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
* @throws SemanticException
*/
public RowResolver handleInsertStatementSpec(List col_list, String dest,
RowResolver outputRR, RowResolver inputRR, QB qb,
ASTNode selExprList) throws SemanticException {
//(z,x)
List targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);//specified in the query
if(targetTableSchema == null) {
//no insert schema was specified
return outputRR;
}
if(targetTableSchema.size() != col_list.size()) {
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
throw new SemanticException(generateErrorMessage(selExprList,
"Expected " + targetTableSchema.size() + " columns for " + dest +
(target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) +
"; select produces " + col_list.size() + " columns"));
}
//e.g. map z->expr for a
Map targetCol2Projection = new HashMap();
//e.g. map z->ColumnInfo for a
Map targetCol2ColumnInfo = new HashMap();
int colListPos = 0;
for(String targetCol : targetTableSchema) {
targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos));
targetCol2Projection.put(targetCol, col_list.get(colListPos++));
}
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
if(target == null && partition == null) {
throw new SemanticException(generateErrorMessage(selExprList,
"No table/partition found in QB metadata for dest='" + dest + "'"));
}
ArrayList new_col_list = new ArrayList();
colListPos = 0;
List targetTableCols = target != null ? target.getCols() : partition.getCols();
List targetTableColNames = new ArrayList();
List targetTableColTypes = new ArrayList();
for(FieldSchema fs : targetTableCols) {
targetTableColNames.add(fs.getName());
targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
}
Map partSpec = qb.getMetaData().getPartSpecForAlias(dest);
if(partSpec != null) {
//find dynamic partition columns
//relies on consistent order via LinkedHashMap
for(Map.Entry partKeyVal : partSpec.entrySet()) {
if (partKeyVal.getValue() == null) {
targetTableColNames.add(partKeyVal.getKey());//these must be after non-partition cols
targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
}
}
}
//now make the select produce , with
//where missing columns are NULL-filled
Table tbl = target == null? partition.getTable() : target;
RowResolver newOutputRR = getColForInsertStmtSpec(targetCol2Projection, tbl, targetCol2ColumnInfo, colListPos,
targetTableColTypes, new_col_list, targetTableColNames);
col_list.clear();
col_list.addAll(new_col_list);
return newOutputRR;
}
String recommendName(ExprNodeDesc exp, String colAlias) {
if (!colAlias.startsWith(autogenColAliasPrfxLbl)) {
return null;
}
String column = ExprNodeDescUtils.recommendInputName(exp);
if (column != null && !column.startsWith(autogenColAliasPrfxLbl)) {
return column;
}
return null;
}
String getAutogenColAliasPrfxLbl() {
return this.autogenColAliasPrfxLbl;
}
boolean autogenColAliasPrfxIncludeFuncName() {
return this.autogenColAliasPrfxIncludeFuncName;
}
/**
* Class to store GenericUDAF related information.
*/
public static class GenericUDAFInfo {
public ArrayList convertedParameters;
public GenericUDAFEvaluator genericUDAFEvaluator;
public TypeInfo returnType;
}
/**
* Convert exprNodeDesc array to Typeinfo array.
*/
static ArrayList getTypeInfo(ArrayList exprs) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : exprs) {
result.add(expr.getTypeInfo());
}
return result;
}
/**
* Convert exprNodeDesc array to ObjectInspector array.
*/
static ArrayList getWritableObjectInspector(ArrayList exprs) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : exprs) {
result.add(expr.getWritableObjectInspector());
}
return result;
}
/**
* Convert exprNodeDesc array to Typeinfo array.
*/
static ObjectInspector[] getStandardObjectInspector(ArrayList exprs) {
ObjectInspector[] result = new ObjectInspector[exprs.size()];
for (int i = 0; i < exprs.size(); i++) {
result[i] = TypeInfoUtils
.getStandardWritableObjectInspectorFromTypeInfo(exprs.get(i));
}
return result;
}
/**
* Returns the GenericUDAFEvaluator for the aggregation. This is called once
* for each GroupBy aggregation.
*/
public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName,
ArrayList aggParameters, ASTNode aggTree,
boolean isDistinct, boolean isAllColumns)
throws SemanticException {
ArrayList originalParameterTypeInfos =
getWritableObjectInspector(aggParameters);
GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator(
aggName, originalParameterTypeInfos, isDistinct, isAllColumns);
if (null == result) {
String reason = "Looking for UDAF Evaluator\"" + aggName
+ "\" with parameters " + originalParameterTypeInfos;
throw new SemanticException(ErrorMsg.INVALID_FUNCTION_SIGNATURE.getMsg(
(ASTNode) aggTree.getChild(0), reason));
}
return result;
}
/**
* Returns the GenericUDAFInfo struct for the aggregation.
*
* @param evaluator
* @param emode
* @param aggParameters
* The exprNodeDesc of the original parameters
* @return GenericUDAFInfo
* @throws SemanticException
* when the UDAF is not found or has problems.
*/
public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator,
GenericUDAFEvaluator.Mode emode, ArrayList aggParameters)
throws SemanticException {
GenericUDAFInfo r = new GenericUDAFInfo();
// set r.genericUDAFEvaluator
r.genericUDAFEvaluator = evaluator;
// set r.returnType
ObjectInspector returnOI = null;
try {
ArrayList aggOIs = getWritableObjectInspector(aggParameters);
ObjectInspector[] aggOIArray = new ObjectInspector[aggOIs.size()];
for (int ii = 0; ii < aggOIs.size(); ++ii) {
aggOIArray[ii] = aggOIs.get(ii);
}
returnOI = r.genericUDAFEvaluator.init(emode, aggOIArray);
r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
} catch (HiveException e) {
throw new SemanticException(e);
}
// set r.convertedParameters
// TODO: type conversion
r.convertedParameters = aggParameters;
return r;
}
public static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode(
GroupByDesc.Mode mode, boolean isDistinct) {
switch (mode) {
case COMPLETE:
return GenericUDAFEvaluator.Mode.COMPLETE;
case PARTIAL1:
return GenericUDAFEvaluator.Mode.PARTIAL1;
case PARTIAL2:
return GenericUDAFEvaluator.Mode.PARTIAL2;
case PARTIALS:
return isDistinct ? GenericUDAFEvaluator.Mode.PARTIAL1
: GenericUDAFEvaluator.Mode.PARTIAL2;
case FINAL:
return GenericUDAFEvaluator.Mode.FINAL;
case HASH:
return GenericUDAFEvaluator.Mode.PARTIAL1;
case MERGEPARTIAL:
return isDistinct ? GenericUDAFEvaluator.Mode.COMPLETE
: GenericUDAFEvaluator.Mode.FINAL;
default:
throw new RuntimeException("internal error in groupByDescModeToUDAFMode");
}
}
/**
* Check if the given internalName represents a constant parameter in aggregation parameters
* of an aggregation tree.
* This method is only invoked when map-side aggregation is not involved. In this case,
* every parameter in every aggregation tree should already have a corresponding ColumnInfo,
* which is generated when the corresponding ReduceSinkOperator of the GroupByOperator being
* generating is generated. If we find that this parameter is a constant parameter,
* we will return the corresponding ExprNodeDesc in reduceValues, and we will not need to
* use a new ExprNodeColumnDesc, which can not be treated as a constant parameter, for this
* parameter (since the writableObjectInspector of a ExprNodeColumnDesc will not be
* a instance of ConstantObjectInspector).
*
* @param reduceValues
* value columns of the corresponding ReduceSinkOperator
* @param internalName
* the internal name of this parameter
* @return the ExprNodeDesc of the constant parameter if the given internalName represents
* a constant parameter; otherwise, return null
*/
public static ExprNodeDesc isConstantParameterInAggregationParameters(String internalName,
List reduceValues) {
// only the pattern of "VALUE._col([0-9]+)" should be handled.
String[] terms = internalName.split("\\.");
if (terms.length != 2 || reduceValues == null) {
return null;
}
if (Utilities.ReduceField.VALUE.toString().equals(terms[0])) {
int pos = getPositionFromInternalName(terms[1]);
if (pos >= 0 && pos < reduceValues.size()) {
ExprNodeDesc reduceValue = reduceValues.get(pos);
if (reduceValue != null) {
if (reduceValue.getWritableObjectInspector() instanceof ConstantObjectInspector) {
// this internalName represents a constant parameter in aggregation parameters
return reduceValue;
}
}
}
}
return null;
}
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (PARTIAL1 or COMPLETE)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo,
String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode,
Map genericUDAFEvaluators)
throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx
.get(input).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList aggregations = new ArrayList();
ArrayList outputColumnNames = new ArrayList();
Map colExprMap = new HashMap();
List grpByExprs = getGroupByForClause(parseInfo, dest);
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo
.getInternalName(), "", false));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false);
groupByOutputRowResolver.putExpression(grpbyExpr,
oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// For each aggregation
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List inputKeyCols = rs.getConf().getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
List reduceValues = rs.getConf().getValueCols();
int numDistinctUDFs = 0;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
// Convert children to aggParameters
ArrayList aggParameters = new ArrayList();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "." +
getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(
aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct,
amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
// Save the evaluator so that it can be used by the next-stage
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
false, groupByMemoryUsage, memoryThreshold, null, false, -1, numDistinctUDFs > 0),
new RowSchema(groupByOutputRowResolver.getColumnInfos()),
input), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
// Add the grouping set key to the group by operator.
// This is not the first group by operator, but it is a subsequent group by operator
// which is forwarding the grouping keys introduced by the grouping sets.
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for GroupBy2 to pass the additional grouping keys introduced by
// GroupBy1 for the grouping set (corresponding to the rollup).
private void addGroupingSetKey(List groupByKeys,
RowResolver groupByInputRowResolver,
RowResolver groupByOutputRowResolver,
List outputColumnNames,
Map colExprMap) throws SemanticException {
// For grouping sets, add a dummy grouping key
String groupingSetColumnName =
groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(VirtualColumn.GROUPINGID.getTypeInfo(),
groupingSetColumnName, null, false);
groupByKeys.add(inputExpr);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
VirtualColumn.GROUPINGID.getTypeInfo(),
null,
true));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// Process grouping set for the reduce sink operator
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for ReduceSink to add the additional grouping keys introduced by
// GroupBy1 into the reduce keys.
private void processGroupingSetReduceSinkOperator(RowResolver reduceSinkInputRowResolver,
RowResolver reduceSinkOutputRowResolver,
List reduceKeys,
List outputKeyColumnNames,
Map colExprMap) throws SemanticException {
// add a key for reduce sink
String groupingSetColumnName =
reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(VirtualColumn.GROUPINGID.getTypeInfo(),
groupingSetColumnName, null, false);
reduceKeys.add(inputExpr);
outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
String field = Utilities.ReduceField.KEY.toString() + "."
+ getColumnInternalName(reduceKeys.size() - 1);
ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
reduceKeys.size() - 1).getTypeInfo(), null, true);
reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
}
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param parseInfo
* @param dest
* @param reduceSinkOperatorInfo
* @param mode
* The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @param groupingSets
* list of grouping sets
* @param groupingSetsPresent
* whether grouping sets are present in this query
* @param groupingSetsNeedAdditionalMRJob
* whether grouping sets are consumed by this group by
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo,
String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
Map genericUDAFEvaluators,
List groupingSets,
boolean groupingSetsPresent,
boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
ArrayList outputColumnNames = new ArrayList();
RowResolver groupByInputRowResolver = opParseCtx
.get(reduceSinkOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList groupByKeys = new ArrayList();
ArrayList aggregations = new ArrayList();
List grpByExprs = getGroupByForClause(parseInfo, dest);
Map colExprMap = new HashMap();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver.putExpression(grpbyExpr,
oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// This is only needed if a new grouping set key is being created
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in a single map-reduce job
// The plan is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1 already added the grouping id as part of the row
// This function is called for GroupBy2 to add grouping id as part of the groupby keys
if (!groupingSetsNeedAdditionalMRJob) {
addGroupingSetKey(
groupByKeys,
groupByInputRowResolver,
groupByOutputRowResolver,
outputColumnNames,
colExprMap);
}
else {
// The grouping set has not yet been processed. Create a new grouping key
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in 2 map-reduce jobs
// The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1/ReduceSink worked as if grouping sets were not present
// This function is called for GroupBy2 to create new rows for grouping sets
// For each input row (a,b), 4 rows are created for the example above:
// (a,b), (a,null), (null, b), (null, null)
createNewGroupingKey(groupByKeys,
outputColumnNames,
groupByOutputRowResolver,
colExprMap);
}
}
HashMap aggregationTrees = parseInfo
.getAggregationExprsForClause(dest);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List inputKeyCols = ((ReduceSinkDesc)
reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
boolean containsDistinctAggr = false;
for (Map.Entry entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList aggParameters = new ArrayList();
boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
containsDistinctAggr = containsDistinctAggr || isDistinct;
// If the function is distinct, partial aggregation has not been done on
// the client side.
// If distPartAgg is set, the client is letting us know that partial
// aggregation has not been done.
// For eg: select a, count(b+c), count(distinct d+e) group by a
// For count(b+c), if partial aggregation has been performed, then we
// directly look for count(b+c).
// Otherwise, we look for b+c.
// For distincts, partial aggregation is never performed on the client
// side, so always look for the parameters: d+e
if (isDistinct) {
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo =
groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN
.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." +
lastKeyColName + ":" + numDistinctUDFs + "."
+ getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(),
paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(
paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
} else {
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(),
paraExpression, paraExprInfo.getTabAlias(), paraExprInfo
.getIsVirtualCol()));
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = null;
genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(),
udaf.genericUDAFEvaluator, udaf.convertedParameters,
(mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
String field = getColumnInternalName(groupByKeys.size()
+ aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(
field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf
.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
groupByMemoryUsage, memoryThreshold,
groupingSets,
groupingSetsPresent && groupingSetsNeedAdditionalMRJob,
groupingSetsPosition, containsDistinctAggr),
new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo),
groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
/*
* Create a new grouping key for grouping id.
* A dummy grouping id. is added. At runtime, the group by operator
* creates 'n' rows per input row, where 'n' is the number of grouping sets.
*/
private void createNewGroupingKey(List groupByKeys,
List outputColumnNames,
RowResolver groupByOutputRowResolver,
Map colExprMap) {
// The value for the constant does not matter. It is replaced by the grouping set
// value for the actual implementation
ExprNodeConstantDesc constant = new ExprNodeConstantDesc(VirtualColumn.GROUPINGID.getTypeInfo(), 0L);
groupByKeys.add(constant);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
new ColumnInfo(
field,
VirtualColumn.GROUPINGID.getTypeInfo(),
null,
true));
colExprMap.put(field, constant);
}
/**
* Generate the map-side GroupByOperator for the Query Block
* (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
* of the inputOperatorInfo.
*
* @param mode
* The mode of the aggregation (HASH)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb,
String dest,
List