org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
Show all versions of hive-exec Show documentation
Hive is a data warehouse infrastructure built on top of Hadoop see
http://wiki.apache.org/hadoop/Hive
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SkewedValueList;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
/**
* BaseSemanticAnalyzer.
*
*/
public abstract class BaseSemanticAnalyzer {
protected final Hive db;
protected final HiveConf conf;
protected List> rootTasks;
protected FetchTask fetchTask;
protected final Log LOG;
protected final LogHelper console;
protected Context ctx;
protected HashMap idToTableNameMap;
protected QueryProperties queryProperties;
public static int HIVE_COLUMN_ORDER_ASC = 1;
public static int HIVE_COLUMN_ORDER_DESC = 0;
/**
* ReadEntitites that are passed to the hooks.
*/
protected HashSet inputs;
/**
* List of WriteEntities that are passed to the hooks.
*/
protected HashSet outputs;
/**
* Lineage information for the query.
*/
protected LineageInfo linfo;
protected TableAccessInfo tableAccessInfo;
protected ColumnAccessInfo columnAccessInfo;
protected static final String TEXTFILE_INPUT = TextInputFormat.class
.getName();
protected static final String TEXTFILE_OUTPUT = IgnoreKeyTextOutputFormat.class
.getName();
protected static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class
.getName();
protected static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class
.getName();
protected static final String RCFILE_INPUT = RCFileInputFormat.class
.getName();
protected static final String RCFILE_OUTPUT = RCFileOutputFormat.class
.getName();
protected static final String ORCFILE_INPUT = OrcInputFormat.class
.getName();
protected static final String ORCFILE_OUTPUT = OrcOutputFormat.class
.getName();
protected static final String ORCFILE_SERDE = OrcSerde.class
.getName();
protected static final String COLUMNAR_SERDE = ColumnarSerDe.class.getName();
class RowFormatParams {
String fieldDelim = null;
String fieldEscape = null;
String collItemDelim = null;
String mapKeyDelim = null;
String lineDelim = null;
protected void analyzeRowFormat(AnalyzeCreateCommonVars shared, ASTNode child) throws SemanticException {
child = (ASTNode) child.getChild(0);
int numChildRowFormat = child.getChildCount();
for (int numC = 0; numC < numChildRowFormat; numC++) {
ASTNode rowChild = (ASTNode) child.getChild(numC);
switch (rowChild.getToken().getType()) {
case HiveParser.TOK_TABLEROWFORMATFIELD:
fieldDelim = unescapeSQLString(rowChild.getChild(0)
.getText());
if (rowChild.getChildCount() >= 2) {
fieldEscape = unescapeSQLString(rowChild
.getChild(1).getText());
}
break;
case HiveParser.TOK_TABLEROWFORMATCOLLITEMS:
collItemDelim = unescapeSQLString(rowChild
.getChild(0).getText());
break;
case HiveParser.TOK_TABLEROWFORMATMAPKEYS:
mapKeyDelim = unescapeSQLString(rowChild.getChild(0)
.getText());
break;
case HiveParser.TOK_TABLEROWFORMATLINES:
lineDelim = unescapeSQLString(rowChild.getChild(0)
.getText());
if (!lineDelim.equals("\n")
&& !lineDelim.equals("10")) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(rowChild,
ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg()));
}
break;
default:
assert false;
}
}
}
}
class AnalyzeCreateCommonVars {
String serde = null;
Map serdeProps = new HashMap();
}
class StorageFormat {
String inputFormat = null;
String outputFormat = null;
String storageHandler = null;
protected boolean fillStorageFormat(ASTNode child, AnalyzeCreateCommonVars shared) {
boolean storageFormat = false;
switch(child.getToken().getType()) {
case HiveParser.TOK_TBLSEQUENCEFILE:
inputFormat = SEQUENCEFILE_INPUT;
outputFormat = SEQUENCEFILE_OUTPUT;
storageFormat = true;
break;
case HiveParser.TOK_TBLTEXTFILE:
inputFormat = TEXTFILE_INPUT;
outputFormat = TEXTFILE_OUTPUT;
storageFormat = true;
break;
case HiveParser.TOK_TBLRCFILE:
inputFormat = RCFILE_INPUT;
outputFormat = RCFILE_OUTPUT;
if (shared.serde == null) {
shared.serde = COLUMNAR_SERDE;
}
storageFormat = true;
break;
case HiveParser.TOK_TBLORCFILE:
inputFormat = ORCFILE_INPUT;
outputFormat = ORCFILE_OUTPUT;
shared.serde = ORCFILE_SERDE;
storageFormat = true;
break;
case HiveParser.TOK_TABLEFILEFORMAT:
inputFormat = unescapeSQLString(child.getChild(0).getText());
outputFormat = unescapeSQLString(child.getChild(1).getText());
storageFormat = true;
break;
case HiveParser.TOK_STORAGEHANDLER:
storageHandler = unescapeSQLString(child.getChild(0).getText());
if (child.getChildCount() == 2) {
readProps(
(ASTNode) (child.getChild(1).getChild(0)),
shared.serdeProps);
}
storageFormat = true;
break;
}
return storageFormat;
}
protected void fillDefaultStorageFormat(AnalyzeCreateCommonVars shared) {
if ((inputFormat == null) && (storageHandler == null)) {
if ("SequenceFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) {
inputFormat = SEQUENCEFILE_INPUT;
outputFormat = SEQUENCEFILE_OUTPUT;
} else if ("RCFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) {
inputFormat = RCFILE_INPUT;
outputFormat = RCFILE_OUTPUT;
shared.serde = COLUMNAR_SERDE;
} else if ("ORC".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) {
inputFormat = ORCFILE_INPUT;
outputFormat = ORCFILE_OUTPUT;
shared.serde = ORCFILE_SERDE;
} else {
inputFormat = TEXTFILE_INPUT;
outputFormat = TEXTFILE_OUTPUT;
}
}
}
}
public BaseSemanticAnalyzer(HiveConf conf) throws SemanticException {
try {
this.conf = conf;
db = Hive.get(conf);
rootTasks = new ArrayList>();
LOG = LogFactory.getLog(this.getClass().getName());
console = new LogHelper(LOG);
idToTableNameMap = new HashMap();
inputs = new LinkedHashSet();
outputs = new LinkedHashSet();
} catch (Exception e) {
throw new SemanticException(e);
}
}
public HashMap getIdToTableNameMap() {
return idToTableNameMap;
}
public abstract void analyzeInternal(ASTNode ast) throws SemanticException;
public void init() {
//no-op
}
public void initCtx(Context ctx) {
this.ctx = ctx;
}
public void analyze(ASTNode ast, Context ctx) throws SemanticException {
initCtx(ctx);
init();
analyzeInternal(ast);
}
public void validate() throws SemanticException {
// Implementations may choose to override this
}
public List> getRootTasks() {
return rootTasks;
}
/**
* @return the fetchTask
*/
public FetchTask getFetchTask() {
return fetchTask;
}
/**
* @param fetchTask
* the fetchTask to set
*/
public void setFetchTask(FetchTask fetchTask) {
this.fetchTask = fetchTask;
}
protected void reset() {
rootTasks = new ArrayList>();
}
public static String stripQuotes(String val) {
return PlanUtils.stripQuotes(val);
}
public static String charSetString(String charSetName, String charSetString)
throws SemanticException {
try {
// The character set name starts with a _, so strip that
charSetName = charSetName.substring(1);
if (charSetString.charAt(0) == '\'') {
return new String(unescapeSQLString(charSetString).getBytes(),
charSetName);
} else // hex input is also supported
{
assert charSetString.charAt(0) == '0';
assert charSetString.charAt(1) == 'x';
charSetString = charSetString.substring(2);
byte[] bArray = new byte[charSetString.length() / 2];
int j = 0;
for (int i = 0; i < charSetString.length(); i += 2) {
int val = Character.digit(charSetString.charAt(i), 16) * 16
+ Character.digit(charSetString.charAt(i + 1), 16);
if (val > 127) {
val = val - 256;
}
bArray[j++] = (byte)val;
}
String res = new String(bArray, charSetName);
return res;
}
} catch (UnsupportedEncodingException e) {
throw new SemanticException(e);
}
}
/**
* Get dequoted name from a table/column node.
* @param tableOrColumnNode the table or column node
* @return for table node, db.tab or tab. for column node column.
*/
public static String getUnescapedName(ASTNode tableOrColumnNode) {
return getUnescapedName(tableOrColumnNode, null);
}
public static String getUnescapedName(ASTNode tableOrColumnNode, String currentDatabase) {
if (tableOrColumnNode.getToken().getType() == HiveParser.TOK_TABNAME) {
// table node
if (tableOrColumnNode.getChildCount() == 2) {
String dbName = unescapeIdentifier(tableOrColumnNode.getChild(0).getText());
String tableName = unescapeIdentifier(tableOrColumnNode.getChild(1).getText());
return dbName + "." + tableName;
}
String tableName = unescapeIdentifier(tableOrColumnNode.getChild(0).getText());
if (currentDatabase != null) {
return currentDatabase + "." + tableName;
}
return tableName;
}
// column node
return unescapeIdentifier(tableOrColumnNode.getText());
}
/**
* Get the unqualified name from a table node.
*
* This method works for table names qualified with their schema (e.g., "db.table")
* and table names without schema qualification. In both cases, it returns
* the table name without the schema.
*
* @param node the table node
* @return the table name without schema qualification
* (i.e., if name is "db.table" or "table", returns "table")
*/
public static String getUnescapedUnqualifiedTableName(ASTNode node) {
assert node.getChildCount() <= 2;
if (node.getChildCount() == 2) {
node = (ASTNode) node.getChild(1);
}
return getUnescapedName(node);
}
/**
* Remove the encapsulating "`" pair from the identifier. We allow users to
* use "`" to escape identifier for table names, column names and aliases, in
* case that coincide with Hive language keywords.
*/
public static String unescapeIdentifier(String val) {
if (val == null) {
return null;
}
if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') {
val = val.substring(1, val.length() - 1);
}
return val;
}
/**
* Converts parsed key/value properties pairs into a map.
*
* @param prop ASTNode parent of the key/value pairs
*
* @param mapProp property map which receives the mappings
*/
public static void readProps(
ASTNode prop, Map mapProp) {
for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
.getText());
String value = null;
if (prop.getChild(propChild).getChild(1) != null) {
value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText());
}
mapProp.put(key, value);
}
}
@SuppressWarnings("nls")
public static String unescapeSQLString(String b) {
Character enclosure = null;
// Some of the strings can be passed in as unicode. For example, the
// delimiter can be passed in as \002 - So, we first check if the
// string is a unicode number, else go back to the old behavior
StringBuilder sb = new StringBuilder(b.length());
for (int i = 0; i < b.length(); i++) {
char currentChar = b.charAt(i);
if (enclosure == null) {
if (currentChar == '\'' || b.charAt(i) == '\"') {
enclosure = currentChar;
}
// ignore all other chars outside the enclosure
continue;
}
if (enclosure.equals(currentChar)) {
enclosure = null;
continue;
}
if (currentChar == '\\' && (i + 4 < b.length())) {
char i1 = b.charAt(i + 1);
char i2 = b.charAt(i + 2);
char i3 = b.charAt(i + 3);
if ((i1 >= '0' && i1 <= '1') && (i2 >= '0' && i2 <= '7')
&& (i3 >= '0' && i3 <= '7')) {
byte bVal = (byte) ((i3 - '0') + ((i2 - '0') * 8) + ((i1 - '0') * 8 * 8));
byte[] bValArr = new byte[1];
bValArr[0] = bVal;
String tmp = new String(bValArr);
sb.append(tmp);
i += 3;
continue;
}
}
if (currentChar == '\\' && (i + 2 < b.length())) {
char n = b.charAt(i + 1);
switch (n) {
case '0':
sb.append("\0");
break;
case '\'':
sb.append("'");
break;
case '"':
sb.append("\"");
break;
case 'b':
sb.append("\b");
break;
case 'n':
sb.append("\n");
break;
case 'r':
sb.append("\r");
break;
case 't':
sb.append("\t");
break;
case 'Z':
sb.append("\u001A");
break;
case '\\':
sb.append("\\");
break;
// The following 2 lines are exactly what MySQL does
case '%':
sb.append("\\%");
break;
case '_':
sb.append("\\_");
break;
default:
sb.append(n);
}
i++;
} else {
sb.append(currentChar);
}
}
return sb.toString();
}
public HashSet getInputs() {
return inputs;
}
public HashSet getOutputs() {
return outputs;
}
/**
* @return the schema for the fields which will be produced
* when the statement is executed, or null if not known
*/
public List getResultSchema() {
return null;
}
protected List getColumns(ASTNode ast) throws SemanticException {
return getColumns(ast, true);
}
protected void handleGenericFileFormat(ASTNode node) throws SemanticException{
ASTNode child = (ASTNode)node.getChild(0);
throw new SemanticException("Unrecognized file format in STORED AS clause:"+
" "+ (child == null ? "" : child.getText()));
}
/**
* Get the list of FieldSchema out of the ASTNode.
*/
public static List getColumns(ASTNode ast, boolean lowerCase) throws SemanticException {
List colList = new ArrayList();
int numCh = ast.getChildCount();
for (int i = 0; i < numCh; i++) {
FieldSchema col = new FieldSchema();
ASTNode child = (ASTNode) ast.getChild(i);
String name = child.getChild(0).getText();
if(lowerCase) {
name = name.toLowerCase();
}
// child 0 is the name of the column
col.setName(unescapeIdentifier(name));
// child 1 is the type of the column
ASTNode typeChild = (ASTNode) (child.getChild(1));
col.setType(getTypeStringFromAST(typeChild));
// child 2 is the optional comment of the column
if (child.getChildCount() == 3) {
col.setComment(unescapeSQLString(child.getChild(2).getText()));
}
colList.add(col);
}
return colList;
}
protected List getColumnNames(ASTNode ast) {
List colList = new ArrayList();
int numCh = ast.getChildCount();
for (int i = 0; i < numCh; i++) {
ASTNode child = (ASTNode) ast.getChild(i);
colList.add(unescapeIdentifier(child.getText()).toLowerCase());
}
return colList;
}
protected List getColumnNamesOrder(ASTNode ast) {
List colList = new ArrayList();
int numCh = ast.getChildCount();
for (int i = 0; i < numCh; i++) {
ASTNode child = (ASTNode) ast.getChild(i);
if (child.getToken().getType() == HiveParser.TOK_TABSORTCOLNAMEASC) {
colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()).toLowerCase(),
HIVE_COLUMN_ORDER_ASC));
} else {
colList.add(new Order(unescapeIdentifier(child.getChild(0).getText()).toLowerCase(),
HIVE_COLUMN_ORDER_DESC));
}
}
return colList;
}
protected static String getTypeStringFromAST(ASTNode typeNode)
throws SemanticException {
switch (typeNode.getType()) {
case HiveParser.TOK_LIST:
return serdeConstants.LIST_TYPE_NAME + "<"
+ getTypeStringFromAST((ASTNode) typeNode.getChild(0)) + ">";
case HiveParser.TOK_MAP:
return serdeConstants.MAP_TYPE_NAME + "<"
+ getTypeStringFromAST((ASTNode) typeNode.getChild(0)) + ","
+ getTypeStringFromAST((ASTNode) typeNode.getChild(1)) + ">";
case HiveParser.TOK_STRUCT:
return getStructTypeStringFromAST(typeNode);
case HiveParser.TOK_UNIONTYPE:
return getUnionTypeStringFromAST(typeNode);
default:
return DDLSemanticAnalyzer.getTypeName(typeNode.getType());
}
}
private static String getStructTypeStringFromAST(ASTNode typeNode)
throws SemanticException {
String typeStr = serdeConstants.STRUCT_TYPE_NAME + "<";
typeNode = (ASTNode) typeNode.getChild(0);
int children = typeNode.getChildCount();
if (children <= 0) {
throw new SemanticException("empty struct not allowed.");
}
StringBuilder buffer = new StringBuilder(typeStr);
for (int i = 0; i < children; i++) {
ASTNode child = (ASTNode) typeNode.getChild(i);
buffer.append(unescapeIdentifier(child.getChild(0).getText())).append(":");
buffer.append(getTypeStringFromAST((ASTNode) child.getChild(1)));
if (i < children - 1) {
buffer.append(",");
}
}
buffer.append(">");
return buffer.toString();
}
private static String getUnionTypeStringFromAST(ASTNode typeNode)
throws SemanticException {
String typeStr = serdeConstants.UNION_TYPE_NAME + "<";
typeNode = (ASTNode) typeNode.getChild(0);
int children = typeNode.getChildCount();
if (children <= 0) {
throw new SemanticException("empty union not allowed.");
}
StringBuilder buffer = new StringBuilder(typeStr);
for (int i = 0; i < children; i++) {
buffer.append(getTypeStringFromAST((ASTNode) typeNode.getChild(i)));
if (i < children - 1) {
buffer.append(",");
}
}
buffer.append(">");
typeStr = buffer.toString();
return typeStr;
}
/**
* tableSpec.
*
*/
public static class tableSpec {
public String tableName;
public Table tableHandle;
public Map partSpec; // has to use LinkedHashMap to enforce order
public Partition partHandle;
public int numDynParts; // number of dynamic partition columns
public List partitions; // involved partitions in TableScanOperator/FileSinkOperator
public static enum SpecType {TABLE_ONLY, STATIC_PARTITION, DYNAMIC_PARTITION};
public SpecType specType;
public tableSpec(Hive db, HiveConf conf, ASTNode ast)
throws SemanticException {
this(db, conf, ast, true, false);
}
public tableSpec(Hive db, HiveConf conf, ASTNode ast,
boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec)
throws SemanticException {
assert (ast.getToken().getType() == HiveParser.TOK_TAB
|| ast.getToken().getType() == HiveParser.TOK_TABLE_PARTITION
|| ast.getToken().getType() == HiveParser.TOK_TABTYPE
|| ast.getToken().getType() == HiveParser.TOK_CREATETABLE);
int childIndex = 0;
numDynParts = 0;
try {
// get table metadata
tableName = getUnescapedName((ASTNode)ast.getChild(0));
boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
if (testMode) {
tableName = conf.getVar(HiveConf.ConfVars.HIVETESTMODEPREFIX)
+ tableName;
}
if (ast.getToken().getType() != HiveParser.TOK_CREATETABLE) {
tableHandle = db.getTable(tableName);
}
} catch (InvalidTableException ite) {
throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(ast
.getChild(0)), ite);
} catch (HiveException e) {
throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg(ast
.getChild(childIndex), e.getMessage()), e);
}
// get partition metadata if partition specified
if (ast.getChildCount() == 2 && ast.getToken().getType() != HiveParser.TOK_CREATETABLE) {
childIndex = 1;
ASTNode partspec = (ASTNode) ast.getChild(1);
partitions = new ArrayList();
// partSpec is a mapping from partition column name to its value.
partSpec = new LinkedHashMap(partspec.getChildCount());
for (int i = 0; i < partspec.getChildCount(); ++i) {
ASTNode partspec_val = (ASTNode) partspec.getChild(i);
String val = null;
String colName = unescapeIdentifier(partspec_val.getChild(0).getText().toLowerCase());
if (partspec_val.getChildCount() < 2) { // DP in the form of T partition (ds, hr)
if (allowDynamicPartitionsSpec) {
++numDynParts;
} else {
throw new SemanticException(ErrorMsg.INVALID_PARTITION
.getMsg(" - Dynamic partitions not allowed"));
}
} else { // in the form of T partition (ds="2010-03-03")
val = stripQuotes(partspec_val.getChild(1).getText());
}
partSpec.put(colName, val);
}
// check if the columns specified in the partition() clause are actually partition columns
Utilities.validatePartSpec(tableHandle, partSpec);
// check if the partition spec is valid
if (numDynParts > 0) {
List parts = tableHandle.getPartitionKeys();
int numStaPart = parts.size() - numDynParts;
if (numStaPart == 0 &&
conf.getVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equalsIgnoreCase("strict")) {
throw new SemanticException(ErrorMsg.DYNAMIC_PARTITION_STRICT_MODE.getMsg());
}
// check the partitions in partSpec be the same as defined in table schema
if (partSpec.keySet().size() != parts.size()) {
ErrorPartSpec(partSpec, parts);
}
Iterator itrPsKeys = partSpec.keySet().iterator();
for (FieldSchema fs: parts) {
if (!itrPsKeys.next().toLowerCase().equals(fs.getName().toLowerCase())) {
ErrorPartSpec(partSpec, parts);
}
}
// check if static partition appear after dynamic partitions
for (FieldSchema fs: parts) {
if (partSpec.get(fs.getName().toLowerCase()) == null) {
if (numStaPart > 0) { // found a DP, but there exists ST as subpartition
throw new SemanticException(
ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg(ast.getChild(childIndex)));
}
break;
} else {
--numStaPart;
}
}
partHandle = null;
specType = SpecType.DYNAMIC_PARTITION;
} else {
try {
if (allowPartialPartitionsSpec) {
partitions = db.getPartitions(tableHandle, partSpec);
} else {
// this doesn't create partition.
partHandle = db.getPartition(tableHandle, partSpec, false);
if (partHandle == null) {
// if partSpec doesn't exists in DB, return a delegate one
// and the actual partition is created in MoveTask
partHandle = new Partition(tableHandle, partSpec, null);
} else {
partitions.add(partHandle);
}
}
} catch (HiveException e) {
throw new SemanticException(
ErrorMsg.INVALID_PARTITION.getMsg(ast.getChild(childIndex)), e);
}
specType = SpecType.STATIC_PARTITION;
}
} else {
specType = SpecType.TABLE_ONLY;
}
}
public Map getPartSpec() {
return this.partSpec;
}
public void setPartSpec(Map partSpec) {
this.partSpec = partSpec;
}
@Override
public String toString() {
if (partHandle != null) {
return partHandle.toString();
} else {
return tableHandle.toString();
}
}
}
/**
* Gets the lineage information.
*
* @return LineageInfo associated with the query.
*/
public LineageInfo getLineageInfo() {
return linfo;
}
/**
* Sets the lineage information.
*
* @param linfo The LineageInfo structure that is set in the optimization phase.
*/
public void setLineageInfo(LineageInfo linfo) {
this.linfo = linfo;
}
/**
* Gets the table access information.
*
* @return TableAccessInfo associated with the query.
*/
public TableAccessInfo getTableAccessInfo() {
return tableAccessInfo;
}
/**
* Sets the table access information.
*
* @param taInfo The TableAccessInfo structure that is set in the optimization phase.
*/
public void setTableAccessInfo(TableAccessInfo tableAccessInfo) {
this.tableAccessInfo = tableAccessInfo;
}
/**
* Gets the column access information.
*
* @return ColumnAccessInfo associated with the query.
*/
public ColumnAccessInfo getColumnAccessInfo() {
return columnAccessInfo;
}
/**
* Sets the column access information.
*
* @param columnAccessInfo The ColumnAccessInfo structure that is set immediately after
* the optimization phase.
*/
public void setColumnAccessInfo(ColumnAccessInfo columnAccessInfo) {
this.columnAccessInfo = columnAccessInfo;
}
protected HashMap extractPartitionSpecs(Tree partspec)
throws SemanticException {
HashMap partSpec = new LinkedHashMap();
for (int i = 0; i < partspec.getChildCount(); ++i) {
CommonTree partspec_val = (CommonTree) partspec.getChild(i);
String val = stripQuotes(partspec_val.getChild(1).getText());
partSpec.put(partspec_val.getChild(0).getText().toLowerCase(), val);
}
return partSpec;
}
/**
* Checks if given specification is proper specification for prefix of
* partition cols, for table partitioned by ds, hr, min valid ones are
* (ds='2008-04-08'), (ds='2008-04-08', hr='12'), (ds='2008-04-08', hr='12', min='30')
* invalid one is for example (ds='2008-04-08', min='30')
* @param spec specification key-value map
* @return true if the specification is prefix; never returns false, but throws
* @throws HiveException
*/
final public boolean isValidPrefixSpec(Table tTable, Map spec)
throws HiveException {
// TODO - types need to be checked.
List partCols = tTable.getPartitionKeys();
if (partCols == null || (partCols.size() == 0)) {
if (spec != null) {
throw new HiveException(
"table is not partitioned but partition spec exists: "
+ spec);
} else {
return true;
}
}
if (spec == null) {
throw new HiveException("partition spec is not specified");
}
Iterator itrPsKeys = spec.keySet().iterator();
for (FieldSchema fs: partCols) {
if(!itrPsKeys.hasNext()) {
break;
}
if (!itrPsKeys.next().toLowerCase().equals(
fs.getName().toLowerCase())) {
ErrorPartSpec(spec, partCols);
}
}
if(itrPsKeys.hasNext()) {
ErrorPartSpec(spec, partCols);
}
return true;
}
private static void ErrorPartSpec(Map partSpec,
List parts) throws SemanticException {
StringBuilder sb =
new StringBuilder(
"Partition columns in the table schema are: (");
for (FieldSchema fs : parts) {
sb.append(fs.getName()).append(", ");
}
sb.setLength(sb.length() - 2); // remove the last ", "
sb.append("), while the partitions specified in the query are: (");
Iterator itrPsKeys = partSpec.keySet().iterator();
while (itrPsKeys.hasNext()) {
sb.append(itrPsKeys.next()).append(", ");
}
sb.setLength(sb.length() - 2); // remove the last ", "
sb.append(").");
throw new SemanticException(ErrorMsg.PARTSPEC_DIFFER_FROM_SCHEMA
.getMsg(sb.toString()));
}
public Hive getDb() {
return db;
}
public QueryProperties getQueryProperties() {
return queryProperties;
}
/**
* Construct list bucketing context.
*
* @param skewedColNames
* @param skewedValues
* @param skewedColValueLocationMaps
* @param isStoredAsSubDirectories
* @return
*/
protected ListBucketingCtx constructListBucketingCtx(List skewedColNames,
List> skewedValues, Map skewedColValueLocationMaps,
boolean isStoredAsSubDirectories, HiveConf conf) {
ListBucketingCtx lbCtx = new ListBucketingCtx();
lbCtx.setSkewedColNames(skewedColNames);
lbCtx.setSkewedColValues(skewedValues);
lbCtx.setLbLocationMap(skewedColValueLocationMaps);
lbCtx.setStoredAsSubDirectories(isStoredAsSubDirectories);
lbCtx.setDefaultKey(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_KEY);
lbCtx.setDefaultDirName(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME);
return lbCtx;
}
/**
* Given a ASTNode, return list of values.
*
* use case:
* create table xyz list bucketed (col1) with skew (1,2,5)
* AST Node is for (1,2,5)
* @param ast
* @return
*/
protected List getSkewedValueFromASTNode(ASTNode ast) {
List colList = new ArrayList();
int numCh = ast.getChildCount();
for (int i = 0; i < numCh; i++) {
ASTNode child = (ASTNode) ast.getChild(i);
colList.add(stripQuotes(child.getText()).toLowerCase());
}
return colList;
}
/**
* Retrieve skewed values from ASTNode.
*
* @param node
* @return
* @throws SemanticException
*/
protected List getSkewedValuesFromASTNode(Node node) throws SemanticException {
List result = null;
Tree leafVNode = ((ASTNode) node).getChild(0);
if (leafVNode == null) {
throw new SemanticException(
ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
} else {
ASTNode lVAstNode = (ASTNode) leafVNode;
if (lVAstNode.getToken().getType() != HiveParser.TOK_TABCOLVALUE) {
throw new SemanticException(
ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
} else {
result = new ArrayList(getSkewedValueFromASTNode(lVAstNode));
}
}
return result;
}
/**
* Analyze list bucket column names
*
* @param skewedColNames
* @param child
* @return
* @throws SemanticException
*/
protected List analyzeSkewedTablDDLColNames(List skewedColNames, ASTNode child)
throws SemanticException {
Tree nNode = child.getChild(0);
if (nNode == null) {
throw new SemanticException(ErrorMsg.SKEWED_TABLE_NO_COLUMN_NAME.getMsg());
} else {
ASTNode nAstNode = (ASTNode) nNode;
if (nAstNode.getToken().getType() != HiveParser.TOK_TABCOLNAME) {
throw new SemanticException(ErrorMsg.SKEWED_TABLE_NO_COLUMN_NAME.getMsg());
} else {
skewedColNames = getColumnNames(nAstNode);
}
}
return skewedColNames;
}
/**
* Handle skewed values in DDL.
*
* It can be used by both skewed by ... on () and set skewed location ().
*
* @param skewedValues
* @param child
* @throws SemanticException
*/
protected void analyzeDDLSkewedValues(List> skewedValues, ASTNode child)
throws SemanticException {
Tree vNode = child.getChild(1);
if (vNode == null) {
throw new SemanticException(ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
}
ASTNode vAstNode = (ASTNode) vNode;
switch (vAstNode.getToken().getType()) {
case HiveParser.TOK_TABCOLVALUE:
for (String str : getSkewedValueFromASTNode(vAstNode)) {
List sList = new ArrayList(Arrays.asList(str));
skewedValues.add(sList);
}
break;
case HiveParser.TOK_TABCOLVALUE_PAIR:
ArrayList vLNodes = vAstNode.getChildren();
for (Node node : vLNodes) {
if ( ((ASTNode) node).getToken().getType() != HiveParser.TOK_TABCOLVALUES) {
throw new SemanticException(
ErrorMsg.SKEWED_TABLE_NO_COLUMN_VALUE.getMsg());
} else {
skewedValues.add(getSkewedValuesFromASTNode(node));
}
}
break;
default:
break;
}
}
/**
* process stored as directories
*
* @param child
* @return
*/
protected boolean analyzeStoredAdDirs(ASTNode child) {
boolean storedAsDirs = false;
if ((child.getChildCount() == 3)
&& (((ASTNode) child.getChild(2)).getToken().getType()
== HiveParser.TOK_STOREDASDIRS)) {
storedAsDirs = true;
}
return storedAsDirs;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy