Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import org.apache.hadoop.hive.ql.Context;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.antlr.runtime.tree.CommonTree;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder;
import org.apache.hadoop.hive.ql.parse.CalcitePlanner.ASTSearcher;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.Stack;
import org.antlr.runtime.tree.Tree;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.PTFUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import com.google.common.base.Preconditions;
/**
* Library of utility functions used in the parse code.
*
*/
public final class ParseUtils {
/** Parses the Hive query. */
private static final Logger LOG = LoggerFactory.getLogger(ParseUtils.class);
public static ASTNode parse(String command) throws ParseException {
return parse(command, null);
}
/** Parses the Hive query. */
public static ASTNode parse(String command, Context ctx) throws ParseException {
return parse(command, ctx, true);
}
/** Parses the Hive query. */
public static ASTNode parse(
String command, Context ctx, boolean setTokenRewriteStream) throws ParseException {
ParseDriver pd = new ParseDriver();
ASTNode tree = pd.parse(command, ctx, setTokenRewriteStream);
tree = findRootNonNullToken(tree);
handleSetColRefs(tree);
return tree;
}
/**
* Tests whether the parse tree node is a join token.
*
* @param node
* The parse tree node
* @return boolean
*/
public static boolean isJoinToken(ASTNode node) {
switch (node.getToken().getType()) {
case HiveParser.TOK_JOIN:
case HiveParser.TOK_LEFTOUTERJOIN:
case HiveParser.TOK_RIGHTOUTERJOIN:
case HiveParser.TOK_FULLOUTERJOIN:
return true;
default:
return false;
}
}
/**
* Performs a descent of the leftmost branch of a tree, stopping when either a
* node with a non-null token is found or the leaf level is encountered.
*
* @param tree
* candidate node from which to start searching
*
* @return node at which descent stopped
*/
private static ASTNode findRootNonNullToken(ASTNode tree) {
while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
tree = (ASTNode) tree.getChild(0);
}
return tree;
}
private ParseUtils() {
// prevent instantiation
}
public static List validateColumnNameUniqueness(
List fieldSchemas) throws SemanticException {
// no duplicate column names
// currently, it is a simple n*n algorithm - this can be optimized later if
// need be
// but it should not be a major bottleneck as the number of columns are
// anyway not so big
Iterator iterCols = fieldSchemas.iterator();
List colNames = new ArrayList();
while (iterCols.hasNext()) {
String colName = iterCols.next().getName();
Iterator iter = colNames.iterator();
while (iter.hasNext()) {
String oldColName = iter.next();
if (colName.equalsIgnoreCase(oldColName)) {
throw new SemanticException(ErrorMsg.DUPLICATE_COLUMN_NAMES
.getMsg(oldColName));
}
}
colNames.add(colName);
}
return colNames;
}
/**
* @param column column expression to convert
* @param tableFieldTypeInfo TypeInfo to convert to
* @return Expression converting column to the type specified by tableFieldTypeInfo
*/
public static ExprNodeDesc createConversionCast(ExprNodeDesc column, PrimitiveTypeInfo tableFieldTypeInfo)
throws SemanticException {
// Get base type, since type string may be parameterized
String baseType = TypeInfoUtils.getBaseName(tableFieldTypeInfo.getTypeName());
// If the type cast UDF is for a parameterized type, then it should implement
// the SettableUDF interface so that we can pass in the params.
// Not sure if this is the cleanest solution, but there does need to be a way
// to provide the type params to the type cast.
return TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDescWithUdfData(baseType,
tableFieldTypeInfo, column);
}
public static VarcharTypeInfo getVarcharTypeInfo(ASTNode node)
throws SemanticException {
if (node.getChildCount() != 1) {
throw new SemanticException("Bad params for type varchar");
}
String lengthStr = node.getChild(0).getText();
return TypeInfoFactory.getVarcharTypeInfo(Integer.parseInt(lengthStr));
}
public static CharTypeInfo getCharTypeInfo(ASTNode node)
throws SemanticException {
if (node.getChildCount() != 1) {
throw new SemanticException("Bad params for type char");
}
String lengthStr = node.getChild(0).getText();
return TypeInfoFactory.getCharTypeInfo(Integer.parseInt(lengthStr));
}
static int getIndex(String[] list, String elem) {
for(int i=0; i < list.length; i++) {
if (list[i] != null && list[i].toLowerCase().equals(elem)) {
return i;
}
}
return -1;
}
/*
* if the given filterCondn refers to only 1 table alias in the QBJoinTree,
* we return that alias's position. Otherwise we return -1
*/
static int checkJoinFilterRefersOneAlias(String[] tabAliases, ASTNode filterCondn) {
switch(filterCondn.getType()) {
case HiveParser.TOK_TABLE_OR_COL:
String tableOrCol = SemanticAnalyzer.unescapeIdentifier(filterCondn.getChild(0).getText()
.toLowerCase());
return getIndex(tabAliases, tableOrCol);
case HiveParser.Identifier:
case HiveParser.Number:
case HiveParser.StringLiteral:
case HiveParser.IntegralLiteral:
case HiveParser.NumberLiteral:
case HiveParser.TOK_STRINGLITERALSEQUENCE:
case HiveParser.TOK_CHARSETLITERAL:
case HiveParser.TOK_DATELITERAL:
case HiveParser.KW_TRUE:
case HiveParser.KW_FALSE:
case HiveParser.TOK_NULL:
return -1;
default:
int idx = -1;
int i = filterCondn.getType() == HiveParser.TOK_FUNCTION ? 1 : 0;
for (; i < filterCondn.getChildCount(); i++) {
int cIdx = checkJoinFilterRefersOneAlias(tabAliases, (ASTNode) filterCondn.getChild(i));
if ( cIdx != idx ) {
if ( idx != -1 && cIdx != -1 ) {
return -1;
}
idx = idx == -1 ? cIdx : idx;
}
}
return idx;
}
}
public static DecimalTypeInfo getDecimalTypeTypeInfo(ASTNode node)
throws SemanticException {
if (node.getChildCount() > 2) {
throw new SemanticException("Bad params for type decimal");
}
int precision = HiveDecimal.USER_DEFAULT_PRECISION;
int scale = HiveDecimal.USER_DEFAULT_SCALE;
if (node.getChildCount() >= 1) {
String precStr = node.getChild(0).getText();
precision = Integer.parseInt(precStr);
}
if (node.getChildCount() == 2) {
String scaleStr = node.getChild(1).getText();
scale = Integer.parseInt(scaleStr);
}
return TypeInfoFactory.getDecimalTypeInfo(precision, scale);
}
public static String ensureClassExists(String className)
throws SemanticException {
if (className == null) {
return null;
}
try {
Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader());
} catch (ClassNotFoundException e) {
throw new SemanticException("Cannot find class '" + className + "'", e);
}
return className;
}
public static boolean containsTokenOfType(ASTNode root, Integer ... tokens) {
final Set tokensToMatch = new HashSet();
for (Integer tokenTypeToMatch : tokens) {
tokensToMatch.add(tokenTypeToMatch);
}
return ParseUtils.containsTokenOfType(root, new PTFUtils.Predicate() {
@Override
public boolean apply(ASTNode node) {
return tokensToMatch.contains(node.getType());
}
});
}
public static boolean containsTokenOfType(ASTNode root, PTFUtils.Predicate predicate) {
Queue queue = new ArrayDeque();
// BFS
queue.add(root);
while (!queue.isEmpty()) {
ASTNode current = queue.remove();
// If the predicate matches, then return true.
// Otherwise visit the next set of nodes that haven't been seen.
if (predicate.apply(current)) {
return true;
} else {
// Guard because ASTNode.getChildren.iterator returns null if no children available (bug).
if (current.getChildCount() > 0) {
for (Node child : current.getChildren()) {
queue.add((ASTNode) child);
}
}
}
}
return false;
}
public static boolean sameTree(ASTNode node, ASTNode otherNode) {
if (node == null && otherNode == null) {
return true;
}
if ((node == null && otherNode != null) ||
(node != null && otherNode == null)) {
return false;
}
Stack stack = new Stack();
stack.push(node);
Stack otherStack = new Stack();
otherStack.push(otherNode);
while (!stack.empty() && !otherStack.empty()) {
Tree p = stack.pop();
Tree otherP = otherStack.pop();
if (p.isNil() != otherP.isNil()) {
return false;
}
if (!p.isNil()) {
if (!p.toString().equals(otherP.toString())) {
return false;
}
}
if (p.getChildCount() != otherP.getChildCount()) {
return false;
}
for (int i = p.getChildCount()-1; i >= 0; i--) {
Tree t = p.getChild(i);
stack.push(t);
Tree otherT = otherP.getChild(i);
otherStack.push(otherT);
}
}
return stack.empty() && otherStack.empty();
}
private static void handleSetColRefs(ASTNode tree) {
CalcitePlanner.ASTSearcher astSearcher = new CalcitePlanner.ASTSearcher();
while (true) {
astSearcher.reset();
ASTNode setCols = astSearcher.depthFirstSearch(tree, HiveParser.TOK_SETCOLREF);
if (setCols == null) break;
processSetColsNode(setCols, astSearcher);
}
}
/**
* Replaces a spurious TOK_SETCOLREF added by parser with column names referring to the query
* in e.g. a union. This is to maintain the expectations that some code, like order by position
* alias, might have about not having ALLCOLREF. If it cannot find the columns with confidence
* it will just replace SETCOLREF with ALLCOLREF. Most of the cases where that happens are
* easy to work around in the query (e.g. by adding column aliases in the union).
* @param setCols TOK_SETCOLREF ASTNode.
* @param searcher AST searcher to reuse.
*/
private static void processSetColsNode(ASTNode setCols, ASTSearcher searcher) {
searcher.reset();
CommonTree rootNode = setCols;
while (rootNode != null && rootNode.getType() != HiveParser.TOK_INSERT) {
rootNode = rootNode.parent;
}
if (rootNode == null || rootNode.parent == null) {
// Couldn't find the parent insert; replace with ALLCOLREF.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the root INSERT");
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
rootNode = rootNode.parent; // TOK_QUERY above insert
Tree fromNode = null;
for (int j = 0; j < rootNode.getChildCount(); ++j) {
Tree child = rootNode.getChild(j);
if (child.getType() == HiveParser.TOK_FROM) {
fromNode = child;
break;
}
}
if (!(fromNode instanceof ASTNode)) {
// Couldn't find the from that contains subquery; replace with ALLCOLREF.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the FROM");
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
// We are making what we are trying to do more explicit if there's a union alias; so
// that if we do something we didn't expect to do, it'd be more likely to fail.
String alias = null;
if (fromNode.getChildCount() > 0) {
Tree fromWhat = fromNode.getChild(0);
if (fromWhat.getType() == HiveParser.TOK_SUBQUERY && fromWhat.getChildCount() > 1) {
Tree child = fromWhat.getChild(fromWhat.getChildCount() - 1);
if (child.getType() == HiveParser.Identifier) {
alias = child.getText();
}
}
}
// Note: we assume that this isn't an already malformed query;
// we don't check for that here - it will fail later anyway.
// First, we find the SELECT closest to the top.
ASTNode select = searcher.simpleBreadthFirstSearchAny((ASTNode)fromNode,
HiveParser.TOK_SELECT, HiveParser.TOK_SELECTDI);
if (select == null) {
// Couldn't find the from that contains subquery; replace with ALLCOLREF.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the SELECT");
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
// Then, find the leftmost logical sibling select, because that's what Hive uses for aliases.
while (true) {
CommonTree queryOfSelect = select.parent;
while (queryOfSelect != null && queryOfSelect.getType() != HiveParser.TOK_QUERY) {
queryOfSelect = queryOfSelect.parent;
}
// We should have some QUERY; and also its parent because by supposition we are in subq.
if (queryOfSelect == null || queryOfSelect.parent == null) {
LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the QUERY");
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
if (queryOfSelect.childIndex == 0) break; // We are the left-most child.
Tree moreToTheLeft = queryOfSelect.parent.getChild(0);
Preconditions.checkState(moreToTheLeft != queryOfSelect);
ASTNode newSelect = searcher.simpleBreadthFirstSearchAny((ASTNode)moreToTheLeft,
HiveParser.TOK_SELECT, HiveParser.TOK_SELECTDI);
Preconditions.checkState(newSelect != select);
select = newSelect;
// Repeat the procedure for the new select.
}
// Found the proper columns.
List newChildren = new ArrayList<>(select.getChildCount());
HashSet aliases = new HashSet<>();
for (int i = 0; i < select.getChildCount(); ++i) {
Tree selExpr = select.getChild(i);
assert selExpr.getType() == HiveParser.TOK_SELEXPR;
assert selExpr.getChildCount() > 0;
// Examine the last child. It could be an alias.
Tree child = selExpr.getChild(selExpr.getChildCount() - 1);
switch (child.getType()) {
case HiveParser.TOK_SETCOLREF:
// We have a nested setcolref. Process that and start from scratch TODO: use stack?
processSetColsNode((ASTNode)child, searcher);
processSetColsNode(setCols, searcher);
return;
case HiveParser.TOK_ALLCOLREF:
// We should find an alias of this insert and do (alias).*. This however won't fix e.g.
// positional order by alias case, cause we'd still have a star on the top level. Bail.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF");
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
case HiveParser.TOK_TABLE_OR_COL:
Tree idChild = child.getChild(0);
assert idChild.getType() == HiveParser.Identifier : idChild;
if (!createChildColumnRef(idChild, alias, newChildren, aliases)) {
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
break;
case HiveParser.Identifier:
if (!createChildColumnRef(child, alias, newChildren, aliases)) {
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
break;
case HiveParser.DOT: {
Tree colChild = child.getChild(child.getChildCount() - 1);
assert colChild.getType() == HiveParser.Identifier : colChild;
if (!createChildColumnRef(colChild, alias, newChildren, aliases)) {
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
break;
}
default:
// Not really sure how to refer to this (or if we can).
// TODO: We could find a different from branch for the union, that might have an alias?
// Or we could add an alias here to refer to, but that might break other branches.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because of the nested node "
+ child.getType() + " " + child.getText());
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
}
// Insert search in the beginning would have failed if these parents didn't exist.
ASTNode parent = (ASTNode)setCols.parent.parent;
int t = parent.getType();
assert t == HiveParser.TOK_SELECT || t == HiveParser.TOK_SELECTDI : t;
int ix = setCols.parent.childIndex;
parent.deleteChild(ix);
for (ASTNode node : newChildren) {
parent.insertChild(ix++, node);
}
}
private static boolean createChildColumnRef(Tree child, String alias,
List newChildren, HashSet aliases) {
String colAlias = child.getText();
if (!aliases.add(colAlias)) {
// TODO: if a side of the union has 2 columns with the same name, noone on the higher
// level can refer to them. We could change the alias in the original node.
LOG.debug("Replacing SETCOLREF with ALLCOLREF because of duplicate alias " + colAlias);
return false;
}
ASTBuilder selExpr = ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
ASTBuilder toc = ASTBuilder.construct(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
ASTBuilder id = ASTBuilder.construct(HiveParser.Identifier, colAlias);
if (alias == null) {
selExpr = selExpr.add(toc.add(id));
} else {
ASTBuilder dot = ASTBuilder.construct(HiveParser.DOT, ".");
ASTBuilder aliasNode = ASTBuilder.construct(HiveParser.Identifier, alias);
selExpr = selExpr.add(dot.add(toc.add(aliasNode)).add(id));
}
newChildren.add(selExpr.node());
return true;
}
}