Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic.QBSubQueryRewrite;
import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
public class QBSubQuery implements ISubQueryJoinInfo {
public static enum SubQueryType {
EXISTS,
NOT_EXISTS,
IN,
NOT_IN,
SCALAR;
public static SubQueryType get(ASTNode opNode) throws SemanticException {
if(opNode == null) {
return SCALAR;
}
switch(opNode.getType()) {
// opNode's type is always either KW_EXISTS or KW_IN never NOTEXISTS or NOTIN
// to figure this out we need to check it's grand parent's parent
case HiveParser.KW_EXISTS:
if(opNode.getParent().getParent().getParent() != null
&& opNode.getParent().getParent().getParent().getType() == HiveParser.KW_NOT) {
return NOT_EXISTS;
}
return EXISTS;
case HiveParser.TOK_SUBQUERY_OP_NOTEXISTS:
return NOT_EXISTS;
case HiveParser.KW_IN:
if(opNode.getParent().getParent().getParent() != null
&& opNode.getParent().getParent().getParent().getType() == HiveParser.KW_NOT) {
return NOT_IN;
}
return IN;
case HiveParser.TOK_SUBQUERY_OP_NOTIN:
return NOT_IN;
default:
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(opNode,
"Operator not supported in SubQuery use."));
}
}
}
public static class SubQueryTypeDef {
private final ASTNode ast;
private final SubQueryType type;
public SubQueryTypeDef(ASTNode ast, SubQueryType type) {
super();
this.ast = ast;
this.type = type;
}
public ASTNode getAst() {
return ast;
}
public SubQueryType getType() {
return type;
}
}
/*
* An expression is either the left/right side of an Equality predicate in the SubQuery where
* clause; or it is the entire conjunct. For e.g. if the Where Clause for a SubQuery is:
* where R1.X = R2.Y and R2.Z > 7
* Then the expressions analyzed are R1.X, R2.X ( the left and right sides of the Equality
* predicate); and R2.Z > 7.
*
* The ExprType tracks whether the expr:
* - has a reference to a SubQuery table source
* - has a reference to Outer(parent) Query table source
*/
static enum ExprType {
REFERS_NONE(false, false) {
@Override
public ExprType combine(ExprType other) {
return other;
}
},
REFERS_PARENT(true, false) {
@Override
public ExprType combine(ExprType other) {
switch(other) {
case REFERS_SUBQUERY:
case REFERS_BOTH:
return REFERS_BOTH;
default:
return this;
}
}
},
REFERS_SUBQUERY(false, true) {
@Override
public ExprType combine(ExprType other) {
switch(other) {
case REFERS_PARENT:
case REFERS_BOTH:
return REFERS_BOTH;
default:
return this;
}
}
},
REFERS_BOTH(true,true) {
@Override
public ExprType combine(ExprType other) {
return this;
}
};
final boolean refersParent;
final boolean refersSubQuery;
ExprType(boolean refersParent, boolean refersSubQuery) {
this.refersParent = refersParent;
this.refersSubQuery = refersSubQuery;
}
public boolean refersParent() {
return refersParent;
}
public boolean refersSubQuery() {
return refersSubQuery;
}
public abstract ExprType combine(ExprType other);
}
/*
* This class captures the information about a
* conjunct in the where clause of the SubQuery.
* For a equality predicate it capture for each side:
* - the AST
* - the type of Expression (basically what columns are referenced)
* - for Expressions that refer the parent it captures the
* parent's ColumnInfo. In case of outer Aggregation expressions
* we need this to introduce a new mapping in the OuterQuery
* RowResolver. A join condition must use qualified column references,
* so we generate a new name for the aggr expression and use it in the
* joining condition.
* For e.g.
* having exists ( select x from R2 where y = min(R1.z) )
* where the expression 'min(R1.z)' is from the outer Query.
* We give this expression a new name like 'R1._gby_sq_col_1'
* and use the join condition: R1._gby_sq_col_1 = R2.y
*/
static class Conjunct {
private final ASTNode leftExpr;
private final ASTNode rightExpr;
private final ExprType leftExprType;
private final ExprType rightExprType;
private final ColumnInfo leftOuterColInfo;
private final ColumnInfo rightOuterColInfo;
Conjunct(ASTNode leftExpr,
ASTNode rightExpr,
ExprType leftExprType,
ExprType rightExprType,
ColumnInfo leftOuterColInfo,
ColumnInfo rightOuterColInfo) {
super();
this.leftExpr = leftExpr;
this.rightExpr = rightExpr;
this.leftExprType = leftExprType;
this.rightExprType = rightExprType;
this.leftOuterColInfo = leftOuterColInfo;
this.rightOuterColInfo = rightOuterColInfo;
}
ASTNode getLeftExpr() {
return leftExpr;
}
ASTNode getRightExpr() {
return rightExpr;
}
ExprType getLeftExprType() {
return leftExprType;
}
ExprType getRightExprType() {
return rightExprType;
}
boolean eitherSideRefersBoth() {
if ( leftExprType == ExprType.REFERS_BOTH ) {
return true;
} else if ( rightExpr != null ) {
return rightExprType == ExprType.REFERS_BOTH;
}
return false;
}
boolean isCorrelated() {
if ( rightExpr != null ) {
return leftExprType.combine(rightExprType) == ExprType.REFERS_BOTH;
}
return false;
}
boolean refersOuterOnly() {
if ( rightExpr == null ) {
return leftExprType == ExprType.REFERS_PARENT;
}
return leftExprType.combine(rightExprType) == ExprType.REFERS_PARENT;
}
ColumnInfo getLeftOuterColInfo() {
return leftOuterColInfo;
}
ColumnInfo getRightOuterColInfo() {
return rightOuterColInfo;
}
}
class ConjunctAnalyzer {
RowResolver parentQueryRR;
boolean forHavingClause;
String parentQueryNewAlias;
NodeProcessor defaultExprProcessor;
Stack stack;
ConjunctAnalyzer(RowResolver parentQueryRR,
boolean forHavingClause,
String parentQueryNewAlias) {
this.parentQueryRR = parentQueryRR;
defaultExprProcessor = new DefaultExprProcessor();
this.forHavingClause = forHavingClause;
this.parentQueryNewAlias = parentQueryNewAlias;
stack = new Stack();
}
/*
* 1. On encountering a DOT, we attempt to resolve the leftmost name
* to the Parent Query.
* 2. An unqualified name is assumed to be a SubQuery reference.
* We don't attempt to resolve this to the Parent; because
* we require all Parent column references to be qualified.
* 3. All other expressions have a Type based on their children.
* An Expr w/o children is assumed to refer to neither.
*/
private ObjectPair analyzeExpr(ASTNode expr) {
ColumnInfo cInfo = null;
if ( forHavingClause ) {
try {
cInfo = parentQueryRR.getExpression(expr);
if ( cInfo != null) {
return ObjectPair.create(ExprType.REFERS_PARENT, cInfo);
}
} catch(SemanticException se) {
}
}
if ( expr.getType() == HiveParser.DOT) {
ASTNode dot = firstDot(expr);
cInfo = resolveDot(dot);
if ( cInfo != null ) {
return ObjectPair.create(ExprType.REFERS_PARENT, cInfo);
}
return ObjectPair.create(ExprType.REFERS_SUBQUERY, null);
} else if ( expr.getType() == HiveParser.TOK_TABLE_OR_COL ) {
return ObjectPair.create(ExprType.REFERS_SUBQUERY, null);
} else {
ExprType exprType = ExprType.REFERS_NONE;
int cnt = expr.getChildCount();
for(int i=0; i < cnt; i++) {
ASTNode child = (ASTNode) expr.getChild(i);
exprType = exprType.combine(analyzeExpr(child).getFirst());
}
return ObjectPair.create(exprType, null);
}
}
/*
* 1. The only correlation operator we check for is EQUAL; because that is
* the one for which we can do a Algebraic transformation.
* 2. For expressions that are not an EQUAL predicate, we treat them as conjuncts
* having only 1 side. These should only contain references to the SubQuery
* table sources.
* 3. For expressions that are an EQUAL predicate; we analyze each side and let the
* left and right exprs in the Conjunct object.
*
* @return Conjunct contains details on the left and right side of the conjunct expression.
*/
Conjunct analyzeConjunct(ASTNode conjunct) throws SemanticException {
int type = conjunct.getType();
if ( type == HiveParser.EQUAL ) {
ASTNode left = (ASTNode) conjunct.getChild(0);
ASTNode right = (ASTNode) conjunct.getChild(1);
ObjectPair leftInfo = analyzeExpr(left);
ObjectPair rightInfo = analyzeExpr(right);
return new Conjunct(left, right,
leftInfo.getFirst(), rightInfo.getFirst(),
leftInfo.getSecond(), rightInfo.getSecond());
} else {
ObjectPair sqExprInfo = analyzeExpr(conjunct);
return new Conjunct(conjunct, null,
sqExprInfo.getFirst(), null,
sqExprInfo.getSecond(), sqExprInfo.getSecond());
}
}
/*
* Try to resolve a qualified name as a column reference on the Parent Query's RowResolver.
* Apply this logic on the leftmost(first) dot in an AST tree.
*/
protected ColumnInfo resolveDot(ASTNode node) {
try {
TypeCheckCtx tcCtx = new TypeCheckCtx(parentQueryRR);
String str = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(1).getText());
ExprNodeDesc idDesc = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str.toLowerCase());
Object desc = defaultExprProcessor.process(node, stack, tcCtx, (Object) null, idDesc);
if (desc != null && desc instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) desc;
String[] qualName = parentQueryRR.reverseLookup(colDesc.getColumn());
return parentQueryRR.get(qualName[0], qualName[1]);
}
} catch(SemanticException se) {
}
return null;
}
/*
* We want to resolve the leftmost name to the Parent Query's RR.
* Hence we do a left walk down the AST, until we reach the bottom most DOT.
*/
protected ASTNode firstDot(ASTNode dot) {
ASTNode firstChild = (ASTNode) dot.getChild(0);
if ( firstChild != null && firstChild.getType() == HiveParser.DOT) {
return firstDot(firstChild);
}
return dot;
}
}
/*
* When transforming a Not In SubQuery we need to check for nulls in the
* Joining expressions of the SubQuery. If there are nulls then the SubQuery always
* return false. For more details see
* https://issues.apache.org/jira/secure/attachment/12614003/SubQuerySpec.pdf
*
* Basically, SQL semantics say that:
* - R1.A not in (null, 1, 2, ...)
* is always false.
* A 'not in' operator is equivalent to a '<> all'. Since a not equal check with null
* returns false, a not in predicate against aset with a 'null' value always returns false.
*
* So for not in SubQuery predicates:
* - we join in a null count predicate.
* - And the joining condition is that the 'Null Count' query has a count of 0.
*
*/
class NotInCheck implements ISubQueryJoinInfo {
private static final String CNT_ALIAS = "c1";
/*
* expressions in SubQ that are joined to the Outer Query.
*/
List subQryCorrExprs;
/*
* row resolver of the SubQuery.
* Set by the SemanticAnalyzer after the Plan for the SubQuery is genned.
* This is needed in case the SubQuery select list contains a TOK_ALLCOLREF
*/
RowResolver sqRR;
NotInCheck() {
subQryCorrExprs = new ArrayList();
}
void addCorrExpr(ASTNode corrExpr) {
subQryCorrExprs.add(corrExpr);
}
public ASTNode getSubQueryAST() {
ASTNode ast = SubQueryUtils.buildNotInNullCheckQuery(
QBSubQuery.this.getSubQueryAST(),
QBSubQuery.this.getAlias(),
CNT_ALIAS,
subQryCorrExprs,
sqRR);
return ast;
}
public String getAlias() {
return QBSubQuery.this.getAlias() + "_notin_nullcheck";
}
public JoinType getJoinType() {
return JoinType.LEFTSEMI;
}
public ASTNode getJoinConditionAST() {
ASTNode ast =
SubQueryUtils.buildNotInNullJoinCond(getAlias(), CNT_ALIAS);
return ast;
}
public QBSubQuery getSubQuery() {
return QBSubQuery.this;
}
public String getOuterQueryId() {
return QBSubQuery.this.getOuterQueryId();
}
void setSQRR(RowResolver sqRR) {
this.sqRR = sqRR;
}
}
private final String outerQueryId;
private final int sqIdx;
private final String alias;
private final ASTNode subQueryAST;
private final ASTNode parentQueryExpression;
private final SubQueryTypeDef operator;
private boolean containsAggregationExprs;
private boolean hasCorrelation;
private ASTNode joinConditionAST;
private JoinType joinType;
private ASTNode postJoinConditionAST;
private int numCorrExprsinSQ;
private List subQueryJoinAliasExprs;
private transient final ASTNodeOrigin originalSQASTOrigin;
/*
* tracks number of exprs from correlated predicates added to SQ select list.
*/
private int numOfCorrelationExprsAddedToSQSelect;
private boolean groupbyAddedToSQ;
private int numOuterCorrExprsForHaving;
private NotInCheck notInCheck;
private QBSubQueryRewrite subQueryDiagnostic;
public QBSubQuery(String outerQueryId,
int sqIdx,
ASTNode subQueryAST,
ASTNode parentQueryExpression,
SubQueryTypeDef operator,
ASTNode originalSQAST,
Context ctx) {
super();
this.subQueryAST = subQueryAST;
this.parentQueryExpression = parentQueryExpression;
this.operator = operator;
this.outerQueryId = outerQueryId;
this.sqIdx = sqIdx;
this.alias = "sq_" + this.sqIdx;
this.numCorrExprsinSQ = 0;
this.numOuterCorrExprsForHaving = 0;
String s = ctx.getTokenRewriteStream().toString(
originalSQAST.getTokenStartIndex(), originalSQAST.getTokenStopIndex());
originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST);
numOfCorrelationExprsAddedToSQSelect = 0;
groupbyAddedToSQ = false;
if ( operator.getType() == SubQueryType.NOT_IN ) {
notInCheck = new NotInCheck();
}
subQueryDiagnostic = SubQueryDiagnostic.getRewrite(this, ctx.getTokenRewriteStream(), ctx);
}
public ASTNode getSubQueryAST() {
return subQueryAST;
}
public SubQueryTypeDef getOperator() {
return operator;
}
public ASTNode getOriginalSubQueryASTForRewrite() {
return (operator.getType() == SubQueryType.NOT_EXISTS
|| operator.getType() == SubQueryType.NOT_IN ?
(ASTNode) originalSQASTOrigin.getUsageNode().getParent() :
originalSQASTOrigin.getUsageNode());
}
/**
* @param parentQueryRR
* @param forHavingClause
* @param outerQueryAlias
* @return true if it is correlated scalar subquery with an aggregate
* @throws SemanticException
*/
boolean subqueryRestrictionsCheck(RowResolver parentQueryRR,
boolean forHavingClause,
String outerQueryAlias)
throws SemanticException {
ASTNode insertClause = getChildFromSubqueryAST("Insert", HiveParser.TOK_INSERT);
ASTNode selectClause = (ASTNode) insertClause.getChild(1);
int selectExprStart = 0;
if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) {
selectExprStart = 1;
}
/*
* Check.5.h :: For In and Not In the SubQuery must implicitly or
* explicitly only contain one select item.
*/
if ( operator.getType() != SubQueryType.EXISTS &&
operator.getType() != SubQueryType.NOT_EXISTS &&
selectClause.getChildCount() - selectExprStart > 1 ) {
subQueryAST.setOrigin(originalSQASTOrigin);
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, "SubQuery can contain only 1 item in Select List."));
}
boolean hasAggreateExprs = false;
boolean hasWindowing = false;
// we need to know if aggregate is COUNT since IN corr subq with count aggregate
// is not special cased later in subquery remove rule
boolean hasCount = false;
for(int i= selectExprStart; i < selectClause.getChildCount(); i++ ) {
ASTNode selectItem = (ASTNode) selectClause.getChild(i);
int r = SubQueryUtils.checkAggOrWindowing(selectItem);
hasWindowing = hasWindowing | ( r == 3);
hasAggreateExprs = hasAggreateExprs | ( r == 1 | r== 2 );
hasCount = hasCount | ( r == 2 );
}
ASTNode whereClause = SubQueryUtils.subQueryWhere(insertClause);
if ( whereClause == null ) {
return false;
}
ASTNode searchCond = (ASTNode) whereClause.getChild(0);
List conjuncts = new ArrayList();
SubQueryUtils.extractConjuncts(searchCond, conjuncts);
ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR,
forHavingClause, outerQueryAlias);
boolean hasCorrelation = false;
boolean hasNonEquiJoinPred = false;
for(ASTNode conjunctAST : conjuncts) {
Conjunct conjunct = conjunctAnalyzer.analyzeConjunct(conjunctAST);
if(conjunct.isCorrelated()){
hasCorrelation = true;
}
if ( conjunct.eitherSideRefersBoth() && conjunctAST.getType() != HiveParser.EQUAL) {
hasNonEquiJoinPred = true;
}
}
boolean noImplicityGby = true;
if ( insertClause.getChild(1).getChildCount() > 3 &&
insertClause.getChild(1).getChild(3).getType() == HiveParser.TOK_GROUPBY ) {
if((ASTNode) insertClause.getChild(1).getChild(3) != null){
noImplicityGby = false;
}
}
/*
* Restriction.14.h :: Correlated Sub Queries cannot contain Windowing clauses.
*/
if ( hasWindowing && hasCorrelation) {
throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, "Correlated Sub Queries cannot contain Windowing clauses."));
}
/*
* Restriction.13.m :: In the case of an implied Group By on a
* correlated SubQuery, the SubQuery always returns 1 row.
* An exists on a SubQuery with an implied GBy will always return true.
* Whereas Algebraically transforming to a Join may not return true. See
* Specification doc for details.
* Similarly a not exists on a SubQuery with a implied GBY will always return false.
*/
// Following is special cases for different type of subqueries which have aggregate and no implicit group by
// and are correlatd
// * EXISTS/NOT EXISTS - NOT allowed, throw an error for now. We plan to allow this later
// * SCALAR - only allow if it has non equi join predicate. This should return true since later in subquery remove
// rule we need to know about this case.
// * IN - always allowed, BUT returns true for cases with aggregate other than COUNT since later in subquery remove
// rule we need to know about this case.
// * NOT IN - always allow, but always return true because later subq remove rule will generate diff plan for this case
if (hasAggreateExprs &&
noImplicityGby) {
if(operator.getType() == SubQueryType.EXISTS
|| operator.getType() == SubQueryType.NOT_EXISTS) {
if(hasCorrelation) {
throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST,
"A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) " +
"cannot be rewritten."));
}
}
else if(operator.getType() == SubQueryType.SCALAR) {
if(hasNonEquiJoinPred) {
throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST,
"Scalar subqueries with aggregate cannot have non-equi join predicate"));
}
if(hasCorrelation) {
return true;
}
}
else if(operator.getType() == SubQueryType.IN) {
if(hasCount && hasCorrelation) {
return true;
}
}
else if (operator.getType() == SubQueryType.NOT_IN) {
if(hasCorrelation) {
return true;
}
}
}
return false;
}
void validateAndRewriteAST(RowResolver outerQueryRR,
boolean forHavingClause,
String outerQueryAlias,
Set outerQryAliases) throws SemanticException {
ASTNode fromClause = getChildFromSubqueryAST("From", HiveParser.TOK_FROM);
ASTNode insertClause = getChildFromSubqueryAST("Insert", HiveParser.TOK_INSERT);
ASTNode selectClause = (ASTNode) insertClause.getChild(1);
int selectExprStart = 0;
if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) {
selectExprStart = 1;
}
/*
* Restriction.16.s :: Correlated Expression in Outer Query must not contain
* unqualified column references.
* disabled : if it's obvious, we allow unqualified refs
*/
/*
* Restriction 17.s :: SubQuery cannot use the same table alias as one used in
* the Outer Query.
*/
List sqAliases = SubQueryUtils.getTableAliasesInSubQuery(fromClause);
String sharedAlias = null;
for(String s : sqAliases ) {
if ( outerQryAliases.contains(s) ) {
sharedAlias = s;
}
}
if ( sharedAlias != null) {
ASTNode whereClause = SubQueryUtils.subQueryWhere(insertClause);
}
/*
* Check.5.h :: For In and Not In the SubQuery must implicitly or
* explicitly only contain one select item.
*/
if ( operator.getType() != SubQueryType.EXISTS &&
operator.getType() != SubQueryType.NOT_EXISTS &&
selectClause.getChildCount() - selectExprStart > 1 ) {
subQueryAST.setOrigin(originalSQASTOrigin);
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, "SubQuery can contain only 1 item in Select List."));
}
containsAggregationExprs = false;
boolean containsWindowing = false;
for(int i= selectExprStart; i < selectClause.getChildCount(); i++ ) {
ASTNode selectItem = (ASTNode) selectClause.getChild(i);
int r = SubQueryUtils.checkAggOrWindowing(selectItem);
containsWindowing = containsWindowing | ( r == 3);
containsAggregationExprs = containsAggregationExprs | ( r == 1 );
}
rewrite(outerQueryRR, forHavingClause, outerQueryAlias, insertClause, selectClause);
/*
* Restriction.13.m :: In the case of an implied Group By on a
* correlated SubQuery, the SubQuery always returns 1 row.
* An exists on a SubQuery with an implied GBy will always return true.
* Whereas Algebraically transforming to a Join may not return true. See
* Specification doc for details.
* Similarly a not exists on a SubQuery with a implied GBY will always return false.
*/
if ( operator.getType() == SubQueryType.EXISTS &&
containsAggregationExprs &&
groupbyAddedToSQ ) {
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST,
"An Exists predicate on SubQuery with implicit Aggregation(no Group By clause) " +
"cannot be rewritten. (predicate will always return true)."));
}
if ( operator.getType() == SubQueryType.NOT_EXISTS &&
containsAggregationExprs &&
groupbyAddedToSQ ) {
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST,
"A Not Exists predicate on SubQuery with implicit Aggregation(no Group By clause) " +
"cannot be rewritten. (predicate will always return false)."));
}
/*
* Restriction.14.h :: Correlated Sub Queries cannot contain Windowing clauses.
*/
if ( containsWindowing && hasCorrelation ) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, "Correlated Sub Queries cannot contain Windowing clauses."));
}
/*
* Check.4.h :: For Exists and Not Exists, the Sub Query must
* have 1 or more correlated predicates.
*/
if ( ( operator.getType() == SubQueryType.EXISTS ||
operator.getType() == SubQueryType.NOT_EXISTS ) &&
!hasCorrelation ) {
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, "For Exists/Not Exists operator SubQuery must be Correlated."));
}
}
private ASTNode getChildFromSubqueryAST(String errorMsg, int type) throws SemanticException {
ASTNode childAST = (ASTNode) subQueryAST.getFirstChildWithType(type);
if (childAST == null && errorMsg != null) {
subQueryAST.setOrigin(originalSQASTOrigin);
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
subQueryAST, errorMsg + " clause is missing in SubQuery."));
}
return childAST;
}
private void setJoinType() {
if ( operator.getType() == SubQueryType.NOT_IN ||
operator.getType() == SubQueryType.NOT_EXISTS ) {
joinType = JoinType.LEFTOUTER;
} else {
joinType = JoinType.LEFTSEMI;
}
}
void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR,
boolean forHavingClause,
String outerQueryAlias) throws SemanticException {
ASTNode parentQueryJoinCond = null;
if ( parentQueryExpression != null ) {
ColumnInfo outerQueryCol = null;
try {
outerQueryCol = outerQueryRR.getExpression(parentQueryExpression);
} catch(SemanticException se) {
// ignore
}
ASTNode parentExpr = parentQueryExpression;
if (!forHavingClause) {
Set aliases = outerQueryRR.getRslvMap().keySet();
if (notInCheck != null) {
aliases.remove(notInCheck.getAlias());
}
String tableAlias = aliases.size() == 1 ? aliases.iterator().next() : null;
parentExpr =
SubQueryUtils.setQualifiedColumnReferences(parentExpr, tableAlias);
if (parentExpr == null) {
subQueryAST.setOrigin(originalSQASTOrigin);
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
parentQueryExpression,
"Correlating expression contains ambiguous column references."));
}
}
parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond(
parentExpr,
alias,
sqRR);
if ( outerQueryCol != null ) {
rewriteCorrConjunctForHaving(parentQueryJoinCond, true,
outerQueryAlias, outerQueryRR, outerQueryCol);
}
subQueryDiagnostic.addJoinCondition(parentQueryJoinCond, outerQueryCol != null, true);
}
joinConditionAST = SubQueryUtils.andAST(parentQueryJoinCond, joinConditionAST);
setJoinType();
if ( joinType == JoinType.LEFTOUTER ) {
if ( operator.getType() == SubQueryType.NOT_EXISTS && hasCorrelation ) {
postJoinConditionAST = SubQueryUtils.buildPostJoinNullCheck(subQueryJoinAliasExprs);
} else if ( operator.getType() == SubQueryType.NOT_IN ) {
postJoinConditionAST = SubQueryUtils.buildOuterJoinPostCond(alias, sqRR);
}
}
}
ASTNode updateOuterQueryFilter(ASTNode outerQryFilter) {
if (postJoinConditionAST == null ) {
return outerQryFilter;
}
subQueryDiagnostic.addPostJoinCondition(postJoinConditionAST);
if ( outerQryFilter == null ) {
return postJoinConditionAST;
}
ASTNode node = SubQueryUtils.andAST(outerQryFilter, postJoinConditionAST);
return node;
}
String getNextCorrExprAlias() {
return "sq_corr_" + numCorrExprsinSQ++;
}
/*
* - If the SubQuery has no where clause, there is nothing to rewrite.
* - Decompose SubQuery where clause into list of Top level conjuncts.
* - For each conjunct
* - Break down the conjunct into (LeftExpr, LeftExprType, RightExpr,
* RightExprType)
* - If the top level operator is an Equality Operator we will break
* it down into left and right; in all other case there is only a
* lhs.
* - The ExprType is based on whether the Expr. refers to the Parent
* Query table sources, refers to the SubQuery sources or both.
* - We assume an unqualified Column refers to a SubQuery table source.
* This is because we require Parent Column references to be qualified
* within the SubQuery.
* - If the lhs or rhs expr refers to both Parent and SubQuery sources,
* we flag this as Unsupported.
* - If the conjunct as a whole, only refers to the Parent Query sources,
* we flag this as an Error.
* - A conjunct is Correlated if the lhs refers to SubQuery sources and rhs
* refers to Parent Query sources or the reverse.
* - Say the lhs refers to SubQuery and rhs refers to Parent Query sources; the
* other case is handled analogously.
* - remove this conjunct from the SubQuery where clause.
* - for the SubQuery expression(lhs) construct a new alias
* - in the correlated predicate, replace the SubQuery
* expression(lhs) with the alias AST.
* - add this altered predicate to the Join predicate tracked by the
* QBSubQuery object.
* - add the alias AST to a list of subQueryJoinAliasExprs. This
* list is used in the case of Outer Joins to add null check
* predicates to the Outer Query's where clause.
* - Add the SubQuery expression with the alias as a SelectItem to
* the SubQuery's SelectList.
* - In case this SubQuery contains aggregation expressions add this SubQuery
* expression to its GroupBy; add it to the front of the GroupBy.
* - If predicate is not correlated, let it remain in the SubQuery
* where clause.
* Additional things for Having clause:
* - A correlation predicate may refer to an aggregation expression.
* - This introduces 2 twists to the rewrite:
* a. When analyzing equality predicates we need to analyze each side
* to see if it is an aggregation expression from the Outer Query.
* So for e.g. this is a valid correlation predicate:
* R2.x = min(R1.y)
* Where R1 is an outer table reference, and R2 is a SubQuery table reference.
* b. When hoisting the correlation predicate to a join predicate, we need to
* rewrite it to be in the form the Join code allows: so the predict needs
* to contain a qualified column references.
* We handle this by generating a new name for the aggregation expression,
* like R1._gby_sq_col_1 and adding this mapping to the Outer Query's
* Row Resolver. Then we construct a joining predicate using this new
* name; so in our e.g. the condition would be: R2.x = R1._gby_sq_col_1
*/
private void rewrite(RowResolver parentQueryRR,
boolean forHavingClause,
String outerQueryAlias, ASTNode insertClause, ASTNode selectClause) throws SemanticException {
ASTNode whereClause = SubQueryUtils.subQueryWhere(insertClause);
if ( whereClause == null ) {
return;
}
ASTNode searchCond = (ASTNode) whereClause.getChild(0);
List conjuncts = new ArrayList();
SubQueryUtils.extractConjuncts(searchCond, conjuncts);
ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR,
forHavingClause, outerQueryAlias);
ASTNode sqNewSearchCond = null;
for(ASTNode conjunctAST : conjuncts) {
Conjunct conjunct = conjunctAnalyzer.analyzeConjunct(conjunctAST);
/*
* Check.12.h :: SubQuery predicates cannot only refer to Outer Query columns.
*/
if ( conjunct.refersOuterOnly() ) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
conjunctAST,
"SubQuery expression refers to Outer query expressions only."));
}
if ( conjunct.isCorrelated() ) {
hasCorrelation = true;
subQueryJoinAliasExprs = new ArrayList();
String exprAlias = getNextCorrExprAlias();
ASTNode sqExprAlias = SubQueryUtils.createAliasAST(exprAlias);
ASTNode sqExprForCorr = SubQueryUtils.createColRefAST(alias, exprAlias);
boolean corrCondLeftIsRewritten = false;
boolean corrCondRightIsRewritten = false;
if ( conjunct.getLeftExprType().refersSubQuery() ) {
corrCondLeftIsRewritten = true;
if ( forHavingClause && conjunct.getRightOuterColInfo() != null ) {
corrCondRightIsRewritten = true;
rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias,
parentQueryRR, conjunct.getRightOuterColInfo());
}
ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate(
conjunctAST, sqExprForCorr, true);
joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate);
subQueryJoinAliasExprs.add(sqExprForCorr);
ASTNode selExpr = SubQueryUtils.createSelectItem(conjunct.getLeftExpr(), sqExprAlias);
selectClause.addChild(selExpr);
subQueryDiagnostic.addSelectClauseRewrite(conjunct.getLeftExpr(), exprAlias);
numOfCorrelationExprsAddedToSQSelect++;
if ( containsAggregationExprs ) {
ASTNode gBy = getSubQueryGroupByAST();
SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr());
subQueryDiagnostic.addGByClauseRewrite(conjunct.getLeftExpr());
}
if ( notInCheck != null ) {
notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(0));
}
subQueryDiagnostic.addJoinCondition(conjunctAST, corrCondLeftIsRewritten, corrCondRightIsRewritten);
} else {
corrCondRightIsRewritten = true;
if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) {
corrCondLeftIsRewritten = true;
rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias,
parentQueryRR, conjunct.getLeftOuterColInfo());
}
ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate(
conjunctAST, sqExprForCorr, false);
joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate);
subQueryJoinAliasExprs.add(sqExprForCorr);
ASTNode selExpr = SubQueryUtils.createSelectItem(conjunct.getRightExpr(), sqExprAlias);
selectClause.addChild(selExpr);
subQueryDiagnostic.addSelectClauseRewrite(conjunct.getRightExpr(), exprAlias);
numOfCorrelationExprsAddedToSQSelect++;
if ( containsAggregationExprs ) {
ASTNode gBy = getSubQueryGroupByAST();
SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getRightExpr());
subQueryDiagnostic.addGByClauseRewrite(conjunct.getRightExpr());
}
if ( notInCheck != null ) {
notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(1));
}
subQueryDiagnostic.addJoinCondition(conjunctAST, corrCondLeftIsRewritten, corrCondRightIsRewritten);
}
} else {
sqNewSearchCond = SubQueryUtils.andAST(sqNewSearchCond, conjunctAST);
subQueryDiagnostic.addWhereClauseRewrite(conjunctAST);
}
}
if ( sqNewSearchCond != searchCond ) {
if ( sqNewSearchCond == null ) {
/*
* for now just adding a true condition(1=1) to where clause.
* Can remove the where clause from the AST; requires moving all subsequent children
* left.
*/
sqNewSearchCond = SubQueryUtils.constructTrueCond();
subQueryDiagnostic.addWhereClauseRewrite("1 = 1");
}
whereClause.setChild(0, sqNewSearchCond);
}
}
/*
* called if the SubQuery is Agg and Correlated.
* if SQ doesn't have a GroupBy, it is added to the SQ AST.
*/
private ASTNode getSubQueryGroupByAST() {
ASTNode groupBy = null;
if ( subQueryAST.getChild(1).getChildCount() > 3 &&
subQueryAST.getChild(1).getChild(3).getType() == HiveParser.TOK_GROUPBY ) {
groupBy = (ASTNode) subQueryAST.getChild(1).getChild(3);
}
if ( groupBy != null ) {
return groupBy;
}
groupBy = SubQueryUtils.buildGroupBy();
groupbyAddedToSQ = true;
List newChildren = new ArrayList();
newChildren.add(groupBy);
if ( subQueryAST.getChildCount() > 3) {
for( int i = subQueryAST.getChildCount() - 1; i >= 3; i-- ) {
ASTNode child = (ASTNode) subQueryAST.getChild(i);
newChildren.add(child);
}
}
for(ASTNode child : newChildren ) {
subQueryAST.addChild(child);
}
subQueryDiagnostic.setAddGroupByClause();
return groupBy;
}
public String getOuterQueryId() {
return outerQueryId;
}
public JoinType getJoinType() {
return joinType;
}
public String getAlias() {
return alias;
}
public ASTNode getJoinConditionAST() {
return joinConditionAST;
}
public int getNumOfCorrelationExprsAddedToSQSelect() {
return numOfCorrelationExprsAddedToSQSelect;
}
public QBSubQueryRewrite getDiagnostic() {
return subQueryDiagnostic;
}
public QBSubQuery getSubQuery() {
return this;
}
NotInCheck getNotInCheck() {
return notInCheck;
}
private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode,
boolean refersLeft,
String outerQueryAlias,
RowResolver outerQueryRR,
ColumnInfo outerQueryCol) {
String newColAlias = "_gby_sq_col_" + numOuterCorrExprsForHaving++;
ASTNode outerExprForCorr = SubQueryUtils.createColRefAST(outerQueryAlias, newColAlias);
if ( refersLeft ) {
conjunctASTNode.setChild(0, outerExprForCorr);
} else {
conjunctASTNode.setChild(1, outerExprForCorr);
}
outerQueryRR.put(outerQueryAlias, newColAlias, outerQueryCol);
}
}