All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.exec.ExplainSQRewriteTask Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.antlr.runtime.TokenRewriteStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.QB;
import org.apache.hadoop.hive.ql.parse.QBSubQuery;
import org.apache.hadoop.hive.ql.parse.SubQueryDiagnostic;
import org.apache.hadoop.hive.ql.plan.ExplainSQRewriteWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ExplainSQRewriteTask extends Task implements Serializable {
private static final long serialVersionUID = 1L;
private final Logger LOG = LoggerFactory.getLogger(this.getClass().getName());
@Override
public StageType getType() {
return StageType.EXPLAIN;
}
@Override
public int execute() {
PrintStream out = null;
try {
Path resFile = new Path(work.getResFile());
OutputStream outS = resFile.getFileSystem(conf).create(resFile);
out = new PrintStream(outS);
QB qb = work.getQb();
TokenRewriteStream stream = work.getCtx().getTokenRewriteStream();
String program = "sq rewrite";
ASTNode ast = work.getAst();
try {
addRewrites(stream, qb, program, out);
out.println("\nRewritten Query:\n" + stream.toString(program,
ast.getTokenStartIndex(), ast.getTokenStopIndex()));
} finally {
stream.deleteProgram(program);
}
out.close();
out = null;
return (0);
}
catch (Exception e) {
setException(e);
LOG.error("Failed to execute", e);
return (1);
}
finally {
IOUtils.closeStream(out);
}
}
void addRewrites(TokenRewriteStream stream, QB qb, String program,
PrintStream out) {
QBSubQuery sqW = qb.getWhereClauseSubQueryPredicate();
QBSubQuery sqH = qb.getHavingClauseSubQueryPredicate();
if (sqW != null || sqH != null) {
ASTNode sqNode = sqW != null ? sqW.getOriginalSubQueryASTForRewrite()
: sqH.getOriginalSubQueryASTForRewrite();
ASTNode tokQry = getQueryASTNode(sqNode);
ASTNode tokFrom = (ASTNode) tokQry.getChild(0);
StringBuilder addedJoins = new StringBuilder();
if (sqW != null) {
addRewrites(stream, sqW, program, out, qb.getId(), true, addedJoins);
}
if (sqH != null) {
addRewrites(stream, sqH, program, out, qb.getId(), false, addedJoins);
}
stream.insertAfter(program, tokFrom.getTokenStopIndex(), addedJoins);
}
Set sqAliases = qb.getSubqAliases();
for(String sqAlias : sqAliases) {
addRewrites(stream, qb.getSubqForAlias(sqAlias).getQB(), program, out);
}
}
void addRewrites(TokenRewriteStream stream, QBSubQuery sq, String program,
PrintStream out, String qbAlias, boolean isWhere, StringBuilder addedJoins) {
ASTNode sqNode = sq.getOriginalSubQueryASTForRewrite();
ASTNode tokQry = getQueryASTNode(sqNode);
ASTNode tokInsert = (ASTNode) tokQry.getChild(1);
ASTNode tokWhere = null;
for(int i=0; i < tokInsert.getChildCount(); i++) {
if ( tokInsert.getChild(i).getType() == HiveParser.TOK_WHERE) {
tokWhere = (ASTNode) tokInsert.getChild(i);
break;
}
}
SubQueryDiagnostic.QBSubQueryRewrite diag = sq.getDiagnostic();
String sqStr = diag.getRewrittenQuery();
String joinCond = diag.getJoiningCondition();
/*
* the SubQuery predicate has been hoisted as a Join. The SubQuery predicate is replaced
* by a 'true' predicate in the Outer QB's where/having clause.
*/
stream.replace(program, sqNode.getTokenStartIndex(),
sqNode.getTokenStopIndex(),
"1 = 1");
String sqJoin = " " +
getJoinKeyWord(sq) +
" " +
sqStr +
" " +
joinCond;
addedJoins.append(" ").append(sqJoin);
String postJoinCond = diag.getOuterQueryPostJoinCond();
if ( postJoinCond != null ) {
stream.insertAfter(program, tokWhere.getTokenStopIndex(), " and " + postJoinCond);
}
String qualifier = isWhere ? "Where Clause " : "Having Clause ";
if ( qbAlias != null ) {
qualifier = qualifier + "for Query Block '" + qbAlias + "' ";
}
out.println(String.format("\n%s Rewritten SubQuery:\n%s",
qualifier, diag.getRewrittenQuery()));
out.println(String.format("\n%s SubQuery Joining Condition:\n%s",
qualifier, diag.getJoiningCondition()));
}
private String getJoinKeyWord(QBSubQuery sq) {
switch (sq.getJoinType()) {
case LEFTOUTER:
return "left outer join";
case LEFTSEMI:
return "left semi join";
case RIGHTOUTER:
return "right outer join";
case FULLOUTER:
return "full outer join";
case INNER:
default:
return "inner join";
}
}
private ASTNode getQueryASTNode(ASTNode node) {
while( node != null && node.getType() != HiveParser.TOK_QUERY ) {
node = (ASTNode) node.getParent();
}
return node;
}
@Override
public String getName() {
return "EXPLAIN REWRITE";
}
public List getResultSchema() {
FieldSchema tmpFieldSchema = new FieldSchema();
List colList = new ArrayList();
tmpFieldSchema.setName(ExplainTask.EXPL_COLUMN_NAME);
tmpFieldSchema.setType(STRING_TYPE_NAME);
colList.add(tmpFieldSchema);
return colList;
}
@Override
public boolean canExecuteInParallel() {
return false;
}
}