org.apache.hadoop.hive.ql.optimizer.SortedMergeBucketMapJoinOptimizer Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
//try to replace a bucket map join with a sorted merge map join
public class SortedMergeBucketMapJoinOptimizer implements Transform {
private static final Log LOG = LogFactory
.getLog(SortedMergeBucketMapJoinOptimizer.class.getName());
public SortedMergeBucketMapJoinOptimizer() {
}
private void getListOfRejectedJoins(
ParseContext pctx, SortBucketJoinProcCtx smbJoinContext)
throws SemanticException {
// Go through all joins - it should only contain selects and filters between
// tablescan and join operators.
Map opRules = new LinkedHashMap();
opRules.put(new RuleRegExp("R1", JoinOperator.getOperatorName() + "%"),
getCheckCandidateJoin());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, smbJoinContext);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList topNodes = new ArrayList();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
HiveConf conf = pctx.getConf();
SortBucketJoinProcCtx smbJoinContext =
new SortBucketJoinProcCtx(conf);
// Get a list of joins which cannot be converted to a sort merge join
// Only selects and filters operators are allowed between the table scan and
// join currently. More operators can be added - the method supportAutomaticSortMergeJoin
// dictates which operator is allowed
getListOfRejectedJoins(pctx, smbJoinContext);
Map opRules = new LinkedHashMap();
// go through all map joins and find out all which have enabled bucket map
// join.
opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%"),
getSortedMergeBucketMapjoinProc(pctx));
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
// There is no need for the user to specify mapjoin for it to be
// converted to sort-merge join
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN)) {
opRules.put(new RuleRegExp("R2", "JOIN%"),
getSortedMergeJoinProc(pctx));
}
Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, smbJoinContext);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList topNodes = new ArrayList();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
private NodeProcessor getSortedMergeBucketMapjoinProc(ParseContext pctx) {
return new SortedMergeBucketMapjoinProc(pctx);
}
private NodeProcessor getSortedMergeJoinProc(ParseContext pctx) {
return new SortedMergeJoinProc(pctx);
}
private NodeProcessor getDefaultProc() {
return new NodeProcessor() {
@Override
public Object process(Node nd, Stack stack,
NodeProcessorCtx procCtx, Object... nodeOutputs)
throws SemanticException {
return null;
}
};
}
// check if the join operator encountered is a candidate for being converted
// to a sort-merge join
private NodeProcessor getCheckCandidateJoin() {
return new NodeProcessor() {
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx)procCtx;
JoinOperator joinOperator = (JoinOperator)nd;
int size = stack.size();
if (!(stack.get(size-1) instanceof JoinOperator) ||
!(stack.get(size-2) instanceof ReduceSinkOperator)) {
smbJoinContext.getRejectedJoinOps().add(joinOperator);
return null;
}
// If any operator in the stack does not support a auto-conversion, this join should
// not be converted.
for (int pos = size -3; pos >= 0; pos--) {
Operator op = (Operator)stack.get(pos);
if (!op.supportAutomaticSortMergeJoin()) {
smbJoinContext.getRejectedJoinOps().add(joinOperator);
return null;
}
}
return null;
}
};
}
}