org.apache.hadoop.hive.ql.plan.MergeJoinWork Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.mapred.JobConf;
public class MergeJoinWork extends BaseWork {
private CommonMergeJoinOperator mergeJoinOp = null;
private final List mergeWorkList = new ArrayList();
private BaseWork bigTableWork;
public MergeJoinWork() {
super();
}
@Override
public void replaceRoots(Map, Operator>> replacementMap) {
getMainWork().replaceRoots(replacementMap);
}
@Override
public Set> getAllRootOperators() {
Set> set = new HashSet<>();
set.addAll(getMainWork().getAllRootOperators());
for (BaseWork w : mergeWorkList) {
set.addAll(w.getAllRootOperators());
}
return set;
}
@Override
public Operator> getAnyRootOperator() {
return getMainWork().getAnyRootOperator();
}
@Override
public void configureJobConf(JobConf job) {
}
public CommonMergeJoinOperator getMergeJoinOperator() {
return this.mergeJoinOp;
}
public void setMergeJoinOperator(CommonMergeJoinOperator mergeJoinOp) {
this.mergeJoinOp = mergeJoinOp;
}
public void addMergedWork(BaseWork work, BaseWork connectWork,
Map, BaseWork> leafOperatorToFollowingWork) {
if (work != null) {
if ((bigTableWork != null) && (bigTableWork != work)) {
assert false;
}
this.bigTableWork = work;
setName(work.getName());
}
if (connectWork != null) {
this.mergeWorkList.add(connectWork);
if ((connectWork instanceof ReduceWork) && (bigTableWork != null)) {
/*
* For tez to route data from an up-stream vertex correctly to the following vertex, the
* output name in the reduce sink needs to be setup appropriately. In the case of reduce
* side merge work, we need to ensure that the parent work that provides data to this merge
* work is setup to point to the right vertex name - the main work name.
*
* In this case, if the big table work has already been created, we can hook up the merge
* work items for the small table correctly.
*/
setReduceSinkOutputName(connectWork, leafOperatorToFollowingWork, bigTableWork.getName());
}
}
if (work != null) {
/*
* Same reason as above. This is the case when we have the main work item after the merge work
* has been created for the small table side.
*/
for (BaseWork mergeWork : mergeWorkList) {
if (mergeWork instanceof ReduceWork) {
setReduceSinkOutputName(mergeWork, leafOperatorToFollowingWork, work.getName());
}
}
}
}
private void setReduceSinkOutputName(BaseWork mergeWork,
Map, BaseWork> leafOperatorToFollowingWork, String name) {
for (Entry, BaseWork> entry : leafOperatorToFollowingWork.entrySet()) {
if (entry.getValue() == mergeWork) {
((ReduceSinkOperator) entry.getKey()).getConf().setOutputName(name);
}
}
}
@Explain(skipHeader=true, displayName = "Join", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public List getBaseWorkList() {
return mergeWorkList;
}
public String getBigTableAlias() {
return ((MapWork) bigTableWork).getAliasToWork().keySet().iterator().next();
}
@Explain(skipHeader=true, displayName = "Main", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public BaseWork getMainWork() {
return bigTableWork;
}
@Override
public void setDummyOps(List dummyOps) {
getMainWork().setDummyOps(dummyOps);
}
@Override
public List getDummyOps() {
return getMainWork().getDummyOps();
}
@Override
public void setVectorMode(boolean vectorMode) {
getMainWork().setVectorMode(vectorMode);
}
@Override
public boolean getVectorMode() {
return getMainWork().getVectorMode();
}
@Override
public void setUberMode(boolean uberMode) {
getMainWork().setUberMode(uberMode);
}
@Override
public boolean getUberMode() {
return getMainWork().getUberMode();
}
@Override
public void setLlapMode(boolean llapMode) {
getMainWork().setLlapMode(llapMode);
}
@Override
public boolean getLlapMode() {
return getMainWork().getLlapMode();
}
public void addDummyOp(HashTableDummyOperator dummyOp) {
getMainWork().addDummyOp(dummyOp);
}
}