org.apache.hadoop.hive.ql.optimizer.correlation.IntraQueryCorrelation Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.correlation;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
/**
* IntraQueryCorrelation records a sub-tree of the query plan tree which can be
* evaluated in a single MR job. The boundary of this sub-tree is recorded by
* the ReduceSinkOperators the the bottom of this sub-tree.
* Also, allReduceSinkOperators in IntraQueryCorrelation contains all
* ReduceSinkOperators of this sub-tree.
*/
public class IntraQueryCorrelation {
private boolean jobFlowCorrelation;
// The bottom layer ReduceSinkOperators. These ReduceSinkOperators are used
// to record the boundary of this sub-tree which can be evaluated in a single MR
// job.
private List bottomReduceSinkOperators;
// The number of reducer(s) should be used for those bottom layer ReduceSinkOperators
private int numReducers;
// This is the min number of reducer(s) for the bottom layer ReduceSinkOperators to avoid query
// executed on too small number of reducers.
private final int minReducers;
// All ReduceSinkOperators in this sub-tree. This set is used when we start to remove unnecessary
// ReduceSinkOperators.
private final Set allReduceSinkOperators;
// Since we merge multiple operation paths, we assign new tags to bottom layer
// ReduceSinkOperators. This mapping is used to map new tags to original tags associated
// to these bottom layer ReduceSinkOperators.
private final Map newTagToOldTag;
// A map from new tags to indices of children of DemuxOperator (the first Operator at the
// Reduce side of optimized plan)
private final Map newTagToChildIndex;
public IntraQueryCorrelation(int minReducers) {
this.jobFlowCorrelation = false;
this.numReducers = -1;
this.minReducers = minReducers;
this.allReduceSinkOperators = new HashSet();
this.newTagToOldTag = new HashMap();
this.newTagToChildIndex = new HashMap();
}
public Map getNewTagToOldTag() {
return newTagToOldTag;
}
public Map getNewTagToChildIndex() {
return newTagToChildIndex;
}
public void setNewTag(Integer newTag, Integer oldTag, Integer childIndex) {
newTagToOldTag.put(newTag, oldTag);
newTagToChildIndex.put(newTag, childIndex);
}
public void addToAllReduceSinkOperators(ReduceSinkOperator rsop) {
allReduceSinkOperators.add(rsop);
}
public Set getAllReduceSinkOperators() {
return allReduceSinkOperators;
}
public void setJobFlowCorrelation(boolean jobFlowCorrelation,
List bottomReduceSinkOperators) {
this.jobFlowCorrelation = jobFlowCorrelation;
this.bottomReduceSinkOperators = bottomReduceSinkOperators;
}
public boolean hasJobFlowCorrelation() {
return jobFlowCorrelation;
}
public List getBottomReduceSinkOperators() {
return bottomReduceSinkOperators;
}
public int getNumReducers() {
return numReducers;
}
public boolean adjustNumReducers(int newNumReducers) {
assert newNumReducers != 0;
if (newNumReducers > 0) {
// If the new numReducer is less than minReducer, we will not consider
// ReduceSinkOperator with this newNumReducer as a correlated ReduceSinkOperator
if (newNumReducers < minReducers) {
return false;
}
if (numReducers > 0) {
if (newNumReducers != numReducers) {
// If (numReducers > 0 && newNumReducers > 0 && newNumReducers != numReducers),
// we will not consider ReduceSinkOperator with this newNumReducer as a correlated
// ReduceSinkOperator
return false;
}
} else {
// if numReducers < 0 and newNumReducers > 0
numReducers = newNumReducers;
}
}
return true;
}
}