Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.correlation;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.DemuxOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer.CorrelationNodeProcCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.DemuxDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.MuxDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
/**
* QueryPlanTreeTransformation contains static methods used to transform
* the query plan tree (operator tree) based on the correlation we have
* detected by Correlation Optimizer.
*/
public class QueryPlanTreeTransformation {
private static final Log LOG = LogFactory.getLog(QueryPlanTreeTransformation.class.getName());
private static void setNewTag(IntraQueryCorrelation correlation,
List> childrenOfDemux,
ReduceSinkOperator rsop, Map bottomRSToNewTag)
throws SemanticException {
int newTag = bottomRSToNewTag.get(rsop);
int oldTag = rsop.getConf().getTag();
if (oldTag == -1) {
// if this child of DemuxOperator does not use tag, we just set the oldTag to 0.
oldTag = 0;
}
Operator child = CorrelationUtilities.getSingleChild(rsop, true);
if (!childrenOfDemux.contains(child)) {
childrenOfDemux.add(child);
}
int childIndex = childrenOfDemux.indexOf(child);
correlation.setNewTag(newTag, oldTag, childIndex);
rsop.getConf().setTag(newTag);
}
/**
* Based on the correlation, we transform the query plan tree (operator tree).
* In here, we first create DemuxOperator and all bottom ReduceSinkOperators
* (bottom means near TableScanOperaotr) in the correlation will be be
* the parents of the DemuxOperaotr. We also reassign tags to those
* ReduceSinkOperators. Then, we use MuxOperators to replace ReduceSinkOperators
* which are not bottom ones in this correlation.
* Example: The original operator tree is ...
* JOIN2
* / \
* RS4 RS5
* / \
* GBY1 JOIN1
* | / \
* RS1 RS2 RS3
* If GBY1, JOIN1, and JOIN2 can be executed in the same reducer
* (optimized by Correlation Optimizer).
* The new operator tree will be ...
* JOIN2
* |
* MUX
* / \
* GBY1 JOIN1
* \ /
* DEMUX
* / | \
* / | \
* / | \
* RS1 RS2 RS3
* @param pCtx
* @param corrCtx
* @param correlation
* @throws SemanticException
*/
protected static void applyCorrelation(
ParseContext pCtx,
CorrelationNodeProcCtx corrCtx,
IntraQueryCorrelation correlation)
throws SemanticException {
final List bottomReduceSinkOperators =
correlation.getBottomReduceSinkOperators();
final int numReducers = correlation.getNumReducers();
List> childrenOfDemux =
new ArrayList>();
List> parentRSsOfDemux =
new ArrayList>();
Map childIndexToOriginalNumParents =
new HashMap();
List keysSerializeInfos = new ArrayList();
List valuessSerializeInfos = new ArrayList();
Map bottomRSToNewTag =
new HashMap();
int newTag = 0;
for (ReduceSinkOperator rsop: bottomReduceSinkOperators) {
rsop.getConf().setNumReducers(numReducers);
bottomRSToNewTag.put(rsop, newTag);
parentRSsOfDemux.add(rsop);
keysSerializeInfos.add(rsop.getConf().getKeySerializeInfo());
valuessSerializeInfos.add(rsop.getConf().getValueSerializeInfo());
Operator child = CorrelationUtilities.getSingleChild(rsop, true);
if (!childrenOfDemux.contains(child)) {
childrenOfDemux.add(child);
int childIndex = childrenOfDemux.size() - 1;
childIndexToOriginalNumParents.put(childIndex, child.getNumParent());
}
newTag++;
}
for (ReduceSinkOperator rsop: bottomReduceSinkOperators) {
setNewTag(correlation, childrenOfDemux, rsop, bottomRSToNewTag);
}
// Create the DemuxOperaotr
DemuxDesc demuxDesc =
new DemuxDesc(
correlation.getNewTagToOldTag(),
correlation.getNewTagToChildIndex(),
childIndexToOriginalNumParents,
keysSerializeInfos,
valuessSerializeInfos);
Operator demuxOp = OperatorFactory.get(demuxDesc);
demuxOp.setChildOperators(childrenOfDemux);
demuxOp.setParentOperators(parentRSsOfDemux);
for (Operator child: childrenOfDemux) {
List> parentsWithMultipleDemux =
new ArrayList>();
boolean hasBottomReduceSinkOperators = false;
boolean hasNonBottomReduceSinkOperators = false;
for (int i = 0; i < child.getParentOperators().size(); i++) {
Operator p = child.getParentOperators().get(i);
assert p instanceof ReduceSinkOperator;
ReduceSinkOperator rsop = (ReduceSinkOperator)p;
if (bottomReduceSinkOperators.contains(rsop)) {
hasBottomReduceSinkOperators = true;
parentsWithMultipleDemux.add(demuxOp);
} else {
hasNonBottomReduceSinkOperators = true;
parentsWithMultipleDemux.add(rsop);
}
}
if (hasBottomReduceSinkOperators && hasNonBottomReduceSinkOperators) {
child.setParentOperators(parentsWithMultipleDemux);
} else {
child.setParentOperators(Utilities.makeList(demuxOp));
}
}
for (Operator parent: parentRSsOfDemux) {
parent.setChildOperators(Utilities.makeList(demuxOp));
}
// replace all ReduceSinkOperators which are not at the bottom of
// this correlation to MuxOperators
Set handledRSs = new HashSet();
for (ReduceSinkOperator rsop : correlation.getAllReduceSinkOperators()) {
if (!bottomReduceSinkOperators.contains(rsop)) {
if (handledRSs.contains(rsop)) {
continue;
}
Operator childOP =
CorrelationUtilities.getSingleChild(rsop, true);
if (childOP instanceof GroupByOperator) {
CorrelationUtilities.removeReduceSinkForGroupBy(
rsop, (GroupByOperator)childOP, pCtx, corrCtx);
List> parentsOfMux =
new ArrayList>();
Operator parentOp =
CorrelationUtilities.getSingleParent(childOP, true);
parentsOfMux.add(parentOp);
Operator mux = OperatorFactory.get(
new MuxDesc(parentsOfMux));
mux.setChildOperators(Utilities.makeList(childOP));
mux.setParentOperators(parentsOfMux);
childOP.setParentOperators(Utilities.makeList(mux));
parentOp.setChildOperators(Utilities.makeList(mux));
} else {
List> parentsOfMux =
new ArrayList>();
List> siblingOPs =
CorrelationUtilities.findSiblingOperators(rsop);
for (Operator op: siblingOPs) {
if (op instanceof DemuxOperator) {
parentsOfMux.add(op);
} else if (op instanceof ReduceSinkOperator){
GroupByOperator pGBYm =
CorrelationUtilities.getSingleParent(op, GroupByOperator.class);
if (pGBYm != null && pGBYm.getConf().getMode() == GroupByDesc.Mode.HASH) {
// We get a semi join at here.
// This map-side GroupByOperator needs to be removed
CorrelationUtilities.removeOperator(
pGBYm, op, CorrelationUtilities.getSingleParent(pGBYm, true), pCtx);
}
handledRSs.add((ReduceSinkOperator)op);
parentsOfMux.add(CorrelationUtilities.getSingleParent(op, true));
} else {
throw new SemanticException("An slibing of ReduceSinkOperator is nethier a " +
"DemuxOperator nor a ReduceSinkOperator");
}
}
MuxDesc muxDesc = new MuxDesc(siblingOPs);
Operator mux = OperatorFactory.get(muxDesc);
mux.setChildOperators(Utilities.makeList(childOP));
mux.setParentOperators(parentsOfMux);
for (Operator op: parentsOfMux) {
if (op instanceof DemuxOperator) {
// op is a DemuxOperator and it directly connects to childOP.
// We will add this MuxOperator between DemuxOperator
// and childOP.
if (op.getChildOperators().contains(childOP)) {
op.replaceChild(childOP, mux);
}
} else {
// op is not a DemuxOperator, so it should have
// a single child.
op.setChildOperators(Utilities.makeList(mux));
}
}
childOP.setParentOperators(Utilities.makeList(mux));
}
}
}
for (ReduceSinkOperator rsop: handledRSs) {
rsop.setChildOperators(null);
rsop.setParentOperators(null);
}
}
}