org.apache.hadoop.hive.ql.optimizer.ReduceSinkDeDuplication Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
Show all versions of hive-exec Show documentation
Hive is a data warehouse infrastructure built on top of Hadoop see
http://wiki.apache.org/hadoop/Hive
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.ForwardOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.ScriptOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVECONVERTJOIN;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASK;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATIONMINREDUCER;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESCRIPTOPERATORTRUST;
/**
* If two reducer sink operators share the same partition/sort columns and order,
* they can be merged. This should happen after map join optimization because map
* join optimization will remove reduce sink operators.
*/
public class ReduceSinkDeDuplication implements Transform{
private static final String RS = ReduceSinkOperator.getOperatorName();
private static final String GBY = GroupByOperator.getOperatorName();
private static final String JOIN = JoinOperator.getOperatorName();
protected ParseContext pGraphContext;
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
pGraphContext = pctx;
// generate pruned column list for all relevant operators
ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext);
boolean mergeJoins = !pctx.getConf().getBoolVar(HIVECONVERTJOIN) &&
!pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK);
Map opRules = new LinkedHashMap();
opRules.put(new RuleRegExp("R1", RS + "%.*%" + RS + "%"),
ReduceSinkDeduplicateProcFactory.getReducerReducerProc());
opRules.put(new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"),
ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc());
if (mergeJoins) {
opRules.put(new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"),
ReduceSinkDeduplicateProcFactory.getJoinReducerProc());
}
// TODO RS+JOIN
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(ReduceSinkDeduplicateProcFactory
.getDefaultProc(), opRules, cppCtx);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList topNodes = new ArrayList();
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
return pGraphContext;
}
class ReduceSinkDeduplicateProcCtx implements NodeProcessorCtx {
ParseContext pctx;
boolean trustScript;
// min reducer num for merged RS (to avoid query contains "order by" executed by one reducer)
int minReducer;
Set> removedOps;
public ReduceSinkDeduplicateProcCtx(ParseContext pctx) {
removedOps = new HashSet>();
trustScript = pctx.getConf().getBoolVar(HIVESCRIPTOPERATORTRUST);
minReducer = pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER);
this.pctx = pctx;
}
public boolean contains(Operator> rsOp) {
return removedOps.contains(rsOp);
}
public boolean addRemovedOperator(Operator> rsOp) {
return removedOps.add(rsOp);
}
public ParseContext getPctx() {
return pctx;
}
public void setPctx(ParseContext pctx) {
this.pctx = pctx;
}
}
static class ReduceSinkDeduplicateProcFactory {
public static NodeProcessor getReducerReducerProc() {
return new ReducerReducerProc();
}
public static NodeProcessor getGroupbyReducerProc() {
return new GroupbyReducerProc();
}
public static NodeProcessor getJoinReducerProc() {
return new JoinReducerProc();
}
public static NodeProcessor getDefaultProc() {
return new DefaultProc();
}
}
/*
* do nothing.
*/
static class DefaultProc implements NodeProcessor {
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
return null;
}
}
public abstract static class AbsctractReducerReducerProc implements NodeProcessor {
ReduceSinkDeduplicateProcCtx dedupCtx;
protected boolean trustScript() {
return dedupCtx.trustScript;
}
protected int minReducer() {
return dedupCtx.minReducer;
}
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
dedupCtx = (ReduceSinkDeduplicateProcCtx) procCtx;
if (dedupCtx.contains((Operator>) nd)) {
return false;
}
ReduceSinkOperator cRS = (ReduceSinkOperator) nd;
Operator> child = getSingleChild(cRS);
if (child instanceof JoinOperator) {
return false; // not supported
}
ParseContext pctx = dedupCtx.getPctx();
if (child instanceof GroupByOperator) {
GroupByOperator cGBY = (GroupByOperator) child;
if (!hasGroupingSet(cRS) && !cGBY.getConf().isGroupingSetsPresent()) {
return process(cRS, cGBY, pctx);
}
return false;
}
if (child instanceof ExtractOperator) {
return process(cRS, pctx);
}
return false;
}
private boolean hasGroupingSet(ReduceSinkOperator cRS) {
GroupByOperator cGBYm = getSingleParent(cRS, GroupByOperator.class);
if (cGBYm != null && cGBYm.getConf().isGroupingSetsPresent()) {
return true;
}
return false;
}
protected Operator> getSingleParent(Operator> operator) {
List> parents = operator.getParentOperators();
if (parents != null && parents.size() == 1) {
return parents.get(0);
}
return null;
}
protected Operator> getSingleChild(Operator> operator) {
List> children = operator.getChildOperators();
if (children != null && children.size() == 1) {
return children.get(0);
}
return null;
}
protected T getSingleParent(Operator> operator, Class type) {
Operator> parent = getSingleParent(operator);
return type.isInstance(parent) ? (T)parent : null;
}
protected abstract Object process(ReduceSinkOperator cRS, ParseContext context)
throws SemanticException;
protected abstract Object process(ReduceSinkOperator cRS, GroupByOperator cGBY,
ParseContext context) throws SemanticException;
protected Operator> getStartForGroupBy(ReduceSinkOperator cRS) {
Operator extends Serializable> parent = getSingleParent(cRS);
return parent instanceof GroupByOperator ? parent : cRS; // skip map-aggr GBY
}
// for JOIN-RS case, it's not possible generally to merge if child has
// more key/partition columns than parents
protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReducer)
throws SemanticException {
List> parents = pJoin.getParentOperators();
ReduceSinkOperator[] pRSs = parents.toArray(new ReduceSinkOperator[parents.size()]);
ReduceSinkDesc cRSc = cRS.getConf();
ReduceSinkDesc pRS0c = pRSs[0].getConf();
if (cRSc.getKeyCols().size() > pRS0c.getKeyCols().size()) {
return false;
}
if (cRSc.getPartitionCols().size() > pRS0c.getPartitionCols().size()) {
return false;
}
Integer moveReducerNumTo = checkNumReducer(cRSc.getNumReducers(), pRS0c.getNumReducers());
if (moveReducerNumTo == null ||
moveReducerNumTo > 0 && cRSc.getNumReducers() < minReducer) {
return false;
}
Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRS0c.getOrder());
if (moveRSOrderTo == null) {
return false;
}
boolean[] sorted = getSortedTags(pJoin);
int cKeySize = cRSc.getKeyCols().size();
for (int i = 0; i < cKeySize; i++) {
ExprNodeDesc cexpr = cRSc.getKeyCols().get(i);
ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
for (int tag = 0; tag < pRSs.length; tag++) {
pexprs[tag] = pRSs[tag].getConf().getKeyCols().get(i);
}
int found = indexOf(cexpr, pexprs, cRS, pRSs, sorted);
if (found < 0) {
return false;
}
}
int cPartSize = cRSc.getPartitionCols().size();
for (int i = 0; i < cPartSize; i++) {
ExprNodeDesc cexpr = cRSc.getPartitionCols().get(i);
ExprNodeDesc[] pexprs = new ExprNodeDesc[pRSs.length];
for (int tag = 0; tag < pRSs.length; tag++) {
pexprs[tag] = pRSs[tag].getConf().getPartitionCols().get(i);
}
int found = indexOf(cexpr, pexprs, cRS, pRSs, sorted);
if (found < 0) {
return false;
}
}
if (moveReducerNumTo > 0) {
for (ReduceSinkOperator pRS : pRSs) {
pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
}
}
return true;
}
private boolean[] getSortedTags(JoinOperator joinOp) {
boolean[] result = new boolean[joinOp.getParentOperators().size()];
for (int tag = 0; tag < result.length; tag++) {
result[tag] = isSortedTag(joinOp, tag);
}
return result;
}
private boolean isSortedTag(JoinOperator joinOp, int tag) {
for (JoinCondDesc cond : joinOp.getConf().getConds()) {
switch (cond.getType()) {
case JoinDesc.LEFT_OUTER_JOIN:
if (cond.getRight() == tag) {
return false;
}
continue;
case JoinDesc.RIGHT_OUTER_JOIN:
if (cond.getLeft() == tag) {
return false;
}
continue;
case JoinDesc.FULL_OUTER_JOIN:
if (cond.getLeft() == tag || cond.getRight() == tag) {
return false;
}
}
}
return true;
}
private int indexOf(ExprNodeDesc cexpr, ExprNodeDesc[] pexprs, Operator child,
Operator[] parents, boolean[] sorted) throws SemanticException {
for (int tag = 0; tag < parents.length; tag++) {
if (sorted[tag] &&
pexprs[tag].isSame(ExprNodeDescUtils.backtrack(cexpr, child, parents[tag]))) {
return tag;
}
}
return -1;
}
protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
throws SemanticException {
int[] result = checkStatus(cRS, pRS, minReducer);
if (result == null) {
return false;
}
if (result[0] > 0) {
// The sorting columns of the child RS are more specific than
// those of the parent RS. Assign sorting columns of the child RS
// to the parent RS.
List childKCs = cRS.getConf().getKeyCols();
pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS));
}
if (result[1] < 0) {
// The partitioning columns of the parent RS are more specific than
// those of the child RS.
List childPCs = cRS.getConf().getPartitionCols();
if (childPCs != null && !childPCs.isEmpty()) {
// If partitioning columns of the child RS are assigned,
// assign these to the partitioning columns of the parent RS.
pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
}
} else if (result[1] > 0) {
// The partitioning columns of the child RS are more specific than
// those of the parent RS.
List parentPCs = pRS.getConf().getPartitionCols();
if (parentPCs == null || parentPCs.isEmpty()) {
// If partitioning columns of the parent RS are not assigned,
// assign partitioning columns of the child RS to the parent RS.
ArrayList childPCs = cRS.getConf().getPartitionCols();
pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
}
}
if (result[2] > 0) {
// The sorting order of the child RS is more specific than
// that of the parent RS. Assign the sorting order of the child RS
// to the parent RS.
if (result[0] <= 0) {
// Sorting columns of the parent RS are more specific than those of the
// child RS but Sorting order of the child RS is more specific than
// that of the parent RS.
throw new SemanticException("Sorting columns and order don't match. " +
"Try set " + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION + "=false;");
}
pRS.getConf().setOrder(cRS.getConf().getOrder());
}
if (result[3] > 0) {
// The number of reducers of the child RS is more specific than
// that of the parent RS. Assign the number of reducers of the child RS
// to the parent RS.
pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
}
return true;
}
// -1 for p to c, 1 for c to p
private int[] checkStatus(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
throws SemanticException {
ReduceSinkDesc cConf = cRS.getConf();
ReduceSinkDesc pConf = pRS.getConf();
Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder());
if (moveRSOrderTo == null) {
return null;
}
Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers());
if (moveReducerNumTo == null ||
moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) {
return null;
}
List ckeys = cConf.getKeyCols();
List pkeys = pConf.getKeyCols();
Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS);
if (moveKeyColTo == null) {
return null;
}
List cpars = cConf.getPartitionCols();
List ppars = pConf.getPartitionCols();
Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS);
if (movePartitionColTo == null) {
return null;
}
return new int[] {moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo};
}
private Integer checkExprs(List ckeys, List pkeys,
ReduceSinkOperator cRS, ReduceSinkOperator pRS) throws SemanticException {
Integer moveKeyColTo = 0;
if (ckeys == null || ckeys.isEmpty()) {
if (pkeys != null && !pkeys.isEmpty()) {
moveKeyColTo = -1;
}
} else {
if (pkeys == null || pkeys.isEmpty()) {
for (ExprNodeDesc ckey : ckeys) {
if (ExprNodeDescUtils.backtrack(ckey, cRS, pRS) == null) {
return null;
}
}
moveKeyColTo = 1;
} else {
moveKeyColTo = sameKeys(ckeys, pkeys, cRS, pRS);
}
}
return moveKeyColTo;
}
protected Integer sameKeys(List cexprs, List pexprs,
Operator> child, Operator> parent) throws SemanticException {
int common = Math.min(cexprs.size(), pexprs.size());
int limit = Math.max(cexprs.size(), pexprs.size());
int i = 0;
for (; i < common; i++) {
ExprNodeDesc pexpr = pexprs.get(i);
ExprNodeDesc cexpr = ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent);
if (!pexpr.isSame(cexpr)) {
return null;
}
}
for (;i < limit; i++) {
if (cexprs.size() > pexprs.size()) {
if (ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent) == null) {
return null;
}
}
}
return Integer.valueOf(cexprs.size()).compareTo(pexprs.size());
}
protected Integer checkOrder(String corder, String porder) {
if (corder == null || corder.trim().equals("")) {
if (porder == null || porder.trim().equals("")) {
return 0;
}
return -1;
}
if (porder == null || porder.trim().equals("")) {
return 1;
}
corder = corder.trim();
porder = porder.trim();
int target = Math.min(corder.length(), porder.length());
if (!corder.substring(0, target).equals(porder.substring(0, target))) {
return null;
}
return Integer.valueOf(corder.length()).compareTo(porder.length());
}
protected Integer checkNumReducer(int creduce, int preduce) {
if (creduce < 0) {
if (preduce < 0) {
return 0;
}
return -1;
}
if (preduce < 0) {
return 1;
}
if (creduce != preduce) {
return null;
}
return 0;
}
protected > T findPossibleParent(Operator> start, Class target,
boolean trustScript) throws SemanticException {
T[] parents = findPossibleParents(start, target, trustScript);
return parents != null && parents.length == 1 ? parents[0] : null;
}
@SuppressWarnings("unchecked")
protected > T[] findPossibleParents(Operator> start, Class target,
boolean trustScript) {
Operator> cursor = getSingleParent(start);
for (; cursor != null; cursor = getSingleParent(cursor)) {
if (target.isAssignableFrom(cursor.getClass())) {
T[] array = (T[]) Array.newInstance(target, 1);
array[0] = (T) cursor;
return array;
}
if (cursor instanceof JoinOperator) {
return findParents((JoinOperator) cursor, target);
}
if (cursor instanceof ScriptOperator && !trustScript) {
return null;
}
if (!(cursor instanceof SelectOperator
|| cursor instanceof FilterOperator
|| cursor instanceof ExtractOperator
|| cursor instanceof ForwardOperator
|| cursor instanceof ScriptOperator
|| cursor instanceof ReduceSinkOperator)) {
return null;
}
}
return null;
}
@SuppressWarnings("unchecked")
private > T[] findParents(JoinOperator join, Class target) {
List> parents = join.getParentOperators();
T[] result = (T[]) Array.newInstance(target, parents.size());
for (int tag = 0; tag < result.length; tag++) {
Operator> cursor = parents.get(tag);
for (; cursor != null; cursor = getSingleParent(cursor)) {
if (target.isAssignableFrom(cursor.getClass())) {
result[tag] = (T) cursor;
break;
}
}
if (result[tag] == null) {
throw new IllegalStateException("failed to find " + target.getSimpleName()
+ " from " + join + " on tag " + tag);
}
}
return result;
}
protected SelectOperator replaceReduceSinkWithSelectOperator(ReduceSinkOperator childRS,
ParseContext context) throws SemanticException {
SelectOperator select = replaceOperatorWithSelect(childRS, context);
select.getConf().setOutputColumnNames(childRS.getConf().getOutputValueColumnNames());
select.getConf().setColList(childRS.getConf().getValueCols());
return select;
}
private SelectOperator replaceOperatorWithSelect(Operator> operator, ParseContext context)
throws SemanticException {
RowResolver inputRR = context.getOpParseCtx().get(operator).getRowResolver();
SelectDesc select = new SelectDesc(null, null);
Operator> parent = getSingleParent(operator);
Operator> child = getSingleChild(operator);
parent.getChildOperators().clear();
SelectOperator sel = (SelectOperator) putOpInsertMap(
OperatorFactory.getAndMakeChild(select, new RowSchema(inputRR
.getColumnInfos()), parent), inputRR, context);
sel.setColumnExprMap(operator.getColumnExprMap());
sel.setChildOperators(operator.getChildOperators());
for (Operator extends Serializable> ch : operator.getChildOperators()) {
ch.replaceParent(operator, sel);
}
if (child instanceof ExtractOperator) {
removeOperator(child, getSingleChild(child), sel, context);
dedupCtx.addRemovedOperator(child);
}
operator.setChildOperators(null);
operator.setParentOperators(null);
dedupCtx.addRemovedOperator(operator);
return sel;
}
protected void removeReduceSinkForGroupBy(ReduceSinkOperator cRS, GroupByOperator cGBYr,
ParseContext context) throws SemanticException {
Operator> parent = getSingleParent(cRS);
if (parent instanceof GroupByOperator) {
GroupByOperator cGBYm = (GroupByOperator) parent;
cGBYr.getConf().setKeys(cGBYm.getConf().getKeys());
cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators());
for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) {
aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE);
}
cGBYr.setColumnExprMap(cGBYm.getColumnExprMap());
cGBYr.setSchema(cGBYm.getSchema());
RowResolver resolver = context.getOpParseCtx().get(cGBYm).getRowResolver();
context.getOpParseCtx().get(cGBYr).setRowResolver(resolver);
} else {
cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(cGBYr.getConf().getKeys(), cGBYr, cRS));
for (AggregationDesc aggr : cGBYr.getConf().getAggregators()) {
aggr.setParameters(ExprNodeDescUtils.backtrack(aggr.getParameters(), cGBYr, cRS));
}
Map oldMap = cGBYr.getColumnExprMap();
RowResolver oldRR = context.getOpParseCtx().get(cGBYr).getRowResolver();
Map newMap = new HashMap();
RowResolver newRR = new RowResolver();
List outputCols = cGBYr.getConf().getOutputColumnNames();
for (int i = 0; i < outputCols.size(); i++) {
String colName = outputCols.get(i);
String[] nm = oldRR.reverseLookup(colName);
ColumnInfo colInfo = oldRR.get(nm[0], nm[1]);
newRR.put(nm[0], nm[1], colInfo);
ExprNodeDesc colExpr = ExprNodeDescUtils.backtrack(oldMap.get(colName), cGBYr, cRS);
if (colExpr != null) {
newMap.put(colInfo.getInternalName(), colExpr);
}
}
cGBYr.setColumnExprMap(newMap);
cGBYr.setSchema(new RowSchema(newRR.getColumnInfos()));
context.getOpParseCtx().get(cGBYr).setRowResolver(newRR);
}
cGBYr.getConf().setMode(GroupByDesc.Mode.COMPLETE);
removeOperator(cRS, cGBYr, parent, context);
dedupCtx.addRemovedOperator(cRS);
if (parent instanceof GroupByOperator) {
removeOperator(parent, cGBYr, getSingleParent(parent), context);
dedupCtx.addRemovedOperator(cGBYr);
}
}
private void removeOperator(Operator> target, Operator> child, Operator> parent,
ParseContext context) {
for (Operator> aparent : target.getParentOperators()) {
aparent.replaceChild(target, child);
}
for (Operator> achild : target.getChildOperators()) {
achild.replaceParent(target, parent);
}
target.setChildOperators(null);
target.setParentOperators(null);
context.getOpParseCtx().remove(target);
}
private Operator extends Serializable> putOpInsertMap(Operator> op, RowResolver rr,
ParseContext context) {
OpParseContext ctx = new OpParseContext(rr);
context.getOpParseCtx().put(op, ctx);
return op;
}
}
static class GroupbyReducerProc extends AbsctractReducerReducerProc {
// pRS-pGBY-cRS
public Object process(ReduceSinkOperator cRS, ParseContext context)
throws SemanticException {
GroupByOperator pGBY = findPossibleParent(cRS, GroupByOperator.class, trustScript());
if (pGBY == null) {
return false;
}
ReduceSinkOperator pRS = findPossibleParent(pGBY, ReduceSinkOperator.class, trustScript());
if (pRS != null && merge(cRS, pRS, minReducer())) {
replaceReduceSinkWithSelectOperator(cRS, context);
return true;
}
return false;
}
// pRS-pGBY-cRS-cGBY
public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ParseContext context)
throws SemanticException {
Operator> start = getStartForGroupBy(cRS);
GroupByOperator pGBY = findPossibleParent(start, GroupByOperator.class, trustScript());
if (pGBY == null) {
return false;
}
ReduceSinkOperator pRS = getSingleParent(pGBY, ReduceSinkOperator.class);
if (pRS != null && merge(cRS, pRS, minReducer())) {
removeReduceSinkForGroupBy(cRS, cGBY, context);
return true;
}
return false;
}
}
static class JoinReducerProc extends AbsctractReducerReducerProc {
// pRS-pJOIN-cRS
public Object process(ReduceSinkOperator cRS, ParseContext context)
throws SemanticException {
JoinOperator pJoin = findPossibleParent(cRS, JoinOperator.class, trustScript());
if (pJoin != null && merge(cRS, pJoin, minReducer())) {
pJoin.getConf().setFixedAsSorted(true);
replaceReduceSinkWithSelectOperator(cRS, context);
return true;
}
return false;
}
// pRS-pJOIN-cRS-cGBY
public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ParseContext context)
throws SemanticException {
Operator> start = getStartForGroupBy(cRS);
JoinOperator pJoin = findPossibleParent(start, JoinOperator.class, trustScript());
if (pJoin != null && merge(cRS, pJoin, minReducer())) {
pJoin.getConf().setFixedAsSorted(true);
removeReduceSinkForGroupBy(cRS, cGBY, context);
return true;
}
return false;
}
}
static class ReducerReducerProc extends AbsctractReducerReducerProc {
// pRS-cRS
public Object process(ReduceSinkOperator cRS, ParseContext context)
throws SemanticException {
ReduceSinkOperator pRS = findPossibleParent(cRS, ReduceSinkOperator.class, trustScript());
if (pRS != null && merge(cRS, pRS, minReducer())) {
replaceReduceSinkWithSelectOperator(cRS, context);
return true;
}
return false;
}
// pRS-cRS-cGBY
public Object process(ReduceSinkOperator cRS, GroupByOperator cGBY, ParseContext context)
throws SemanticException {
Operator> start = getStartForGroupBy(cRS);
ReduceSinkOperator pRS = findPossibleParent(start, ReduceSinkOperator.class, trustScript());
if (pRS != null && merge(cRS, pRS, minReducer())) {
removeReduceSinkForGroupBy(cRS, cGBY, context);
return true;
}
return false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy