Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
/**
* merges SEL-SEL or FIL-FIL into single operator
*/
public class NonBlockingOpDeDupProc extends Transform {
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
// 1. We apply the transformation
String SEL = SelectOperator.getOperatorName();
String FIL = FilterOperator.getOperatorName();
Map opRules = new LinkedHashMap();
opRules.put(new RuleRegExp("R1", SEL + "%" + SEL + "%"), new SelectDedup(pctx));
opRules.put(new RuleRegExp("R2", FIL + "%" + FIL + "%"), new FilterDedup());
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
List topNodes = new ArrayList();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
private class SelectDedup implements NodeProcessor {
private ParseContext pctx;
public SelectDedup (ParseContext pctx) {
this.pctx = pctx;
}
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
SelectOperator cSEL = (SelectOperator) nd;
SelectOperator pSEL = (SelectOperator) stack.get(stack.size() - 2);
if (pSEL.getNumChild() > 1) {
return null; // possible if all children have same expressions, but not likely.
}
if (pSEL.getConf().isSelStarNoCompute()) {
// SEL(no-compute)-SEL. never seen this condition
// and also, removing parent is not safe in current graph walker
return null;
}
// For SEL-SEL(compute) case, move column exprs/names of child to parent.
if (!cSEL.getConf().isSelStarNoCompute()) {
Set funcOutputs = getFunctionOutputs(
pSEL.getConf().getOutputColumnNames(), pSEL.getConf().getColList());
List cSELColList = cSEL.getConf().getColList();
List cSELOutputColumnNames = cSEL.getConf().getOutputColumnNames();
if (!funcOutputs.isEmpty() && !checkReferences(cSELColList, funcOutputs)) {
return null;
}
if (cSEL.getColumnExprMap() == null) {
// If the child SelectOperator does not have the ColumnExprMap,
// we do not need to update the ColumnExprMap in the parent SelectOperator.
pSEL.getConf().setColList(ExprNodeDescUtils.backtrack(cSELColList, cSEL, pSEL, true));
pSEL.getConf().setOutputColumnNames(cSELOutputColumnNames);
} else {
// If the child SelectOperator has the ColumnExprMap,
// we need to update the ColumnExprMap in the parent SelectOperator.
List newPSELColList = new ArrayList();
List newPSELOutputColumnNames = new ArrayList();
Map colExprMap = new HashMap();
for (int i= 0; i < cSELOutputColumnNames.size(); i++) {
String outputColumnName = cSELOutputColumnNames.get(i);
ExprNodeDesc cSELExprNodeDesc = cSELColList.get(i);
ExprNodeDesc newPSELExprNodeDesc =
ExprNodeDescUtils.backtrack(cSELExprNodeDesc, cSEL, pSEL, true);
newPSELColList.add(newPSELExprNodeDesc);
newPSELOutputColumnNames.add(outputColumnName);
colExprMap.put(outputColumnName, newPSELExprNodeDesc);
}
pSEL.getConf().setColList(newPSELColList);
pSEL.getConf().setOutputColumnNames(newPSELOutputColumnNames);
pSEL.setColumnExprMap(colExprMap);
}
pSEL.setSchema(cSEL.getSchema());
}
pSEL.getConf().setSelectStar(cSEL.getConf().isSelectStar());
// We need to use the OpParseContext of the child SelectOperator to replace the
// the OpParseContext of the parent SelectOperator.
pSEL.removeChildAndAdoptItsChildren(cSEL);
cSEL.setParentOperators(null);
cSEL.setChildOperators(null);
fixContextReferences(cSEL, pSEL);
cSEL = null;
return null;
}
// collect name of output columns which is result of function
private Set getFunctionOutputs(List colNames, List targets) {
Set functionOutputs = new HashSet();
for (int i = 0; i < targets.size(); i++) {
if (targets.get(i) instanceof ExprNodeGenericFuncDesc) {
functionOutputs.add(colNames.get(i));
}
}
return functionOutputs;
}
// if any expression of child is referencing parent column which is result of function
// twice or more, skip dedup.
private boolean checkReferences(List sources, Set funcOutputs) {
Set ref = new HashSet();
for (ExprNodeDesc source : sources) {
if (!checkReferences(source, funcOutputs, ref)) {
return false;
}
}
return true;
}
private boolean checkReferences(ExprNodeDesc expr, Set funcOutputs, Set ref) {
if (expr instanceof ExprNodeColumnDesc) {
String col = ((ExprNodeColumnDesc) expr).getColumn();
if (funcOutputs.contains(col) && !ref.add(col)) {
return false;
}
}
if (expr.getChildren() != null) {
for (ExprNodeDesc child : expr.getChildren()) {
if (!checkReferences(child, funcOutputs, ref)) {
return false;
}
}
}
return true;
}
/**
* Change existing references in the context to point from child to parent operator.
* @param cSEL child operator (to be removed, and merged into parent)
* @param pSEL parent operator
*/
private void fixContextReferences(SelectOperator cSEL, SelectOperator pSEL) {
Collection