org.apache.hadoop.hive.ql.optimizer.lineage.ExprProcFactory Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.lineage;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.LineageInfo;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo;
import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
/**
* Expression processor factory for lineage. Each processor is responsible to
* create the leaf level column info objects that the expression depends upon
* and also generates a string representation of the expression.
*/
public class ExprProcFactory {
/**
* Processor for column expressions.
*/
public static class ColumnExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd;
ExprProcCtx epc = (ExprProcCtx) procCtx;
// assert that the input operator is not null as there are no
// exprs associated with table scans.
Operator operator = epc.getInputOperator();
assert (operator != null);
RowSchema schema = epc.getSchema();
ColumnInfo ci = schema.getColumnInfo(cd.getColumn());
if (ci == null && operator instanceof ReduceSinkOperator) {
ci = schema.getColumnInfo(Utilities.removeValueTag(cd.getColumn()));
}
// Insert the dependencies of inp_ci to that of the current operator, ci
LineageCtx lc = epc.getLineageCtx();
Dependency dep = lc.getIndex().getDependency(operator, ci);
return dep;
}
}
/**
* Processor for any function or field expression.
*/
public static class GenericExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
assert (nd instanceof ExprNodeGenericFuncDesc || nd instanceof ExprNodeFieldDesc);
// Concatenate the dependencies of all the children to compute the new
// dependency.
Dependency dep = new Dependency();
LinkedHashSet bci_set = new LinkedHashSet();
LineageInfo.DependencyType new_type = LineageInfo.DependencyType.EXPRESSION;
for (Object child : nodeOutputs) {
if (child == null) {
continue;
}
Dependency child_dep = (Dependency) child;
new_type = LineageCtx.getNewDependencyType(child_dep.getType(), new_type);
bci_set.addAll(child_dep.getBaseCols());
}
dep.setBaseCols(new ArrayList(bci_set));
dep.setType(new_type);
return dep;
}
}
/**
* Processor for constants and null expressions. For such expressions the
* processor simply returns a null dependency vector.
*/
public static class DefaultExprProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
assert (nd instanceof ExprNodeConstantDesc);
// Create a dependency that has no basecols
Dependency dep = new Dependency();
dep.setType(LineageInfo.DependencyType.SIMPLE);
dep.setBaseCols(new ArrayList());
return dep;
}
}
public static NodeProcessor getDefaultExprProcessor() {
return new DefaultExprProcessor();
}
public static NodeProcessor getGenericFuncProcessor() {
return new GenericExprProcessor();
}
public static NodeProcessor getFieldProcessor() {
return new GenericExprProcessor();
}
public static NodeProcessor getColumnProcessor() {
return new ColumnExprProcessor();
}
/**
* Gets the expression dependencies for the expression.
*
* @param lctx
* The lineage context containing the input operators dependencies.
* @param inpOp
* The input operator to the current operator.
* @param expr
* The expression that is being processed.
* @throws SemanticException
*/
public static Dependency getExprDependency(LineageCtx lctx,
Operator inpOp, ExprNodeDesc expr)
throws SemanticException {
// Create the walker, the rules dispatcher and the context.
ExprProcCtx exprCtx = new ExprProcCtx(lctx, inpOp);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map exprRules = new LinkedHashMap();
exprRules.put(
new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"),
getColumnProcessor());
exprRules.put(
new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"),
getFieldProcessor());
exprRules.put(new RuleRegExp("R3", ExprNodeGenericFuncDesc.class.getName()
+ "%"), getGenericFuncProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
exprRules, exprCtx);
GraphWalker egw = new DefaultGraphWalker(disp);
List startNodes = new ArrayList();
startNodes.add(expr);
HashMap outputMap = new HashMap();
egw.startWalking(startNodes, outputMap);
return (Dependency)outputMap.get(expr);
}
}