Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.util.ReflectionUtils;
public class ExprNodeDescUtils {
public static int indexOf(ExprNodeDesc origin, List sources) {
for (int i = 0; i < sources.size(); i++) {
if (origin.isSame(sources.get(i))) {
return i;
}
}
return -1;
}
// traversing origin, find ExprNodeDesc in sources and replaces it with ExprNodeDesc
// in targets having same index.
// return null if failed to find
public static ExprNodeDesc replace(ExprNodeDesc origin,
List sources, List targets) {
int index = indexOf(origin, sources);
if (index >= 0) {
return targets.get(index);
}
// encountered column or field which cannot be found in sources
if (origin instanceof ExprNodeColumnDesc || origin instanceof ExprNodeFieldDesc) {
return null;
}
// for ExprNodeGenericFuncDesc, it should be deterministic and stateless
if (origin instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) origin;
if (!FunctionRegistry.isDeterministic(func.getGenericUDF())
|| FunctionRegistry.isStateful(func.getGenericUDF())) {
return null;
}
List children = new ArrayList();
for (int i = 0; i < origin.getChildren().size(); i++) {
ExprNodeDesc child = replace(origin.getChildren().get(i), sources, targets);
if (child == null) {
return null;
}
children.add(child);
}
// duplicate function with possibly replaced children
ExprNodeGenericFuncDesc clone = (ExprNodeGenericFuncDesc) func.clone();
clone.setChildren(children);
return clone;
}
// constant or null, just return it
return origin;
}
/**
* return true if predicate is already included in source
*/
public static boolean containsPredicate(ExprNodeDesc source, ExprNodeDesc predicate) {
if (source.isSame(predicate)) {
return true;
}
if (FunctionRegistry.isOpAnd(source)) {
if (containsPredicate(source.getChildren().get(0), predicate) ||
containsPredicate(source.getChildren().get(1), predicate)) {
return true;
}
}
return false;
}
/**
* bind two predicates by AND op
*/
public static ExprNodeGenericFuncDesc mergePredicates(ExprNodeDesc prev, ExprNodeDesc next) {
final List children = new ArrayList(2);
if (FunctionRegistry.isOpAnd(prev)) {
children.addAll(prev.getChildren());
} else {
children.add(prev);
}
if (FunctionRegistry.isOpAnd(next)) {
children.addAll(next.getChildren());
} else {
children.add(next);
}
return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
FunctionRegistry.getGenericUDFForAnd(), children);
}
/**
* bind n predicates by AND op
*/
public static ExprNodeDesc mergePredicates(List exprs) {
ExprNodeDesc prev = null;
for (ExprNodeDesc expr : exprs) {
if (prev == null) {
prev = expr;
continue;
}
prev = mergePredicates(prev, expr);
}
return prev;
}
/**
* split predicates by AND op
*/
public static List split(ExprNodeDesc current) {
return split(current, new ArrayList());
}
/**
* split predicates by AND op
*/
public static List split(ExprNodeDesc current, List splitted) {
if (FunctionRegistry.isOpAnd(current)) {
for (ExprNodeDesc child : current.getChildren()) {
split(child, splitted);
}
return splitted;
}
if (indexOf(current, splitted) < 0) {
splitted.add(current);
}
return splitted;
}
/**
* Recommend name for the expression
*/
public static String recommendInputName(ExprNodeDesc desc) {
if (desc instanceof ExprNodeColumnDesc) {
return ((ExprNodeColumnDesc)desc).getColumn();
}
List children = desc.getChildren();
if (FunctionRegistry.isOpPreserveInputName(desc) && !children.isEmpty() &&
children.get(0) instanceof ExprNodeColumnDesc) {
return ((ExprNodeColumnDesc)children.get(0)).getColumn();
}
return null;
}
/**
* Return false if the expression has any non deterministic function
*/
public static boolean isDeterministic(ExprNodeDesc desc) {
if (desc instanceof ExprNodeGenericFuncDesc) {
if (!FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc)desc).getGenericUDF())) {
return false;
}
}
if (desc.getChildren() != null) {
for (ExprNodeDesc child : desc.getChildren()) {
if (!isDeterministic(child)) {
return false;
}
}
}
return true;
}
public static ArrayList clone(List sources) {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : sources) {
result.add(expr.clone());
}
return result;
}
/**
* Convert expressions in current operator to those in terminal operator, which
* is an ancestor of current or null (back to top operator).
* Possibly contain null values for non-traceable exprs
*/
public static ArrayList backtrack(List sources,
Operator> current, Operator> terminal) throws SemanticException {
return backtrack(sources, current, terminal, false);
}
public static ArrayList backtrack(List sources,
Operator> current, Operator> terminal, boolean foldExpr) throws SemanticException {
ArrayList result = new ArrayList();
for (ExprNodeDesc expr : sources) {
result.add(backtrack(expr, current, terminal, foldExpr));
}
return result;
}
public static ExprNodeDesc backtrack(ExprNodeDesc source, Operator> current,
Operator> terminal) throws SemanticException {
return backtrack(source, current, terminal, false);
}
public static ExprNodeDesc backtrack(ExprNodeDesc source, Operator> current,
Operator> terminal, boolean foldExpr) throws SemanticException {
Operator> parent = getSingleParent(current, terminal);
if (parent == null) {
return source;
}
if (source instanceof ExprNodeGenericFuncDesc) {
// all children expression should be resolved
ExprNodeGenericFuncDesc function = (ExprNodeGenericFuncDesc) source.clone();
List children = backtrack(function.getChildren(), current, terminal, foldExpr);
for (ExprNodeDesc child : children) {
if (child == null) {
// Could not resolve all of the function children, fail
return null;
}
}
function.setChildren(children);
if (foldExpr) {
// fold after replacing, if possible
ExprNodeDesc foldedFunction = ConstantPropagateProcFactory.foldExpr(function);
if (foldedFunction != null) {
return foldedFunction;
}
}
return function;
}
if (source instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc column = (ExprNodeColumnDesc) source;
return backtrack(column, parent, terminal);
}
if (source instanceof ExprNodeFieldDesc) {
// field expression should be resolved
ExprNodeFieldDesc field = (ExprNodeFieldDesc) source.clone();
ExprNodeDesc fieldDesc = backtrack(field.getDesc(), current, terminal, foldExpr);
if (fieldDesc == null) {
return null;
}
field.setDesc(fieldDesc);
return field;
}
// constant or null expr, just return
return source;
}
// Resolve column expression to input expression by using expression mapping in current operator
private static ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator> current,
Operator> terminal) throws SemanticException {
Map mapping = current.getColumnExprMap();
if (mapping == null) {
return backtrack((ExprNodeDesc)column, current, terminal);
}
ExprNodeDesc mapped = mapping.get(column.getColumn());
return mapped == null ? null : backtrack(mapped, current, terminal);
}
public static Operator> getSingleParent(Operator> current, Operator> terminal)
throws SemanticException {
if (current == terminal) {
return null;
}
List> parents = current.getParentOperators();
if (parents == null || parents.isEmpty()) {
if (terminal != null) {
throw new SemanticException("Failed to meet terminal operator");
}
return null;
}
if (parents.size() == 1) {
return parents.get(0);
}
if (terminal != null && parents.contains(terminal)) {
return terminal;
}
throw new SemanticException("Met multiple parent operators");
}
public static List resolveJoinKeysAsRSColumns(List sourceList,
Operator> reduceSinkOp) {
ArrayList result = new ArrayList(sourceList.size());
for (ExprNodeDesc source : sourceList) {
ExprNodeDesc newExpr = resolveJoinKeysAsRSColumns(source, reduceSinkOp);
if (newExpr == null) {
return null;
}
result.add(newExpr);
}
return result;
}
/**
* Join keys are expressions based on the select operator. Resolve the expressions so they
* are based on the ReduceSink operator
* SEL -> RS -> JOIN
* @param source
* @param reduceSinkOp
* @return
*/
public static ExprNodeDesc resolveJoinKeysAsRSColumns(ExprNodeDesc source, Operator> reduceSinkOp) {
// Assuming this is only being done for join keys. As a result we shouldn't have to recursively
// check any nested child expressions, because the result of the expression should exist as an
// output column of the ReduceSink operator
if (source == null) {
return null;
}
// columnExprMap has the reverse of what we need - a mapping of the internal column names
// to the ExprNodeDesc from the previous operation.
// Find the key/value where the ExprNodeDesc value matches the column we are searching for.
// The key portion of the entry will be the internal column name for the join key expression.
for (Map.Entry mapEntry : reduceSinkOp.getColumnExprMap().entrySet()) {
if (mapEntry.getValue().isSame(source)) {
String columnInternalName = mapEntry.getKey();
if (source instanceof ExprNodeColumnDesc) {
// The join key is a table column. Create the ExprNodeDesc based on this column.
ColumnInfo columnInfo = reduceSinkOp.getSchema().getColumnInfo(columnInternalName);
return new ExprNodeColumnDesc(columnInfo);
} else {
// Join key expression is likely some expression involving functions/operators, so there
// is no actual table column for this. But the ReduceSink operator should still have an
// output column corresponding to this expression, using the columnInternalName.
// TODO: does tableAlias matter for this kind of expression?
return new ExprNodeColumnDesc(source.getTypeInfo(), columnInternalName, "", false);
}
}
}
return null; // Couldn't find reference to expression
}
public static ExprNodeDesc[] extractComparePair(ExprNodeDesc expr1, ExprNodeDesc expr2) {
expr1 = extractConstant(expr1);
expr2 = extractConstant(expr2);
if (expr1 instanceof ExprNodeColumnDesc && expr2 instanceof ExprNodeConstantDesc) {
return new ExprNodeDesc[] {expr1, expr2};
}
if (expr1 instanceof ExprNodeConstantDesc && expr2 instanceof ExprNodeColumnDesc) {
return new ExprNodeDesc[] {expr1, expr2};
}
// handles cases where the query has a predicate "column-name=constant"
if (expr1 instanceof ExprNodeFieldDesc && expr2 instanceof ExprNodeConstantDesc) {
ExprNodeColumnDesc columnDesc = extractColumn(expr1);
return columnDesc != null ? new ExprNodeDesc[] {columnDesc, expr2, expr1} : null;
}
// handles cases where the query has a predicate "constant=column-name"
if (expr1 instanceof ExprNodeConstantDesc && expr2 instanceof ExprNodeFieldDesc) {
ExprNodeColumnDesc columnDesc = extractColumn(expr2);
return columnDesc != null ? new ExprNodeDesc[] {expr1, columnDesc, expr2} : null;
}
// todo: constant op constant
return null;
}
/**
* Extract fields from the given {@link ExprNodeFieldDesc node descriptor}
* */
public static String[] extractFields(ExprNodeFieldDesc expr) {
return extractFields(expr, new ArrayList()).toArray(new String[0]);
}
/*
* Recursively extract fields from ExprNodeDesc. Deeply nested structs can have multiple levels of
* fields in them
*/
private static List extractFields(ExprNodeDesc expr, List fields) {
if (expr instanceof ExprNodeFieldDesc) {
ExprNodeFieldDesc field = (ExprNodeFieldDesc)expr;
fields.add(field.getFieldName());
return extractFields(field.getDesc(), fields);
}
if (expr instanceof ExprNodeColumnDesc) {
return fields;
}
throw new IllegalStateException(
"Unexpected exception while extracting fields from ExprNodeDesc");
}
/*
* Extract column from the given ExprNodeDesc
*/
private static ExprNodeColumnDesc extractColumn(ExprNodeDesc expr) {
if (expr instanceof ExprNodeColumnDesc) {
return (ExprNodeColumnDesc)expr;
}
if (expr instanceof ExprNodeFieldDesc) {
return extractColumn(((ExprNodeFieldDesc)expr).getDesc());
}
return null;
}
// from IndexPredicateAnalyzer
private static ExprNodeDesc extractConstant(ExprNodeDesc expr) {
if (!(expr instanceof ExprNodeGenericFuncDesc)) {
return expr;
}
ExprNodeConstantDesc folded = foldConstant(((ExprNodeGenericFuncDesc) expr));
return folded == null ? expr : folded;
}
private static ExprNodeConstantDesc foldConstant(ExprNodeGenericFuncDesc func) {
GenericUDF udf = func.getGenericUDF();
if (!FunctionRegistry.isDeterministic(udf) || FunctionRegistry.isStateful(udf)) {
return null;
}
try {
// If the UDF depends on any external resources, we can't fold because the
// resources may not be available at compile time.
if (udf instanceof GenericUDFBridge) {
UDF internal = ReflectionUtils.newInstance(((GenericUDFBridge) udf).getUdfClass(), null);
if (internal.getRequiredFiles() != null || internal.getRequiredJars() != null) {
return null;
}
} else {
if (udf.getRequiredFiles() != null || udf.getRequiredJars() != null) {
return null;
}
}
if (func.getChildren() != null) {
for (ExprNodeDesc child : func.getChildren()) {
if (child instanceof ExprNodeConstantDesc) {
continue;
}
if (child instanceof ExprNodeGenericFuncDesc) {
if (foldConstant((ExprNodeGenericFuncDesc) child) != null) {
continue;
}
}
return null;
}
}
ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(func);
ObjectInspector output = evaluator.initialize(null);
Object constant = evaluator.evaluate(null);
Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);
return new ExprNodeConstantDesc(java);
} catch (Exception e) {
return null;
}
}
public static void getExprNodeColumnDesc(List exprDescList,
Map hashCodeTocolumnDescMap) {
for (ExprNodeDesc exprNodeDesc : exprDescList) {
getExprNodeColumnDesc(exprNodeDesc, hashCodeTocolumnDescMap);
}
}
/**
* Get Map of ExprNodeColumnDesc HashCode to ExprNodeColumnDesc.
*
* @param exprDesc
* @param hashCodeToColumnDescMap
* Assumption: If two ExprNodeColumnDesc have same hash code then
* they are logically referring to same projection
*/
public static void getExprNodeColumnDesc(ExprNodeDesc exprDesc,
Map hashCodeToColumnDescMap) {
if (exprDesc instanceof ExprNodeColumnDesc) {
hashCodeToColumnDescMap.put(exprDesc.hashCode(), exprDesc);
} else if (exprDesc instanceof ExprNodeColumnListDesc) {
for (ExprNodeDesc child : exprDesc.getChildren()) {
getExprNodeColumnDesc(child, hashCodeToColumnDescMap);
}
} else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
for (ExprNodeDesc child : exprDesc.getChildren()) {
getExprNodeColumnDesc(child, hashCodeToColumnDescMap);
}
} else if (exprDesc instanceof ExprNodeFieldDesc) {
getExprNodeColumnDesc(((ExprNodeFieldDesc) exprDesc).getDesc(),
hashCodeToColumnDescMap);
}
}
public static boolean isConstant(ExprNodeDesc value) {
if (value instanceof ExprNodeConstantDesc) {
return true;
}
if (value instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) value;
if (!FunctionRegistry.isDeterministic(func.getGenericUDF())) {
return false;
}
for (ExprNodeDesc child : func.getChildren()) {
if (!isConstant(child)) {
return false;
}
}
return true;
}
return false;
}
public static boolean isAllConstants(List value) {
for (ExprNodeDesc expr : value) {
if (!(expr instanceof ExprNodeConstantDesc)) {
return false;
}
}
return true;
}
public static boolean isNullConstant(ExprNodeDesc value) {
if ((value instanceof ExprNodeConstantDesc)
&& ((ExprNodeConstantDesc) value).getValue() == null) {
return true;
}
return false;
}
public static PrimitiveTypeInfo deriveMinArgumentCast(
ExprNodeDesc childExpr, TypeInfo targetType) {
assert targetType instanceof PrimitiveTypeInfo : "Not a primitive type" + targetType;
PrimitiveTypeInfo pti = (PrimitiveTypeInfo)targetType;
// We only do the minimum cast for decimals. Other types are assumed safe; fix if needed.
// We also don't do anything for non-primitive children (maybe we should assert).
if ((pti.getPrimitiveCategory() != PrimitiveCategory.DECIMAL)
|| (!(childExpr.getTypeInfo() instanceof PrimitiveTypeInfo))) return pti;
PrimitiveTypeInfo childTi = (PrimitiveTypeInfo)childExpr.getTypeInfo();
// If the child is also decimal, no cast is needed (we hope - can target type be narrower?).
return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory(childTi);
}
/**
* Build ExprNodeColumnDesc for the projections in the input operator from
* sartpos to endpos(both included). Operator must have an associated
* colExprMap.
*
* @param inputOp
* Input Hive Operator
* @param startPos
* starting position in the input operator schema; must be >=0 and <=
* endPos
* @param endPos
* end position in the input operator schema; must be >=0.
* @return List of ExprNodeDesc
*/
public static ArrayList genExprNodeDesc(Operator inputOp, int startPos, int endPos,
boolean addEmptyTabAlias, boolean setColToNonVirtual) {
ArrayList exprColLst = new ArrayList();
List colInfoLst = inputOp.getSchema().getSignature();
String tabAlias;
boolean vc;
ColumnInfo ci;
for (int i = startPos; i <= endPos; i++) {
ci = colInfoLst.get(i);
tabAlias = ci.getTabAlias();
if (addEmptyTabAlias) {
tabAlias = "";
}
vc = ci.getIsVirtualCol();
if (setColToNonVirtual) {
vc = false;
}
exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc));
}
return exprColLst;
}
public static List flattenExprList(List sourceList) {
ArrayList result = new ArrayList(sourceList.size());
for (ExprNodeDesc source : sourceList) {
result.add(flattenExpr(source));
}
return result;
}
/**
* A normal reduce operator's rowObjectInspector looks like a struct containing
* nested key/value structs that contain the column values:
* { key: { reducesinkkey0:int }, value: { _col0:int, _col1:int, .. } }
*
* While the rowObjectInspector looks the same for vectorized queries during
* compilation time, within the tasks at query execution the rowObjectInspector
* has changed to a flatter structure without nested key/value structs:
* { 'key.reducesinkkey0':int, 'value._col0':int, 'value._col1':int, .. }
*
* Trying to fetch 'key.reducesinkkey0' by name from the list of flattened
* ObjectInspectors does not work because the '.' gets interpreted as a field member,
* even though it is a flattened list of column values.
* This workaround converts the column name referenced in the ExprNodeDesc
* from a nested field name (key.reducesinkkey0) to key_reducesinkkey0,
* simply by replacing '.' with '_'.
* @param source
* @return
*/
public static ExprNodeDesc flattenExpr(ExprNodeDesc source) {
if (source instanceof ExprNodeGenericFuncDesc) {
// all children expression should be resolved
ExprNodeGenericFuncDesc function = (ExprNodeGenericFuncDesc) source.clone();
List newChildren = flattenExprList(function.getChildren());
for (ExprNodeDesc newChild : newChildren) {
if (newChild == null) {
// Could not resolve all of the function children, fail
return null;
}
}
function.setChildren(newChildren);
return function;
}
if (source instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc column = (ExprNodeColumnDesc) source;
// Create a new ColumnInfo, replacing STRUCT.COLUMN with STRUCT_COLUMN
String newColumn = column.getColumn().replace('.', '_');
return new ExprNodeColumnDesc(source.getTypeInfo(), newColumn, column.getTabAlias(), false);
}
if (source instanceof ExprNodeFieldDesc) {
// field expression should be resolved
ExprNodeFieldDesc field = (ExprNodeFieldDesc) source.clone();
ExprNodeDesc fieldDesc = flattenExpr(field.getDesc());
if (fieldDesc == null) {
return null;
}
field.setDesc(fieldDesc);
return field;
}
// constant or null expr, just return
return source;
}
public static String extractColName(ExprNodeDesc root) {
if (root instanceof ExprNodeColumnDesc) {
return ((ExprNodeColumnDesc) root).getColumn();
} else {
if (root.getChildren() == null) {
return null;
}
String column = null;
for (ExprNodeDesc d: root.getChildren()) {
String candidate = extractColName(d);
if (column != null && candidate != null) {
return null;
} else if (candidate != null) {
column = candidate;
}
}
return column;
}
}
public static ExprNodeColumnDesc getColumnExpr(ExprNodeDesc expr) {
while (FunctionRegistry.isOpCast(expr)) {
expr = expr.getChildren().get(0);
}
return (expr instanceof ExprNodeColumnDesc) ? (ExprNodeColumnDesc)expr : null;
}
// Find the constant origin of a certain column if it is originated from a constant
// Otherwise, it returns the expression that originated the column
public static ExprNodeDesc findConstantExprOrigin(String dpCol, Operator extends OperatorDesc> op) {
ExprNodeDesc expr = op.getColumnExprMap().get(dpCol);
ExprNodeDesc foldedExpr;
// If it is a function, we try to fold it
if (expr instanceof ExprNodeGenericFuncDesc) {
foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)expr);
if (foldedExpr == null) {
foldedExpr = expr;
}
} else {
foldedExpr = expr;
}
// If it is a column reference, we will try to resolve it
if (foldedExpr instanceof ExprNodeColumnDesc) {
Operator extends OperatorDesc> originOp = null;
for(Operator extends OperatorDesc> parentOp : op.getParentOperators()) {
if (parentOp.getColumnExprMap() != null) {
originOp = parentOp;
break;
}
}
if (originOp != null) {
return findConstantExprOrigin(((ExprNodeColumnDesc)foldedExpr).getColumn(), originOp);
}
}
// Otherwise, we return the expression
return foldedExpr;
}
}