org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.listbucketingpruner;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
/**
* Utility for list bucketing prune.
*
*/
public final class ListBucketingPrunerUtils {
/* Default list bucketing directory name. internal use only not for client. */
public static String HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME =
"HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME";
/* Default list bucketing directory key. internal use only not for client. */
public static String HIVE_LIST_BUCKETING_DEFAULT_KEY = "HIVE_DEFAULT_LIST_BUCKETING_KEY";
/**
* Decide if pruner skips the skewed directory
* Input: if the skewed value matches the expression tree
* Ouput: if pruner should skip the directory represented by the skewed value
* If match result is unknown(null) or true, pruner doesn't skip the directory
* If match result is false, pruner skips the dir.
* @param bool
* if the skewed value matches the expression tree
* @return
*/
public static boolean skipSkewedDirectory(Boolean bool) {
if (bool == null) {
return false;
}
return !bool.booleanValue();
}
/**
* or 2 Boolean operands in the context of pruning match
*
* Operand one|Operand another | or result
* unknown | T | T
* unknown | F | unknown
* unknown | unknown | unknown
* T | T | T
* T | F | T
* T | unknown | unknown
* F | T | T
* F | F | F
* F | unknown | unknown
*/
public static Boolean orBoolOperand(Boolean o, Boolean a) {
// pick up unknown case
if (o == null) {
if ((a == null) || !a) {
return null;
} else {
return a;
}
} else if (a == null) {
return null;
}
return (o || a);
}
/**
* And 2 Boolean operands in the context of pruning match
*
* Operand one|Operand another | And result
* unknown | T | unknown
* unknown | F | F
* unknown | unknown | unknown
* T | T | T
* T | F | F
* T | unknown | unknown
* F | T | F
* F | F | F
* F | unknown | F
* @param o
* one operand
* @param a
* another operand
* @return result
*/
public static Boolean andBoolOperand(Boolean o, Boolean a) {
// pick up unknown case and let and operator handle the rest
if (o == null) {
if ((a == null) || a) {
return null;
} else {
return a;
}
} else if (a == null) {
return o ? null : Boolean.FALSE;
}
return (o && a);
}
/**
* Not a Boolean operand in the context of pruning match
*
* Operand | Not
* T | F
* F | T
* unknown | unknown
* @param input
* match result
* @return
*/
public static Boolean notBoolOperand(Boolean input) {
if (input == null) {
return null;
}
return input ? Boolean.FALSE : Boolean.TRUE;
}
/**
* 1. Walk through the tree to decide value
* 1.1 true means the element matches the expression tree
* 1.2 false means the element doesn't match the expression tree
* 1.3 unknown means not sure if the element matches the expression tree
*
* Example:
* skewed column: C1, C2
* cell: (1,a) , (1,b) , (1,c) , (1,other), (2,a), (2,b) , (2,c), (2,other), (other,a), (other,b),
* (other,c), (other,other)
*
* * Expression Tree : ((c1=1) and (c2=a)) or ( (c1=3) or (c2=b))
*
* or
* / \
* and or
* / \ / \
* c1=1 c2=a c1=3 c2=b
* @throws SemanticException
*
*/
static Boolean evaluateExprOnCell(List skewedCols, List cell,
ExprNodeDesc pruner, List> uniqSkewedValues) throws SemanticException {
return recursiveExpr(pruner, skewedCols, cell, uniqSkewedValues);
}
/**
* Walk through expression tree recursively to evaluate.
*
*
* @param node
* @param skewedCols
* @param cell
* @return
* @throws SemanticException
*/
private static Boolean recursiveExpr(final ExprNodeDesc node, final List skewedCols,
final List cell, final List> uniqSkewedValues)
throws SemanticException {
if (isUnknownState(node)) {
return null;
}
if (node instanceof ExprNodeGenericFuncDesc) {
if (((ExprNodeGenericFuncDesc) node).getGenericUDF() instanceof GenericUDFOPEqual) {
return evaluateEqualNd(node, skewedCols, cell, uniqSkewedValues);
} else if (FunctionRegistry.isOpAnd(node)) {
return evaluateAndNode(node, skewedCols, cell, uniqSkewedValues);
} else if (FunctionRegistry.isOpOr(node)) {
return evaluateOrNode(node, skewedCols, cell, uniqSkewedValues);
} else if (FunctionRegistry.isOpNot(node)) {
return evaluateNotNode(node, skewedCols, cell, uniqSkewedValues);
} else {
return null;
}
} else {
return null;
}
}
/**
* Evaluate equal node.
*
*
* @param node
* @param skewedCols
* @param cell
* @param uniqSkewedValues
* @return
* @throws SemanticException
*/
private static Boolean evaluateEqualNd(final ExprNodeDesc node, final List skewedCols,
final List cell, final List> uniqSkewedValues) throws SemanticException {
Boolean result = null;
List children = ((ExprNodeGenericFuncDesc) node).getChildren();
assert ((children != null) && (children.size() == 2)) : "GenericUDFOPEqual should have 2 " +
"ExprNodeDesc. Node name : " + node.getName();
ExprNodeDesc left = children.get(0);
ExprNodeDesc right = children.get(1);
assert (left instanceof ExprNodeColumnDesc && right instanceof ExprNodeConstantDesc) :
"GenericUDFOPEqual should have 2 children: "
+ " the first is ExprNodeColumnDesc and the second is ExprNodeConstantDesc. "
+ "But this one, the first one is " + left.getName() + " and the second is "
+ right.getName();
result = startComparisonInEqualNode(skewedCols, cell, uniqSkewedValues, result, left, right);
return result;
}
/**
* Comparison in equal node
*
* @param skewedCols
* @param cell
* @param uniqSkewedValues
* @param result
* @param left
* @param right
* @return
* @throws SemanticException
*/
private static Boolean startComparisonInEqualNode(final List skewedCols,
final List cell, final List> uniqSkewedValues, Boolean result,
ExprNodeDesc left, ExprNodeDesc right) throws SemanticException {
String columnNameInFilter = ((ExprNodeColumnDesc) left).getColumn();
String constantValueInFilter = ((ExprNodeConstantDesc) right).getValue().toString();
assert (skewedCols.contains(columnNameInFilter)) : "List bucketing pruner has a column name "
+ columnNameInFilter
+ " which is not found in the partiton's skewed column list";
int index = skewedCols.indexOf(columnNameInFilter);
assert (index < cell.size()) : "GenericUDFOPEqual has a ExprNodeColumnDesc ("
+ columnNameInFilter + ") which is " + index + "th" + "skewed column. "
+ " But it can't find the matching part in cell." + " Because the cell size is "
+ cell.size();
String cellValueInPosition = cell.get(index);
assert (index < uniqSkewedValues.size()) : "GenericUDFOPEqual has a ExprNodeColumnDesc ("
+ columnNameInFilter + ") which is " + index + "th" + "skewed column. "
+ " But it can't find the matching part in uniq skewed value list."
+ " Because the cell size is "
+ uniqSkewedValues.size();
List uniqSkewedValuesInPosition = uniqSkewedValues.get(index);
result = coreComparisonInEqualNode(constantValueInFilter, cellValueInPosition,
uniqSkewedValuesInPosition);
return result;
}
/**
* Compare
* @param constantValueInFilter
* @param cellValueInPosition
* @param uniqSkewedValuesInPosition
* @return
*/
private static Boolean coreComparisonInEqualNode(String constantValueInFilter,
String cellValueInPosition, List uniqSkewedValuesInPosition) {
Boolean result;
// Compare cell value with constant value in filter
// 1 if they match and cell value isn't other, return true
// 2 if they don't match but cell is other and value in filter is not skewed value,
// return unknown. why not true? true is not enough. since not true is false,
// but not unknown is unknown.
// For example, skewed column C, skewed value 1, 2. clause: where not ( c =3)
// cell is other, evaluate (not(c=3)).
// other to (c=3), if ture. not(c=3) will be false. but it is wrong skip default dir
// but, if unknown. not(c=3) will be unknown. we will choose default dir.
// 3 all others, return false
if (cellValueInPosition.equals(constantValueInFilter)
&& !cellValueInPosition.equals(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_KEY)) {
result = Boolean.TRUE;
} else if (cellValueInPosition.equals(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_KEY)
&& !uniqSkewedValuesInPosition.contains(constantValueInFilter)) {
result = null;
} else {
result = Boolean.FALSE;
}
return result;
}
private static Boolean evaluateNotNode(final ExprNodeDesc node, final List skewedCols,
final List cell, final List> uniqSkewedValues) throws SemanticException {
List children = ((ExprNodeGenericFuncDesc) node).getChildren();
if ((children == null) || (children.size() != 1)) {
throw new SemanticException("GenericUDFOPNot should have 1 ExprNodeDesc. Node name : "
+ node.getName());
}
ExprNodeDesc child = children.get(0);
return notBoolOperand(recursiveExpr(child, skewedCols, cell, uniqSkewedValues));
}
private static Boolean evaluateOrNode(final ExprNodeDesc node, final List skewedCols,
final List cell, final List> uniqSkewedValues) throws SemanticException {
List children = ((ExprNodeGenericFuncDesc) node).getChildren();
if ((children == null) || (children.size() != 2)) {
throw new SemanticException("GenericUDFOPOr should have 2 ExprNodeDesc. Node name : "
+ node.getName());
}
ExprNodeDesc left = children.get(0);
ExprNodeDesc right = children.get(1);
return orBoolOperand(recursiveExpr(left, skewedCols, cell, uniqSkewedValues),
recursiveExpr(right, skewedCols, cell, uniqSkewedValues));
}
private static Boolean evaluateAndNode(final ExprNodeDesc node, final List skewedCols,
final List cell, final List> uniqSkewedValues) throws SemanticException {
List children = ((ExprNodeGenericFuncDesc) node).getChildren();
if ((children == null) || (children.size() != 2)) {
throw new SemanticException("GenericUDFOPAnd should have 2 ExprNodeDesc. Node name : "
+ node.getName());
}
ExprNodeDesc left = children.get(0);
ExprNodeDesc right = children.get(1);
return andBoolOperand(recursiveExpr(left, skewedCols, cell, uniqSkewedValues),
recursiveExpr(right, skewedCols, cell, uniqSkewedValues));
}
/**
* Check if the node is unknown
*
*
* unknown is marked in {@link #transform(ParseContext)}
*
*
* newcd = new ExprNodeConstantDesc(cd.getTypeInfo(), null)
*
*
* like
*
* 1. non-skewed column
*
* 2. non and/or/not ...
*
*
* @param descNd
* @return
*/
static boolean isUnknownState(ExprNodeDesc descNd) {
boolean unknown = false;
if ((descNd == null)
|| (descNd instanceof ExprNodeConstantDesc
&& ((ExprNodeConstantDesc) descNd).getValue() == null)) {
unknown = true;
}
return unknown;
}
/**
* check if the partition is list bucketing
*
* @param part
* @return
*/
public static boolean isListBucketingPart(Partition part) {
return (part.getSkewedColNames() != null) && (part.getSkewedColNames().size() > 0)
&& (part.getSkewedColValues() != null) && (part.getSkewedColValues().size() > 0)
&& (part.getSkewedColValueLocationMaps() != null)
&& (part.getSkewedColValueLocationMaps().size() > 0);
}
}