Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.sarg;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.facebook.presto.hive.$internal.org.apache.commons.codec.binary.Base64;
import com.facebook.presto.hive.$internal.org.apache.commons.lang.StringUtils;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.Log;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder;
import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import parquet.filter2.predicate.FilterApi;
import parquet.filter2.predicate.FilterPredicate;
/**
* The implementation of SearchArguments.
*/
final class SearchArgumentImpl implements SearchArgument {
public static final Log LOG = LogFactory.getLog(SearchArgumentImpl.class);
static final class PredicateLeafImpl implements PredicateLeaf {
private final Operator operator;
private final Type type;
private final String columnName;
private final Object literal;
private final List literalList;
PredicateLeafImpl() {
operator = null;
type = null;
columnName = null;
literal = null;
literalList = null;
}
PredicateLeafImpl(Operator operator,
Type type,
String columnName,
Object literal,
List literalList) {
this.operator = operator;
this.type = type;
this.columnName = columnName;
this.literal = literal;
this.literalList = literalList;
}
@Override
public Operator getOperator() {
return operator;
}
@Override
public Type getType(){
return type;
}
@Override
public String getColumnName() {
return columnName;
}
@Override
public Object getLiteral() {
return literal;
}
@Override
public List getLiteralList() {
return literalList;
}
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append('(');
buffer.append(operator);
buffer.append(' ');
buffer.append(columnName);
if (literal != null) {
buffer.append(' ');
buffer.append(literal);
} else if (literalList != null) {
for(Object lit: literalList) {
buffer.append(' ');
buffer.append(lit.toString());
}
}
buffer.append(')');
return buffer.toString();
}
private static boolean isEqual(Object left, Object right) {
if (left == right) {
return true;
} else if (left == null || right == null) {
return false;
} else {
return left.equals(right);
}
}
@Override
public boolean equals(Object other) {
if (other == null || other.getClass() != getClass()) {
return false;
} else if (other == this) {
return true;
} else {
PredicateLeafImpl o = (PredicateLeafImpl) other;
return operator == o.operator &&
type == o.type &&
columnName.equals(o.columnName) &&
isEqual(literal, o.literal) &&
isEqual(literalList, o.literalList);
}
}
@Override
public int hashCode() {
return operator.hashCode() +
type.hashCode() * 17 +
columnName.hashCode() * 3 * 17+
(literal == null ? 0 : literal.hashCode()) * 101 * 3 * 17 +
(literalList == null ? 0 : literalList.hashCode()) *
103 * 101 * 3 * 17;
}
}
static class ExpressionTree {
static enum Operator {OR, AND, NOT, LEAF, CONSTANT}
private final Operator operator;
private final List children;
private final int leaf;
private final TruthValue constant;
ExpressionTree() {
operator = null;
children = null;
leaf = 0;
constant = null;
}
ExpressionTree(Operator op, ExpressionTree... kids) {
operator = op;
children = new ArrayList();
leaf = -1;
this.constant = null;
Collections.addAll(children, kids);
}
ExpressionTree(int leaf) {
operator = Operator.LEAF;
children = null;
this.leaf = leaf;
this.constant = null;
}
ExpressionTree(TruthValue constant) {
operator = Operator.CONSTANT;
children = null;
this.leaf = -1;
this.constant = constant;
}
ExpressionTree(ExpressionTree other) {
this.operator = other.operator;
if (other.children == null) {
this.children = null;
} else {
this.children = new ArrayList();
for(ExpressionTree child: other.children) {
children.add(new ExpressionTree(child));
}
}
this.leaf = other.leaf;
this.constant = other.constant;
}
TruthValue evaluate(TruthValue[] leaves) {
TruthValue result = null;
switch (operator) {
case OR:
for(ExpressionTree child: children) {
result = child.evaluate(leaves).or(result);
}
return result;
case AND:
for(ExpressionTree child: children) {
result = child.evaluate(leaves).and(result);
}
return result;
case NOT:
return children.get(0).evaluate(leaves).not();
case LEAF:
return leaves[leaf];
case CONSTANT:
return constant;
default:
throw new IllegalStateException("Unknown operator: " + operator);
}
}
FilterPredicate translate(List leafs){
FilterPredicate p = null;
switch (operator) {
case OR:
for(ExpressionTree child: children) {
if (p == null) {
p = child.translate(leafs);
} else {
FilterPredicate right = child.translate(leafs);
// constant means no filter, ignore it when it is null
if(right != null){
p = FilterApi.or(p, right);
}
}
}
return p;
case AND:
for(ExpressionTree child: children) {
if (p == null) {
p = child.translate(leafs);
} else {
FilterPredicate right = child.translate(leafs);
// constant means no filter, ignore it when it is null
if(right != null){
p = FilterApi.and(p, right);
}
}
}
return p;
case NOT:
FilterPredicate op = children.get(0).translate(leafs);
if (op != null) {
return FilterApi.not(op);
} else {
return null;
}
case LEAF:
return buildFilterPredicateFromPredicateLeaf(leafs.get(leaf));
case CONSTANT:
return null;// no filter will be executed for constant
default:
throw new IllegalStateException("Unknown operator: " + operator);
}
}
private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf) {
LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
FilterPredicateLeafBuilder builder;
try {
builder = leafFilterFactory
.getLeafFilterBuilderByType(leaf.getType());
if (builder == null) {
return null;
}
if (isMultiLiteralsOperator(leaf.getOperator())) {
return builder.buildPredicate(leaf.getOperator(),
leaf.getLiteralList(),
leaf.getColumnName());
} else {
return builder
.buildPredict(leaf.getOperator(),
leaf.getLiteral(),
leaf.getColumnName());
}
} catch (Exception e) {
LOG.error("fail to build predicate filter leaf with errors" + e, e);
return null;
}
}
private boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) {
return (op == PredicateLeaf.Operator.IN) || (op == PredicateLeaf.Operator.BETWEEN);
}
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
switch (operator) {
case OR:
buffer.append("(or");
for(ExpressionTree child: children) {
buffer.append(' ');
buffer.append(child.toString());
}
buffer.append(')');
break;
case AND:
buffer.append("(and");
for(ExpressionTree child: children) {
buffer.append(' ');
buffer.append(child.toString());
}
buffer.append(')');
break;
case NOT:
buffer.append("(not ");
buffer.append(children.get(0));
buffer.append(')');
break;
case LEAF:
buffer.append("leaf-");
buffer.append(leaf);
break;
case CONSTANT:
buffer.append(constant);
break;
}
return buffer.toString();
}
Operator getOperator() {
return operator;
}
List getChildren() {
return children;
}
}
static class ExpressionBuilder {
// max threshold for CNF conversion. having >8 elements in andList will be converted to maybe
private static final int CNF_COMBINATIONS_THRESHOLD = 256;
private final List leaves = new ArrayList();
/**
* Get the type of the given expression node.
* @param expr the expression to get the type of
* @return int, string, or float or null if we don't know the type
*/
private static PredicateLeaf.Type getType(ExprNodeDesc expr) {
TypeInfo type = expr.getTypeInfo();
if (type.getCategory() == ObjectInspector.Category.PRIMITIVE) {
switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
return PredicateLeaf.Type.INTEGER;
case LONG:
return PredicateLeaf.Type.LONG;
case CHAR:
case VARCHAR:
case STRING:
return PredicateLeaf.Type.STRING;
case FLOAT:
case DOUBLE:
return PredicateLeaf.Type.FLOAT;
case DATE:
return PredicateLeaf.Type.DATE;
case TIMESTAMP:
return PredicateLeaf.Type.TIMESTAMP;
case DECIMAL:
return PredicateLeaf.Type.DECIMAL;
case BOOLEAN:
return PredicateLeaf.Type.BOOLEAN;
default:
}
}
return null;
}
/**
* Get the column name referenced in the expression. It must be at the top
* level of this expression and there must be exactly one column.
* @param expr the expression to look in
* @param variable the slot the variable is expected in
* @return the column name or null if there isn't exactly one column
*/
private static String getColumnName(ExprNodeGenericFuncDesc expr,
int variable) {
List children = expr.getChildren();
if (variable < 0 || variable >= children.size()) {
return null;
}
ExprNodeDesc child = children.get(variable);
if (child instanceof ExprNodeColumnDesc) {
return ((ExprNodeColumnDesc) child).getColumn();
}
return null;
}
private static Object boxLiteral(ExprNodeConstantDesc lit) {
switch (getType(lit)) {
case INTEGER:
return ((Number) lit.getValue()).intValue();
case LONG:
return ((Number) lit.getValue()).longValue();
case STRING:
return StringUtils.stripEnd(lit.getValue().toString(), null);
case FLOAT:
return Double.parseDouble(lit.getValue().toString());
case DATE:
case TIMESTAMP:
case DECIMAL:
case BOOLEAN:
return lit;
default:
throw new IllegalArgumentException("Unknown literal " + getType(lit));
}
}
private static Object getLiteral(ExprNodeGenericFuncDesc expr) {
Object result = null;
List children = expr.getChildren();
if (children.size() != 2) {
return null;
}
for(ExprNodeDesc child: children) {
if (child instanceof ExprNodeConstantDesc) {
if (result != null) {
return null;
}
result = boxLiteral((ExprNodeConstantDesc) child);
}
}
return result;
}
private static List getLiteralList(ExprNodeGenericFuncDesc expr,
int start) {
List result = new ArrayList();
List children = expr.getChildren();
// ignore the first child, since it is the variable
for(ExprNodeDesc child: children.subList(start, children.size())) {
if (child instanceof ExprNodeConstantDesc) {
result.add(boxLiteral((ExprNodeConstantDesc) child));
} else {
// if we get some non-literals, we need to punt
return null;
}
}
return result;
}
private ExpressionTree createLeaf(PredicateLeaf.Operator operator,
ExprNodeGenericFuncDesc expression,
List leafCache,
int variable) {
String columnName = getColumnName(expression, variable);
if (columnName == null) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
PredicateLeaf.Type type = getType(expression.getChildren().get(variable));
if (type == null) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
Object literal = null;
List literalList = null;
switch (operator) {
case IS_NULL:
break;
case IN:
case BETWEEN:
literalList = getLiteralList(expression, variable + 1);
if (literalList == null) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
break;
default:
literal = getLiteral(expression);
if (literal == null) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
break;
}
// if the variable was on the right, we need to swap things around
boolean needSwap = false;
if (variable != 0) {
if (operator == PredicateLeaf.Operator.LESS_THAN) {
needSwap = true;
operator = PredicateLeaf.Operator.LESS_THAN_EQUALS;
} else if (operator == PredicateLeaf.Operator.LESS_THAN_EQUALS) {
needSwap = true;
operator = PredicateLeaf.Operator.LESS_THAN;
}
}
leafCache.add(new PredicateLeafImpl(operator, type, columnName,
literal, literalList));
ExpressionTree result = new ExpressionTree(leafCache.size() - 1);
if (needSwap) {
result = negate(result);
}
return result;
}
/**
* Find the variable in the expression.
* @param expr the expression to look in
* @return the index of the variable or -1 if there is not exactly one
* variable.
*/
private int findVariable(ExprNodeDesc expr) {
int result = -1;
List children = expr.getChildren();
for(int i = 0; i < children.size(); ++i) {
ExprNodeDesc child = children.get(i);
if (child instanceof ExprNodeColumnDesc) {
// if we already found a variable, this isn't a sarg
if (result != -1) {
return -1;
} else {
result = i;
}
}
}
return result;
}
/**
* Create a leaf expression when we aren't sure where the variable is
* located.
* @param operator the operator type that was found
* @param expression the expression to check
* @param leafCache the list of leaves
* @return if the expression is a sarg, return it, otherwise null
*/
private ExpressionTree createLeaf(PredicateLeaf.Operator operator,
ExprNodeGenericFuncDesc expression,
List leafCache) {
return createLeaf(operator, expression, leafCache,
findVariable(expression));
}
private ExpressionTree negate(ExpressionTree expr) {
ExpressionTree result = new ExpressionTree(ExpressionTree.Operator.NOT);
result.children.add(expr);
return result;
}
private void addChildren(ExpressionTree result,
ExprNodeGenericFuncDesc node,
List leafCache) {
for(ExprNodeDesc child: node.getChildren()) {
result.children.add(parse(child, leafCache));
}
}
/**
* Do the recursive parse of the Hive ExprNodeDesc into our ExpressionTree.
* @param expression the Hive ExprNodeDesc
* @return the non-normalized ExpressionTree
*/
private ExpressionTree parse(ExprNodeDesc expression,
List leafCache) {
// if we don't know the expression, just assume maybe
if (expression.getClass() != ExprNodeGenericFuncDesc.class) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
// get the kind of expression
ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) expression;
Class> op = expr.getGenericUDF().getClass();
ExpressionTree result;
// handle the logical operators
if (op == GenericUDFOPOr.class) {
result = new ExpressionTree(ExpressionTree.Operator.OR);
addChildren(result, expr, leafCache);
} else if (op == GenericUDFOPAnd.class) {
result = new ExpressionTree(ExpressionTree.Operator.AND);
addChildren(result, expr, leafCache);
} else if (op == GenericUDFOPNot.class) {
result = new ExpressionTree(ExpressionTree.Operator.NOT);
addChildren(result, expr, leafCache);
} else if (op == GenericUDFOPEqual.class) {
result = createLeaf(PredicateLeaf.Operator.EQUALS, expr, leafCache);
} else if (op == GenericUDFOPNotEqual.class) {
result = negate(createLeaf(PredicateLeaf.Operator.EQUALS, expr,
leafCache));
} else if (op == GenericUDFOPEqualNS.class) {
result = createLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, expr,
leafCache);
} else if (op == GenericUDFOPGreaterThan.class) {
result = negate(createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS,
expr, leafCache));
} else if (op == GenericUDFOPEqualOrGreaterThan.class) {
result = negate(createLeaf(PredicateLeaf.Operator.LESS_THAN, expr,
leafCache));
} else if (op == GenericUDFOPLessThan.class) {
result = createLeaf(PredicateLeaf.Operator.LESS_THAN, expr, leafCache);
} else if (op == GenericUDFOPEqualOrLessThan.class) {
result = createLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS, expr,
leafCache);
} else if (op == GenericUDFIn.class) {
result = createLeaf(PredicateLeaf.Operator.IN, expr, leafCache, 0);
} else if (op == GenericUDFBetween.class) {
result = createLeaf(PredicateLeaf.Operator.BETWEEN, expr, leafCache,
1);
} else if (op == GenericUDFOPNull.class) {
result = createLeaf(PredicateLeaf.Operator.IS_NULL, expr, leafCache,
0);
} else if (op == GenericUDFOPNotNull.class) {
result = negate(createLeaf(PredicateLeaf.Operator.IS_NULL, expr,
leafCache, 0));
// otherwise, we didn't understand it, so mark it maybe
} else {
result = new ExpressionTree(TruthValue.YES_NO_NULL);
}
return result;
}
/**
* Push the negations all the way to just before the leaves. Also remove
* double negatives.
* @param root the expression to normalize
* @return the normalized expression, which may share some or all of the
* nodes of the original expression.
*/
static ExpressionTree pushDownNot(ExpressionTree root) {
if (root.operator == ExpressionTree.Operator.NOT) {
ExpressionTree child = root.children.get(0);
switch (child.operator) {
case NOT:
return pushDownNot(child.children.get(0));
case CONSTANT:
return new ExpressionTree(child.constant.not());
case AND:
root = new ExpressionTree(ExpressionTree.Operator.OR);
for(ExpressionTree kid: child.children) {
root.children.add(pushDownNot(new
ExpressionTree(ExpressionTree.Operator.NOT, kid)));
}
break;
case OR:
root = new ExpressionTree(ExpressionTree.Operator.AND);
for(ExpressionTree kid: child.children) {
root.children.add(pushDownNot(new ExpressionTree
(ExpressionTree.Operator.NOT, kid)));
}
break;
// for leaf, we don't do anything
default:
break;
}
} else if (root.children != null) {
// iterate through children and push down not for each one
for(int i=0; i < root.children.size(); ++i) {
root.children.set(i, pushDownNot(root.children.get(i)));
}
}
return root;
}
/**
* Remove MAYBE values from the expression. If they are in an AND operator,
* they are dropped. If they are in an OR operator, they kill their parent.
* This assumes that pushDownNot has already been called.
* @param expr The expression to clean up
* @return The cleaned up expression
*/
static ExpressionTree foldMaybe(ExpressionTree expr) {
if (expr.children != null) {
for(int i=0; i < expr.children.size(); ++i) {
ExpressionTree child = foldMaybe(expr.children.get(i));
if (child.constant == TruthValue.YES_NO_NULL) {
switch (expr.operator) {
case AND:
expr.children.remove(i);
i -= 1;
break;
case OR:
// a maybe will kill the or condition
return child;
default:
throw new IllegalStateException("Got a maybe as child of " +
expr);
}
} else {
expr.children.set(i, child);
}
}
if (expr.children.isEmpty()) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
}
return expr;
}
/**
* Generate all combinations of items on the andList. For each item on the
* andList, it generates all combinations of one child from each and
* expression. Thus, (and a b) (and c d) will be expanded to: (or a c)
* (or a d) (or b c) (or b d). If there are items on the nonAndList, they
* are added to each or expression.
* @param result a list to put the results onto
* @param andList a list of and expressions
* @param nonAndList a list of non-and expressions
*/
private static void generateAllCombinations(List result,
List andList,
List nonAndList
) {
List kids = andList.get(0).children;
if (result.isEmpty()) {
for(ExpressionTree kid: kids) {
ExpressionTree or = new ExpressionTree(ExpressionTree.Operator.OR);
result.add(or);
for(ExpressionTree node: nonAndList) {
or.children.add(new ExpressionTree(node));
}
or.children.add(kid);
}
} else {
List work = new ArrayList(result);
result.clear();
for(ExpressionTree kid: kids) {
for(ExpressionTree or: work) {
ExpressionTree copy = new ExpressionTree(or);
copy.children.add(kid);
result.add(copy);
}
}
}
if (andList.size() > 1) {
generateAllCombinations(result, andList.subList(1, andList.size()),
nonAndList);
}
}
/**
* Convert an expression so that the top level operator is AND with OR
* operators under it. This routine assumes that all of the NOT operators
* have been pushed to the leaves via pushdDownNot.
* @param root the expression
* @return the normalized expression
*/
static ExpressionTree convertToCNF(ExpressionTree root) {
if (root.children != null) {
// convert all of the children to CNF
int size = root.children.size();
for(int i=0; i < size; ++i) {
root.children.set(i, convertToCNF(root.children.get(i)));
}
if (root.operator == ExpressionTree.Operator.OR) {
// a list of leaves that weren't under AND expressions
List nonAndList = new ArrayList();
// a list of AND expressions that we need to distribute
List andList = new ArrayList();
for(ExpressionTree child: root.children) {
if (child.operator == ExpressionTree.Operator.AND) {
andList.add(child);
} else if (child.operator == ExpressionTree.Operator.OR) {
// pull apart the kids of the OR expression
for(ExpressionTree grandkid: child.children) {
nonAndList.add(grandkid);
}
} else {
nonAndList.add(child);
}
}
if (!andList.isEmpty()) {
if (checkCombinationsThreshold(andList)) {
root = new ExpressionTree(ExpressionTree.Operator.AND);
generateAllCombinations(root.children, andList, nonAndList);
} else {
root = new ExpressionTree(TruthValue.YES_NO_NULL);
}
}
}
}
return root;
}
private static boolean checkCombinationsThreshold(List andList) {
int numComb = 1;
for (ExpressionTree tree : andList) {
numComb *= tree.children.size();
if (numComb > CNF_COMBINATIONS_THRESHOLD) {
return false;
}
}
return true;
}
/**
* Converts multi-level ands and ors into single level ones.
* @param root the expression to flatten
* @return the flattened expression, which will always be root with
* potentially modified children.
*/
static ExpressionTree flatten(ExpressionTree root) {
if (root.children != null) {
// iterate through the index, so that if we add more children,
// they don't get re-visited
for(int i=0; i < root.children.size(); ++i) {
ExpressionTree child = flatten(root.children.get(i));
// do we need to flatten?
if (child.operator == root.operator &&
child.operator != ExpressionTree.Operator.NOT) {
boolean first = true;
for(ExpressionTree grandkid: child.children) {
// for the first grandkid replace the original parent
if (first) {
first = false;
root.children.set(i, grandkid);
} else {
root.children.add(++i, grandkid);
}
}
} else {
root.children.set(i, child);
}
}
// if we have a singleton AND or OR, just return the child
if ((root.operator == ExpressionTree.Operator.OR ||
root.operator == ExpressionTree.Operator.AND) &&
root.children.size() == 1) {
return root.children.get(0);
}
}
return root;
}
/**
* Iterates through the expression, finding all of the leaves. It creates
* the leaves list with each unique leaf that is found in the expression.
* The expression is updated with the new leaf ids for each leaf.
* @param expr the expression to find the leaves in
* @param leafCache the list of all of the leaves
* @param lookup a map that is used to uniquify the leaves
* @return The potentially modified expression
*/
private ExpressionTree buildLeafList(ExpressionTree expr,
List leafCache,
Map lookup) {
if (expr.children != null) {
for(int i=0; i < expr.children.size(); ++i) {
expr.children.set(i, buildLeafList(expr.children.get(i), leafCache,
lookup));
}
} else if (expr.operator == ExpressionTree.Operator.LEAF) {
PredicateLeaf leaf = leafCache.get(expr.leaf);
ExpressionTree val = lookup.get(leaf);
if (val == null) {
val = new ExpressionTree(leaves.size());
lookup.put(leaf, val);
leaves.add(leaf);
}
return val;
}
return expr;
}
/**
* Builds the expression and leaf list from the original predicate.
* @param expression the expression to translate
* @return The normalized expression.
*/
ExpressionTree expression(ExprNodeGenericFuncDesc expression) {
List leafCache = new ArrayList();
ExpressionTree expr = parse(expression, leafCache);
return expression(expr, leafCache);
}
/**
* Builds the expression and optimized leaf list from a non-normalized
* expression. Sets the leaves field with the unique leaves.
* @param expr non-normalized expression
* @param leaves non-unique leaves
* @return the normalized expression
*/
ExpressionTree expression(ExpressionTree expr,
List leaves) {
expr = pushDownNot(expr);
expr = foldMaybe(expr);
expr = flatten(expr);
expr = convertToCNF(expr);
expr = flatten(expr);
expr = buildLeafList(expr, leaves,
new HashMap());
return expr;
}
List getLeaves() {
return leaves;
}
}
private final List leaves;
private final ExpressionTree expression;
SearchArgumentImpl(ExprNodeGenericFuncDesc expr) {
if (expr == null) {
leaves = new ArrayList();
expression = null;
} else {
ExpressionBuilder builder = new ExpressionBuilder();
expression = builder.expression(expr);
leaves = builder.getLeaves();
}
}
SearchArgumentImpl() {
leaves = null;
expression = null;
}
SearchArgumentImpl(ExpressionTree expression, List leaves) {
this.expression = expression;
this.leaves = leaves;
}
@Override
public List getLeaves() {
return leaves;
}
@Override
public TruthValue evaluate(TruthValue[] leaves) {
return expression == null ? TruthValue.YES : expression.evaluate(leaves);
}
ExpressionTree getExpression() {
return expression;
}
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
for(int i=0; i < leaves.size(); ++i) {
buffer.append("leaf-");
buffer.append(i);
buffer.append(" = ");
buffer.append(leaves.get(i).toString());
buffer.append('\n');
}
buffer.append("expr = ");
buffer.append(expression);
return buffer.toString();
}
public String toKryo() {
Output out = new Output(4 * 1024, 10 * 1024 * 1024);
new Kryo().writeObject(out, this);
out.close();
return Base64.encodeBase64String(out.toBytes());
}
static SearchArgument fromKryo(String value) {
Input input = new Input(Base64.decodeBase64(value));
return new Kryo().readObject(input, SearchArgumentImpl.class);
}
@Override
public FilterPredicate toFilterPredicate() {
return expression.translate(leaves);
}
private static class BuilderImpl implements Builder {
private final Deque currentTree =
new ArrayDeque();
private final List leaves = new ArrayList();
private ExpressionTree root = null;
@Override
public Builder startOr() {
ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR);
if (currentTree.size() != 0) {
ExpressionTree parent = currentTree.getFirst();
parent.children.add(node);
}
currentTree.addFirst(node);
return this;
}
@Override
public Builder startAnd() {
ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND);
if (currentTree.size() != 0) {
ExpressionTree parent = currentTree.getFirst();
parent.children.add(node);
}
currentTree.addFirst(node);
return this;
}
@Override
public Builder startNot() {
ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT);
if (currentTree.size() != 0) {
ExpressionTree parent = currentTree.getFirst();
parent.children.add(node);
}
currentTree.addFirst(node);
return this;
}
@Override
public Builder end() {
root = currentTree.removeFirst();
if (root.children.size() == 0) {
throw new IllegalArgumentException("Can't create expression " + root +
" with no children.");
}
if (root.operator == ExpressionTree.Operator.NOT &&
root.children.size() != 1) {
throw new IllegalArgumentException("Can't create not expression " +
root + " with more than 1 child.");
}
return this;
}
private static Object boxLiteral(Object literal) {
if (literal instanceof String ||
literal instanceof Long ||
literal instanceof Double ||
literal instanceof DateWritable ||
literal instanceof Timestamp ||
literal instanceof HiveDecimal ||
literal instanceof BigDecimal ||
literal instanceof Boolean) {
return literal;
} else if (literal instanceof HiveChar ||
literal instanceof HiveVarchar) {
return StringUtils.stripEnd(literal.toString(), null);
} else if (literal instanceof Byte ||
literal instanceof Short ||
literal instanceof Integer) {
return ((Number) literal).longValue();
} else if (literal instanceof Float) {
// to avoid change in precision when upcasting float to double
// we convert the literal to string and parse it as double. (HIVE-8460)
return Double.parseDouble(literal.toString());
} else {
throw new IllegalArgumentException("Unknown type for literal " +
literal);
}
}
private static PredicateLeaf.Type getType(Object literal) {
if (literal instanceof Byte ||
literal instanceof Short ||
literal instanceof Integer) {
return PredicateLeaf.Type.INTEGER;
} else if(literal instanceof Long){
return PredicateLeaf.Type.LONG;
}else if (literal instanceof HiveChar ||
literal instanceof HiveVarchar ||
literal instanceof String) {
return PredicateLeaf.Type.STRING;
} else if (literal instanceof Float ||
literal instanceof Double) {
return PredicateLeaf.Type.FLOAT;
} else if (literal instanceof DateWritable) {
return PredicateLeaf.Type.DATE;
} else if (literal instanceof Timestamp) {
return PredicateLeaf.Type.TIMESTAMP;
}else if (literal instanceof HiveDecimal ||
literal instanceof BigDecimal) {
return PredicateLeaf.Type.DECIMAL;
} else if (literal instanceof Boolean) {
return PredicateLeaf.Type.BOOLEAN;
}
throw new IllegalArgumentException("Unknown type for literal " + literal);
}
@Override
public Builder lessThan(String column, Object literal) {
ExpressionTree parent = currentTree.getFirst();
Object box = boxLiteral(literal);
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN,
getType(box), column, box, null);
leaves.add(leaf);
parent.children.add(new ExpressionTree(leaves.size() - 1));
return this;
}
@Override
public Builder lessThanEquals(String column, Object literal) {
ExpressionTree parent = currentTree.getFirst();
Object box = boxLiteral(literal);
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS,
getType(box), column, box, null);
leaves.add(leaf);
parent.children.add(new ExpressionTree(leaves.size() - 1));
return this;
}
@Override
public Builder equals(String column, Object literal) {
ExpressionTree parent = currentTree.getFirst();
Object box = boxLiteral(literal);
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS,
getType(box), column, box, null);
leaves.add(leaf);
parent.children.add(new ExpressionTree(leaves.size() - 1));
return this;
}
@Override
public Builder nullSafeEquals(String column, Object literal) {
ExpressionTree parent = currentTree.getFirst();
Object box = boxLiteral(literal);
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
getType(box), column, box, null);
leaves.add(leaf);
parent.children.add(new ExpressionTree(leaves.size() - 1));
return this;
}
@Override
public Builder in(String column, Object... literal) {
ExpressionTree parent = currentTree.getFirst();
if (literal.length == 0) {
throw new IllegalArgumentException("Can't create in expression with "
+ "no arguments");
}
List argList = new ArrayList();
for(Object lit: literal){
argList.add(boxLiteral(lit));
}
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.IN,
getType(argList.get(0)), column, null, argList);
leaves.add(leaf);
parent.children.add(new ExpressionTree(leaves.size() - 1));
return this;
}
@Override
public Builder isNull(String column) {
ExpressionTree parent = currentTree.getFirst();
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL,
PredicateLeaf.Type.STRING, column, null, null);
leaves.add(leaf);
parent.children.add(new ExpressionTree(leaves.size() - 1));
return this;
}
@Override
public Builder between(String column, Object lower, Object upper) {
ExpressionTree parent = currentTree.getFirst();
List argList = new ArrayList();
argList.add(boxLiteral(lower));
argList.add(boxLiteral(upper));
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN,
getType(argList.get(0)), column, null, argList);
leaves.add(leaf);
parent.children.add(new ExpressionTree(leaves.size() - 1));
return this;
}
@Override
public SearchArgument build() {
if (currentTree.size() != 0) {
throw new IllegalArgumentException("Failed to end " +
currentTree.size() + " operations.");
}
ExpressionBuilder internal = new ExpressionBuilder();
ExpressionTree normalized = internal.expression(root, leaves);
return new SearchArgumentImpl(normalized, internal.getLeaves());
}
}
public static Builder newBuilder() {
return new BuilderImpl();
}
}