Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.sarg;
import java.sql.Timestamp;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.NoDynamicValuesException;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
/**
* The implementation of SearchArguments. Visible for testing only.
*/
public final class SearchArgumentImpl implements SearchArgument {
private static final Logger LOG = LoggerFactory.getLogger(SearchArgumentImpl.class);
public static final class PredicateLeafImpl implements PredicateLeaf {
private final Operator operator;
private final Type type;
private String columnName;
private final Object literal;
private final List literalList;
// Used by kryo
@SuppressWarnings("unused")
PredicateLeafImpl() {
operator = null;
type = null;
columnName = null;
literal = null;
literalList = null;
}
public PredicateLeafImpl(Operator operator,
Type type,
String columnName,
Object literal,
List literalList) {
this(operator, type, columnName, literal, literalList, null);
}
public PredicateLeafImpl(Operator operator,
Type type,
String columnName,
Object literal,
List literalList, Configuration conf) {
this.operator = operator;
this.type = type;
this.columnName = columnName;
this.literal = literal;
checkLiteralType(literal, type, conf);
this.literalList = literalList;
if (literalList != null) {
Class valueCls = type.getValueClass();
for(Object lit: literalList) {
checkLiteralType(lit, type, conf);
}
}
}
@Override
public Operator getOperator() {
return operator;
}
@Override
public Type getType(){
return type;
}
@Override
public String getColumnName() {
return columnName;
}
@Override
public Object getLiteral() {
if (literal instanceof LiteralDelegate) {
return ((LiteralDelegate) literal).getLiteral();
}
// To get around a kryo 2.22 bug while deserialize a Timestamp into Date
// (https://github.com/EsotericSoftware/kryo/issues/88)
// When we see a Date, convert back into Timestamp
if (literal instanceof java.util.Date) {
return new Timestamp(((java.util.Date)literal).getTime());
}
return literal;
}
@Override
public List getLiteralList() {
if (literalList != null && literalList.size() > 0 && literalList.get(0) instanceof LiteralDelegate) {
List newLiteraList = new ArrayList();
try {
for (Object litertalObj : literalList) {
Object literal = ((LiteralDelegate) litertalObj).getLiteral();
if (literal != null) {
newLiteraList.add(literal);
}
}
} catch (NoDynamicValuesException err) {
LOG.debug("Error while retrieving literalList, returning null", err);
return null;
}
return newLiteraList;
}
return literalList;
}
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append('(');
buffer.append(operator);
buffer.append(' ');
buffer.append(columnName);
if (literal != null) {
buffer.append(' ');
buffer.append(literal);
} else if (literalList != null) {
for(Object lit: literalList) {
buffer.append(' ');
buffer.append(lit == null ? "null" : lit.toString());
}
}
buffer.append(')');
return buffer.toString();
}
private static boolean isEqual(Object left, Object right) {
return left == right ||
(left != null && right != null && left.equals(right));
}
@Override
public boolean equals(Object other) {
if (other == null || other.getClass() != getClass()) {
return false;
} else if (other == this) {
return true;
} else {
PredicateLeafImpl o = (PredicateLeafImpl) other;
return operator == o.operator &&
type == o.type &&
columnName.equals(o.columnName) &&
isEqual(literal, o.literal) &&
isEqual(literalList, o.literalList);
}
}
@Override
public int hashCode() {
return operator.hashCode() +
type.hashCode() * 17 +
columnName.hashCode() * 3 * 17+
(literal == null ? 0 : literal.hashCode()) * 101 * 3 * 17 +
(literalList == null ? 0 : literalList.hashCode()) *
103 * 101 * 3 * 17;
}
public static void setColumnName(PredicateLeaf leaf, String newName) {
assert leaf instanceof PredicateLeafImpl;
((PredicateLeafImpl)leaf).columnName = newName;
}
protected void checkLiteralType(Object literal, Type type, Configuration conf) {
if (literal == null) {
return;
}
if (literal instanceof LiteralDelegate) {
// Give it a pass. Optionally, have LiteralDelegate provide a getLiteralClass() to check.
((LiteralDelegate) literal).setConf(conf);
} else {
if (literal.getClass() != type.getValueClass()) {
throw new IllegalArgumentException("Wrong value class " +
literal.getClass().getName() + " for " + type + "." + operator +
" leaf");
}
}
}
}
private final List leaves;
private final ExpressionTree expression;
SearchArgumentImpl(ExpressionTree expression, List leaves) {
this.expression = expression;
this.leaves = leaves;
}
// Used by kyro
@SuppressWarnings("unused")
SearchArgumentImpl() {
leaves = null;
expression = null;
}
@Override
public List getLeaves() {
return leaves;
}
@Override
public TruthValue evaluate(TruthValue[] leaves) {
return expression == null ? TruthValue.YES : expression.evaluate(leaves);
}
@Override
public ExpressionTree getExpression() {
return expression;
}
@Override
public String toString() {
StringBuilder buffer = new StringBuilder();
for(int i=0; i < leaves.size(); ++i) {
buffer.append("leaf-");
buffer.append(i);
buffer.append(" = ");
buffer.append(leaves.get(i).toString());
buffer.append(", ");
}
buffer.append("expr = ");
buffer.append(expression);
return buffer.toString();
}
static class BuilderImpl implements Builder {
Configuration conf;
public BuilderImpl(Configuration conf) {
this.conf = conf;
}
// max threshold for CNF conversion. having >8 elements in andList will be
// converted to maybe
private static final int CNF_COMBINATIONS_THRESHOLD = 256;
private final Deque currentTree =
new ArrayDeque();
private final Map leaves =
new HashMap();
private final ExpressionTree root =
new ExpressionTree(ExpressionTree.Operator.AND);
{
currentTree.add(root);
}
@Override
public Builder startOr() {
ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR);
currentTree.getFirst().getChildren().add(node);
currentTree.addFirst(node);
return this;
}
@Override
public Builder startAnd() {
ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND);
currentTree.getFirst().getChildren().add(node);
currentTree.addFirst(node);
return this;
}
@Override
public Builder startNot() {
ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT);
currentTree.getFirst().getChildren().add(node);
currentTree.addFirst(node);
return this;
}
@Override
public Builder end() {
ExpressionTree current = currentTree.removeFirst();
if (current.getChildren().size() == 0) {
throw new IllegalArgumentException("Can't create expression " + root +
" with no children.");
}
if (current.getOperator() == ExpressionTree.Operator.NOT &&
current.getChildren().size() != 1) {
throw new IllegalArgumentException("Can't create not expression " +
current + " with more than 1 child.");
}
return this;
}
private int addLeaf(PredicateLeaf leaf) {
Integer result = leaves.get(leaf);
if (result == null) {
int id = leaves.size();
leaves.put(leaf, id);
return id;
} else {
return result;
}
}
@Override
public Builder lessThan(String column, PredicateLeaf.Type type,
Object literal) {
ExpressionTree parent = currentTree.getFirst();
if (column == null || literal == null) {
parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
} else {
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN,
type, column, literal, null, conf);
parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
}
return this;
}
@Override
public Builder lessThanEquals(String column, PredicateLeaf.Type type,
Object literal) {
ExpressionTree parent = currentTree.getFirst();
if (column == null || literal == null) {
parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
} else {
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS,
type, column, literal, null, conf);
parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
}
return this;
}
@Override
public Builder equals(String column, PredicateLeaf.Type type,
Object literal) {
ExpressionTree parent = currentTree.getFirst();
if (column == null || literal == null) {
parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
} else {
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS,
type, column, literal, null, conf);
parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
}
return this;
}
@Override
public Builder nullSafeEquals(String column, PredicateLeaf.Type type,
Object literal) {
ExpressionTree parent = currentTree.getFirst();
if (column == null || literal == null) {
parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
} else {
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
type, column, literal, null, conf);
parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
}
return this;
}
@Override
public Builder in(String column, PredicateLeaf.Type type,
Object... literal) {
ExpressionTree parent = currentTree.getFirst();
if (column == null || literal == null) {
parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
} else {
if (literal.length == 0) {
throw new IllegalArgumentException("Can't create in expression with "
+ "no arguments");
}
List argList = new ArrayList();
argList.addAll(Arrays.asList(literal));
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.IN,
type, column, null, argList, conf);
parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
}
return this;
}
@Override
public Builder isNull(String column, PredicateLeaf.Type type) {
ExpressionTree parent = currentTree.getFirst();
if (column == null) {
parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
} else {
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL,
type, column, null, null, conf);
parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
}
return this;
}
@Override
public Builder between(String column, PredicateLeaf.Type type, Object lower,
Object upper) {
ExpressionTree parent = currentTree.getFirst();
if (column == null || lower == null || upper == null) {
parent.getChildren().add(new ExpressionTree(TruthValue.YES_NO_NULL));
} else {
List argList = new ArrayList();
argList.add(lower);
argList.add(upper);
PredicateLeaf leaf =
new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN,
type, column, null, argList, conf);
parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
}
return this;
}
@Override
public Builder literal(TruthValue truth) {
ExpressionTree parent = currentTree.getFirst();
parent.getChildren().add(new ExpressionTree(truth));
return this;
}
/**
* Recursively explore the tree to find the leaves that are still reachable
* after optimizations.
* @param tree the node to check next
* @param next the next available leaf id
* @param leafReorder
* @return the next available leaf id
*/
static int compactLeaves(ExpressionTree tree, int next, int[] leafReorder) {
if (tree.getOperator() == ExpressionTree.Operator.LEAF) {
int oldLeaf = tree.getLeaf();
if (leafReorder[oldLeaf] == -1) {
leafReorder[oldLeaf] = next++;
}
} else if (tree.getChildren() != null){
for(ExpressionTree child: tree.getChildren()) {
next = compactLeaves(child, next, leafReorder);
}
}
return next;
}
/**
* Rewrite expression tree to update the leaves.
* @param root the root of the tree to fix
* @param leafReorder a map from old leaf ids to new leaf ids
* @return the fixed root
*/
static ExpressionTree rewriteLeaves(ExpressionTree root,
int[] leafReorder) {
// The leaves could be shared in the tree. Use Set to remove the duplicates.
Set leaves = new HashSet();
Queue nodes = new LinkedList();
nodes.add(root);
while(!nodes.isEmpty()) {
ExpressionTree node = nodes.remove();
if (node.getOperator() == ExpressionTree.Operator.LEAF) {
leaves.add(node);
} else {
if (node.getChildren() != null){
nodes.addAll(node.getChildren());
}
}
}
// Update the leaf in place
for(ExpressionTree leaf : leaves) {
leaf.setLeaf(leafReorder[leaf.getLeaf()]);
}
return root;
}
@Override
public SearchArgument build() {
if (currentTree.size() != 1) {
throw new IllegalArgumentException("Failed to end " +
currentTree.size() + " operations.");
}
ExpressionTree optimized = pushDownNot(root);
optimized = foldMaybe(optimized);
optimized = flatten(optimized);
optimized = convertToCNF(optimized);
optimized = flatten(optimized);
int leafReorder[] = new int[leaves.size()];
Arrays.fill(leafReorder, -1);
int newLeafCount = compactLeaves(optimized, 0, leafReorder);
optimized = rewriteLeaves(optimized, leafReorder);
ArrayList leafList = new ArrayList<>(newLeafCount);
// expand list to correct size
for(int i=0; i < newLeafCount; ++i) {
leafList.add(null);
}
// build the new list
for(Map.Entry elem: leaves.entrySet()) {
int newLoc = leafReorder[elem.getValue()];
if (newLoc != -1) {
leafList.set(newLoc, elem.getKey());
}
}
return new SearchArgumentImpl(optimized, leafList);
}
/**
* Push the negations all the way to just before the leaves. Also remove
* double negatives.
* @param root the expression to normalize
* @return the normalized expression, which may share some or all of the
* nodes of the original expression.
*/
static ExpressionTree pushDownNot(ExpressionTree root) {
if (root.getOperator() == ExpressionTree.Operator.NOT) {
ExpressionTree child = root.getChildren().get(0);
switch (child.getOperator()) {
case NOT:
return pushDownNot(child.getChildren().get(0));
case CONSTANT:
return new ExpressionTree(child.getConstant().not());
case AND:
root = new ExpressionTree(ExpressionTree.Operator.OR);
for(ExpressionTree kid: child.getChildren()) {
root.getChildren().add(pushDownNot(new
ExpressionTree(ExpressionTree.Operator.NOT, kid)));
}
break;
case OR:
root = new ExpressionTree(ExpressionTree.Operator.AND);
for(ExpressionTree kid: child.getChildren()) {
root.getChildren().add(pushDownNot(new ExpressionTree
(ExpressionTree.Operator.NOT, kid)));
}
break;
// for leaf, we don't do anything
default:
break;
}
} else if (root.getChildren() != null) {
// iterate through children and push down not for each one
for(int i=0; i < root.getChildren().size(); ++i) {
root.getChildren().set(i, pushDownNot(root.getChildren().get(i)));
}
}
return root;
}
/**
* Remove MAYBE values from the expression. If they are in an AND operator,
* they are dropped. If they are in an OR operator, they kill their parent.
* This assumes that pushDownNot has already been called.
* @param expr The expression to clean up
* @return The cleaned up expression
*/
static ExpressionTree foldMaybe(ExpressionTree expr) {
if (expr.getChildren() != null) {
for(int i=0; i < expr.getChildren().size(); ++i) {
ExpressionTree child = foldMaybe(expr.getChildren().get(i));
if (child.getConstant() == TruthValue.YES_NO_NULL) {
switch (expr.getOperator()) {
case AND:
expr.getChildren().remove(i);
i -= 1;
break;
case OR:
// a maybe will kill the or condition
return child;
default:
throw new IllegalStateException("Got a maybe as child of " +
expr);
}
} else {
expr.getChildren().set(i, child);
}
}
if (expr.getChildren().isEmpty()) {
return new ExpressionTree(TruthValue.YES_NO_NULL);
}
}
return expr;
}
/**
* Converts multi-level ands and ors into single level ones.
* @param root the expression to flatten
* @return the flattened expression, which will always be root with
* potentially modified children.
*/
static ExpressionTree flatten(ExpressionTree root) {
if (root.getChildren() != null) {
// iterate through the index, so that if we add more children,
// they don't get re-visited
for(int i=0; i < root.getChildren().size(); ++i) {
ExpressionTree child = flatten(root.getChildren().get(i));
// do we need to flatten?
if (child.getOperator() == root.getOperator() &&
child.getOperator() != ExpressionTree.Operator.NOT) {
boolean first = true;
for(ExpressionTree grandkid: child.getChildren()) {
// for the first grandkid replace the original parent
if (first) {
first = false;
root.getChildren().set(i, grandkid);
} else {
root.getChildren().add(++i, grandkid);
}
}
} else {
root.getChildren().set(i, child);
}
}
// if we have a singleton AND or OR, just return the child
if ((root.getOperator() == ExpressionTree.Operator.OR ||
root.getOperator() == ExpressionTree.Operator.AND) &&
root.getChildren().size() == 1) {
return root.getChildren().get(0);
}
}
return root;
}
/**
* Generate all combinations of items on the andList. For each item on the
* andList, it generates all combinations of one child from each and
* expression. Thus, (and a b) (and c d) will be expanded to: (or a c)
* (or a d) (or b c) (or b d). If there are items on the nonAndList, they
* are added to each or expression.
* @param result a list to put the results onto
* @param andList a list of and expressions
* @param nonAndList a list of non-and expressions
*/
private static void generateAllCombinations(List result,
List andList,
List nonAndList
) {
List kids = andList.get(0).getChildren();
if (result.isEmpty()) {
for(ExpressionTree kid: kids) {
ExpressionTree or = new ExpressionTree(ExpressionTree.Operator.OR);
result.add(or);
for(ExpressionTree node: nonAndList) {
or.getChildren().add(new ExpressionTree(node));
}
or.getChildren().add(kid);
}
} else {
List work = new ArrayList(result);
result.clear();
for(ExpressionTree kid: kids) {
for(ExpressionTree or: work) {
ExpressionTree copy = new ExpressionTree(or);
copy.getChildren().add(kid);
result.add(copy);
}
}
}
if (andList.size() > 1) {
generateAllCombinations(result, andList.subList(1, andList.size()),
nonAndList);
}
}
/**
* Convert an expression so that the top level operator is AND with OR
* operators under it. This routine assumes that all of the NOT operators
* have been pushed to the leaves via pushdDownNot.
* @param root the expression
* @return the normalized expression
*/
static ExpressionTree convertToCNF(ExpressionTree root) {
if (root.getChildren() != null) {
// convert all of the children to CNF
int size = root.getChildren().size();
for(int i=0; i < size; ++i) {
root.getChildren().set(i, convertToCNF(root.getChildren().get(i)));
}
if (root.getOperator() == ExpressionTree.Operator.OR) {
// a list of leaves that weren't under AND expressions
List nonAndList = new ArrayList();
// a list of AND expressions that we need to distribute
List andList = new ArrayList();
for(ExpressionTree child: root.getChildren()) {
if (child.getOperator() == ExpressionTree.Operator.AND) {
andList.add(child);
} else if (child.getOperator() == ExpressionTree.Operator.OR) {
// pull apart the kids of the OR expression
for(ExpressionTree grandkid: child.getChildren()) {
nonAndList.add(grandkid);
}
} else {
nonAndList.add(child);
}
}
if (!andList.isEmpty()) {
if (checkCombinationsThreshold(andList)) {
root = new ExpressionTree(ExpressionTree.Operator.AND);
generateAllCombinations(root.getChildren(), andList, nonAndList);
} else {
root = new ExpressionTree(TruthValue.YES_NO_NULL);
}
}
}
}
return root;
}
private static boolean checkCombinationsThreshold(List andList) {
int numComb = 1;
for (ExpressionTree tree : andList) {
numComb *= tree.getChildren().size();
if (numComb > CNF_COMBINATIONS_THRESHOLD) {
return false;
}
}
return true;
}
}
}