org.apache.hadoop.hive.accumulo.predicate.AccumuloPredicateHandler Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.accumulo.predicate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.data.Range;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.accumulo.columns.ColumnMapper;
import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping;
import org.apache.hadoop.hive.accumulo.predicate.compare.CompareOp;
import org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare;
import org.apache.hadoop.hive.accumulo.predicate.compare.Equal;
import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThan;
import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual;
import org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare;
import org.apache.hadoop.hive.accumulo.predicate.compare.LessThan;
import org.apache.hadoop.hive.accumulo.predicate.compare.LessThanOrEqual;
import org.apache.hadoop.hive.accumulo.predicate.compare.Like;
import org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare;
import org.apache.hadoop.hive.accumulo.predicate.compare.NotEqual;
import org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison;
import org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare;
import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer;
import org.apache.hadoop.hive.ql.index.IndexSearchCondition;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.udf.UDFLike;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.log4j.Logger;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
/**
*
* Supporting operations dealing with Hive Predicate pushdown to iterators and ranges.
*
* See {@link PrimitiveComparisonFilter}
*
*/
public class AccumuloPredicateHandler {
private static final List TOTAL_RANGE = Collections.singletonList(new Range());
private static AccumuloPredicateHandler handler = new AccumuloPredicateHandler();
private static Map> compareOps = Maps.newHashMap();
private static Map> pComparisons = Maps.newHashMap();
// Want to start sufficiently "high" enough in the iterator stack
private static int iteratorCount = 50;
private static final Logger log = Logger.getLogger(AccumuloPredicateHandler.class);
static {
compareOps.put(GenericUDFOPEqual.class.getName(), Equal.class);
compareOps.put(GenericUDFOPNotEqual.class.getName(), NotEqual.class);
compareOps.put(GenericUDFOPGreaterThan.class.getName(), GreaterThan.class);
compareOps.put(GenericUDFOPEqualOrGreaterThan.class.getName(), GreaterThanOrEqual.class);
compareOps.put(GenericUDFOPEqualOrLessThan.class.getName(), LessThanOrEqual.class);
compareOps.put(GenericUDFOPLessThan.class.getName(), LessThan.class);
compareOps.put(UDFLike.class.getName(), Like.class);
pComparisons.put("bigint", LongCompare.class);
pComparisons.put("int", IntCompare.class);
pComparisons.put("double", DoubleCompare.class);
pComparisons.put("string", StringCompare.class);
}
public static AccumuloPredicateHandler getInstance() {
return handler;
}
/**
*
* @return set of all UDF class names with matching CompareOpt implementations.
*/
public Set cOpKeyset() {
return compareOps.keySet();
}
/**
*
* @return set of all hive data types with matching PrimitiveCompare implementations.
*/
public Set pComparisonKeyset() {
return pComparisons.keySet();
}
/**
*
* @param udfType
* GenericUDF classname to lookup matching CompareOpt
* @return Class extends CompareOpt/>
*/
public Class extends CompareOp> getCompareOpClass(String udfType)
throws NoSuchCompareOpException {
if (!compareOps.containsKey(udfType))
throw new NoSuchCompareOpException("Null compare op for specified key: " + udfType);
return compareOps.get(udfType);
}
public CompareOp getCompareOp(String udfType, IndexSearchCondition sc)
throws NoSuchCompareOpException, SerDeException {
Class extends CompareOp> clz = getCompareOpClass(udfType);
try {
return clz.newInstance();
} catch (ClassCastException e) {
throw new SerDeException("Column type mismatch in WHERE clause "
+ sc.getComparisonExpr().getExprString() + " found type "
+ sc.getConstantDesc().getTypeString() + " instead of "
+ sc.getColumnDesc().getTypeString());
} catch (IllegalAccessException e) {
throw new SerDeException("Could not instantiate class for WHERE clause", e);
} catch (InstantiationException e) {
throw new SerDeException("Could not instantiate class for WHERE clause", e);
}
}
/**
*
* @param type
* String hive column lookup matching PrimitiveCompare
* @return Class extends >?>
*/
public Class extends PrimitiveComparison> getPrimitiveComparisonClass(String type)
throws NoSuchPrimitiveComparisonException {
if (!pComparisons.containsKey(type))
throw new NoSuchPrimitiveComparisonException("Null primitive comparison for specified key: "
+ type);
return pComparisons.get(type);
}
public PrimitiveComparison getPrimitiveComparison(String type, IndexSearchCondition sc)
throws NoSuchPrimitiveComparisonException, SerDeException {
Class extends PrimitiveComparison> clz = getPrimitiveComparisonClass(type);
try {
return clz.newInstance();
} catch (ClassCastException e) {
throw new SerDeException("Column type mismatch in WHERE clause "
+ sc.getComparisonExpr().getExprString() + " found type "
+ sc.getConstantDesc().getTypeString() + " instead of "
+ sc.getColumnDesc().getTypeString());
} catch (IllegalAccessException e) {
throw new SerDeException("Could not instantiate class for WHERE clause", e);
} catch (InstantiationException e) {
throw new SerDeException("Could not instantiate class for WHERE clause", e);
}
}
private AccumuloPredicateHandler() {}
/**
* Loop through search conditions and build ranges for predicates involving rowID column, if any.
*/
public List getRanges(Configuration conf, ColumnMapper columnMapper) throws SerDeException {
if (!columnMapper.hasRowIdMapping()) {
return TOTAL_RANGE;
}
int rowIdOffset = columnMapper.getRowIdOffset();
String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS);
if (null == hiveColumnNamesArr) {
throw new IllegalArgumentException("Could not find Hive columns in configuration");
}
// Already verified that we should have the rowId mapping
String hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset];
ExprNodeDesc root = this.getExpression(conf);
// No expression, therefore scan the whole table
if (null == root) {
return TOTAL_RANGE;
}
Object result = generateRanges(columnMapper, hiveRowIdColumnName, root);
if (null == result) {
log.info("Calculated null set of ranges, scanning full table");
return TOTAL_RANGE;
} else if (result instanceof Range) {
log.info("Computed a single Range for the query: " + result);
return Collections.singletonList((Range) result);
} else if (result instanceof List) {
log.info("Computed a collection of Ranges for the query: " + result);
@SuppressWarnings("unchecked")
List ranges = (List) result;
return ranges;
} else {
throw new IllegalArgumentException("Unhandled return from Range generation: " + result);
}
}
/**
* Encapsulates the traversal over some {@link ExprNodeDesc} tree for the generation of Accumuluo
* Ranges using expressions involving the Accumulo rowid-mapped Hive column
*
* @param columnMapper
* Mapping of Hive to Accumulo columns for the query
* @param hiveRowIdColumnName
* Name of the hive column mapped to the Accumulo rowid
* @param root
* Root of some ExprNodeDesc tree to traverse, the WHERE clause
* @return An object representing the result from the ExprNodeDesc tree traversal using the
* AccumuloRangeGenerator
*/
protected Object generateRanges(ColumnMapper columnMapper, String hiveRowIdColumnName, ExprNodeDesc root) {
AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler,
columnMapper.getRowIdMapping(), hiveRowIdColumnName);
Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator,
Collections. emptyMap(), null);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList roots = new ArrayList();
roots.add(root);
HashMap nodeOutput = new HashMap();
try {
ogw.startWalking(roots, nodeOutput);
} catch (SemanticException ex) {
throw new RuntimeException(ex);
}
return nodeOutput.get(root);
}
/**
* Loop through search conditions and build iterator settings for predicates involving columns
* other than rowID, if any.
*
* @param conf
* Configuration
* @throws SerDeException
*/
public List getIterators(Configuration conf, ColumnMapper columnMapper)
throws SerDeException {
List itrs = Lists.newArrayList();
boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY,
AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT);
if (!shouldPushdown) {
log.info("Iterator pushdown is disabled for this table");
return itrs;
}
int rowIdOffset = columnMapper.getRowIdOffset();
String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS);
if (null == hiveColumnNamesArr) {
throw new IllegalArgumentException("Could not find Hive columns in configuration");
}
String hiveRowIdColumnName = null;
if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) {
hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset];
}
List hiveColumnNames = Arrays.asList(hiveColumnNamesArr);
for (IndexSearchCondition sc : getSearchConditions(conf)) {
String col = sc.getColumnDesc().getColumn();
if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) {
HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper
.getColumnMappingForHiveColumn(hiveColumnNames, col);
itrs.add(toSetting(mapping, sc));
}
}
if (log.isInfoEnabled())
log.info("num iterators = " + itrs.size());
return itrs;
}
/**
* Create an IteratorSetting for the right qualifier, constant, CompareOpt, and PrimitiveCompare
* type.
*
* @param accumuloColumnMapping
* ColumnMapping to filter
* @param sc
* IndexSearchCondition
* @return IteratorSetting
* @throws SerDeException
*/
public IteratorSetting toSetting(HiveAccumuloColumnMapping accumuloColumnMapping,
IndexSearchCondition sc) throws SerDeException {
iteratorCount++;
final IteratorSetting is = new IteratorSetting(iteratorCount,
PrimitiveComparisonFilter.FILTER_PREFIX + iteratorCount, PrimitiveComparisonFilter.class);
final String type = sc.getColumnDesc().getTypeString();
final String comparisonOpStr = sc.getComparisonOp();
PushdownTuple tuple;
try {
tuple = new PushdownTuple(sc, getPrimitiveComparison(type, sc), getCompareOp(comparisonOpStr,
sc));
} catch (NoSuchPrimitiveComparisonException e) {
throw new SerDeException("No configured PrimitiveComparison class for " + type, e);
} catch (NoSuchCompareOpException e) {
throw new SerDeException("No configured CompareOp class for " + comparisonOpStr, e);
}
is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, tuple.getpCompare().getClass()
.getName());
is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, tuple.getcOpt().getClass().getName());
is.addOption(PrimitiveComparisonFilter.CONST_VAL,
new String(Base64.encodeBase64(tuple.getConstVal())));
is.addOption(PrimitiveComparisonFilter.COLUMN, accumuloColumnMapping.serialize());
return is;
}
public ExprNodeDesc getExpression(Configuration conf) {
String filteredExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
if (filteredExprSerialized == null)
return null;
return Utilities.deserializeExpression(filteredExprSerialized);
}
/**
*
* @param conf
* Configuration
* @return list of IndexSearchConditions from the filter expression.
*/
public List getSearchConditions(Configuration conf) {
final List sConditions = Lists.newArrayList();
ExprNodeDesc filterExpr = getExpression(conf);
if (null == filterExpr) {
return sConditions;
}
IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions);
if (residual != null)
throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString());
return sConditions;
}
/**
*
* @param conf
* Configuration
* @param desc
* predicate expression node.
* @return DecomposedPredicate containing translated search conditions the analyzer can support.
*/
public DecomposedPredicate decompose(Configuration conf, ExprNodeDesc desc) {
IndexPredicateAnalyzer analyzer = newAnalyzer(conf);
List sConditions = new ArrayList();
ExprNodeDesc residualPredicate = analyzer.analyzePredicate(desc, sConditions);
if (sConditions.size() == 0) {
if (log.isInfoEnabled())
log.info("nothing to decompose. Returning");
return null;
}
DecomposedPredicate decomposedPredicate = new DecomposedPredicate();
decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions(sConditions);
decomposedPredicate.residualPredicate = (ExprNodeGenericFuncDesc) residualPredicate;
return decomposedPredicate;
}
/**
* Build an analyzer that allows comparison opts from compareOpts map, and all columns from table
* definition.
*/
private IndexPredicateAnalyzer newAnalyzer(Configuration conf) {
IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer();
analyzer.clearAllowedColumnNames();
for (String op : cOpKeyset()) {
analyzer.addComparisonOp(op);
}
String[] hiveColumnNames = conf.getStrings(serdeConstants.LIST_COLUMNS);
for (String col : hiveColumnNames) {
analyzer.allowColumnName(col);
}
return analyzer;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy