org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc;
import org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext;
import org.apache.hadoop.hive.ql.plan.*;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This optimization looks for expressions of the kind "x IN (RS[n])". If such
* an expression made it to a table scan operator and x is a partition column we
* can use an existing join to dynamically prune partitions. This class sets up
* the infrastructure for that.
*/
public class DynamicPartitionPruningOptimization implements NodeProcessor {
static final private Logger LOG = LoggerFactory.getLogger(DynamicPartitionPruningOptimization.class
.getName());
private static class DynamicPartitionPrunerProc implements NodeProcessor {
/**
* process simply remembers all the dynamic partition pruning expressions
* found
*/
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
ExprNodeDynamicListDesc desc = (ExprNodeDynamicListDesc) nd;
DynamicPartitionPrunerContext context = (DynamicPartitionPrunerContext) procCtx;
// Rule is searching for dynamic pruning expr. There's at least an IN
// expression wrapping it.
ExprNodeDesc parent = (ExprNodeDesc) stack.get(stack.size() - 2);
ExprNodeDesc grandParent = stack.size() >= 3 ? (ExprNodeDesc) stack.get(stack.size() - 3) : null;
context.addDynamicList(desc, parent, grandParent, (ReduceSinkOperator) desc.getSource());
return context;
}
}
private static class DynamicListContext {
public ExprNodeDynamicListDesc desc;
public ExprNodeDesc parent;
public ExprNodeDesc grandParent;
public ReduceSinkOperator generator;
public DynamicListContext(ExprNodeDynamicListDesc desc, ExprNodeDesc parent,
ExprNodeDesc grandParent, ReduceSinkOperator generator) {
this.desc = desc;
this.parent = parent;
this.grandParent = grandParent;
this.generator = generator;
}
}
private static class DynamicPartitionPrunerContext implements NodeProcessorCtx,
Iterable {
public List dynLists = new ArrayList();
public void addDynamicList(ExprNodeDynamicListDesc desc, ExprNodeDesc parent,
ExprNodeDesc grandParent, ReduceSinkOperator generator) {
dynLists.add(new DynamicListContext(desc, parent, grandParent, generator));
}
@Override
public Iterator iterator() {
return dynLists.iterator();
}
}
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
throws SemanticException {
ParseContext parseContext;
if (procCtx instanceof OptimizeTezProcContext) {
parseContext = ((OptimizeTezProcContext) procCtx).parseContext;
} else if (procCtx instanceof OptimizeSparkProcContext) {
parseContext = ((OptimizeSparkProcContext) procCtx).getParseContext();
} else {
throw new IllegalArgumentException("expected parseContext to be either " +
"OptimizeTezProcContext or OptimizeSparkProcContext, but found " +
procCtx.getClass().getName());
}
FilterOperator filter = (FilterOperator) nd;
FilterDesc desc = filter.getConf();
if (!parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING) &&
!parseContext.getConf().getBoolVar(ConfVars.SPARK_DYNAMIC_PARTITION_PRUNING)) {
// nothing to do when the optimization is off
return null;
}
TableScanOperator ts = null;
if (filter.getParentOperators().size() == 1
&& filter.getParentOperators().get(0) instanceof TableScanOperator) {
ts = (TableScanOperator) filter.getParentOperators().get(0);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Parent: " + filter.getParentOperators().get(0));
LOG.debug("Filter: " + desc.getPredicateString());
LOG.debug("TableScan: " + ts);
}
DynamicPartitionPrunerContext removerContext = new DynamicPartitionPrunerContext();
// collect the dynamic pruning conditions
removerContext.dynLists.clear();
collectDynamicPruningConditions(desc.getPredicate(), removerContext);
if (ts == null) {
// Replace the synthetic predicate with true and bail out
for (DynamicListContext ctx : removerContext) {
ExprNodeDesc constNode =
new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
replaceExprNode(ctx, desc, constNode);
}
return false;
}
final boolean semiJoin = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION);
for (DynamicListContext ctx : removerContext) {
String column = ExprNodeDescUtils.extractColName(ctx.parent);
boolean semiJoinAttempted = false;
if (column != null) {
// Need unique IDs to refer to each min/max key value in the DynamicValueRegistry
String keyBaseAlias = "";
Table table = ts.getConf().getTableMetadata();
if (table != null && table.isPartitionKey(column)) {
String columnType = table.getPartColByName(column).getType();
String alias = ts.getConf().getAlias();
PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts);
if (LOG.isDebugEnabled()) {
LOG.debug("alias: " + alias);
LOG.debug("pruned partition list: ");
if (plist != null) {
for (Partition p : plist.getPartitions()) {
LOG.debug(p.getCompleteName());
}
}
}
// If partKey is a constant, we can check whether the partitions
// have been already filtered
if (plist == null || plist.getPartitions().size() != 0) {
LOG.info("Dynamic partitioning: " + table.getCompleteName() + "." + column);
generateEventOperatorPlan(ctx, parseContext, ts, column, columnType);
} else {
// all partitions have been statically removed
LOG.debug("No partition pruning necessary.");
}
} else {
LOG.debug("Column " + column + " is not a partition column");
if (semiJoin && ts.getConf().getFilterExpr() != null) {
LOG.debug("Initiate semijoin reduction for " + column);
// Get the table name from which the min-max values will come.
Operator> op = ctx.generator;
while (!(op == null || op instanceof TableScanOperator)) {
op = op.getParentOperators().get(0);
}
String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias());
keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + column;
semiJoinAttempted = generateSemiJoinOperatorPlan(ctx, parseContext, ts, keyBaseAlias);
}
}
// If semijoin is attempted then replace the condition with a min-max filter
// and bloom filter else,
// we always remove the condition by replacing it with "true"
if (semiJoinAttempted) {
List betweenArgs = new ArrayList();
betweenArgs.add(new ExprNodeConstantDesc(Boolean.FALSE)); // Do not invert between result
// add column expression here
betweenArgs.add(ctx.parent.getChildren().get(0));
betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_min", ctx.desc.getTypeInfo())));
betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_max", ctx.desc.getTypeInfo())));
ExprNodeDesc betweenNode = ExprNodeGenericFuncDesc.newInstance(
FunctionRegistry.getFunctionInfo("between").getGenericUDF(), betweenArgs);
// add column expression for bloom filter
List bloomFilterArgs = new ArrayList();
bloomFilterArgs.add(ctx.parent.getChildren().get(0));
bloomFilterArgs.add(new ExprNodeDynamicValueDesc(
new DynamicValue(keyBaseAlias + "_bloom_filter",
TypeInfoFactory.binaryTypeInfo)));
ExprNodeDesc bloomFilterNode = ExprNodeGenericFuncDesc.newInstance(
FunctionRegistry.getFunctionInfo("in_bloom_filter").
getGenericUDF(), bloomFilterArgs);
List andArgs = new ArrayList();
andArgs.add(betweenNode);
andArgs.add(bloomFilterNode);
ExprNodeDesc andExpr = ExprNodeGenericFuncDesc.newInstance(
FunctionRegistry.getFunctionInfo("and").getGenericUDF(), andArgs);
replaceExprNode(ctx, desc, andExpr);
} else {
ExprNodeDesc replaceNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
replaceExprNode(ctx, desc, replaceNode);
}
} else {
ExprNodeDesc constNode =
new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
replaceExprNode(ctx, desc, constNode);
}
}
// if we pushed the predicate into the table scan we need to remove the
// synthetic conditions there.
cleanTableScanFilters(ts);
return false;
}
private void replaceExprNode(DynamicListContext ctx, FilterDesc desc, ExprNodeDesc node) {
if (ctx.grandParent == null) {
desc.setPredicate(node);
} else {
int i = ctx.grandParent.getChildren().indexOf(ctx.parent);
ctx.grandParent.getChildren().remove(i);
ctx.grandParent.getChildren().add(i, node);
}
}
private void cleanTableScanFilters(TableScanOperator ts) throws SemanticException {
if (ts == null || ts.getConf() == null || ts.getConf().getFilterExpr() == null) {
// nothing to do
return;
}
DynamicPartitionPrunerContext removerContext = new DynamicPartitionPrunerContext();
// collect the dynamic pruning conditions
removerContext.dynLists.clear();
collectDynamicPruningConditions(ts.getConf().getFilterExpr(), removerContext);
for (DynamicListContext ctx : removerContext) {
// remove the condition by replacing it with "true"
ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
if (ctx.grandParent == null) {
// we're the only node, just clear out the expression
ts.getConf().setFilterExpr(null);
} else {
int i = ctx.grandParent.getChildren().indexOf(ctx.parent);
ctx.grandParent.getChildren().remove(i);
ctx.grandParent.getChildren().add(i, constNode);
}
}
}
private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext parseContext,
TableScanOperator ts, String column, String columnType) {
// we will put a fork in the plan at the source of the reduce sink
Operator extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
// we need the expr that generated the key of the reduce sink
ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
// we also need the expr for the partitioned table
ExprNodeDesc partKey = ctx.parent.getChildren().get(0);
if (LOG.isDebugEnabled()) {
LOG.debug("key expr: " + key);
LOG.debug("partition key expr: " + partKey);
}
List keyExprs = new ArrayList();
keyExprs.add(key);
// group by requires "ArrayList", don't ask.
ArrayList outputNames = new ArrayList();
outputNames.add(HiveConf.getColumnInternalName(0));
// project the relevant key column
SelectDesc select = new SelectDesc(keyExprs, outputNames);
SelectOperator selectOp =
(SelectOperator) OperatorFactory.getAndMakeChild(select, parentOfRS);
// do a group by on the list to dedup
float groupByMemoryUsage =
HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold =
HiveConf.getFloatVar(parseContext.getConf(),
HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
ArrayList groupByExprs = new ArrayList();
ExprNodeDesc groupByExpr =
new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), null, false);
groupByExprs.add(groupByExpr);
GroupByDesc groupBy =
new GroupByDesc(GroupByDesc.Mode.HASH, outputNames, groupByExprs,
new ArrayList(), false, groupByMemoryUsage, memoryThreshold,
null, false, 0, true);
GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(
groupBy, selectOp);
Map colMap = new HashMap();
colMap.put(outputNames.get(0), groupByExpr);
groupByOp.setColumnExprMap(colMap);
// finally add the event broadcast operator
if (HiveConf.getVar(parseContext.getConf(),
ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
DynamicPruningEventDesc eventDesc = new DynamicPruningEventDesc();
eventDesc.setTableScan(ts);
eventDesc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils
.getFieldSchemasFromColumnList(keyExprs, "key")));
eventDesc.setTargetColumnName(column);
eventDesc.setTargetColumnType(columnType);
eventDesc.setPartKey(partKey);
OperatorFactory.getAndMakeChild(eventDesc, groupByOp);
} else {
// Must be spark branch
SparkPartitionPruningSinkDesc desc = new SparkPartitionPruningSinkDesc();
desc.setTableScan(ts);
desc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils
.getFieldSchemasFromColumnList(keyExprs, "key")));
desc.setTargetColumnName(column);
desc.setPartKey(partKey);
OperatorFactory.getAndMakeChild(desc, groupByOp);
}
}
// Generates plan for min/max when dynamic partition pruning is ruled out.
private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContext parseContext,
TableScanOperator ts, String keyBaseAlias) throws SemanticException {
// we will put a fork in the plan at the source of the reduce sink
Operator extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
// we need the expr that generated the key of the reduce sink
ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
String internalColName = null;
ExprNodeDesc exprNodeDesc = key;
// Find the ExprNodeColumnDesc
while (!(exprNodeDesc instanceof ExprNodeColumnDesc) &&
(exprNodeDesc.getChildren() != null)) {
exprNodeDesc = exprNodeDesc.getChildren().get(0);
}
if (!(exprNodeDesc instanceof ExprNodeColumnDesc)) {
// No column found!
// Bail out
return false;
}
internalColName = ((ExprNodeColumnDesc) exprNodeDesc).getColumn();
if (parentOfRS instanceof SelectOperator) {
// Make sure the semijoin branch is not on partition column.
ExprNodeDesc expr = parentOfRS.getColumnExprMap().get(internalColName);
while (!(expr instanceof ExprNodeColumnDesc) &&
(expr.getChildren() != null)) {
expr = expr.getChildren().get(0);
}
if (!(expr instanceof ExprNodeColumnDesc)) {
// No column found!
// Bail out
return false;
}
ExprNodeColumnDesc colExpr = (ExprNodeColumnDesc) expr;
String colName = ExprNodeDescUtils.extractColName(colExpr);
// Fetch the TableScan Operator.
Operator> op = parentOfRS.getParentOperators().get(0);
while (op != null && !(op instanceof TableScanOperator)) {
op = op.getParentOperators().get(0);
}
assert op != null;
Table table = ((TableScanOperator) op).getConf().getTableMetadata();
if (table.isPartitionKey(colName)) {
// The column is partition column, skip the optimization.
return false;
}
}
List keyExprs = new ArrayList();
keyExprs.add(key);
// group by requires "ArrayList", don't ask.
ArrayList outputNames = new ArrayList();
outputNames.add(HiveConf.getColumnInternalName(0));
// project the relevant key column
SelectDesc select = new SelectDesc(keyExprs, outputNames);
// Create the new RowSchema for the projected column
ColumnInfo columnInfo = parentOfRS.getSchema().getColumnInfo(internalColName);
ArrayList signature = new ArrayList();
signature.add(columnInfo);
RowSchema rowSchema = new RowSchema(signature);
// Create the column expr map
Map colExprMap = new HashMap();
ExprNodeDesc exprNode = null;
if ( parentOfRS.getColumnExprMap() != null) {
exprNode = parentOfRS.getColumnExprMap().get(internalColName).clone();
} else {
exprNode = new ExprNodeColumnDesc(columnInfo);
}
if (exprNode instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) exprNode;
encd.setColumn(internalColName);
}
colExprMap.put(internalColName, exprNode);
// Create the Select Operator
SelectOperator selectOp =
(SelectOperator) OperatorFactory.getAndMakeChild(select,
rowSchema, colExprMap, parentOfRS);
// do a group by to aggregate min,max and bloom filter.
float groupByMemoryUsage =
HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold =
HiveConf.getFloatVar(parseContext.getConf(),
HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
ArrayList groupByExprs = new ArrayList();
// Add min/max and bloom filter aggregations
List aggFnOIs = new ArrayList();
aggFnOIs.add(key.getWritableObjectInspector());
ArrayList params = new ArrayList();
params.add(
new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0),
"", false));
ArrayList aggs = new ArrayList();
try {
AggregationDesc min = new AggregationDesc("min",
FunctionRegistry.getGenericUDAFEvaluator("min", aggFnOIs, false, false),
params, false, Mode.PARTIAL1);
AggregationDesc max = new AggregationDesc("max",
FunctionRegistry.getGenericUDAFEvaluator("max", aggFnOIs, false, false),
params, false, Mode.PARTIAL1);
AggregationDesc bloomFilter = new AggregationDesc("bloom_filter",
FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", aggFnOIs, false, false),
params, false, Mode.PARTIAL1);
GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
bloomFilterEval.setSourceOperator(selectOp);
bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
aggs.add(min);
aggs.add(max);
aggs.add(bloomFilter);
} catch (SemanticException e) {
LOG.error("Error creating min/max aggregations on key", e);
throw new IllegalStateException("Error creating min/max aggregations on key", e);
}
// Create the Group by Operator
ArrayList gbOutputNames = new ArrayList();
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH,
gbOutputNames, new ArrayList(), aggs, false,
groupByMemoryUsage, memoryThreshold, null, false, 0, false);
ArrayList groupbyColInfos = new ArrayList();
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(0), key.getTypeInfo(), "", false));
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(1), key.getTypeInfo(), "", false));
groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(2), key.getTypeInfo(), "", false));
GroupByOperator groupByOp = (GroupByOperator)OperatorFactory.getAndMakeChild(
groupBy, new RowSchema(groupbyColInfos), selectOp);
groupByOp.setColumnExprMap(new HashMap());
// Get the column names of the aggregations for reduce sink
int colPos = 0;
ArrayList rsValueCols = new ArrayList();
for (int i = 0; i < aggs.size() - 1; i++) {
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(key.getTypeInfo(),
gbOutputNames.get(colPos++), "", false);
rsValueCols.add(colExpr);
}
// Bloom Filter uses binary
ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo,
gbOutputNames.get(colPos++), "", false);
rsValueCols.add(colExpr);
// Create the reduce sink operator
ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(
new ArrayList(), rsValueCols, gbOutputNames, false,
-1, 0, 1, Operation.NOT_ACID);
ReduceSinkOperator rsOp = (ReduceSinkOperator)OperatorFactory.getAndMakeChild(
rsDesc, new RowSchema(groupByOp.getSchema()), groupByOp);
Map columnExprMap = new HashMap();
rsOp.setColumnExprMap(columnExprMap);
// Create the final Group By Operator
ArrayList aggsFinal = new ArrayList();
try {
List minFinalFnOIs = new ArrayList();
List maxFinalFnOIs = new ArrayList();
List bloomFilterFinalFnOIs = new ArrayList();
ArrayList minFinalParams = new ArrayList();
ArrayList maxFinalParams = new ArrayList();
ArrayList bloomFilterFinalParams = new ArrayList();
// Use the expressions from Reduce Sink.
minFinalFnOIs.add(rsValueCols.get(0).getWritableObjectInspector());
maxFinalFnOIs.add(rsValueCols.get(1).getWritableObjectInspector());
bloomFilterFinalFnOIs.add(rsValueCols.get(2).getWritableObjectInspector());
// Coming from a ReduceSink the aggregations would be in the form VALUE._col0, VALUE._col1
minFinalParams.add(
new ExprNodeColumnDesc(
rsValueCols.get(0).getTypeInfo(),
Utilities.ReduceField.VALUE + "." +
gbOutputNames.get(0), "", false));
maxFinalParams.add(
new ExprNodeColumnDesc(
rsValueCols.get(1).getTypeInfo(),
Utilities.ReduceField.VALUE + "." +
gbOutputNames.get(1), "", false));
bloomFilterFinalParams.add(
new ExprNodeColumnDesc(
rsValueCols.get(2).getTypeInfo(),
Utilities.ReduceField.VALUE + "." +
gbOutputNames.get(2), "", false));
AggregationDesc min = new AggregationDesc("min",
FunctionRegistry.getGenericUDAFEvaluator("min", minFinalFnOIs,
false, false),
minFinalParams, false, Mode.FINAL);
AggregationDesc max = new AggregationDesc("max",
FunctionRegistry.getGenericUDAFEvaluator("max", maxFinalFnOIs,
false, false),
maxFinalParams, false, Mode.FINAL);
AggregationDesc bloomFilter = new AggregationDesc("bloom_filter",
FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", bloomFilterFinalFnOIs,
false, false),
bloomFilterFinalParams, false, Mode.FINAL);
GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
bloomFilterEval.setSourceOperator(selectOp);
bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
aggsFinal.add(min);
aggsFinal.add(max);
aggsFinal.add(bloomFilter);
} catch (SemanticException e) {
LOG.error("Error creating min/max aggregations on key", e);
throw new IllegalStateException("Error creating min/max aggregations on key", e);
}
GroupByDesc groupByDescFinal = new GroupByDesc(GroupByDesc.Mode.FINAL,
gbOutputNames, new ArrayList(), aggsFinal, false,
groupByMemoryUsage, memoryThreshold, null, false, 0, false);
GroupByOperator groupByOpFinal = (GroupByOperator)OperatorFactory.getAndMakeChild(
groupByDescFinal, new RowSchema(rsOp.getSchema()), rsOp);
groupByOpFinal.setColumnExprMap(new HashMap());
// for explain purpose
if (parseContext.getContext().getExplainConfig() != null
&& parseContext.getContext().getExplainConfig().isFormatted()) {
List outputOperators = new ArrayList<>();
outputOperators.add(groupByOpFinal.getOperatorId());
rsOp.getConf().setOutputOperators(outputOperators);
}
// Create the final Reduce Sink Operator
ReduceSinkDesc rsDescFinal = PlanUtils.getReduceSinkDesc(
new ArrayList(), rsValueCols, gbOutputNames, false,
-1, 0, 1, Operation.NOT_ACID);
ReduceSinkOperator rsOpFinal = (ReduceSinkOperator)OperatorFactory.getAndMakeChild(
rsDescFinal, new RowSchema(groupByOpFinal.getSchema()), groupByOpFinal);
rsOpFinal.setColumnExprMap(columnExprMap);
LOG.debug("DynamicMinMaxPushdown: Saving RS to TS mapping: " + rsOpFinal + ": " + ts);
parseContext.getRsOpToTsOpMap().put(rsOpFinal, ts);
// for explain purpose
if (parseContext.getContext().getExplainConfig() != null
&& parseContext.getContext().getExplainConfig().isFormatted()) {
List outputOperators = new ArrayList<>();
outputOperators.add(ts.getOperatorId());
rsOpFinal.getConf().setOutputOperators(outputOperators);
}
// Save the info that is required at query time to resolve dynamic/runtime values.
RuntimeValuesInfo runtimeValuesInfo = new RuntimeValuesInfo();
TableDesc rsFinalTableDesc = PlanUtils.getReduceValueTableDesc(
PlanUtils.getFieldSchemasFromColumnList(rsValueCols, "_col"));
List dynamicValueIDs = new ArrayList();
dynamicValueIDs.add(keyBaseAlias + "_min");
dynamicValueIDs.add(keyBaseAlias + "_max");
dynamicValueIDs.add(keyBaseAlias + "_bloom_filter");
runtimeValuesInfo.setTableDesc(rsFinalTableDesc);
runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs);
runtimeValuesInfo.setColExprs(rsValueCols);
runtimeValuesInfo.setTsColExpr(ctx.parent.getChildren().get(0));
parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo);
return true;
}
private Map collectDynamicPruningConditions(ExprNodeDesc pred, NodeProcessorCtx ctx)
throws SemanticException {
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map exprRules = new LinkedHashMap();
exprRules.put(new RuleRegExp("R1", ExprNodeDynamicListDesc.class.getName() + "%"),
new DynamicPartitionPrunerProc());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, ctx);
GraphWalker egw = new DefaultGraphWalker(disp);
List startNodes = new ArrayList();
startNodes.add(pred);
HashMap outputMap = new HashMap();
egw.startWalking(startNodes, outputMap);
return outputMap;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy