org.apache.hadoop.hive.ql.optimizer.FixedBucketPruningOptimizer Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree.Operator;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.PrunerOperatorFactory.FilterPruner;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import com.google.common.base.Preconditions;
/**
* Fixed bucket pruning optimizer goes through all the table scans and annotates them
* with a bucketing inclusion bit-set.
*/
public class FixedBucketPruningOptimizer extends Transform {
private static final Log LOG = LogFactory
.getLog(FixedBucketPruningOptimizer.class.getName());
private final boolean compat;
public FixedBucketPruningOptimizer(boolean compat) {
this.compat = compat;
}
public class NoopWalker implements NodeProcessor {
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
// do nothing
return null;
}
}
public class FixedBucketPartitionWalker extends FilterPruner {
@Override
protected void generatePredicate(NodeProcessorCtx procCtx,
FilterOperator fop, TableScanOperator top) throws SemanticException,
UDFArgumentException {
FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx);
Table tbl = top.getConf().getTableMetadata();
if (tbl.getNumBuckets() > 0) {
final int nbuckets = tbl.getNumBuckets();
ctxt.setNumBuckets(nbuckets);
ctxt.setBucketCols(tbl.getBucketCols());
ctxt.setSchema(tbl.getFields());
if (tbl.isPartitioned()) {
// Run partition pruner to get partitions
ParseContext parseCtx = ctxt.pctx;
PrunedPartitionList prunedPartList;
try {
String alias = (String) parseCtx.getTopOps().keySet().toArray()[0];
prunedPartList = PartitionPruner.prune(top, parseCtx, alias);
} catch (HiveException e) {
throw new SemanticException(e.getMessage(), e);
}
if (prunedPartList != null) {
ctxt.setPartitions(prunedPartList);
for (Partition p : prunedPartList.getPartitions()) {
if (nbuckets != p.getBucketCount()) {
// disable feature
ctxt.setNumBuckets(-1);
break;
}
}
}
}
}
}
}
public static class BucketBitsetGenerator extends FilterPruner {
@Override
protected void generatePredicate(NodeProcessorCtx procCtx,
FilterOperator fop, TableScanOperator top) throws SemanticException,
UDFArgumentException {
FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx);
if (ctxt.getNumBuckets() <= 0 || ctxt.getBucketCols().size() != 1) {
// bucketing isn't consistent or there are >1 bucket columns
// optimizer does not extract multiple column predicates for this
return;
}
ExprNodeGenericFuncDesc filter = top.getConf().getFilterExpr();
if (filter == null) {
return;
}
// the sargs are closely tied to hive.optimize.index.filter
SearchArgument sarg = ConvertAstToSearchArg.create(ctxt.pctx.getConf(), filter);
if (sarg == null) {
return;
}
final String bucketCol = ctxt.getBucketCols().get(0);
StructField bucketField = null;
for (StructField fs : ctxt.getSchema()) {
if(fs.getFieldName().equals(bucketCol)) {
bucketField = fs;
}
}
Preconditions.checkArgument(bucketField != null);
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy