org.apache.hadoop.hive.ql.optimizer.FixedBucketPruningOptimizer Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hive-exec
There is a newer version: 4.0.0
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.optimizer;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Stack;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree;
import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree.Operator;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.PrunerOperatorFactory.FilterPruner;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import com.google.common.base.Preconditions;

/**
 * Fixed bucket pruning optimizer goes through all the table scans and annotates them
 * with a bucketing inclusion bit-set.
 */
public class FixedBucketPruningOptimizer extends Transform {

  private static final Log LOG = LogFactory
      .getLog(FixedBucketPruningOptimizer.class.getName());

  private final boolean compat;

  public FixedBucketPruningOptimizer(boolean compat) {
    this.compat = compat;
  }

  public class NoopWalker implements NodeProcessor {
    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      // do nothing
      return null;
    }
  }

  public class FixedBucketPartitionWalker extends FilterPruner {

    @Override
    protected void generatePredicate(NodeProcessorCtx procCtx,
        FilterOperator fop, TableScanOperator top) throws SemanticException,
        UDFArgumentException {
      FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx);
      Table tbl = top.getConf().getTableMetadata();
      if (tbl.getNumBuckets() > 0) {
        final int nbuckets = tbl.getNumBuckets();
        ctxt.setNumBuckets(nbuckets);
        ctxt.setBucketCols(tbl.getBucketCols());
        ctxt.setSchema(tbl.getFields());
        if (tbl.isPartitioned()) {
          // Run partition pruner to get partitions
          ParseContext parseCtx = ctxt.pctx;
          PrunedPartitionList prunedPartList;
          try {
            String alias = (String) parseCtx.getTopOps().keySet().toArray()[0];
            prunedPartList = PartitionPruner.prune(top, parseCtx, alias);
          } catch (HiveException e) {
            throw new SemanticException(e.getMessage(), e);
          }
          if (prunedPartList != null) {
            ctxt.setPartitions(prunedPartList);
            for (Partition p : prunedPartList.getPartitions()) {
              if (nbuckets != p.getBucketCount()) {
                // disable feature
                ctxt.setNumBuckets(-1);
                break;
              }
            }
          }
        }
      }
    }
  }

  public static class BucketBitsetGenerator extends FilterPruner {

    @Override
    protected void generatePredicate(NodeProcessorCtx procCtx,
        FilterOperator fop, TableScanOperator top) throws SemanticException,
        UDFArgumentException {
      FixedBucketPruningOptimizerCtxt ctxt = ((FixedBucketPruningOptimizerCtxt) procCtx);
      if (ctxt.getNumBuckets() <= 0 || ctxt.getBucketCols().size() != 1) {
        // bucketing isn't consistent or there are >1 bucket columns
        // optimizer does not extract multiple column predicates for this
        return;
      }
      ExprNodeGenericFuncDesc filter = top.getConf().getFilterExpr();
      if (filter == null) {
        return;
      }
      // the sargs are closely tied to hive.optimize.index.filter
      SearchArgument sarg = ConvertAstToSearchArg.create(filter);
      if (sarg == null) {
        return;
      }
      final String bucketCol = ctxt.getBucketCols().get(0);
      StructField bucketField = null;
      for (StructField fs : ctxt.getSchema()) {
        if(fs.getFieldName().equals(bucketCol)) {
          bucketField = fs;
        }
      }
      Preconditions.checkArgument(bucketField != null);
      List