All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.RedundantDynamicPruningConditionsRemoval Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.optimizer;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.apache.calcite.util.Pair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;


/**
 * Takes a Filter operator on top of a TableScan and removes dynamic pruning conditions
 * if static partition pruning has been triggered already.
 * 
 * This transformation is executed when CBO is on and hence we can guarantee that the filtering
 * conditions on the partition columns will be immediately on top of the TableScan operator.
 *
 */
public class RedundantDynamicPruningConditionsRemoval extends Transform {

  private static final Logger LOG = LoggerFactory.getLogger(RedundantDynamicPruningConditionsRemoval.class);


  /**
   * Transform the query tree.
   *
   * @param pctx the current parse context
   */
  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    // Make sure semijoin is not enabled. If it is, then do not remove the dynamic partition pruning predicates.
    if (!pctx.getConf().getBoolVar(HiveConf.ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
      Map opRules = new LinkedHashMap();
      opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" +
              FilterOperator.getOperatorName() + "%"), new FilterTransformer());

      Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
      GraphWalker ogw = new DefaultGraphWalker(disp);

      List topNodes = new ArrayList();
      topNodes.addAll(pctx.getTopOps().values());
      ogw.startWalking(topNodes, null);
    }
    return pctx;
  }

  private class FilterTransformer implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      FilterOperator filter = (FilterOperator) nd;
      FilterDesc desc = filter.getConf();

      TableScanOperator ts = (TableScanOperator) stack.get(stack.size() - 2);

      // collect
      CollectContext removalContext = new CollectContext();
      collect(desc.getPredicate(), removalContext);
      CollectContext tsRemovalContext = new CollectContext();
      collect(ts.getConf().getFilterExpr(), tsRemovalContext);

      for (Pair pair : removalContext.dynamicListNodes) {
        ExprNodeDesc child = pair.left;
        ExprNodeDesc columnDesc = child.getChildren().get(0);
        assert child.getChildren().get(1) instanceof ExprNodeDynamicListDesc;
        ExprNodeDesc parent = pair.right;

        String column = ExprNodeDescUtils.extractColName(columnDesc);
        if (column != null) {
          Table table = ts.getConf().getTableMetadata();

          boolean generate = false;
          if (table != null && table.isPartitionKey(column)) {
            generate = true;
            for (ExprNodeDesc filterColumnDesc : removalContext.comparatorNodes) {
              if (columnDesc.isSame(filterColumnDesc)) {
                generate = false;
                break;
              }
            }
          }
          if (!generate) {
            // We can safely remove the condition by replacing it with "true"
            ExprNodeDesc constNode = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.TRUE);
            if (parent == null) {
              desc.setPredicate(constNode);
            } else {
              int i = parent.getChildren().indexOf(child);
              parent.getChildren().remove(i);
              parent.getChildren().add(i, constNode);
            }
            // We remove it from the TS too if it was pushed
            for (Pair tsPair : tsRemovalContext.dynamicListNodes) {
              ExprNodeDesc tsChild = tsPair.left;
              ExprNodeDesc tsParent = tsPair.right;
              if (tsChild.isSame(child)) {
                if (tsParent == null) {
                  ts.getConf().setFilterExpr(null);
                } else {
                  int i = tsParent.getChildren().indexOf(tsChild);
                  if (i != -1) {
                    tsParent.getChildren().remove(i);
                    tsParent.getChildren().add(i, constNode);
                  }
                }
                break;
              }
            }
            if (LOG.isInfoEnabled()) {
              LOG.info("Dynamic pruning condition removed: " + child);
            }
          }
        }
      }
      return false;
    }
  }

  private static void collect(ExprNodeDesc pred, CollectContext listContext) {
    collect(null, pred, listContext);
  }

  private static void collect(ExprNodeDesc parent, ExprNodeDesc child, CollectContext listContext) {
    if (child instanceof ExprNodeGenericFuncDesc &&
            ((ExprNodeGenericFuncDesc)child).getGenericUDF() instanceof GenericUDFIn) {
      if (child.getChildren().get(1) instanceof ExprNodeDynamicListDesc) {
        listContext.dynamicListNodes.add(new Pair(child, parent));
      }
      return;
    }
    if (child instanceof ExprNodeGenericFuncDesc &&
            ((ExprNodeGenericFuncDesc)child).getGenericUDF() instanceof GenericUDFBaseCompare &&
            child.getChildren().size() == 2) {
      ExprNodeDesc leftCol = child.getChildren().get(0);
      ExprNodeDesc rightCol = child.getChildren().get(1);
      ExprNodeColumnDesc leftColDesc = ExprNodeDescUtils.getColumnExpr(leftCol);
      if (leftColDesc != null) {
        boolean rightConstant = false;
        if (rightCol instanceof ExprNodeConstantDesc) {
          rightConstant = true;
        } else if (rightCol instanceof ExprNodeGenericFuncDesc) {
          ExprNodeDesc foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)rightCol);
          rightConstant = foldedExpr != null;
        }
        if (rightConstant) {
          listContext.comparatorNodes.add(leftColDesc);
        }
      } else {
        ExprNodeColumnDesc rightColDesc = ExprNodeDescUtils.getColumnExpr(rightCol);
        if (rightColDesc != null) {
          boolean leftConstant = false;
          if (leftCol instanceof ExprNodeConstantDesc) {
            leftConstant = true;
          } else if (leftCol instanceof ExprNodeGenericFuncDesc) {
            ExprNodeDesc foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)leftCol);
            leftConstant = foldedExpr != null;
          }
          if (leftConstant) {
            listContext.comparatorNodes.add(rightColDesc);
          }
        }
      }
      return;
    }
    if (FunctionRegistry.isOpAnd(child)) {
      for (ExprNodeDesc newChild : child.getChildren()) {
        collect(child, newChild, listContext);
      }
    }
  }

  private class CollectContext implements NodeProcessorCtx {

    private final List> dynamicListNodes;
    private final List comparatorNodes;

    public CollectContext() {
      this.dynamicListNodes = Lists.>newArrayList();
      this.comparatorNodes = Lists.newArrayList();
    }

  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy