All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.ppd.ExprWalkerProcFactory Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.ppd;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.TypeRule;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.ppd.ExprWalkerInfo.ExprInfo;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;

/**
 * Expression factory for predicate pushdown processing. Each processor
 * determines whether the expression is a possible candidate for predicate
 * pushdown optimization for the given operator
 */
public final class ExprWalkerProcFactory {

  private static final Logger LOG = LoggerFactory.getLogger(ExprWalkerProcFactory.class.getName());

  /**
   * ColumnExprProcessor.
   *
   */
  public static class ColumnExprProcessor implements NodeProcessor {

    /**
     * Converts the reference from child row resolver to current row resolver.
     */
    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      ExprWalkerInfo ctx = (ExprWalkerInfo) procCtx;
      ExprNodeColumnDesc colref = (ExprNodeColumnDesc) nd;
      RowSchema toRS = ctx.getOp().getSchema();
      Operator op = ctx.getOp();
      ColumnInfo ci = toRS.getColumnInfo(colref.getColumn());
      String tabAlias = null;
      if (ci != null) {
        tabAlias = ci.getTabAlias();
      }

      ExprInfo colExprInfo = null;
      boolean isCandidate = true;
      if (op.getColumnExprMap() != null) {
        // replace the output expression with the input expression so that
        // parent op can understand this expression
        ExprNodeDesc exp = op.getColumnExprMap().get(colref.getColumn());
        // if the operator is a groupby and we are referencing the grouping
        // id column, we cannot push the predicate
        if (op instanceof GroupByOperator) {
          GroupByOperator groupBy = (GroupByOperator) op;
          if (groupBy.getConf().isGroupingSetsPresent()) {
            int groupingSetPlaceholderPos = groupBy.getConf().getKeys().size() - 1;
            if (colref.getColumn().equals(groupBy.getSchema().getColumnNames().get(groupingSetPlaceholderPos))) {
              exp = null;
            }
          }
        }
        if (exp == null) {
          // means that expression can't be pushed either because it is value in
          // group by
          colExprInfo = ctx.addOrGetExprInfo(colref);
          colExprInfo.isCandidate = false;
          return false;
        } else {
          if (exp instanceof ExprNodeGenericFuncDesc) {
            isCandidate = false;
          }
          if (exp instanceof ExprNodeColumnDesc && ci == null) {
            ExprNodeColumnDesc column = (ExprNodeColumnDesc)exp;
            tabAlias = column.getTabAlias();
          }
        }
        colExprInfo = ctx.addOrGetExprInfo(colref);
        colExprInfo.convertedExpr = exp;
        ExprInfo expInfo = ctx.addExprInfo(exp);
        expInfo.isCandidate = isCandidate;
        if (tabAlias != null) {
          expInfo.alias = tabAlias;
        } else {
          expInfo.alias = colExprInfo.alias;
        }
      } else {
        if (ci == null) {
          return false;
        }
        colExprInfo = ctx.addOrGetExprInfo(colref);
        if (tabAlias != null) {
          colExprInfo.alias = tabAlias;
        }
      }
      colExprInfo.isCandidate = isCandidate;
      return isCandidate;
    }

  }

  /**
   * FieldExprProcessor.
   *
   */
  public static class FieldExprProcessor implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      ExprWalkerInfo ctx = (ExprWalkerInfo) procCtx;
      String alias = null;
      ExprNodeFieldDesc expr = (ExprNodeFieldDesc) nd;

      assert (nd.getChildren().size() == 1);
      ExprNodeDesc ch = (ExprNodeDesc) nd.getChildren().get(0);
      ExprInfo chExprInfo = ctx.getExprInfo(ch);
      ExprNodeDesc newCh = chExprInfo != null ? chExprInfo.convertedExpr : null;
      if (newCh != null) {
        expr.setDesc(newCh);
        ch = newCh;
        chExprInfo = ctx.getExprInfo(ch);
      }

      boolean isCandidate;
      String chAlias;
      if (chExprInfo != null) {
        chAlias = chExprInfo.alias;
        isCandidate = chExprInfo.isCandidate;
      } else {
        chAlias = null;
        isCandidate = false;
      }
      // need to iterate through all children even if one is found to be not a
      // candidate
      // in case if the other children could be individually pushed up
      if (isCandidate && chAlias != null) {
        alias = chAlias;
      }

      ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
      if (alias != null) {
        exprInfo.alias = alias;
      }
      exprInfo.isCandidate = isCandidate;
      return isCandidate;
    }

  }

  /**
   * If all children are candidates and refer only to one table alias then this
   * expr is a candidate else it is not a candidate but its children could be
   * final candidates.
   */
  public static class GenericFuncExprProcessor implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      ExprWalkerInfo ctx = (ExprWalkerInfo) procCtx;
      String alias = null;
      ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) nd;

      if (!FunctionRegistry.isConsistentWithinQuery(expr.getGenericUDF())) {
        // this GenericUDF can't be pushed down
        ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
        exprInfo.isCandidate = false;
        ctx.setDeterministic(false);
        return false;
      }

      boolean isCandidate = true;
      for (int i = 0; i < nd.getChildren().size(); i++) {
        ExprNodeDesc ch = (ExprNodeDesc) nd.getChildren().get(i);
        ExprInfo chExprInfo = ctx.getExprInfo(ch);
        ExprNodeDesc newCh = chExprInfo != null ? chExprInfo.convertedExpr : null;
        if (newCh != null) {
          expr.getChildren().set(i, newCh);
          ch = newCh;
          chExprInfo = ctx.getExprInfo(ch);
        }

        String chAlias;
        if (chExprInfo != null) {
          chAlias = chExprInfo.alias;
          isCandidate = isCandidate && chExprInfo.isCandidate;
        } else {
          chAlias = null;
          isCandidate = false;
        }
        // need to iterate through all children even if one is found to be not a
        // candidate
        // in case if the other children could be individually pushed up
        if (isCandidate && chAlias != null) {
          if (alias == null) {
            alias = chAlias;
          } else if (!chAlias.equalsIgnoreCase(alias)) {
            isCandidate = false;
          }
        }

        if (!isCandidate) {
          break;
        }
      }
      ExprInfo exprInfo = ctx.addOrGetExprInfo(expr);
      if (alias != null) {
        exprInfo.alias = alias;
      }
      exprInfo.isCandidate = isCandidate;
      return isCandidate;
    }

  }

  /**
   * For constants and null expressions.
   */
  public static class DefaultExprProcessor implements NodeProcessor {

    @Override
    public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
        Object... nodeOutputs) throws SemanticException {
      ExprWalkerInfo ctx = (ExprWalkerInfo) procCtx;
      ExprInfo exprInfo = ctx.addOrGetExprInfo((ExprNodeDesc) nd);
      exprInfo.isCandidate = true;
      return true;
    }
  }

  public static NodeProcessor getDefaultExprProcessor() {
    return new DefaultExprProcessor();
  }

  public static NodeProcessor getGenericFuncProcessor() {
    return new GenericFuncExprProcessor();
  }

  public static NodeProcessor getColumnProcessor() {
    return new ColumnExprProcessor();
  }

  private static NodeProcessor getFieldProcessor() {
    return new FieldExprProcessor();
  }

  public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext,
    Operator op, ExprNodeDesc pred)
    throws SemanticException {
    List preds = new ArrayList();
    preds.add(pred);
    return extractPushdownPreds(opContext, op, preds);
  }

  /**
   * Extracts pushdown predicates from the given list of predicate expression.
   *
   * @param opContext
   *          operator context used for resolving column references
   * @param op
   *          operator of the predicates being processed
   * @param preds
   * @return The expression walker information
   * @throws SemanticException
   */
  public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext,
    Operator op, List preds)
    throws SemanticException {
    // Create the walker, the rules dispatcher and the context.
    ExprWalkerInfo exprContext = new ExprWalkerInfo(op);

    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map exprRules = new LinkedHashMap();
    exprRules.put(new TypeRule(ExprNodeColumnDesc.class), getColumnProcessor());
    exprRules.put(new TypeRule(ExprNodeFieldDesc.class), getFieldProcessor());
    exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), getGenericFuncProcessor());

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
        exprRules, exprContext);
    GraphWalker egw = new DefaultGraphWalker(disp);

    List startNodes = new ArrayList();
    List clonedPreds = new ArrayList();
    for (ExprNodeDesc node : preds) {
      ExprNodeDesc clone = node.clone();
      clonedPreds.add(clone);
      exprContext.getNewToOldExprMap().put(clone, node);
    }
    startNodes.addAll(clonedPreds);

    egw.startWalking(startNodes, null);

    HiveConf conf = opContext.getParseContext().getConf();
    // check the root expression for final candidates
    for (ExprNodeDesc pred : clonedPreds) {
      extractFinalCandidates(pred, exprContext, conf);
    }
    return exprContext;
  }

  /**
   * Walks through the top AND nodes and determine which of them are final
   * candidates.
   */
  private static void extractFinalCandidates(ExprNodeDesc expr,
      ExprWalkerInfo ctx, HiveConf conf) {
    // We decompose an AND expression into its parts before checking if the
    // entire expression is a candidate because each part may be a candidate
    // for replicating transitively over an equijoin condition.
    if (FunctionRegistry.isOpAnd(expr)) {
      // If the operator is AND, we need to determine if any of the children are
      // final candidates.

      // For the children, we populate the NewToOldExprMap to keep track of
      // the original condition before rewriting it for this operator
      assert ctx.getNewToOldExprMap().containsKey(expr);
      for (int i = 0; i < expr.getChildren().size(); i++) {
        ctx.getNewToOldExprMap().put(
            expr.getChildren().get(i),
            ctx.getNewToOldExprMap().get(expr).getChildren().get(i));
        extractFinalCandidates(expr.getChildren().get(i),
            ctx, conf);
      }
      return;
    }

    ExprInfo exprInfo = ctx.getExprInfo(expr);
    if (exprInfo != null && exprInfo.isCandidate) {
      String alias = exprInfo.alias;
      if ((alias == null) && (exprInfo.convertedExpr != null)) {
    	ExprInfo convertedExprInfo = ctx.getExprInfo(exprInfo.convertedExpr);
    	if (convertedExprInfo != null) {
    		alias = convertedExprInfo.alias;
    	}
      }
      ctx.addFinalCandidate(alias, exprInfo.convertedExpr != null ?
              exprInfo.convertedExpr : expr);
      return;
    } else if (!FunctionRegistry.isOpAnd(expr) &&
        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
      ctx.addNonFinalCandidate(exprInfo != null ? exprInfo.alias : null, expr);
    }
  }

  private ExprWalkerProcFactory() {
    // prevent instantiation
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy