All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;

import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.RelFactories.FilterFactory;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;

public class HivePreFilteringRule extends RelOptRule {

  protected static final Logger LOG = LoggerFactory.getLogger(HivePreFilteringRule.class);

  private static final Set        COMPARISON = EnumSet.of(SqlKind.EQUALS,
                                                          SqlKind.GREATER_THAN_OR_EQUAL,
                                                          SqlKind.LESS_THAN_OR_EQUAL,
                                                          SqlKind.GREATER_THAN, SqlKind.LESS_THAN,
                                                          SqlKind.NOT_EQUALS);

  private final FilterFactory              filterFactory;

  // Max number of nodes when converting to CNF
  private final int maxCNFNodeCount;

  public HivePreFilteringRule(int maxCNFNodeCount) {
    super(operand(Filter.class, operand(RelNode.class, any())));
    this.filterFactory = HiveRelFactories.HIVE_FILTER_FACTORY;
    this.maxCNFNodeCount = maxCNFNodeCount;
  }

  @Override
  public boolean matches(RelOptRuleCall call) {
    final Filter filter = call.rel(0);
    final RelNode filterChild = call.rel(1);

    // If the filter is already on top of a TableScan,
    // we can bail out
    if (filterChild instanceof TableScan) {
      return false;
    }

    HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);

    // If this operator has been visited already by the rule,
    // we do not need to apply the optimization
    if (registry != null && registry.getVisited(this).contains(filter)) {
      return false;
    }

    return true;
  }

  @Override
  public void onMatch(RelOptRuleCall call) {
    final Filter filter = call.rel(0);

    // 0. Register that we have visited this operator in this rule
    HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
    if (registry != null) {
      registry.registerVisited(this, filter);
    }

    final RexBuilder rexBuilder = filter.getCluster().getRexBuilder();

    // 1. Recompose filter possibly by pulling out common elements from DNF
    // expressions
    RexNode topFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition());

    // 2. We extract possible candidates to be pushed down
    List operandsToPushDown = new ArrayList<>();
    List deterministicExprs = new ArrayList<>();
    List nonDeterministicExprs = new ArrayList<>();

    switch (topFilterCondition.getKind()) {
    case AND:
      ImmutableList operands = RexUtil.flattenAnd(((RexCall) topFilterCondition)
          .getOperands());
      Set operandsToPushDownDigest = new HashSet();
      List extractedCommonOperands = null;

      for (RexNode operand : operands) {
        if (operand.getKind() == SqlKind.OR) {
          extractedCommonOperands = extractCommonOperands(rexBuilder, operand, maxCNFNodeCount);
          for (RexNode extractedExpr : extractedCommonOperands) {
            if (operandsToPushDownDigest.add(extractedExpr.toString())) {
              operandsToPushDown.add(extractedExpr);
            }
          }
        }

        // TODO: Make expr traversal recursive. Extend to traverse inside
        // elements of DNF/CNF & extract more deterministic pieces out.
        if (HiveCalciteUtil.isDeterministic(operand)) {
          deterministicExprs.add(operand);
        } else {
          nonDeterministicExprs.add(operand);
        }
      }

      // Pull out Deterministic exprs from non-deterministic and push down
      // deterministic expressions as a separate filter
      // NOTE: Hive by convention doesn't pushdown non deterministic expressions
      if (nonDeterministicExprs.size() > 0) {
        for (RexNode expr : deterministicExprs) {
          if (!operandsToPushDownDigest.contains(expr.toString())) {
            operandsToPushDown.add(expr);
            operandsToPushDownDigest.add(expr.toString());
          }
        }

        topFilterCondition = RexUtil.pullFactors(rexBuilder,
            RexUtil.composeConjunction(rexBuilder, nonDeterministicExprs, false));
      }

      break;

    case OR:
      operandsToPushDown = extractCommonOperands(rexBuilder, topFilterCondition, maxCNFNodeCount);
      break;
    default:
      return;
    }

    // 2. If we did not generate anything for the new predicate, we bail out
    if (operandsToPushDown.isEmpty()) {
      return;
    }

    // 3. If the new conjuncts are already present in the plan, we bail out
    final List newConjuncts = HiveCalciteUtil.getPredsNotPushedAlready(filter.getInput(),
        operandsToPushDown);
    RexNode newPredicate = RexUtil.composeConjunction(rexBuilder, newConjuncts, false);
    if (newPredicate.isAlwaysTrue()) {
      return;
    }

    // 4. Otherwise, we create a new condition
    final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, newPredicate);

    // 5. We create the new filter that might be pushed down
    RelNode newChildFilter = filterFactory.createFilter(filter.getInput(), newChildFilterCondition);
    RelNode newTopFilter = filterFactory.createFilter(newChildFilter, topFilterCondition);

    // 6. We register both so we do not fire the rule on them again
    if (registry != null) {
      registry.registerVisited(this, newChildFilter);
      registry.registerVisited(this, newTopFilter);
    }

    call.transformTo(newTopFilter);

  }

  private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition,
          int maxCNFNodeCount) {
    assert condition.getKind() == SqlKind.OR;
    Multimap reductionCondition = LinkedHashMultimap.create();

    // Data structure to control whether a certain reference is present in every
    // operand
    Set refsInAllOperands = null;

    // 1. We extract the information necessary to create the predicate for the
    // new filter; currently we support comparison functions, in and between
    ImmutableList operands = RexUtil.flattenOr(((RexCall) condition).getOperands());
    for (int i = 0; i < operands.size(); i++) {
      final RexNode operand = operands.get(i);

      final RexNode operandCNF = RexUtil.toCnf(rexBuilder, maxCNFNodeCount, operand);
      final List conjunctions = RelOptUtil.conjunctions(operandCNF);

      Set refsInCurrentOperand = Sets.newHashSet();
      for (RexNode conjunction : conjunctions) {
        // We do not know what it is, we bail out for safety
        if (!(conjunction instanceof RexCall) || !HiveCalciteUtil.isDeterministic(conjunction)) {
          return new ArrayList<>();
        }
        RexCall conjCall = (RexCall) conjunction;
        RexNode ref = null;
        if (COMPARISON.contains(conjCall.getOperator().getKind())) {
          if (conjCall.operands.get(0) instanceof RexInputRef
              && conjCall.operands.get(1) instanceof RexLiteral) {
            ref = conjCall.operands.get(0);
          } else if (conjCall.operands.get(1) instanceof RexInputRef
              && conjCall.operands.get(0) instanceof RexLiteral) {
            ref = conjCall.operands.get(1);
          } else {
            // We do not know what it is, we bail out for safety
            return new ArrayList<>();
          }
        } else if (conjCall.getOperator().getKind().equals(SqlKind.IN)) {
          ref = conjCall.operands.get(0);
        } else if (conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
          ref = conjCall.operands.get(1);
        } else {
          // We do not know what it is, we bail out for safety
          return new ArrayList<>();
        }

        String stringRef = ref.toString();
        reductionCondition.put(stringRef, conjCall);
        refsInCurrentOperand.add(stringRef);
      }

      // Updates the references that are present in every operand up till now
      if (i == 0) {
        refsInAllOperands = refsInCurrentOperand;
      } else {
        refsInAllOperands = Sets.intersection(refsInAllOperands, refsInCurrentOperand);
      }
      // If we did not add any factor or there are no common factors, we can
      // bail out
      if (refsInAllOperands.isEmpty()) {
        return new ArrayList<>();
      }
    }

    // 2. We gather the common factors and return them
    List commonOperands = new ArrayList<>();
    for (String ref : refsInAllOperands) {
      commonOperands
          .add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false));
    }
    return commonOperands;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy