All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.calcite.stats.FilterSelectivityEstimator Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.optimizer.calcite.stats;

import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexVisitorImpl;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;

public class FilterSelectivityEstimator extends RexVisitorImpl {
  private final RelNode childRel;
  private final double  childCardinality;

  protected FilterSelectivityEstimator(RelNode childRel) {
    super(true);
    this.childRel = childRel;
    this.childCardinality = RelMetadataQuery.getRowCount(childRel);
  }

  public Double estimateSelectivity(RexNode predicate) {
    return predicate.accept(this);
  }

  public Double visitCall(RexCall call) {
    if (!deep) {
      return 1.0;
    }

    /*
     * Ignore any predicates on partition columns because we have already
     * accounted for these in the Table row count.
     */
    if (isPartitionPredicate(call, this.childRel)) {
      return 1.0;
    }

    Double selectivity = null;
    SqlKind op = getOp(call);

    switch (op) {
    case AND: {
      selectivity = computeConjunctionSelectivity(call);
      break;
    }

    case OR: {
      selectivity = computeDisjunctionSelectivity(call);
      break;
    }

    case NOT:
    case NOT_EQUALS: {
      selectivity = computeNotEqualitySelectivity(call);
      break;
    }

    case LESS_THAN_OR_EQUAL:
    case GREATER_THAN_OR_EQUAL:
    case LESS_THAN:
    case GREATER_THAN: {
      selectivity = ((double) 1 / (double) 3);
      break;
    }

    case IN: {
      // TODO: 1) check for duplicates 2) We assume in clause values to be
      // present in NDV which may not be correct (Range check can find it) 3) We
      // assume values in NDV set is uniformly distributed over col values
      // (account for skewness - histogram).
      selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1);
      if (selectivity <= 0.0) {
        selectivity = 0.10;
      } else if (selectivity >= 1.0) {
        selectivity = 1.0;
      }
      break;
    }

    default:
      selectivity = computeFunctionSelectivity(call);
    }

    return selectivity;
  }

  /**
   * NDV of "f1(x, y, z) != f2(p, q, r)" ->
   * "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)".
   * 

* * @param call * @return */ private Double computeNotEqualitySelectivity(RexCall call) { double tmpNDV = getMaxNDV(call); if (tmpNDV > 1) return (tmpNDV - (double) 1) / tmpNDV; else return 1.0; } /** * Selectivity of f(X,y,z) -> 1/maxNDV(x,y,z). *

* Note that >, >=, <, <=, = ... are considered generic functions and uses * this method to find their selectivity. * * @param call * @return */ private Double computeFunctionSelectivity(RexCall call) { return 1 / getMaxNDV(call); } /** * Disjunction Selectivity -> (1 D(1-m1/n)(1-m2/n)) where n is the total * number of tuples from child and m1 and m2 is the expected number of tuples * from each part of the disjunction predicate. *

* Note we compute m1. m2.. by applying selectivity of the disjunctive element * on the cardinality from child. * * @param call * @return */ private Double computeDisjunctionSelectivity(RexCall call) { Double tmpCardinality; Double tmpSelectivity; double selectivity = 1; for (RexNode dje : call.getOperands()) { tmpSelectivity = dje.accept(this); if (tmpSelectivity == null) { tmpSelectivity = 0.99; } tmpCardinality = childCardinality * tmpSelectivity; if (tmpCardinality > 1 && tmpCardinality < childCardinality) { tmpSelectivity = (1 - tmpCardinality / childCardinality); } else { tmpSelectivity = 1.0; } selectivity *= tmpSelectivity; } if (selectivity < 0.0) selectivity = 0.0; return (1 - selectivity); } /** * Selectivity of conjunctive predicate -> (selectivity of conjunctive * element1) * (selectivity of conjunctive element2)... * * @param call * @return */ private Double computeConjunctionSelectivity(RexCall call) { Double tmpSelectivity; double selectivity = 1; for (RexNode cje : call.getOperands()) { tmpSelectivity = cje.accept(this); if (tmpSelectivity != null) { selectivity *= tmpSelectivity; } } return selectivity; } private Double getMaxNDV(RexCall call) { double tmpNDV; double maxNDV = 1.0; InputReferencedVisitor irv; for (RexNode op : call.getOperands()) { if (op instanceof RexInputRef) { tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, ((RexInputRef) op).getIndex()); if (tmpNDV > maxNDV) maxNDV = tmpNDV; } else { irv = new InputReferencedVisitor(); irv.apply(op); for (Integer childProjIndx : irv.inputPosReferenced) { tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, childProjIndx); if (tmpNDV > maxNDV) maxNDV = tmpNDV; } } } return maxNDV; } private boolean isPartitionPredicate(RexNode expr, RelNode r) { if (r instanceof Project) { expr = RelOptUtil.pushFilterPastProject(expr, (Project) r); return isPartitionPredicate(expr, ((Project) r).getInput()); } else if (r instanceof Filter) { return isPartitionPredicate(expr, ((Filter) r).getInput()); } else if (r instanceof HiveTableScan) { RelOptHiveTable table = (RelOptHiveTable) ((HiveTableScan) r).getTable(); ImmutableBitSet cols = RelOptUtil.InputFinder.bits(expr); return table.containsPartitionColumnsOnly(cols); } return false; } private SqlKind getOp(RexCall call) { SqlKind op = call.getKind(); if (call.getKind().equals(SqlKind.OTHER_FUNCTION) && SqlTypeUtil.inBooleanFamily(call.getType())) { SqlOperator sqlOp = call.getOperator(); String opName = (sqlOp != null) ? sqlOp.getName() : ""; if (opName.equalsIgnoreCase("in")) { op = SqlKind.IN; } } return op; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy