com.hazelcast.org.apache.calcite.plan.volcano.RelSubset Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hazelcast-sql Show documentation
SQL Parser and Optimizer
There is a newer version: 5.5.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hazelcast.org.apache.calcite.plan.volcano;

import com.hazelcast.org.apache.calcite.linq4j.Linq4j;
import com.hazelcast.org.apache.calcite.plan.RelOptCluster;
import com.hazelcast.org.apache.calcite.plan.RelOptCost;
import com.hazelcast.org.apache.calcite.plan.RelOptListener;
import com.hazelcast.org.apache.calcite.plan.RelOptPlanner;
import com.hazelcast.org.apache.calcite.plan.RelOptUtil;
import com.hazelcast.org.apache.calcite.plan.RelTrait;
import com.hazelcast.org.apache.calcite.plan.RelTraitSet;
import com.hazelcast.org.apache.calcite.rel.AbstractRelNode;
import com.hazelcast.org.apache.calcite.rel.RelNode;
import com.hazelcast.org.apache.calcite.rel.RelWriter;
import com.hazelcast.org.apache.calcite.rel.core.CorrelationId;
import com.hazelcast.org.apache.calcite.rel.externalize.RelWriterImpl;
import com.hazelcast.org.apache.calcite.rel.metadata.RelMetadataQuery;
import com.hazelcast.org.apache.calcite.rel.type.RelDataType;
import com.hazelcast.org.apache.calcite.sql.SqlExplainLevel;
import com.hazelcast.org.apache.calcite.util.Litmus;
import com.hazelcast.org.apache.calcite.util.Pair;
import com.hazelcast.org.apache.calcite.util.Util;
import com.hazelcast.org.apache.calcite.util.trace.CalciteTrace;

import org.apiguardian.api.API;
import com.hazelcast.org.slf4j.Logger;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * Subset of an equivalence class where all relational expressions have the
 * same physical properties.
 *
 * Physical properties are instances of the {@link RelTraitSet}, and consist
 * of traits such as calling convention and collation (sort-order).
 *
 * 
For some traits, a relational expression can have more than one instance.
 * For example, R can be sorted on both [X] and [Y, Z]. In which case, R would
 * belong to the sub-sets for [X] and [Y, Z]; and also the leading edges [Y] and
 * [].
 *
 * @see RelNode
 * @see RelSet
 * @see RelTrait
 */
public class RelSubset extends AbstractRelNode {
  //~ Static fields/initializers ---------------------------------------------

  private static final Logger LOGGER = CalciteTrace.getPlannerTracer();
  private static final int DELIVERED = 1;
  private static final int REQUIRED = 2;

  //~ Instance fields --------------------------------------------------------

  /**
   * Optimization task state
   */
  OptimizeTask.State taskState;

  /**
   * cost of best known plan (it may have improved since)
   */
  RelOptCost bestCost;

  /**
   * The set this subset belongs to.
   */
  final RelSet set;

  /**
   * best known plan
   */
  RelNode best;

  /**
   * Timestamp for metadata validity
   */
  long timestamp;

  /**
   * Physical property state of current subset
   * 0: logical operators, NONE convention is neither DELIVERED nor REQUIRED
   * 1: traitSet DELIVERED from child operators or itself
   * 2: traitSet REQUIRED from parent operators
   * 3: both DELIVERED and REQUIRED
   */
  private int state = 0;

  /**
   * This subset should trigger rules when it becomes delivered.
   */
  boolean triggerRule = false;

  //~ Constructors -----------------------------------------------------------

  RelSubset(
      RelOptCluster cluster,
      RelSet set,
      RelTraitSet traits) {
    super(cluster, traits);
    this.set = set;
    assert traits.allSimple();
    computeBestCost(cluster.getPlanner());
    recomputeDigest();
  }

  //~ Methods ----------------------------------------------------------------

  /**
   * Computes the best {@link RelNode} in this subset.
   *
   * 
Only necessary when a subset is created in a set that has subsets that
   * subsume it. Rationale:
   *
   * 
   * If the are no subsuming subsets, the subset is initially empty.
   * After creation, {@code best} and {@code bestCost} are maintained
   *    incrementally by {@link #propagateCostImprovements0} and
   *    {@link RelSet#mergeWith(VolcanoPlanner, RelSet)}.
   * 
   */
  private void computeBestCost(RelOptPlanner planner) {
    bestCost = planner.getCostFactory().makeInfiniteCost();
    final RelMetadataQuery mq = getCluster().getMetadataQuery();
    for (RelNode rel : getRels()) {
      final RelOptCost cost = planner.getCost(rel, mq);
      if (cost.isLt(bestCost)) {
        bestCost = cost;
        best = rel;
      }
    }
  }

  void setDelivered() {
    triggerRule = !isDelivered();
    state |= DELIVERED;
  }

  void setRequired() {
    triggerRule = false;
    state |= REQUIRED;
  }

  @API(since = "1.23", status = API.Status.EXPERIMENTAL)
  public boolean isDelivered() {
    return (state & DELIVERED) == DELIVERED;
  }

  @API(since = "1.23", status = API.Status.EXPERIMENTAL)
  public boolean isRequired() {
    return (state & REQUIRED) == REQUIRED;
  }

  public RelNode getBest() {
    return best;
  }

  public RelNode getOriginal() {
    return set.rel;
  }

  public RelNode copy(RelTraitSet traitSet, List inputs) {
    if (inputs.isEmpty()) {
      final RelTraitSet traitSet1 = traitSet.simplify();
      if (traitSet1.equals(this.traitSet)) {
        return this;
      }
      return set.getOrCreateSubset(getCluster(), traitSet1);
    }
    throw new UnsupportedOperationException();
  }

  public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
    return planner.getCostFactory().makeZeroCost();
  }

  public double estimateRowCount(RelMetadataQuery mq) {
    if (best != null) {
      return mq.getRowCount(best);
    } else {
      return mq.getRowCount(set.rel);
    }
  }

  @Override public void explain(RelWriter pw) {
    // Not a typical implementation of "explain". We don't gather terms &
    // values to be printed later. We actually do the work.
    pw.item("subset", toString());
    final AbstractRelNode input =
        (AbstractRelNode) Util.first(getBest(), getOriginal());
    if (input == null) {
      return;
    }
    input.explainTerms(pw);
    pw.done(input);
  }

  @Override protected String computeDigest() {
    StringBuilder digest = new StringBuilder("Subset#");
    digest.append(set.id);
    for (RelTrait trait : traitSet) {
      digest.append('.').append(trait);
    }
    return digest.toString();
  }

  @Override protected RelDataType deriveRowType() {
    return set.rel.getRowType();
  }

  /**
   * Returns the collection of RelNodes one of whose inputs is in this
   * subset.
   */
  Set getParents() {
    final Set list = new LinkedHashSet<>();
    for (RelNode parent : set.getParentRels()) {
      for (RelSubset rel : inputSubsets(parent)) {
        // see usage of this method in propagateCostImprovements0()
        if (rel == this) {
          list.add(parent);
        }
      }
    }
    return list;
  }

  /**
   * Returns the collection of distinct subsets that contain a RelNode one
   * of whose inputs is in this subset.
   */
  Set getParentSubsets(VolcanoPlanner planner) {
    final Set list = new LinkedHashSet<>();
    for (RelNode parent : set.getParentRels()) {
      for (RelSubset rel : inputSubsets(parent)) {
        if (rel.set == set && rel.getTraitSet().equals(traitSet)) {
          list.add(planner.getSubset(parent));
        }
      }
    }
    return list;
  }

  private static List inputSubsets(RelNode parent) {
    //noinspection unchecked
    return (List) (List) parent.getInputs();
  }

  /**
   * Returns a list of relational expressions one of whose children is this
   * subset. The elements of the list are distinct.
   */
  public Collection getParentRels() {
    final Set list = new LinkedHashSet<>();
  parentLoop:
    for (RelNode parent : set.getParentRels()) {
      for (RelSubset rel : inputSubsets(parent)) {
        if (rel.set == set && traitSet.satisfies(rel.getTraitSet())) {
          list.add(parent);
          continue parentLoop;
        }
      }
    }
    return list;
  }

  RelSet getSet() {
    return set;
  }

  /**
   * Adds expression rel to this subset.
   */
  void add(RelNode rel) {
    if (set.rels.contains(rel)) {
      return;
    }

    VolcanoPlanner planner = (VolcanoPlanner) rel.getCluster().getPlanner();
    if (planner.getListener() != null) {
      RelOptListener.RelEquivalenceEvent event =
          new RelOptListener.RelEquivalenceEvent(
              planner,
              rel,
              this,
              true);
      planner.getListener().relEquivalenceFound(event);
    }

    // If this isn't the first rel in the set, it must have compatible
    // row type.
    if (set.rel != null) {
      RelOptUtil.equal("rowtype of new rel", rel.getRowType(),
          "rowtype of set", getRowType(), Litmus.THROW);
    }
    set.addInternal(rel);
    if (false) {
      Set variablesSet = RelOptUtil.getVariablesSet(rel);
      Set variablesStopped = rel.getVariablesSet();
      Set variablesPropagated =
          Util.minus(variablesSet, variablesStopped);
      assert set.variablesPropagated.containsAll(variablesPropagated);
      Set variablesUsed = RelOptUtil.getVariablesUsed(rel);
      assert set.variablesUsed.containsAll(variablesUsed);
    }
  }

  /**
   * Recursively builds a tree consisting of the cheapest plan at each node.
   */
  RelNode buildCheapestPlan(VolcanoPlanner planner) {
    CheapestPlanReplacer replacer = new CheapestPlanReplacer(planner);
    final RelNode cheapest = replacer.visit(this, -1, null);

    if (planner.getListener() != null) {
      RelOptListener.RelChosenEvent event =
          new RelOptListener.RelChosenEvent(
              planner,
              null);
      planner.getListener().relChosen(event);
    }

    return cheapest;
  }

  /**
   * Checks whether a relexp has made its subset cheaper, and if it so,
   * propagate new cost to parent rel nodes using breadth first manner.
   *
   * @param planner   Planner
   * @param mq        Metadata query
   * @param rel       Relational expression whose cost has improved
   * @param activeSet Set of active subsets, for cycle detection
   */
  void propagateCostImprovements(VolcanoPlanner planner, RelMetadataQuery mq,
      RelNode rel, Set activeSet) {
    Queue> propagationQueue = new ArrayDeque<>();
    for (RelSubset subset : set.subsets) {
      if (rel.getTraitSet().satisfies(subset.traitSet)) {
        propagationQueue.offer(Pair.of(subset, rel));
      }
    }

    while (!propagationQueue.isEmpty()) {
      Pair p = propagationQueue.poll();
      p.left.propagateCostImprovements0(planner, mq, p.right, activeSet, propagationQueue);
    }
  }

  void propagateCostImprovements0(VolcanoPlanner planner, RelMetadataQuery mq,
      RelNode rel, Set activeSet,
      Queue> propagationQueue) {
    ++timestamp;

    if (!activeSet.add(this)) {
      // This subset is already in the chain being propagated to. This
      // means that the graph is cyclic, and therefore the cost of this
      // relational expression - not this subset - must be infinite.
      LOGGER.trace("cyclic: {}", this);
      return;
    }
    try {
      RelOptCost cost = planner.getCost(rel, mq);

      // Update subset best cost when we find a cheaper rel or the current
      // best's cost is changed
      if (cost.isLt(bestCost)) {
        LOGGER.trace("Subset cost changed: subset [{}] cost was {} now {}",
            this, bestCost, cost);

        bestCost = cost;
        best = rel;
        // since best was changed, cached metadata for this subset should be removed
        mq.clearCache(this);

        // Propagate cost change to parents
        for (RelNode parent : getParents()) {
          // removes parent cached metadata since its input was changed
          mq.clearCache(parent);
          final RelSubset parentSubset = planner.getSubset(parent);

          // parent subset will clear its cache in propagateCostImprovements0 method itself
          for (RelSubset subset : parentSubset.set.subsets) {
            if (parent.getTraitSet().satisfies(subset.traitSet)) {
              propagationQueue.offer(Pair.of(subset, parent));
            }
          }
        }
      }
    } finally {
      activeSet.remove(this);
    }
  }

  @Override public void collectVariablesUsed(Set variableSet) {
    variableSet.addAll(set.variablesUsed);
  }

  @Override public void collectVariablesSet(Set variableSet) {
    variableSet.addAll(set.variablesPropagated);
  }

  /**
   * Returns the rel nodes in this rel subset.  All rels must have the same
   * traits and are logically equivalent.
   *
   * @return all the rels in the subset
   */
  public Iterable getRels() {
    return () -> Linq4j.asEnumerable(set.rels)
        .where(v1 -> v1.getTraitSet().satisfies(traitSet))
        .iterator();
  }

  /**
   * As {@link #getRels()} but returns a list.
   */
  public List getRelList() {
    final List list = new ArrayList<>();
    for (RelNode rel : set.rels) {
      if (rel.getTraitSet().satisfies(traitSet)) {
        list.add(rel);
      }
    }
    return list;
  }

  /**
   * Returns stream of subsets whose traitset satisfies
   * current subset's traitset.
   */
  @API(since = "1.23", status = API.Status.EXPERIMENTAL)
  public Stream getSubsetsSatisfyingThis() {
    return set.subsets.stream()
      .filter(s -> s.getTraitSet().satisfies(traitSet));
  }

  /**
   * Returns stream of subsets whose traitset is satisfied
   * by current subset's traitset.
   */
  @API(since = "1.23", status = API.Status.EXPERIMENTAL)
  public Stream getSatisfyingSubsets() {
    return set.subsets.stream()
      .filter(s -> traitSet.satisfies(s.getTraitSet()));
  }

  //~ Inner Classes ----------------------------------------------------------

  /**
   * Identifies the leaf-most non-implementable nodes.
   */
  static class DeadEndFinder {
    final Set deadEnds = new HashSet<>();
    // To save time
    private final Set visitedNodes = new HashSet<>();
    // For cycle detection
    private final Set activeNodes = new HashSet<>();

    private boolean visit(RelNode p) {
      if (p instanceof RelSubset) {
        visitSubset((RelSubset) p);
        return false;
      }
      return visitRel(p);
    }

    private void visitSubset(RelSubset subset) {
      RelNode cheapest = subset.getBest();
      if (cheapest != null) {
        // Subset is implementable, and we are looking for bad ones, so stop here
        return;
      }

      boolean isEmpty = true;
      for (RelNode rel : subset.getRels()) {
        if (rel instanceof AbstractConverter) {
          // Converters are not implementable
          continue;
        }
        if (!activeNodes.add(rel)) {
          continue;
        }
        boolean res = visit(rel);
        isEmpty &= res;
        activeNodes.remove(rel);
      }
      if (isEmpty) {
        deadEnds.add(subset);
      }
    }

    /**
     * Returns true when input {@code RelNode} is cyclic.
     */
    private boolean visitRel(RelNode p) {
      // If one of the inputs is in "active" set, that means the rel forms a cycle,
      // then we just ignore it. Cyclic rels are not implementable.
      for (RelNode oldInput : p.getInputs()) {
        if (activeNodes.contains(oldInput)) {
          return true;
        }
      }
      // The same subset can be used multiple times (e.g. union all with the same inputs),
      // so it is important to perform "contains" and "add" in different loops
      activeNodes.addAll(p.getInputs());
      for (RelNode oldInput : p.getInputs()) {
        if (!visitedNodes.add(oldInput)) {
          // We don't want to explore the same subset twice
          continue;
        }
        visit(oldInput);
      }
      activeNodes.removeAll(p.getInputs());
      return false;
    }
  }

  /**
   * Visitor which walks over a tree of {@link RelSet}s, replacing each node
   * with the cheapest implementation of the expression.
   */
  static class CheapestPlanReplacer {
    VolcanoPlanner planner;

    CheapestPlanReplacer(VolcanoPlanner planner) {
      super();
      this.planner = planner;
    }

    private static String traitDiff(RelTraitSet original, RelTraitSet desired) {
      return Pair.zip(original, desired)
          .stream()
          .filter(p -> !p.left.satisfies(p.right))
          .map(p -> p.left.getTraitDef().getSimpleName() + ": " + p.left + " -> " + p.right)
          .collect(Collectors.joining(", ", "[", "]"));
    }

    public RelNode visit(
        RelNode p,
        int ordinal,
        RelNode parent) {
      if (p instanceof RelSubset) {
        RelSubset subset = (RelSubset) p;
        RelNode cheapest = subset.best;
        if (cheapest == null) {
          // Dump the planner's expression pool so we can figure
          // out why we reached impasse.
          StringWriter sw = new StringWriter();
          final PrintWriter pw = new PrintWriter(sw);

          pw.print("There are not enough rules to produce a node with desired properties");
          RelTraitSet desiredTraits = subset.getTraitSet();
          String sep = ": ";
          for (RelTrait trait : desiredTraits) {
            pw.print(sep);
            pw.print(trait.getTraitDef().getSimpleName());
            pw.print("=");
            pw.print(trait);
            sep = ", ";
          }
          pw.print(".");
          DeadEndFinder finder = new DeadEndFinder();
          finder.visit(subset);
          if (finder.deadEnds.isEmpty()) {
            pw.print(" All the inputs have relevant nodes, however the cost is still infinite.");
          } else {
            Map problemCounts =
                finder.deadEnds.stream()
                    .filter(deadSubset -> deadSubset.getOriginal() != null)
                    .map(x -> x.getOriginal().getClass().getSimpleName()
                        + traitDiff(x.getOriginal().getTraitSet(), x.getTraitSet()))
                    .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
            // Sort problems from most often to less often ones
            String problems = problemCounts.entrySet().stream()
                .sorted(Comparator.comparingLong(Map.Entry::getValue).reversed())
                .map(e -> e.getKey() + (e.getValue() > 1 ? " (" + e.getValue() + " cases)" : ""))
                .collect(Collectors.joining(", "));
            pw.println();
            pw.print("Missing conversion");
            pw.print(finder.deadEnds.size() == 1 ? " is " : "s are ");
            pw.print(problems);
            pw.println();
            if (finder.deadEnds.size() == 1) {
              pw.print("There is 1 empty subset: ");
            }
            if (finder.deadEnds.size() > 1) {
              pw.println("There are " + finder.deadEnds.size() + " empty subsets:");
            }
            int i = 0;
            int rest = finder.deadEnds.size();
            for (RelSubset deadEnd : finder.deadEnds) {
              if (finder.deadEnds.size() > 1) {
                pw.print("Empty subset ");
                pw.print(i);
                pw.print(": ");
              }
              pw.print(deadEnd);
              pw.println(", the relevant part of the original plan is as follows");
              RelNode original = deadEnd.getOriginal();
              original.explain(
                  new RelWriterImpl(pw, SqlExplainLevel.EXPPLAN_ATTRIBUTES, true));
              i++;
              rest--;
              if (rest > 0) {
                pw.println();
              }
              if (i >= 10 && rest > 1) {
                pw.print("The rest ");
                pw.print(rest);
                pw.println(" leafs are omitted.");
                break;
              }
            }
          }
          pw.println();

          planner.dump(pw);
          pw.flush();
          final String dump = sw.toString();
          RuntimeException e =
              new RelOptPlanner.CannotPlanException(dump);
          LOGGER.trace("Caught exception in class={}, method=visit", getClass().getName(), e);
          throw e;
        }
        p = cheapest;
      }

      if (ordinal != -1) {
        if (planner.getListener() != null) {
          RelOptListener.RelChosenEvent event =
              new RelOptListener.RelChosenEvent(
                  planner,
                  p);
          planner.getListener().relChosen(event);
        }
      }

      List oldInputs = p.getInputs();
      List inputs = new ArrayList<>();
      for (int i = 0; i < oldInputs.size(); i++) {
        RelNode oldInput = oldInputs.get(i);
        RelNode input = visit(oldInput, i, p);
        inputs.add(input);
      }
      if (!inputs.equals(oldInputs)) {
        final RelNode pOld = p;
        p = p.copy(p.getTraitSet(), inputs);
        planner.provenanceMap.put(
            p, new VolcanoPlanner.DirectProvenance(pOld));
      }
      return p;
    }
  }
}