com.hazelcast.org.apache.calcite.rel.core.Sort Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hazelcast-sql Show documentation
SQL Parser and Optimizer
There is a newer version: 5.4.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hazelcast.org.apache.calcite.rel.core;

import com.hazelcast.org.apache.calcite.linq4j.Ord;
import com.hazelcast.org.apache.calcite.plan.RelOptCluster;
import com.hazelcast.org.apache.calcite.plan.RelOptCost;
import com.hazelcast.org.apache.calcite.plan.RelOptPlanner;
import com.hazelcast.org.apache.calcite.plan.RelTraitSet;
import com.hazelcast.org.apache.calcite.rel.RelCollation;
import com.hazelcast.org.apache.calcite.rel.RelCollationTraitDef;
import com.hazelcast.org.apache.calcite.rel.RelFieldCollation;
import com.hazelcast.org.apache.calcite.rel.RelInput;
import com.hazelcast.org.apache.calcite.rel.RelNode;
import com.hazelcast.org.apache.calcite.rel.RelWriter;
import com.hazelcast.org.apache.calcite.rel.SingleRel;
import com.hazelcast.org.apache.calcite.rel.hint.Hintable;
import com.hazelcast.org.apache.calcite.rel.hint.RelHint;
import com.hazelcast.org.apache.calcite.rel.metadata.RelMetadataQuery;
import com.hazelcast.org.apache.calcite.rex.RexLiteral;
import com.hazelcast.org.apache.calcite.rex.RexNode;
import com.hazelcast.org.apache.calcite.rex.RexShuttle;
import com.hazelcast.org.apache.calcite.util.Util;

import com.hazelcast.com.google.common.collect.ImmutableList;

import com.hazelcast.org.checkerframework.checker.nullness.qual.Nullable;

import java.util.Collections;
import java.util.List;
import java.util.Objects;

/**
 * Relational expression that imposes a particular sort order on its input
 * without otherwise changing its content.
 */
public abstract class Sort extends SingleRel implements Hintable {
  //~ Instance fields --------------------------------------------------------

  public final RelCollation collation;
  public final @Nullable RexNode offset;
  public final @Nullable RexNode fetch;
  protected final ImmutableList hints;

  //~ Constructors -----------------------------------------------------------

  /**
   * Creates a Sort.
   *
   * @param cluster   Cluster this relational expression belongs to
   * @param traits    Traits
   * @param hints     Hints for this node
   * @param child     input relational expression
   * @param collation array of sort specifications
   * @param offset    Expression for number of rows to discard before returning
   *                  first row
   * @param fetch     Expression for number of rows to fetch
   */
  protected Sort(
      RelOptCluster cluster,
      RelTraitSet traits,
      List hints,
      RelNode child,
      RelCollation collation,
      @Nullable RexNode offset,
      @Nullable RexNode fetch) {
    super(cluster, traits, child);
    this.collation = collation;
    this.offset = offset;
    this.fetch = fetch;
    this.hints = ImmutableList.copyOf(hints);

    assert traits.containsIfApplicable(collation)
            : "traits=" + traits + ", collation=" + collation;
    assert !(fetch == null
            && offset == null
            && collation.getFieldCollations().isEmpty())
            : "trivial sort";
  }

  /**
   * Creates a Sort.
   *
   * @param cluster   Cluster this relational expression belongs to
   * @param traits    Traits
   * @param child     input relational expression
   * @param collation array of sort specifications
   */
  protected Sort(
      RelOptCluster cluster,
      RelTraitSet traits,
      RelNode child,
      RelCollation collation) {
    this(cluster, traits, Collections.emptyList(), child, collation, null, null);
  }

  /**
   * Creates a Sort.
   *
   * @param cluster   Cluster this relational expression belongs to
   * @param traits    Traits
   * @param child     input relational expression
   * @param collation array of sort specifications
   * @param offset    Expression for number of rows to discard before returning
   *                  first row
   * @param fetch     Expression for number of rows to fetch
   */
  protected Sort(
      RelOptCluster cluster,
      RelTraitSet traits,
      RelNode child,
      RelCollation collation,
      @Nullable RexNode offset,
      @Nullable RexNode fetch) {
    this(cluster, traits, Collections.emptyList(), child, collation, offset, fetch);
  }

  /**
   * Creates a Sort by parsing serialized output.
   */
  protected Sort(RelInput input) {
    this(input.getCluster(), input.getTraitSet().plus(input.getCollation()),
        input.getInput(),
        RelCollationTraitDef.INSTANCE.canonize(input.getCollation()),
        input.getExpression("offset"), input.getExpression("fetch"));
  }

  //~ Methods ----------------------------------------------------------------

  @Override public final Sort copy(RelTraitSet traitSet, List inputs) {
    return copy(traitSet, sole(inputs), collation, offset, fetch);
  }

  public final Sort copy(RelTraitSet traitSet, RelNode newInput,
      RelCollation newCollation) {
    return copy(traitSet, newInput, newCollation, offset, fetch);
  }

  public abstract Sort copy(RelTraitSet traitSet, RelNode newInput,
      RelCollation newCollation, @Nullable RexNode offset, @Nullable RexNode fetch);

  /** {@inheritDoc}
   *
   * The CPU cost of a Sort has three main cases:
   *
   * 

   * If {@code fetch} is zero, CPU cost is zero; otherwise,
   *
   * 
if the sort keys are empty, we don't need to sort, only step over
   * the rows, and therefore the CPU cost is
   * {@code min(fetch + offset, inputRowCount) * bytesPerRow}; otherwise
   *
   * 
we need to read and sort {@code inputRowCount} rows, with at most
   * {@code min(fetch + offset, inputRowCount)} of them in the sort data
   * structure at a time, giving a CPU cost of {@code inputRowCount *
   * log(min(fetch + offset, inputRowCount)) * bytesPerRow}.
   * 
   *
   * The cost model factors in row width via {@code bytesPerRow}, because
   * sorts need to move rows around, not just compare them; by making the cost
   * higher if rows are wider, we discourage pushing a Project through a Sort.
   * We assume that each field is 4 bytes, and we add 3 'virtual fields' to
   * represent the per-row overhead. Thus a 1-field row is (3 + 1) * 4 = 16
   * bytes; a 5-field row is (3 + 5) * 4 = 32 bytes.
   *
   * 
The cost model does not consider a 5-field sort to be more expensive
   * than, say, a 2-field sort, because both sorts will compare just one field
   * most of the time. */
  @Override public @Nullable RelOptCost computeSelfCost(RelOptPlanner planner,
      RelMetadataQuery mq) {
    final double offsetValue = Util.first(doubleValue(offset), 0d);
    assert offsetValue >= 0 : "offset should not be negative:" + offsetValue;

    final double inCount = mq.getRowCount(input);
    @Nullable Double fetchValue = doubleValue(fetch);
    final double readCount;
    if (fetchValue == null) {
      readCount = inCount;
    } else if (fetchValue <= 0) {
      // Case 1. Read zero rows from input, therefore CPU cost is zero.
      return planner.getCostFactory().makeCost(inCount, 0, 0);
    } else {
      readCount = Math.min(inCount, offsetValue + fetchValue);
    }

    final double bytesPerRow = (3 + getRowType().getFieldCount()) * 4;

    final double cpu;
    if (collation.getFieldCollations().isEmpty()) {
      // Case 2. If sort keys are empty, CPU cost is cheaper because we are just
      // stepping over the first "readCount" rows, rather than sorting all
      // "inCount" them. (Presumably we are applying FETCH and/or OFFSET,
      // otherwise this Sort is a no-op.)
      cpu = readCount * bytesPerRow;
    } else {
      // Case 3. Read and sort all "inCount" rows, keeping "readCount" in the
      // sort data structure at a time.
      cpu = Util.nLogM(inCount, readCount) * bytesPerRow;
    }
    return planner.getCostFactory().makeCost(readCount, cpu, 0);
  }

  @Override public RelNode accept(RexShuttle shuttle) {
    RexNode offset = shuttle.apply(this.offset);
    RexNode fetch = shuttle.apply(this.fetch);
    List originalSortExps = getSortExps();
    List sortExps = shuttle.apply(originalSortExps);
    assert sortExps == originalSortExps
        : "Sort node does not support modification of input field expressions."
          + " Old expressions: " + originalSortExps + ", new ones: " + sortExps;
    if (offset == this.offset
        && fetch == this.fetch) {
      return this;
    }
    return copy(traitSet, getInput(), collation, offset, fetch);
  }

  @Override public boolean isEnforcer() {
    return offset == null && fetch == null
        && collation.getFieldCollations().size() > 0;
  }

  /**
   * Returns the array of {@link RelFieldCollation}s asked for by the sort
   * specification, from most significant to least significant.
   *
   * 
See also {@link RelMetadataQuery#collations(RelNode)},
   * which lists all known collations. For example,
   * ORDER BY time_id might also be sorted by
   * the_year, the_month because of a known monotonicity
   * constraint among the columns. {@code getCollation} would return
   * [time_id] and {@code collations} would return
   * [ [time_id], [the_year, the_month] ].
   */
  public RelCollation getCollation() {
    return collation;
  }

  /** Returns the sort expressions. */
  public List getSortExps() {
    //noinspection StaticPseudoFunctionalStyleMethod
    return Util.transform(collation.getFieldCollations(), field ->
        getCluster().getRexBuilder().makeInputRef(input,
            Objects.requireNonNull(field, "field").getFieldIndex()));
  }

  @Override public RelWriter explainTerms(RelWriter pw) {
    super.explainTerms(pw);
    if (pw.nest()) {
      pw.item("collation", collation);
    } else {
      for (Ord ord : Ord.zip(getSortExps())) {
        pw.item("sort" + ord.i, ord.e);
      }
      for (Ord ord
          : Ord.zip(collation.getFieldCollations())) {
        pw.item("dir" + ord.i, ord.e.shortString());
      }
    }
    pw.itemIf("offset", offset, offset != null);
    pw.itemIf("fetch", fetch, fetch != null);
    return pw;
  }

  /** Returns the double value of a node if it is a literal, otherwise null. */
  private static @Nullable Double doubleValue(@Nullable RexNode r) {
    return r instanceof RexLiteral
        ? ((RexLiteral) r).getValueAs(Double.class)
        : null;
  }

  @Override public ImmutableList getHints() {
    return hints;
  }
}