All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.org.apache.calcite.rel.metadata.BuiltInMetadata Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hazelcast.org.apache.calcite.rel.metadata;

import com.hazelcast.org.apache.calcite.plan.RelOptCost;
import com.hazelcast.org.apache.calcite.plan.RelOptPredicateList;
import com.hazelcast.org.apache.calcite.rel.RelCollation;
import com.hazelcast.org.apache.calcite.rel.RelDistribution;
import com.hazelcast.org.apache.calcite.rel.RelNode;
import com.hazelcast.org.apache.calcite.rex.RexNode;
import com.hazelcast.org.apache.calcite.rex.RexTableInputRef;
import com.hazelcast.org.apache.calcite.rex.RexTableInputRef.RelTableRef;
import com.hazelcast.org.apache.calcite.sql.SqlExplainLevel;
import com.hazelcast.org.apache.calcite.util.BuiltInMethod;
import com.hazelcast.org.apache.calcite.util.ImmutableBitSet;

import com.hazelcast.com.google.common.collect.ImmutableList;
import com.hazelcast.com.google.common.collect.Multimap;

import java.util.List;
import java.util.Set;

/**
 * Contains the interfaces for several common forms of metadata.
 */
public abstract class BuiltInMetadata {

  /** Metadata about the selectivity of a predicate. */
  public interface Selectivity extends Metadata {
    MetadataDef DEF = MetadataDef.of(Selectivity.class,
        Selectivity.Handler.class, BuiltInMethod.SELECTIVITY.method);

    /**
     * Estimates the percentage of an expression's output rows which satisfy a
     * given predicate. Returns null to indicate that no reliable estimate can
     * be produced.
     *
     * @param predicate predicate whose selectivity is to be estimated against
     *                  rel's output
     * @return estimated selectivity (between 0.0 and 1.0), or null if no
     * reliable estimate can be determined
     */
    Double getSelectivity(RexNode predicate);

    /** Handler API. */
    interface Handler extends MetadataHandler {
      Double getSelectivity(RelNode r, RelMetadataQuery mq, RexNode predicate);
    }
  }

  /** Metadata about which combinations of columns are unique identifiers. */
  public interface UniqueKeys extends Metadata {
    MetadataDef DEF = MetadataDef.of(UniqueKeys.class,
        UniqueKeys.Handler.class, BuiltInMethod.UNIQUE_KEYS.method);

    /**
     * Determines the set of unique minimal keys for this expression. A key is
     * represented as an {@link com.hazelcast.org.apache.calcite.util.ImmutableBitSet}, where
     * each bit position represents a 0-based output column ordinal.
     *
     * 

Nulls can be ignored if the relational expression has filtered out * null values. * * @param ignoreNulls if true, ignore null values when determining * whether the keys are unique * @return set of keys, or null if this information cannot be determined * (whereas empty set indicates definitely no keys at all) */ Set getUniqueKeys(boolean ignoreNulls); /** Handler API. */ interface Handler extends MetadataHandler { Set getUniqueKeys(RelNode r, RelMetadataQuery mq, boolean ignoreNulls); } } /** Metadata about whether a set of columns uniquely identifies a row. */ public interface ColumnUniqueness extends Metadata { MetadataDef DEF = MetadataDef.of(ColumnUniqueness.class, ColumnUniqueness.Handler.class, BuiltInMethod.COLUMN_UNIQUENESS.method); /** * Determines whether a specified set of columns from a specified relational * expression are unique. * *

For example, if the relational expression is a {@code TableScan} to * T(A, B, C, D) whose key is (A, B), then: *

    *
  • {@code areColumnsUnique([0, 1])} yields true, *
  • {@code areColumnsUnique([0])} yields false, *
  • {@code areColumnsUnique([0, 2])} yields false. *
* *

Nulls can be ignored if the relational expression has filtered out * null values. * * @param columns column mask representing the subset of columns for which * uniqueness will be determined * @param ignoreNulls if true, ignore null values when determining column * uniqueness * @return whether the columns are unique, or * null if not enough information is available to make that determination */ Boolean areColumnsUnique(ImmutableBitSet columns, boolean ignoreNulls); /** Handler API. */ interface Handler extends MetadataHandler { Boolean areColumnsUnique(RelNode r, RelMetadataQuery mq, ImmutableBitSet columns, boolean ignoreNulls); } } /** Metadata about which columns are sorted. */ public interface Collation extends Metadata { MetadataDef DEF = MetadataDef.of(Collation.class, Collation.Handler.class, BuiltInMethod.COLLATIONS.method); /** Determines which columns are sorted. */ ImmutableList collations(); /** Handler API. */ interface Handler extends MetadataHandler { ImmutableList collations(RelNode r, RelMetadataQuery mq); } } /** Metadata about how a relational expression is distributed. * *

If you are an operator consuming a relational expression, which subset * of the rows are you seeing? You might be seeing all of them (BROADCAST * or SINGLETON), only those whose key column values have a particular hash * code (HASH) or only those whose column values have particular values or * ranges of values (RANGE). * *

When a relational expression is partitioned, it is often partitioned * among nodes, but it may be partitioned among threads running on the same * node. */ public interface Distribution extends Metadata { MetadataDef DEF = MetadataDef.of(Distribution.class, Distribution.Handler.class, BuiltInMethod.DISTRIBUTION.method); /** Determines how the rows are distributed. */ RelDistribution distribution(); /** Handler API. */ interface Handler extends MetadataHandler { RelDistribution distribution(RelNode r, RelMetadataQuery mq); } } /** * Metadata about the node types in a relational expression. * *

For each relational expression, it returns a multimap from the class * to the nodes instantiating that class. Each node will appear in the * multimap only once. */ public interface NodeTypes extends Metadata { MetadataDef DEF = MetadataDef.of(NodeTypes.class, NodeTypes.Handler.class, BuiltInMethod.NODE_TYPES.method); /** * Returns a multimap from the class to the nodes instantiating that * class. The default implementation for a node classifies it as a * {@link RelNode}. */ Multimap, RelNode> getNodeTypes(); /** Handler API. */ interface Handler extends MetadataHandler { Multimap, RelNode> getNodeTypes(RelNode r, RelMetadataQuery mq); } } /** Metadata about the number of rows returned by a relational expression. */ public interface RowCount extends Metadata { MetadataDef DEF = MetadataDef.of(RowCount.class, RowCount.Handler.class, BuiltInMethod.ROW_COUNT.method); /** * Estimates the number of rows which will be returned by a relational * expression. The default implementation for this query asks the rel itself * via {@link RelNode#estimateRowCount}, but metadata providers can override this * with their own cost models. * * @return estimated row count, or null if no reliable estimate can be * determined */ Double getRowCount(); /** Handler API. */ interface Handler extends MetadataHandler { Double getRowCount(RelNode r, RelMetadataQuery mq); } } /** Metadata about the maximum number of rows returned by a relational * expression. */ public interface MaxRowCount extends Metadata { MetadataDef DEF = MetadataDef.of(MaxRowCount.class, MaxRowCount.Handler.class, BuiltInMethod.MAX_ROW_COUNT.method); /** * Estimates the max number of rows which will be returned by a relational * expression. * *

The default implementation for this query returns * {@link Double#POSITIVE_INFINITY}, * but metadata providers can override this with their own cost models. * * @return upper bound on the number of rows returned */ Double getMaxRowCount(); /** Handler API. */ interface Handler extends MetadataHandler { Double getMaxRowCount(RelNode r, RelMetadataQuery mq); } } /** Metadata about the minimum number of rows returned by a relational * expression. */ public interface MinRowCount extends Metadata { MetadataDef DEF = MetadataDef.of(MinRowCount.class, MinRowCount.Handler.class, BuiltInMethod.MIN_ROW_COUNT.method); /** * Estimates the minimum number of rows which will be returned by a * relational expression. * *

The default implementation for this query returns 0, * but metadata providers can override this with their own cost models. * * @return lower bound on the number of rows returned */ Double getMinRowCount(); /** Handler API. */ interface Handler extends MetadataHandler { Double getMinRowCount(RelNode r, RelMetadataQuery mq); } } /** Metadata about the number of distinct rows returned by a set of columns * in a relational expression. */ public interface DistinctRowCount extends Metadata { MetadataDef DEF = MetadataDef.of(DistinctRowCount.class, DistinctRowCount.Handler.class, BuiltInMethod.DISTINCT_ROW_COUNT.method); /** * Estimates the number of rows which would be produced by a GROUP BY on the * set of columns indicated by groupKey, where the input to the GROUP BY has * been pre-filtered by predicate. This quantity (leaving out predicate) is * often referred to as cardinality (as in gender being a "low-cardinality * column"). * * @param groupKey column mask representing group by columns * @param predicate pre-filtered predicates * @return distinct row count for groupKey, filtered by predicate, or null * if no reliable estimate can be determined */ Double getDistinctRowCount(ImmutableBitSet groupKey, RexNode predicate); /** Handler API. */ interface Handler extends MetadataHandler { Double getDistinctRowCount(RelNode r, RelMetadataQuery mq, ImmutableBitSet groupKey, RexNode predicate); } } /** Metadata about the proportion of original rows that remain in a relational * expression. */ public interface PercentageOriginalRows extends Metadata { MetadataDef DEF = MetadataDef.of(PercentageOriginalRows.class, PercentageOriginalRows.Handler.class, BuiltInMethod.PERCENTAGE_ORIGINAL_ROWS.method); /** * Estimates the percentage of the number of rows actually produced by a * relational expression out of the number of rows it would produce if all * single-table filter conditions were removed. * * @return estimated percentage (between 0.0 and 1.0), or null if no * reliable estimate can be determined */ Double getPercentageOriginalRows(); /** Handler API. */ interface Handler extends MetadataHandler { Double getPercentageOriginalRows(RelNode r, RelMetadataQuery mq); } } /** Metadata about the number of distinct values in the original source of a * column or set of columns. */ public interface PopulationSize extends Metadata { MetadataDef DEF = MetadataDef.of(PopulationSize.class, PopulationSize.Handler.class, BuiltInMethod.POPULATION_SIZE.method); /** * Estimates the distinct row count in the original source for the given * {@code groupKey}, ignoring any filtering being applied by the expression. * Typically, "original source" means base table, but for derived columns, * the estimate may come from a non-leaf rel such as a LogicalProject. * * @param groupKey column mask representing the subset of columns for which * the row count will be determined * @return distinct row count for the given groupKey, or null if no reliable * estimate can be determined */ Double getPopulationSize(ImmutableBitSet groupKey); /** Handler API. */ interface Handler extends MetadataHandler { Double getPopulationSize(RelNode r, RelMetadataQuery mq, ImmutableBitSet groupKey); } } /** Metadata about the size of rows and columns. */ public interface Size extends Metadata { MetadataDef DEF = MetadataDef.of(Size.class, Size.Handler.class, BuiltInMethod.AVERAGE_ROW_SIZE.method, BuiltInMethod.AVERAGE_COLUMN_SIZES.method); /** * Determines the average size (in bytes) of a row from this relational * expression. * * @return average size of a row, in bytes, or null if not known */ Double averageRowSize(); /** * Determines the average size (in bytes) of a value of a column in this * relational expression. * *

Null values are included (presumably they occupy close to 0 bytes). * *

It is left to the caller to decide whether the size is the compressed * size, the uncompressed size, or memory allocation when the value is * wrapped in an object in the Java heap. The uncompressed size is probably * a good compromise. * * @return an immutable list containing, for each column, the average size * of a column value, in bytes. Each value or the entire list may be null if * the metadata is not available */ List averageColumnSizes(); /** Handler API. */ interface Handler extends MetadataHandler { Double averageRowSize(RelNode r, RelMetadataQuery mq); List averageColumnSizes(RelNode r, RelMetadataQuery mq); } } /** Metadata about the origins of columns. */ public interface ColumnOrigin extends Metadata { MetadataDef DEF = MetadataDef.of(ColumnOrigin.class, ColumnOrigin.Handler.class, BuiltInMethod.COLUMN_ORIGIN.method); /** * For a given output column of an expression, determines all columns of * underlying tables which contribute to result values. An output column may * have more than one origin due to expressions such as Union and * LogicalProject. The optimizer may use this information for catalog access * (e.g. index availability). * * @param outputColumn 0-based ordinal for output column of interest * @return set of origin columns, or null if this information cannot be * determined (whereas empty set indicates definitely no origin columns at * all) */ Set getColumnOrigins(int outputColumn); /** Handler API. */ interface Handler extends MetadataHandler { Set getColumnOrigins(RelNode r, RelMetadataQuery mq, int outputColumn); } } /** Metadata about the origins of expressions. */ public interface ExpressionLineage extends Metadata { MetadataDef DEF = MetadataDef.of(ExpressionLineage.class, ExpressionLineage.Handler.class, BuiltInMethod.EXPRESSION_LINEAGE.method); /** * Given the input expression applied on the given {@link RelNode}, this * provider returns the expression with its lineage resolved. * *

In particular, the result will be a set of nodes which might contain * references to columns in TableScan operators ({@link RexTableInputRef}). * An expression can have more than one lineage expression due to Union * operators. However, we do not check column equality in Filter predicates. * Each TableScan operator below the node is identified uniquely by its * qualified name and its entity number. * *

For example, if the expression is {@code $0 + 2} and {@code $0} originated * from column {@code $3} in the {@code 0} occurrence of table {@code A} in the * plan, result will be: {@code A.#0.$3 + 2}. Occurrences are generated in no * particular order, but it is guaranteed that if two expressions referred to the * same table, the qualified name + occurrence will be the same. * * @param expression expression whose lineage we want to resolve * * @return set of expressions with lineage resolved, or null if this information * cannot be determined (e.g. origin of an expression is an aggregation * in an {@link com.hazelcast.org.apache.calcite.rel.core.Aggregate} operator) */ Set getExpressionLineage(RexNode expression); /** Handler API. */ interface Handler extends MetadataHandler { Set getExpressionLineage(RelNode r, RelMetadataQuery mq, RexNode expression); } } /** Metadata to obtain references to tables used by a given expression. */ public interface TableReferences extends Metadata { MetadataDef DEF = MetadataDef.of(TableReferences.class, TableReferences.Handler.class, BuiltInMethod.TABLE_REFERENCES.method); /** * This provider returns the tables used by a given plan. * *

In particular, the result will be a set of unique table references * ({@link RelTableRef}) corresponding to each TableScan operator in the * plan. These table references are composed by the table qualified name * and an entity number. * *

Importantly, the table identifiers returned by this metadata provider * will be consistent with the unique identifiers used by the {@link ExpressionLineage} * provider, meaning that it is guaranteed that same table will use same unique * identifiers in both. * * @return set of unique table identifiers, or null if this information * cannot be determined */ Set getTableReferences(); /** Handler API. */ interface Handler extends MetadataHandler { Set getTableReferences(RelNode r, RelMetadataQuery mq); } } /** Metadata about the cost of evaluating a relational expression, including * all of its inputs. */ public interface CumulativeCost extends Metadata { MetadataDef DEF = MetadataDef.of(CumulativeCost.class, CumulativeCost.Handler.class, BuiltInMethod.CUMULATIVE_COST.method); /** * Estimates the cost of executing a relational expression, including the * cost of its inputs. The default implementation for this query adds * {@link NonCumulativeCost#getNonCumulativeCost} to the cumulative cost of * each input, but metadata providers can override this with their own cost * models, e.g. to take into account interactions between expressions. * * @return estimated cost, or null if no reliable estimate can be * determined */ RelOptCost getCumulativeCost(); /** Handler API. */ interface Handler extends MetadataHandler { RelOptCost getCumulativeCost(RelNode r, RelMetadataQuery mq); } } /** Metadata about the cost of evaluating a relational expression, not * including its inputs. */ public interface NonCumulativeCost extends Metadata { MetadataDef DEF = MetadataDef.of(NonCumulativeCost.class, NonCumulativeCost.Handler.class, BuiltInMethod.NON_CUMULATIVE_COST.method); /** * Estimates the cost of executing a relational expression, not counting the * cost of its inputs. (However, the non-cumulative cost is still usually * dependent on the row counts of the inputs.) The default implementation * for this query asks the rel itself via {@link RelNode#computeSelfCost}, * but metadata providers can override this with their own cost models. * * @return estimated cost, or null if no reliable estimate can be * determined */ RelOptCost getNonCumulativeCost(); /** Handler API. */ interface Handler extends MetadataHandler { RelOptCost getNonCumulativeCost(RelNode r, RelMetadataQuery mq); } } /** Metadata about whether a relational expression should appear in a plan. */ public interface ExplainVisibility extends Metadata { MetadataDef DEF = MetadataDef.of(ExplainVisibility.class, ExplainVisibility.Handler.class, BuiltInMethod.EXPLAIN_VISIBILITY.method); /** * Determines whether a relational expression should be visible in EXPLAIN * PLAN output at a particular level of detail. * * @param explainLevel level of detail * @return true for visible, false for invisible */ Boolean isVisibleInExplain(SqlExplainLevel explainLevel); /** Handler API. */ interface Handler extends MetadataHandler { Boolean isVisibleInExplain(RelNode r, RelMetadataQuery mq, SqlExplainLevel explainLevel); } } /** Metadata about the predicates that hold in the rows emitted from a * relational expression. */ public interface Predicates extends Metadata { MetadataDef DEF = MetadataDef.of(Predicates.class, Predicates.Handler.class, BuiltInMethod.PREDICATES.method); /** * Derives the predicates that hold on rows emitted from a relational * expression. * * @return Predicate list */ RelOptPredicateList getPredicates(); /** Handler API. */ interface Handler extends MetadataHandler { RelOptPredicateList getPredicates(RelNode r, RelMetadataQuery mq); } } /** Metadata about the predicates that hold in the rows emitted from a * relational expression. * *

The difference with respect to {@link Predicates} provider is that * this provider tries to extract ALL predicates even if they are not * applied on the output expressions of the relational expression; we rely * on {@link RexTableInputRef} to reference origin columns in * {@link com.hazelcast.org.apache.calcite.rel.core.TableScan} for the result predicates. */ public interface AllPredicates extends Metadata { MetadataDef DEF = MetadataDef.of(AllPredicates.class, AllPredicates.Handler.class, BuiltInMethod.ALL_PREDICATES.method); /** * Derives the predicates that hold on rows emitted from a relational * expression. * * @return predicate list, or null if the provider cannot infer the * lineage for any of the expressions contained in any of the predicates */ RelOptPredicateList getAllPredicates(); /** Handler API. */ interface Handler extends MetadataHandler { RelOptPredicateList getAllPredicates(RelNode r, RelMetadataQuery mq); } } /** Metadata about the degree of parallelism of a relational expression, and * how its operators are assigned to processes with independent resource * pools. */ public interface Parallelism extends Metadata { MetadataDef DEF = MetadataDef.of(Parallelism.class, Parallelism.Handler.class, BuiltInMethod.IS_PHASE_TRANSITION.method, BuiltInMethod.SPLIT_COUNT.method); /** Returns whether each physical operator implementing this relational * expression belongs to a different process than its inputs. * *

A collection of operators processing all of the splits of a particular * stage in the query pipeline is called a "phase". A phase starts with * a leaf node such as a {@link com.hazelcast.org.apache.calcite.rel.core.TableScan}, * or with a phase-change node such as an * {@link com.hazelcast.org.apache.calcite.rel.core.Exchange}. Hadoop's shuffle operator * (a form of sort-exchange) causes data to be sent across the network. */ Boolean isPhaseTransition(); /** Returns the number of distinct splits of the data. * *

Note that splits must be distinct. For broadcast, where each copy is * the same, returns 1. * *

Thus the split count is the proportion of the data seen by * each operator instance. */ Integer splitCount(); /** Handler API. */ interface Handler extends MetadataHandler { Boolean isPhaseTransition(RelNode r, RelMetadataQuery mq); Integer splitCount(RelNode r, RelMetadataQuery mq); } } /** Metadata about the memory use of an operator. */ public interface Memory extends Metadata { MetadataDef DEF = MetadataDef.of(Memory.class, Memory.Handler.class, BuiltInMethod.MEMORY.method, BuiltInMethod.CUMULATIVE_MEMORY_WITHIN_PHASE.method, BuiltInMethod.CUMULATIVE_MEMORY_WITHIN_PHASE_SPLIT.method); /** Returns the expected amount of memory, in bytes, required by a physical * operator implementing this relational expression, across all splits. * *

How much memory is used depends very much on the algorithm; for * example, an implementation of * {@link com.hazelcast.org.apache.calcite.rel.core.Aggregate} that loads all data into a * hash table requires approximately {@code rowCount * averageRowSize} * bytes, whereas an implementation that assumes that the input is sorted * requires only {@code averageRowSize} bytes to maintain a single * accumulator for each aggregate function. */ Double memory(); /** Returns the cumulative amount of memory, in bytes, required by the * physical operator implementing this relational expression, and all other * operators within the same phase, across all splits. * * @see Parallelism#splitCount() */ Double cumulativeMemoryWithinPhase(); /** Returns the expected cumulative amount of memory, in bytes, required by * the physical operator implementing this relational expression, and all * operators within the same phase, within each split. * *

Basic formula: * *

cumulativeMemoryWithinPhaseSplit * = cumulativeMemoryWithinPhase / Parallelism.splitCount
*/ Double cumulativeMemoryWithinPhaseSplit(); /** Handler API. */ interface Handler extends MetadataHandler { Double memory(RelNode r, RelMetadataQuery mq); Double cumulativeMemoryWithinPhase(RelNode r, RelMetadataQuery mq); Double cumulativeMemoryWithinPhaseSplit(RelNode r, RelMetadataQuery mq); } } /** The built-in forms of metadata. */ interface All extends Selectivity, UniqueKeys, RowCount, DistinctRowCount, PercentageOriginalRows, ColumnUniqueness, ColumnOrigin, Predicates, Collation, Distribution, Size, Parallelism, Memory, AllPredicates, ExpressionLineage, TableReferences, NodeTypes { } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy