com.hazelcast.jet.sql.impl.opt.physical.IndexScanMapPhysicalRel Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hazelcast-sql Show documentation
SQL Parser and Optimizer
There is a newer version: 5.5.0
/*
 * Copyright 2024 Hazelcast Inc.
 *
 * Licensed under the Hazelcast Community License (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://hazelcast.com/hazelcast-community-license
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.sql.impl.opt.physical;

import com.hazelcast.config.IndexType;
import com.hazelcast.function.ComparatorEx;
import com.hazelcast.jet.sql.impl.ExpressionUtil;
import com.hazelcast.jet.sql.impl.HazelcastPhysicalScan;
import com.hazelcast.jet.sql.impl.opt.FieldCollation;
import com.hazelcast.jet.sql.impl.opt.cost.CostUtils;
import com.hazelcast.jet.sql.impl.schema.HazelcastTable;
import com.hazelcast.jet.sql.impl.validate.types.HazelcastTypeUtils;
import com.hazelcast.sql.impl.QueryParameterMetadata;
import com.hazelcast.sql.impl.exec.scan.index.IndexFilter;
import com.hazelcast.sql.impl.plan.node.PlanNodeSchema;
import com.hazelcast.sql.impl.row.JetSqlRow;
import com.hazelcast.sql.impl.schema.map.MapTableIndex;
import com.hazelcast.sql.impl.type.QueryDataType;
import com.hazelcast.shaded.org.apache.calcite.plan.RelOptCluster;
import com.hazelcast.shaded.org.apache.calcite.plan.RelOptCost;
import com.hazelcast.shaded.org.apache.calcite.plan.RelOptPlanner;
import com.hazelcast.shaded.org.apache.calcite.plan.RelOptTable;
import com.hazelcast.shaded.org.apache.calcite.plan.RelTraitSet;
import com.hazelcast.shaded.org.apache.calcite.rel.RelCollation;
import com.hazelcast.shaded.org.apache.calcite.rel.RelCollations;
import com.hazelcast.shaded.org.apache.calcite.rel.RelFieldCollation;
import com.hazelcast.shaded.org.apache.calcite.rel.RelNode;
import com.hazelcast.shaded.org.apache.calcite.rel.RelWriter;
import com.hazelcast.shaded.org.apache.calcite.rel.core.TableScan;
import com.hazelcast.shaded.org.apache.calcite.rel.metadata.RelMdUtil;
import com.hazelcast.shaded.org.apache.calcite.rel.metadata.RelMetadataQuery;
import com.hazelcast.shaded.org.apache.calcite.rex.RexNode;

import java.util.List;

import static com.hazelcast.jet.impl.util.Util.toList;
import static java.util.stream.Collectors.toList;

/**
 * Map index scan operator.
 */
public class IndexScanMapPhysicalRel extends TableScan implements HazelcastPhysicalScan {

    private final MapTableIndex index;
    private final IndexFilter indexFilter;
    private final RexNode indexExp;
    private final RexNode remainderExp;

    public IndexScanMapPhysicalRel(
            RelOptCluster cluster,
            RelTraitSet traitSet,
            RelOptTable table,
            MapTableIndex index,
            IndexFilter indexFilter,
            RexNode indexExp,
            RexNode remainderExp
    ) {
        super(cluster, traitSet, table);

        this.index = index;
        this.indexFilter = indexFilter;
        this.indexExp = indexExp;
        this.remainderExp = remainderExp;
    }

    public MapTableIndex getIndex() {
        return index;
    }

    public IndexFilter getIndexFilter() {
        return indexFilter;
    }

    public RexNode getRemainderExp() {
        return remainderExp;
    }

    public ComparatorEx getComparator() {
        if (index.getType() == IndexType.SORTED) {
            RelCollation relCollation = getTraitSet().getCollation();
            assert relCollation != null;
            List collations;
            if (relCollation != RelCollations.EMPTY) {
                collations = FieldCollation.convertCollation(relCollation.getFieldCollations());
            } else {
                // order is not forced, use default comparator for the index columns
                collations = index.getFieldOrdinals().stream()
                        .map(i -> new FieldCollation(i, RelFieldCollation.Direction.ASCENDING,
                                RelFieldCollation.NullDirection.UNSPECIFIED))
                        .collect(toList());
            }
            return ExpressionUtil.comparisonFn(collations);
        } else {
            return null;
        }
    }

    public boolean isDescending() {
        boolean descending = false;
        RelCollation relCollation = getTraitSet().getCollation();

        // Take first direction as main direction.
        // In case of different directions Scan + Sort relations combination should be used.
        if (!relCollation.getFieldCollations().isEmpty()) {
            descending = relCollation.getFieldCollations().get(0).getDirection().isDescending();
        }

        return descending;
    }

    public boolean requiresSort() {
        return index.getType() == IndexType.SORTED && getTraitSet().getCollation() != RelCollations.EMPTY;
    }


    @Override
    public RexNode filter() {
        return remainderExp;
    }

    @Override
    public List projection() {
        return getTable().unwrap(HazelcastTable.class).getProjects();
    }

    public HazelcastTable getTableUnwrapped() {
        return table.unwrap(HazelcastTable.class);
    }

    @Override
    public PlanNodeSchema schema(QueryParameterMetadata parameterMetadata) {
        List fieldTypes = toList(projection(), rexNode -> HazelcastTypeUtils.toHazelcastType(rexNode.getType()));
        return new PlanNodeSchema(fieldTypes);
    }

    @Override
    public  V accept(CreateDagVisitor visitor) {
        return visitor.onMapIndexScan(this);
    }

    @Override
    public double estimateRowCount(RelMetadataQuery mq) {
        double rowCount = table.getRowCount();

        if (indexExp != null) {
            rowCount = CostUtils.adjustFilteredRowCount(rowCount, RelMdUtil.guessSelectivity(indexExp));
        }

        if (remainderExp != null) {
            rowCount = CostUtils.adjustFilteredRowCount(rowCount, RelMdUtil.guessSelectivity(remainderExp));
        }

        return rowCount;
    }

    @Override
    public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) {
        // Get the number of rows being scanned. This is either the whole index (scan), or only part of the index (lookup)
        double scanRowCount = table.getRowCount();

        if (indexExp != null) {
            scanRowCount = CostUtils.adjustFilteredRowCount(scanRowCount, RelMdUtil.guessSelectivity(indexExp));
        }

        // Get the number of rows that we expect after the remainder filter is applied.
        boolean hasFilter = remainderExp != null;
        double filterRowCount = scanRowCount;

        if (hasFilter) {
            filterRowCount = CostUtils.adjustFilteredRowCount(filterRowCount, RelMdUtil.guessSelectivity(remainderExp));
        }

        return computeSelfCost(
                planner,
                scanRowCount,
                CostUtils.indexScanCpuMultiplier(index.getType()),
                hasFilter,
                filterRowCount,
                requiresSort(),
                getTableUnwrapped().getProjects().size()
        );
    }

    private static RelOptCost computeSelfCost(
            RelOptPlanner planner,
            double scanRowCount,
            double scanCostMultiplier,
            boolean hasFilter,
            double filterRowCount,
            boolean requiresSort,
            int projectCount
    ) {
        // 1. Get cost of the scan itself.
        double scanCpu = scanRowCount * scanCostMultiplier;

        // 2. Get cost of the filter, if any.
        double filterCpu = hasFilter ? CostUtils.adjustCpuForConstrainedScan(scanCpu) : 0;

        // 3. Get cost of the project taking into account the filter and number of expressions. Project never produces IO.
        double projectCpu = CostUtils.adjustCpuForConstrainedScan(CostUtils.getProjectCpu(filterRowCount, projectCount));

        // 4. Cost of merging the rows into a sorted stream, it depends on the number of resulting rows and their size.
        // Note that no full sorting is performed, sorted row streams are only merged.
        double sortCpu = requiresSort ? CostUtils.INDEX_SCAN_CPU_MULTIPLIER_SORTED_ORDER_REQUIRED * projectCpu : 0;

        // 5. Finally, return sum of both scan and project.
        return planner.getCostFactory().makeCost(
                filterRowCount,
                scanCpu + filterCpu + projectCpu + sortCpu,
                0
        );
    }

    @Override
    public RelWriter explainTerms(RelWriter pw) {
        return super.explainTerms(pw)
                .item("index", index.getName())
                .item("indexExp", indexExp)
                .item("remainderExp", remainderExp)
                .itemIf("requiresSort", requiresSort(), requiresSort());
    }

    @Override
    public RelNode copy(RelTraitSet traitSet, List inputs) {
        return new IndexScanMapPhysicalRel(getCluster(), traitSet, getTable(), index, indexFilter, indexExp, remainderExp);
    }


    /**
     * Return copy of this RelNode without collation - without guaranteed order.
     * 
     * Sorted index scan can work in two modes because it is distributed scatter-gather operation:
     * 

     *     with guaranteed order which is defined by index columns. This mode requires gathering
     *     all results before further processing to guarantee the order
     *     without guaranteed order, if the order is not needed. This mode does not require
     *     gathering results so should be much more performant.
     * 
     * Some operations do not care about the order of input data (e.g. aggregations) so they can
     * benefit from this mode of operation.
     */
    public RelNode withoutCollation() {
        return copy(traitSet.replace(RelCollations.EMPTY), getInputs());
    }
}