All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.sql.impl.parse.QueryConverter Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright 2024 Hazelcast Inc.
 *
 * Licensed under the Hazelcast Community License (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://hazelcast.com/hazelcast-community-license
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.sql.impl.parse;

import com.hazelcast.jet.sql.impl.HazelcastSqlToRelConverter;
import com.hazelcast.jet.sql.impl.opt.ExtractUpdateExpressionsRule;
import com.hazelcast.jet.sql.impl.opt.logical.CalcMergeRule;
import com.hazelcast.shaded.org.apache.calcite.plan.Contexts;
import com.hazelcast.shaded.org.apache.calcite.plan.RelOptCluster;
import com.hazelcast.shaded.org.apache.calcite.plan.RelOptCostImpl;
import com.hazelcast.shaded.org.apache.calcite.plan.hep.HepPlanner;
import com.hazelcast.shaded.org.apache.calcite.plan.hep.HepProgram;
import com.hazelcast.shaded.org.apache.calcite.plan.hep.HepProgramBuilder;
import com.hazelcast.shaded.org.apache.calcite.prepare.Prepare;
import com.hazelcast.shaded.org.apache.calcite.prepare.Prepare.CatalogReader;
import com.hazelcast.shaded.org.apache.calcite.rel.RelNode;
import com.hazelcast.shaded.org.apache.calcite.rel.RelRoot;
import com.hazelcast.shaded.org.apache.calcite.rel.RelVisitor;
import com.hazelcast.shaded.org.apache.calcite.rel.core.Calc;
import com.hazelcast.shaded.org.apache.calcite.rel.core.Filter;
import com.hazelcast.shaded.org.apache.calcite.rel.core.Project;
import com.hazelcast.shaded.org.apache.calcite.rel.logical.LogicalFilter;
import com.hazelcast.shaded.org.apache.calcite.rel.rules.CoreRules;
import com.hazelcast.shaded.org.apache.calcite.rel.rules.PruneEmptyRules;
import com.hazelcast.shaded.org.apache.calcite.rex.RexSubQuery;
import com.hazelcast.shaded.org.apache.calcite.rex.RexVisitorImpl;
import com.hazelcast.shaded.org.apache.calcite.sql.SqlKind;
import com.hazelcast.shaded.org.apache.calcite.sql.SqlNode;
import com.hazelcast.shaded.org.apache.calcite.sql.validate.SqlValidator;
import com.hazelcast.shaded.org.apache.calcite.sql2rel.RelDecorrelator;
import com.hazelcast.shaded.org.apache.calcite.sql2rel.SqlToRelConverter;
import com.hazelcast.shaded.org.apache.calcite.sql2rel.StandardConvertletTable;
import com.hazelcast.shaded.org.apache.calcite.tools.RelBuilder;
import com.hazelcast.shaded.org.apache.calcite.util.Pair;

import javax.annotation.Nullable;

/**
 * Converts a parse tree into a relational tree.
 */
public class QueryConverter {
    public static final SqlToRelConverter.Config CONFIG;

    private static final HepProgram HEP_CALC_UNION_REWRITER_PROGRAM;

    /**
     * Whether to expand subqueries. When set to {@code false}, subqueries are left as is in the form of
     * {@link com.hazelcast.shaded.org.apache.calcite.rex.RexSubQuery}. Otherwise they are expanded into {@link com.hazelcast.shaded.org.apache.calcite.rel.core.Correlate}
     * instances.
     * Do not enable this because you may run into https://issues.apache.org/jira/browse/CALCITE-3484. Instead, subquery
     * elimination rules are executed during logical planning. In addition, resulting plans are slightly better that those
     * produced by "expand" flag.
     */
    private static final boolean EXPAND = false;

    /**
     * Whether to trim unused fields. The trimming is needed after subquery elimination.
     */
    private static final boolean TRIM_UNUSED_FIELDS = true;

    /**
     * Increase the maximum number of elements in the RHS to convert the IN operator to a sequence of OR comparisons.
     */
    private static final int HAZELCAST_IN_ELEMENTS_THRESHOLD = 10_000;

    static {
        CONFIG = SqlToRelConverter.config()
                .withExpand(EXPAND)
                .withInSubQueryThreshold(HAZELCAST_IN_ELEMENTS_THRESHOLD)
                .withTrimUnusedFields(TRIM_UNUSED_FIELDS);

        HEP_CALC_UNION_REWRITER_PROGRAM = prepareCalcAndUnionRewriterProgram();
    }

    private final SqlValidator validator;
    private final Prepare.CatalogReader catalogReader;
    private final RelOptCluster cluster;

    /**
     * HEP planner program for unconditional rewrites
     */
    private final HepProgram subqueryRewriterProgram;

    public QueryConverter(
            SqlValidator validator,
            CatalogReader catalogReader,
            RelOptCluster cluster,
            HepProgram subqueryRewriterProgram) {
        this.validator = validator;
        this.catalogReader = catalogReader;
        this.cluster = cluster;
        this.subqueryRewriterProgram = subqueryRewriterProgram;
    }

    public QueryConvertResult convert(SqlNode node) {
        SqlToRelConverter converter = createSqlToRelConverter();

        // 1. Perform initial conversion.
        RelRoot root = converter.convertQuery(node, false, true);

        // 2. Perform unconditional rewrites, such as:
        // - remove subquery expressions, converting them to Correlate nodes.
        // - transform distinct UNION to UNION ALL, merging the neighboring UNION relations.
        // - check, if the relation uses cyclic user types, but if they are allowed - skip this step.
        RelNode relNoSubqueries = performUnconditionalRewrites(root.project());

        // 3. Perform decorrelation, i.e. rewrite a nested loop where the right side depends on the value of the left side,
        // to a variation of joins, semijoins and aggregations, which could be executed much more efficiently.
        // See "Unnesting Arbitrary Queries", Thomas Neumann and Alfons Kemper.
        RelNode result = converter.decorrelate(node, relNoSubqueries);

        // 4. The side effect of subquery rewrite and decorrelation in Apache Calcite is a number of unnecessary fields,
        // primarily in projections. This steps removes unused fields from the tree.
        //
        // Due to a (possible) Calcite bug, we're not doing it if there are nested EXISTS calls.
        // The bug is likely in decorrelation which produces LogicalAggregate with 0 output columns.
        if (!hasNestedExists(root.rel)) {
            result = converter.trimUnusedFields(true, result);
        }

        // 5. Transform projects and filters to Calc.
        result = transformProjectAndFilterIntoCalc(result);

        // 6. Collect original field names.
        return new QueryConvertResult(result, Pair.right(root.fields));
    }

    public RelNode convertView(SqlNode node) {
        HazelcastSqlToRelConverter sqlToRelConverter = createSqlToRelConverter();

        final RelRoot root = sqlToRelConverter.convertQuery(node, true, true);
        final RelRoot root2 = root.withRel(sqlToRelConverter.flattenTypes(root.rel, true));

        final RelBuilder relBuilder = QueryConverter.CONFIG.getRelBuilderFactory().create(cluster, null);
        RelRoot root3 = root2.withRel(RelDecorrelator.decorrelateQuery(root.rel, relBuilder));
        return root3.project();
    }

    private HazelcastSqlToRelConverter createSqlToRelConverter() {
        return new HazelcastSqlToRelConverter(validator, catalogReader, cluster,
                StandardConvertletTable.INSTANCE, QueryConverter.CONFIG);
    }

    /**
     * Initial query optimization step. It includes
     * 
    *
  • * Correlated subqueries elimination, converting them to various forms of joins. * It is used instead of "expand" flag due to bugs in Calcite (see {@link #EXPAND}). *
  • *
  • * Check, if the relation uses cyclic user types, and if they are allowed - skip this step. *
  • *
* * @param rel Initial relation. * @return Resulting relation. * @implNote {@link QueryConverter#subqueryRewriterProgram} is a per-HZ instance program, * and should be movedto the static context after the stabilization of the cyclic UDTs. */ private RelNode performUnconditionalRewrites(RelNode rel) { HepPlanner planner = new HepPlanner( subqueryRewriterProgram, Contexts.empty(), true, null, RelOptCostImpl.FACTORY ); planner.setRoot(rel); return planner.findBestExp(); } /** * Second unconditional query optimization step. It includes *
    *
  • * Extract unsupported source expressions from an UPDATE stmt into a {@link Calc}. *
  • *
  • * Transformation of distinct UNION to UNION ALL, merging the neighboring UNION relations. *
  • *
  • * Transformation of {@link Project} and {@link Filter} relations to {@link Calc} *
  • *
* * @param rel Initial relation. * @return Resulting relation. */ private RelNode transformProjectAndFilterIntoCalc(RelNode rel) { // TODO: [sasha] Move more rules to unconditionally rewrite rel tree. HepPlanner planner = new HepPlanner( HEP_CALC_UNION_REWRITER_PROGRAM, Contexts.empty(), true, null, RelOptCostImpl.FACTORY ); planner.setRoot(rel); return planner.findBestExp(); } private static boolean hasNestedExists(RelNode root) { class NestedExistsFinder extends RelVisitor { private boolean found; private int depth; @Override public void visit(RelNode node, int ordinal, @Nullable RelNode parent) { if (node instanceof LogicalFilter) { RexSubQuery exists = getExists((LogicalFilter) node); if (exists != null) { found |= depth > 0; depth++; go(exists.rel); depth--; } } super.visit(node, ordinal, parent); } private boolean find() { go(root); return found; } private RexSubQuery getExists(LogicalFilter filter) { RexSubQuery[] existsSubQuery = {null}; filter.getCondition().accept(new RexVisitorImpl(true) { @Override public Void visitSubQuery(RexSubQuery subQuery) { if (subQuery.getKind() == SqlKind.EXISTS) { existsSubQuery[0] = subQuery; } return super.visitSubQuery(subQuery); } }); return existsSubQuery[0]; } } return new NestedExistsFinder().find(); } // Note: it must be used only once in static class initializer. private static HepProgram prepareCalcAndUnionRewriterProgram() { HepProgramBuilder hepProgramBuilder = new HepProgramBuilder(); // Special rules hepProgramBuilder.addRuleInstance(ExtractUpdateExpressionsRule.INSTANCE); // Filter rules hepProgramBuilder.addRuleInstance(CoreRules.FILTER_MERGE) .addRuleInstance(CoreRules.FILTER_AGGREGATE_TRANSPOSE) .addRuleInstance(CoreRules.FILTER_INTO_JOIN) .addRuleInstance(CoreRules.FILTER_REDUCE_EXPRESSIONS) .addRuleInstance(PruneEmptyRules.FILTER_INSTANCE); // Project rules hepProgramBuilder.addRuleInstance(CoreRules.PROJECT_MERGE) .addRuleInstance(CoreRules.PROJECT_REMOVE) .addRuleInstance(PruneEmptyRules.PROJECT_INSTANCE); // Join rules hepProgramBuilder.addRuleInstance(CoreRules.JOIN_REDUCE_EXPRESSIONS) .addRuleInstance(CoreRules.JOIN_PROJECT_RIGHT_TRANSPOSE_INCLUDE_OUTER); // Calc rules hepProgramBuilder.addRuleInstance(CoreRules.PROJECT_TO_CALC) .addRuleInstance(CoreRules.FILTER_TO_CALC) .addRuleInstance(CalcMergeRule.INSTANCE) .addRuleInstance(CoreRules.CALC_REMOVE); // Union optimization rules hepProgramBuilder.addRuleInstance(CoreRules.UNION_MERGE) .addRuleInstance(CoreRules.UNION_TO_DISTINCT); return hepProgramBuilder.build(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy