com.hazelcast.jet.sql.impl.parse.QueryConverter Maven / Gradle / Ivy
/*
* Copyright 2024 Hazelcast Inc.
*
* Licensed under the Hazelcast Community License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://hazelcast.com/hazelcast-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.sql.impl.parse;
import com.hazelcast.jet.sql.impl.HazelcastSqlToRelConverter;
import com.hazelcast.jet.sql.impl.opt.ExtractUpdateExpressionsRule;
import com.hazelcast.jet.sql.impl.opt.logical.CalcMergeRule;
import com.hazelcast.shaded.org.apache.calcite.plan.Contexts;
import com.hazelcast.shaded.org.apache.calcite.plan.RelOptCluster;
import com.hazelcast.shaded.org.apache.calcite.plan.RelOptCostImpl;
import com.hazelcast.shaded.org.apache.calcite.plan.hep.HepPlanner;
import com.hazelcast.shaded.org.apache.calcite.plan.hep.HepProgram;
import com.hazelcast.shaded.org.apache.calcite.plan.hep.HepProgramBuilder;
import com.hazelcast.shaded.org.apache.calcite.prepare.Prepare;
import com.hazelcast.shaded.org.apache.calcite.prepare.Prepare.CatalogReader;
import com.hazelcast.shaded.org.apache.calcite.rel.RelNode;
import com.hazelcast.shaded.org.apache.calcite.rel.RelRoot;
import com.hazelcast.shaded.org.apache.calcite.rel.RelVisitor;
import com.hazelcast.shaded.org.apache.calcite.rel.core.Calc;
import com.hazelcast.shaded.org.apache.calcite.rel.core.Filter;
import com.hazelcast.shaded.org.apache.calcite.rel.core.Project;
import com.hazelcast.shaded.org.apache.calcite.rel.logical.LogicalFilter;
import com.hazelcast.shaded.org.apache.calcite.rel.rules.CoreRules;
import com.hazelcast.shaded.org.apache.calcite.rel.rules.PruneEmptyRules;
import com.hazelcast.shaded.org.apache.calcite.rex.RexSubQuery;
import com.hazelcast.shaded.org.apache.calcite.rex.RexVisitorImpl;
import com.hazelcast.shaded.org.apache.calcite.sql.SqlKind;
import com.hazelcast.shaded.org.apache.calcite.sql.SqlNode;
import com.hazelcast.shaded.org.apache.calcite.sql.validate.SqlValidator;
import com.hazelcast.shaded.org.apache.calcite.sql2rel.RelDecorrelator;
import com.hazelcast.shaded.org.apache.calcite.sql2rel.SqlToRelConverter;
import com.hazelcast.shaded.org.apache.calcite.sql2rel.StandardConvertletTable;
import com.hazelcast.shaded.org.apache.calcite.tools.RelBuilder;
import com.hazelcast.shaded.org.apache.calcite.util.Pair;
import javax.annotation.Nullable;
/**
* Converts a parse tree into a relational tree.
*/
public class QueryConverter {
public static final SqlToRelConverter.Config CONFIG;
private static final HepProgram HEP_CALC_UNION_REWRITER_PROGRAM;
/**
* Whether to expand subqueries. When set to {@code false}, subqueries are left as is in the form of
* {@link com.hazelcast.shaded.org.apache.calcite.rex.RexSubQuery}. Otherwise they are expanded into {@link com.hazelcast.shaded.org.apache.calcite.rel.core.Correlate}
* instances.
* Do not enable this because you may run into https://issues.apache.org/jira/browse/CALCITE-3484. Instead, subquery
* elimination rules are executed during logical planning. In addition, resulting plans are slightly better that those
* produced by "expand" flag.
*/
private static final boolean EXPAND = false;
/**
* Whether to trim unused fields. The trimming is needed after subquery elimination.
*/
private static final boolean TRIM_UNUSED_FIELDS = true;
/**
* Increase the maximum number of elements in the RHS to convert the IN operator to a sequence of OR comparisons.
*/
private static final int HAZELCAST_IN_ELEMENTS_THRESHOLD = 10_000;
static {
CONFIG = SqlToRelConverter.config()
.withExpand(EXPAND)
.withInSubQueryThreshold(HAZELCAST_IN_ELEMENTS_THRESHOLD)
.withTrimUnusedFields(TRIM_UNUSED_FIELDS);
HEP_CALC_UNION_REWRITER_PROGRAM = prepareCalcAndUnionRewriterProgram();
}
private final SqlValidator validator;
private final Prepare.CatalogReader catalogReader;
private final RelOptCluster cluster;
/**
* HEP planner program for unconditional rewrites
*/
private final HepProgram subqueryRewriterProgram;
public QueryConverter(
SqlValidator validator,
CatalogReader catalogReader,
RelOptCluster cluster,
HepProgram subqueryRewriterProgram) {
this.validator = validator;
this.catalogReader = catalogReader;
this.cluster = cluster;
this.subqueryRewriterProgram = subqueryRewriterProgram;
}
public QueryConvertResult convert(SqlNode node) {
SqlToRelConverter converter = createSqlToRelConverter();
// 1. Perform initial conversion.
RelRoot root = converter.convertQuery(node, false, true);
// 2. Perform unconditional rewrites, such as:
// - remove subquery expressions, converting them to Correlate nodes.
// - transform distinct UNION to UNION ALL, merging the neighboring UNION relations.
// - check, if the relation uses cyclic user types, but if they are allowed - skip this step.
RelNode relNoSubqueries = performUnconditionalRewrites(root.project());
// 3. Perform decorrelation, i.e. rewrite a nested loop where the right side depends on the value of the left side,
// to a variation of joins, semijoins and aggregations, which could be executed much more efficiently.
// See "Unnesting Arbitrary Queries", Thomas Neumann and Alfons Kemper.
RelNode result = converter.decorrelate(node, relNoSubqueries);
// 4. The side effect of subquery rewrite and decorrelation in Apache Calcite is a number of unnecessary fields,
// primarily in projections. This steps removes unused fields from the tree.
//
// Due to a (possible) Calcite bug, we're not doing it if there are nested EXISTS calls.
// The bug is likely in decorrelation which produces LogicalAggregate with 0 output columns.
if (!hasNestedExists(root.rel)) {
result = converter.trimUnusedFields(true, result);
}
// 5. Transform projects and filters to Calc.
result = transformProjectAndFilterIntoCalc(result);
// 6. Collect original field names.
return new QueryConvertResult(result, Pair.right(root.fields));
}
public RelNode convertView(SqlNode node) {
HazelcastSqlToRelConverter sqlToRelConverter = createSqlToRelConverter();
final RelRoot root = sqlToRelConverter.convertQuery(node, true, true);
final RelRoot root2 = root.withRel(sqlToRelConverter.flattenTypes(root.rel, true));
final RelBuilder relBuilder = QueryConverter.CONFIG.getRelBuilderFactory().create(cluster, null);
RelRoot root3 = root2.withRel(RelDecorrelator.decorrelateQuery(root.rel, relBuilder));
return root3.project();
}
private HazelcastSqlToRelConverter createSqlToRelConverter() {
return new HazelcastSqlToRelConverter(validator, catalogReader, cluster,
StandardConvertletTable.INSTANCE, QueryConverter.CONFIG);
}
/**
* Initial query optimization step. It includes
*
* -
* Correlated subqueries elimination, converting them to various forms of joins.
* It is used instead of "expand" flag due to bugs in Calcite (see {@link #EXPAND}).
*
* -
* Check, if the relation uses cyclic user types, and if they are allowed - skip this step.
*
*
*
* @param rel Initial relation.
* @return Resulting relation.
* @implNote {@link QueryConverter#subqueryRewriterProgram} is a per-HZ instance program,
* and should be movedto the static context after the stabilization of the cyclic UDTs.
*/
private RelNode performUnconditionalRewrites(RelNode rel) {
HepPlanner planner = new HepPlanner(
subqueryRewriterProgram,
Contexts.empty(),
true,
null,
RelOptCostImpl.FACTORY
);
planner.setRoot(rel);
return planner.findBestExp();
}
/**
* Second unconditional query optimization step. It includes
*
* -
* Extract unsupported source expressions from an UPDATE stmt into a {@link Calc}.
*
* -
* Transformation of distinct UNION to UNION ALL, merging the neighboring UNION relations.
*
* -
* Transformation of {@link Project} and {@link Filter} relations to {@link Calc}
*
*
*
* @param rel Initial relation.
* @return Resulting relation.
*/
private RelNode transformProjectAndFilterIntoCalc(RelNode rel) {
// TODO: [sasha] Move more rules to unconditionally rewrite rel tree.
HepPlanner planner = new HepPlanner(
HEP_CALC_UNION_REWRITER_PROGRAM,
Contexts.empty(),
true,
null,
RelOptCostImpl.FACTORY
);
planner.setRoot(rel);
return planner.findBestExp();
}
private static boolean hasNestedExists(RelNode root) {
class NestedExistsFinder extends RelVisitor {
private boolean found;
private int depth;
@Override
public void visit(RelNode node, int ordinal, @Nullable RelNode parent) {
if (node instanceof LogicalFilter) {
RexSubQuery exists = getExists((LogicalFilter) node);
if (exists != null) {
found |= depth > 0;
depth++;
go(exists.rel);
depth--;
}
}
super.visit(node, ordinal, parent);
}
private boolean find() {
go(root);
return found;
}
private RexSubQuery getExists(LogicalFilter filter) {
RexSubQuery[] existsSubQuery = {null};
filter.getCondition().accept(new RexVisitorImpl(true) {
@Override
public Void visitSubQuery(RexSubQuery subQuery) {
if (subQuery.getKind() == SqlKind.EXISTS) {
existsSubQuery[0] = subQuery;
}
return super.visitSubQuery(subQuery);
}
});
return existsSubQuery[0];
}
}
return new NestedExistsFinder().find();
}
// Note: it must be used only once in static class initializer.
private static HepProgram prepareCalcAndUnionRewriterProgram() {
HepProgramBuilder hepProgramBuilder = new HepProgramBuilder();
// Special rules
hepProgramBuilder.addRuleInstance(ExtractUpdateExpressionsRule.INSTANCE);
// Filter rules
hepProgramBuilder.addRuleInstance(CoreRules.FILTER_MERGE)
.addRuleInstance(CoreRules.FILTER_AGGREGATE_TRANSPOSE)
.addRuleInstance(CoreRules.FILTER_INTO_JOIN)
.addRuleInstance(CoreRules.FILTER_REDUCE_EXPRESSIONS)
.addRuleInstance(PruneEmptyRules.FILTER_INSTANCE);
// Project rules
hepProgramBuilder.addRuleInstance(CoreRules.PROJECT_MERGE)
.addRuleInstance(CoreRules.PROJECT_REMOVE)
.addRuleInstance(PruneEmptyRules.PROJECT_INSTANCE);
// Join rules
hepProgramBuilder.addRuleInstance(CoreRules.JOIN_REDUCE_EXPRESSIONS)
.addRuleInstance(CoreRules.JOIN_PROJECT_RIGHT_TRANSPOSE_INCLUDE_OUTER);
// Calc rules
hepProgramBuilder.addRuleInstance(CoreRules.PROJECT_TO_CALC)
.addRuleInstance(CoreRules.FILTER_TO_CALC)
.addRuleInstance(CalcMergeRule.INSTANCE)
.addRuleInstance(CoreRules.CALC_REMOVE);
// Union optimization rules
hepProgramBuilder.addRuleInstance(CoreRules.UNION_MERGE)
.addRuleInstance(CoreRules.UNION_TO_DISTINCT);
return hepProgramBuilder.build();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy