All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.sql.impl.CalciteSqlOptimizer Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright 2021 Hazelcast Inc.
 *
 * Licensed under the Hazelcast Community License (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://hazelcast.com/hazelcast-community-license
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.sql.impl;

import com.hazelcast.cluster.memberselector.MemberSelectors;
import com.hazelcast.jet.sql.impl.JetPlan.AlterJobPlan;
import com.hazelcast.jet.sql.impl.JetPlan.CreateJobPlan;
import com.hazelcast.jet.sql.impl.JetPlan.CreateMappingPlan;
import com.hazelcast.jet.sql.impl.JetPlan.CreateSnapshotPlan;
import com.hazelcast.jet.sql.impl.JetPlan.DmlPlan;
import com.hazelcast.jet.sql.impl.JetPlan.DropJobPlan;
import com.hazelcast.jet.sql.impl.JetPlan.DropMappingPlan;
import com.hazelcast.jet.sql.impl.JetPlan.DropSnapshotPlan;
import com.hazelcast.jet.sql.impl.JetPlan.IMapDeletePlan;
import com.hazelcast.jet.sql.impl.JetPlan.IMapInsertPlan;
import com.hazelcast.jet.sql.impl.JetPlan.IMapSelectPlan;
import com.hazelcast.jet.sql.impl.JetPlan.IMapSinkPlan;
import com.hazelcast.jet.sql.impl.JetPlan.IMapUpdatePlan;
import com.hazelcast.jet.sql.impl.JetPlan.SelectPlan;
import com.hazelcast.jet.sql.impl.JetPlan.ShowStatementPlan;
import com.hazelcast.jet.sql.impl.connector.SqlConnectorCache;
import com.hazelcast.jet.sql.impl.connector.map.MetadataResolver;
import com.hazelcast.jet.sql.impl.opt.JetConventions;
import com.hazelcast.jet.sql.impl.opt.OptUtils;
import com.hazelcast.jet.sql.impl.opt.logical.LogicalRel;
import com.hazelcast.jet.sql.impl.opt.logical.LogicalRules;
import com.hazelcast.jet.sql.impl.opt.physical.CreateDagVisitor;
import com.hazelcast.jet.sql.impl.opt.physical.DeleteByKeyMapPhysicalRel;
import com.hazelcast.jet.sql.impl.opt.physical.InsertMapPhysicalRel;
import com.hazelcast.jet.sql.impl.opt.physical.JetRootRel;
import com.hazelcast.jet.sql.impl.opt.physical.PhysicalRel;
import com.hazelcast.jet.sql.impl.opt.physical.PhysicalRules;
import com.hazelcast.jet.sql.impl.opt.physical.SelectByKeyMapPhysicalRel;
import com.hazelcast.jet.sql.impl.opt.physical.SinkMapPhysicalRel;
import com.hazelcast.jet.sql.impl.opt.physical.UpdateByKeyMapPhysicalRel;
import com.hazelcast.jet.sql.impl.parse.QueryConvertResult;
import com.hazelcast.jet.sql.impl.parse.QueryParseResult;
import com.hazelcast.jet.sql.impl.parse.SqlAlterJob;
import com.hazelcast.jet.sql.impl.parse.SqlCreateJob;
import com.hazelcast.jet.sql.impl.parse.SqlCreateMapping;
import com.hazelcast.jet.sql.impl.parse.SqlCreateSnapshot;
import com.hazelcast.jet.sql.impl.parse.SqlDropJob;
import com.hazelcast.jet.sql.impl.parse.SqlDropMapping;
import com.hazelcast.jet.sql.impl.parse.SqlDropSnapshot;
import com.hazelcast.jet.sql.impl.parse.SqlShowStatement;
import com.hazelcast.jet.sql.impl.schema.HazelcastTable;
import com.hazelcast.jet.sql.impl.schema.MappingCatalog;
import com.hazelcast.jet.sql.impl.schema.MappingStorage;
import com.hazelcast.logging.ILogger;
import com.hazelcast.security.permission.ActionConstants;
import com.hazelcast.security.permission.MapPermission;
import com.hazelcast.spi.impl.NodeEngine;
import com.hazelcast.sql.SqlColumnMetadata;
import com.hazelcast.sql.SqlRowMetadata;
import com.hazelcast.sql.impl.QueryParameterMetadata;
import com.hazelcast.sql.impl.QueryUtils;
import com.hazelcast.sql.impl.optimizer.OptimizationTask;
import com.hazelcast.sql.impl.optimizer.PlanKey;
import com.hazelcast.sql.impl.optimizer.SqlOptimizer;
import com.hazelcast.sql.impl.optimizer.SqlPlan;
import com.hazelcast.sql.impl.schema.Mapping;
import com.hazelcast.sql.impl.schema.MappingField;
import com.hazelcast.sql.impl.schema.MappingResolver;
import com.hazelcast.sql.impl.schema.TableResolver;
import com.hazelcast.sql.impl.schema.map.AbstractMapTable;
import com.hazelcast.sql.impl.state.QueryResultRegistry;
import com.hazelcast.sql.impl.type.QueryDataType;
import com.hazelcast.org.apache.calcite.plan.Convention;
import com.hazelcast.org.apache.calcite.plan.RelOptTable;
import com.hazelcast.org.apache.calcite.plan.RelOptUtil;
import com.hazelcast.org.apache.calcite.plan.RelTraitSet;
import com.hazelcast.org.apache.calcite.plan.volcano.VolcanoPlanner;
import com.hazelcast.org.apache.calcite.rel.RelNode;
import com.hazelcast.org.apache.calcite.rel.RelShuttleImpl;
import com.hazelcast.org.apache.calcite.rel.core.TableModify;
import com.hazelcast.org.apache.calcite.rel.core.TableModify.Operation;
import com.hazelcast.org.apache.calcite.rel.core.TableScan;
import com.hazelcast.org.apache.calcite.rel.type.RelDataTypeField;
import com.hazelcast.org.apache.calcite.sql.SqlNode;

import javax.annotation.Nullable;
import java.security.Permission;
import java.util.ArrayList;
import java.util.List;

import static java.util.Collections.singletonList;
import static java.util.stream.Collectors.toList;

/**
 * SQL optimizer based on Apache Calcite.
 * 

* After parsing and initial sql-to-rel conversion is finished, all relational nodes start with {@link Convention#NONE} * convention. Such nodes are typically referred as "abstract" in Apache Calcite, because they do not have any physical * properties. *

* The optimization process is split into two phases - logical and physical. During logical planning we normalize abstract * nodes and convert them to nodes with {@link JetConventions#LOGICAL} convention. These new nodes are Hazelcast-specific * and hence may have additional properties. For example, at this stage we do filter pushdowns, introduce constrained scans, * etc. *

* During physical planning we look for specific physical implementations of logical nodes. Implementation nodes have * {@link JetConventions#PHYSICAL} convention. The process contains the following fundamental steps: *

    *
  • Choosing proper access methods for scan (normal scan, index scan, etc)
  • *
  • Propagating physical properties from children nodes to their parents
  • *
  • Choosing proper implementations of parent operators based on physical properties of children * (local vs. distributed sorting, blocking vs. streaming aggregation, hash join vs. merge join, etc.)
  • *
  • Enforcing exchange operators when data movement is necessary
  • *
*

* Physical optimization stage uses {@link VolcanoPlanner}. This is a rule-based optimizer. However it doesn't share any * architectural traits with EXODUS/Volcano/Cascades papers, except for the rule-based nature. In classical Cascades algorithm * [1], the optimization process is performed in a top-down style. Parent operator may request implementations of children * operators with specific properties. This is not possible in {@code VolcanoPlanner}. Instead, in this planner the rules are * fired in effectively uncontrollable fashion, thus making propagation of physical properties difficult. To overcome this * problem we use several techniques that helps us emulate at least some parts of Cascades-style optimization. *

* First, {@link JetConventions#PHYSICAL} convention overrides {@link Convention#canConvertConvention(Convention)} and * {@link Convention#useAbstractConvertersForConversion(RelTraitSet, RelTraitSet)} methods. Their implementations ensure that * whenever a new child node with {@code PHYSICAL} convention is created, the rule of the parent {@code LOGICAL} nodes * will be re-scheduled. Second, physical rules for {@code LOGICAL} nodes iterate over concrete physical implementations of * inputs and convert logical nodes to physical nodes with proper traits. Combined, these techniques ensure complete exploration * of a search space and proper propagation of physical properties from child nodes to parent nodes. The downside is that * the same rule on the same node could be fired multiple times, thus increase the optimization time. *

* For example, consider the following logical tree: *

 * LogicalFilter
 *   LogicalScan
 * 
* By default Apache Calcite will fire a rule on the logical filter first. But at this point we do not know the physical * properties of {@code LogicalScan} implementations, since they are not produced yet. As a result, we do not know what * physical properties should be set to the to-be-created {@code PhysicalFilter}. Then Apache Calcite will optimize * {@code LogicalScan}, producing physical implementations. However, by default these new physical implementations will not * re-trigger optimization of {@code LogicalFilter}. The result of the optimization will be: *
 * [LogicalFilter, PhysicalFilter(???)]
 *   [LogicalScan, PhysicalScan(PARTITIONED), PhysicalIndexScan(PARTITIONED, a ASC)]
 * 
* Notice how we failed to propagate important physical properties to the {@code PhysicalFilter}. *

* With the above-described techniques we force Apache Calcite to re-optimize the logical parent after a new physical child * has been created. This way we are able to pull-up physical properties. The result of the optimization will be: *

 * [LogicalFilter, PhysicalFilter(PARTITIONED), PhysicalFilter(PARTITIONED, a ASC)]
 *   [LogicalScan, PhysicalScan(PARTITIONED), PhysicalIndexScan(PARTITIONED, a ASC)]
 * 
*

* [1] Efficiency In The Columbia Database Query Optimizer (1998), chapters 2 and 3 */ public class CalciteSqlOptimizer implements SqlOptimizer { private final NodeEngine nodeEngine; private final MappingResolver mappingResolver; private final List tableResolvers; private final JetPlanExecutor planExecutor; private final ILogger logger; public CalciteSqlOptimizer(NodeEngine nodeEngine, QueryResultRegistry resultRegistry) { this.nodeEngine = nodeEngine; this.mappingResolver = new MetadataResolver(nodeEngine); MappingCatalog mappingCatalog = mappingCatalog(nodeEngine); this.tableResolvers = singletonList(mappingCatalog); this.planExecutor = new JetPlanExecutor(mappingCatalog, nodeEngine.getHazelcastInstance(), resultRegistry); this.logger = nodeEngine.getLogger(getClass()); } private static MappingCatalog mappingCatalog(NodeEngine nodeEngine) { MappingStorage mappingStorage = new MappingStorage(nodeEngine); SqlConnectorCache connectorCache = new SqlConnectorCache(nodeEngine); return new MappingCatalog(nodeEngine, mappingStorage, connectorCache); } @Nullable @Override public String mappingDdl(String name) { Mapping mapping = mappingResolver.resolve(name); return mapping != null ? SqlCreateMapping.unparse(mapping) : null; } @Override public List tableResolvers() { return tableResolvers; } @Override public SqlPlan prepare(OptimizationTask task) { // 1. Prepare context. int memberCount = nodeEngine.getClusterService().getSize(MemberSelectors.DATA_MEMBER_SELECTOR); OptimizerContext context = OptimizerContext.create( task.getSchema(), task.getSearchPaths(), task.getArguments(), memberCount, mappingResolver ); // 2. Parse SQL string and validate it. QueryParseResult parseResult = context.parse(task.getSql()); // 3. Create plan. return createPlan(task, parseResult, context); } @SuppressWarnings("checkstyle:returncount") private SqlPlan createPlan( OptimizationTask task, QueryParseResult parseResult, OptimizerContext context ) { SqlNode node = parseResult.getNode(); PlanKey planKey = new PlanKey(task.getSearchPaths(), task.getSql()); if (node instanceof SqlCreateMapping) { return toCreateMappingPlan(planKey, (SqlCreateMapping) node); } else if (node instanceof SqlDropMapping) { return toDropMappingPlan(planKey, (SqlDropMapping) node); } else if (node instanceof SqlCreateJob) { return toCreateJobPlan(planKey, parseResult, context); } else if (node instanceof SqlAlterJob) { return toAlterJobPlan(planKey, (SqlAlterJob) node); } else if (node instanceof SqlDropJob) { return toDropJobPlan(planKey, (SqlDropJob) node); } else if (node instanceof SqlCreateSnapshot) { return toCreateSnapshotPlan(planKey, (SqlCreateSnapshot) node); } else if (node instanceof SqlDropSnapshot) { return toDropSnapshotPlan(planKey, (SqlDropSnapshot) node); } else if (node instanceof SqlShowStatement) { return toShowStatementPlan(planKey, (SqlShowStatement) node); } else { QueryConvertResult convertResult = context.convert(parseResult.getNode()); return toPlan( planKey, parseResult.getParameterMetadata(), convertResult.getRel(), convertResult.getFieldNames(), context, parseResult.isInfiniteRows(), false ); } } private SqlPlan toCreateMappingPlan(PlanKey planKey, SqlCreateMapping sqlCreateMapping) { List mappingFields = sqlCreateMapping.columns() .map(field -> new MappingField(field.name(), field.type(), field.externalName())) .collect(toList()); Mapping mapping = new Mapping( sqlCreateMapping.nameWithoutSchema(), sqlCreateMapping.externalName(), sqlCreateMapping.type(), mappingFields, sqlCreateMapping.options() ); return new CreateMappingPlan( planKey, mapping, sqlCreateMapping.getReplace(), sqlCreateMapping.ifNotExists(), planExecutor ); } private SqlPlan toDropMappingPlan(PlanKey planKey, SqlDropMapping sqlDropMapping) { return new DropMappingPlan(planKey, sqlDropMapping.nameWithoutSchema(), sqlDropMapping.ifExists(), planExecutor); } private SqlPlan toCreateJobPlan(PlanKey planKey, QueryParseResult parseResult, OptimizerContext context) { SqlCreateJob sqlCreateJob = (SqlCreateJob) parseResult.getNode(); SqlNode source = sqlCreateJob.dmlStatement(); QueryParseResult dmlParseResult = new QueryParseResult(source, parseResult.getParameterMetadata(), false); QueryConvertResult dmlConvertedResult = context.convert(dmlParseResult.getNode()); JetPlan dmlPlan = toPlan( null, parseResult.getParameterMetadata(), dmlConvertedResult.getRel(), dmlConvertedResult.getFieldNames(), context, dmlParseResult.isInfiniteRows(), true ); assert dmlPlan instanceof DmlPlan && ((DmlPlan) dmlPlan).getOperation() == Operation.INSERT; return new CreateJobPlan( planKey, sqlCreateJob.jobConfig(), sqlCreateJob.ifNotExists(), (DmlPlan) dmlPlan, planExecutor ); } private SqlPlan toAlterJobPlan(PlanKey planKey, SqlAlterJob sqlAlterJob) { return new AlterJobPlan(planKey, sqlAlterJob.name(), sqlAlterJob.getOperation(), planExecutor); } private SqlPlan toDropJobPlan(PlanKey planKey, SqlDropJob sqlDropJob) { return new DropJobPlan( planKey, sqlDropJob.name(), sqlDropJob.ifExists(), sqlDropJob.withSnapshotName(), planExecutor ); } private SqlPlan toCreateSnapshotPlan(PlanKey planKey, SqlCreateSnapshot sqlNode) { return new CreateSnapshotPlan(planKey, sqlNode.getSnapshotName(), sqlNode.getJobName(), planExecutor); } private SqlPlan toDropSnapshotPlan(PlanKey planKey, SqlDropSnapshot sqlNode) { return new DropSnapshotPlan(planKey, sqlNode.getSnapshotName(), sqlNode.isIfExists(), planExecutor); } private SqlPlan toShowStatementPlan(PlanKey planKey, SqlShowStatement sqlNode) { return new ShowStatementPlan(planKey, sqlNode.getTarget(), planExecutor); } private JetPlan toPlan( PlanKey planKey, QueryParameterMetadata parameterMetadata, RelNode rel, List fieldNames, OptimizerContext context, boolean isInfiniteRows, boolean isCreateJob ) { PhysicalRel physicalRel = optimize(parameterMetadata, rel, context, isCreateJob); List permissions = extractPermissions(physicalRel); if (physicalRel instanceof SelectByKeyMapPhysicalRel) { assert !isCreateJob; SelectByKeyMapPhysicalRel select = (SelectByKeyMapPhysicalRel) physicalRel; SqlRowMetadata rowMetadata = createRowMetadata( fieldNames, physicalRel.schema(parameterMetadata).getTypes(), rel.getRowType().getFieldList() ); return new IMapSelectPlan( planKey, select.objectKey(), parameterMetadata, select.mapName(), select.keyCondition(parameterMetadata), select.rowProjectorSupplier(parameterMetadata), rowMetadata, planExecutor, permissions ); } else if (physicalRel instanceof InsertMapPhysicalRel) { assert !isCreateJob; InsertMapPhysicalRel insert = (InsertMapPhysicalRel) physicalRel; return new IMapInsertPlan( planKey, insert.objectKey(), parameterMetadata, insert.mapName(), insert.entriesFn(), planExecutor, permissions ); } else if (physicalRel instanceof SinkMapPhysicalRel) { assert !isCreateJob; SinkMapPhysicalRel sink = (SinkMapPhysicalRel) physicalRel; return new IMapSinkPlan( planKey, sink.objectKey(), parameterMetadata, sink.mapName(), sink.entriesFn(), planExecutor, permissions ); } else if (physicalRel instanceof UpdateByKeyMapPhysicalRel) { assert !isCreateJob; UpdateByKeyMapPhysicalRel update = (UpdateByKeyMapPhysicalRel) physicalRel; return new IMapUpdatePlan( planKey, update.objectKey(), parameterMetadata, update.mapName(), update.keyCondition(parameterMetadata), update.updaterSupplier(parameterMetadata), planExecutor, permissions ); } else if (physicalRel instanceof DeleteByKeyMapPhysicalRel) { assert !isCreateJob; DeleteByKeyMapPhysicalRel delete = (DeleteByKeyMapPhysicalRel) physicalRel; return new IMapDeletePlan( planKey, delete.objectKey(), parameterMetadata, delete.mapName(), delete.keyCondition(parameterMetadata), planExecutor, permissions ); } else if (physicalRel instanceof TableModify) { Operation operation = ((TableModify) physicalRel).getOperation(); CreateDagVisitor visitor = traverseRel(physicalRel, parameterMetadata); return new DmlPlan( operation, planKey, parameterMetadata, visitor.getObjectKeys(), visitor.getDag(), planExecutor, permissions ); } else { CreateDagVisitor visitor = traverseRel(new JetRootRel(physicalRel), parameterMetadata); SqlRowMetadata rowMetadata = createRowMetadata( fieldNames, physicalRel.schema(parameterMetadata).getTypes(), rel.getRowType().getFieldList() ); return new SelectPlan( planKey, parameterMetadata, visitor.getObjectKeys(), visitor.getDag(), isInfiniteRows, rowMetadata, planExecutor, permissions ); } } private List extractPermissions(PhysicalRel physicalRel) { List permissions = new ArrayList<>(); physicalRel.accept(new RelShuttleImpl() { @Override public RelNode visit(TableScan scan) { addPermissionForTable(scan.getTable(), ActionConstants.ACTION_READ); return super.visit(scan); } @Override public RelNode visit(RelNode other) { addPermissionForTable(other.getTable(), ActionConstants.ACTION_PUT); return super.visit(other); } private void addPermissionForTable(RelOptTable t, String action) { if (t == null) { return; } HazelcastTable table = t.unwrap(HazelcastTable.class); if (table != null && table.getTarget() instanceof AbstractMapTable) { String mapName = ((AbstractMapTable) table.getTarget()).getMapName(); permissions.add(new MapPermission(mapName, action)); } } }); return permissions; } private PhysicalRel optimize( QueryParameterMetadata parameterMetadata, RelNode rel, OptimizerContext context, boolean isCreateJob ) { context.setParameterMetadata(parameterMetadata); context.setRequiresJob(isCreateJob); logger.fine("Before logical opt:\n" + RelOptUtil.toString(rel)); LogicalRel logicalRel = optimizeLogical(context, rel); logger.fine("After logical opt:\n" + RelOptUtil.toString(logicalRel)); PhysicalRel physicalRel = optimizePhysical(context, logicalRel); logger.fine("After physical opt:\n" + RelOptUtil.toString(physicalRel)); return physicalRel; } /** * Perform logical optimization. * * @param rel Original logical tree. * @return Optimized logical tree. */ private LogicalRel optimizeLogical(OptimizerContext context, RelNode rel) { return (LogicalRel) context.optimize( rel, LogicalRules.getRuleSet(), OptUtils.toLogicalConvention(rel.getTraitSet()) ); } /** * Perform physical optimization. * This is where proper access methods and algorithms for joins and aggregations are chosen. * * @param rel Optimized logical tree. * @return Optimized physical tree. */ private PhysicalRel optimizePhysical(OptimizerContext context, RelNode rel) { return (PhysicalRel) context.optimize( rel, PhysicalRules.getRuleSet(), OptUtils.toPhysicalConvention(rel.getTraitSet()) ); } private SqlRowMetadata createRowMetadata( List columnNames, List columnTypes, List fields ) { assert columnNames.size() == columnTypes.size(); assert columnTypes.size() == fields.size(); List columns = new ArrayList<>(columnNames.size()); for (int i = 0; i < columnNames.size(); i++) { SqlColumnMetadata column = QueryUtils.getColumnMetadata( columnNames.get(i), columnTypes.get(i), fields.get(i).getType().isNullable() ); columns.add(column); } return new SqlRowMetadata(columns); } private CreateDagVisitor traverseRel( PhysicalRel physicalRel, QueryParameterMetadata parameterMetadata ) { CreateDagVisitor visitor = new CreateDagVisitor(this.nodeEngine, parameterMetadata); physicalRel.accept(visitor); return visitor; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy