org.modeshape.jcr.query.plan.CanonicalPlanner Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of modeshape-jcr
ModeShape implementation of the JCR API
There is a newer version: 5.4.1.Final
/*
 * ModeShape (http://www.modeshape.org)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.modeshape.jcr.query.plan;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.jcr.query.qom.JoinCondition;
import org.modeshape.common.i18n.I18n;
import org.modeshape.jcr.GraphI18n;
import org.modeshape.jcr.query.QueryContext;
import org.modeshape.jcr.query.model.AllNodes;
import org.modeshape.jcr.query.model.And;
import org.modeshape.jcr.query.model.Column;
import org.modeshape.jcr.query.model.Constraint;
import org.modeshape.jcr.query.model.FullTextSearch;
import org.modeshape.jcr.query.model.Join;
import org.modeshape.jcr.query.model.JoinType;
import org.modeshape.jcr.query.model.Limit;
import org.modeshape.jcr.query.model.NamedSelector;
import org.modeshape.jcr.query.model.Ordering;
import org.modeshape.jcr.query.model.Query;
import org.modeshape.jcr.query.model.QueryCommand;
import org.modeshape.jcr.query.model.Selector;
import org.modeshape.jcr.query.model.SelectorName;
import org.modeshape.jcr.query.model.SetQuery;
import org.modeshape.jcr.query.model.Source;
import org.modeshape.jcr.query.model.Subquery;
import org.modeshape.jcr.query.model.Visitable;
import org.modeshape.jcr.query.model.Visitors;
import org.modeshape.jcr.query.model.Visitors.WalkAllVisitor;
import org.modeshape.jcr.query.plan.PlanNode.Property;
import org.modeshape.jcr.query.plan.PlanNode.Type;
import org.modeshape.jcr.query.validate.Schemata;
import org.modeshape.jcr.query.validate.Schemata.Table;
import org.modeshape.jcr.query.validate.Schemata.View;
import org.modeshape.jcr.query.validate.Validator;
import org.modeshape.jcr.value.NameFactory;

/**
 * The planner that produces a canonical query plan given a {@link QueryCommand query command}.
 * 
 * A canonical plan always has the same structure:
 * 
 * 
 *       LIMIT       if row limit or offset are used
 *         |
 *      SORTING      if 'ORDER BY' is used
 *         |
 *     DUP_REMOVE    if 'SELECT DISTINCT' is used
 *         |
 *      PROJECT      with the list of columns being SELECTed
 *         |
 *       GROUP       if 'GROUP BY' is used
 *         |
 *      SELECT1
 *         |         One or more SELECT plan nodes that each have
 *      SELECT2      a single non-join constraint that are then all AND-ed
 *         |         together (see {@link #separateAndConstraints(Constraint, List)})
 *      SELECTn
 *         |
 *    SOURCE or JOIN     A single SOURCE or JOIN node, depending upon the query
 *              /  \
 *             /    \
 *           SOJ    SOJ    A SOURCE or JOIN node for the left and right side of the JOIN
 * 
 * 
 * There leaves of the tree are always SOURCE nodes, so conceptually data always flows through this plan from the bottom
 * SOURCE nodes, is adjusted/filtered as it trickles up through the plan, and is then ready to be used by the caller as it emerges
 * from the top node of the plan.
 * 
 * 
 * This canonical plan, however, is later optimized and rearranged so that it performs faster.
 * 
 */
public class CanonicalPlanner implements Planner {

    @Override
    public PlanNode createPlan( QueryContext context,
                                QueryCommand query ) {
        PlanNode plan = null;
        if (query instanceof Query) {
            plan = createCanonicalPlan(context, (Query)query);
        } else {
            plan = createCanonicalPlan(context, (SetQuery)query);
        }
        return plan;
    }

    /**
     * Create a canonical query plan for the given query.
     * 
     * @param context the context in which the query is being planned
     * @param query the query to be planned
     * @return the root node of the plan tree representing the canonical plan
     */
    protected PlanNode createCanonicalPlan( QueryContext context,
                                            Query query ) {
        PlanNode plan = null;

        // Process the source of the query ...
        Map usedSources = new HashMap();
        plan = createPlanNode(context, query.source(), usedSources);

        // Attach criteria (on top) ...
        Map subqueriesByVariableName = new HashMap();
        plan = attachCriteria(context, plan, query.constraint(), query.columns(), subqueriesByVariableName);

        // Attach groupbys (on top) ...
        // plan = attachGrouping(context,plan,query.getGroupBy());

        // Attach the project ...
        plan = attachProject(context, plan, query.columns(), usedSources);

        // Attach duplicate removal ...
        if (query.isDistinct()) {
            plan = attachDuplicateRemoval(context, plan);
        }

        // Process the orderings and limits ...
        plan = attachSorting(context, plan, query.orderings());
        plan = attachLimits(context, plan, query.getLimits());

        // Capture if we're limiting the results to 1 row and no offset and no sorting ...
        if (query.getLimits().isLimitedToSingleRowWithNoOffset() && query.orderings().isEmpty()) {
            context.getHints().isExistsQuery = true;
        }

        // Now add in the subqueries as dependent joins, in reverse order ...
        plan = attachSubqueries(context, plan, subqueriesByVariableName);

        // Validate that all the parts of the query are resolvable ...
        validate(context, query, usedSources);

        // Now we need to validate all of the subqueries ...
        for (Subquery subquery : Visitors.subqueries(query, false)) {
            // Just do it by creating a plan, even though we aren't doing anything with these plans ...
            createPlan(context, subquery.getQuery());
        }

        return plan;
    }

    /**
     * Validate the supplied query.
     * 
     * @param context the context in which the query is being planned
     * @param query the set query to be planned
     * @param usedSelectors the map of {@link SelectorName}s (aliases or names) used in the query.
     */
    protected void validate( QueryContext context,
                             QueryCommand query,
                             Map usedSelectors ) {
        // // Resolve everything ...
        // Visitors.visitAll(query, new Validator(context, usedSelectors));
        // Resolve everything (except subqueries) ...
        Validator validator = new Validator(context, usedSelectors);
        query.accept(new WalkAllVisitor(validator) {
            @Override
            protected void enqueue( Visitable objectToBeVisited ) {
                if (objectToBeVisited instanceof Subquery) return;
                super.enqueue(objectToBeVisited);
            }
        });
    }

    /**
     * Create a canonical query plan for the given set query.
     * 
     * @param context the context in which the query is being planned
     * @param query the set query to be planned
     * @return the root node of the plan tree representing the canonical plan
     */
    protected PlanNode createCanonicalPlan( QueryContext context,
                                            SetQuery query ) {
        // Process the left and right parts of the query ...
        PlanNode left = createPlan(context, query.getLeft());
        PlanNode right = createPlan(context, query.getRight());

        // Wrap in a set operation node ...
        PlanNode plan = new PlanNode(Type.SET_OPERATION);
        plan.addChildren(left, right);
        plan.setProperty(Property.SET_OPERATION, query.operation());
        plan.setProperty(Property.SET_USE_ALL, query.isAll());

        // Process the orderings and limits ...
        plan = attachSorting(context, plan, query.orderings());
        plan = attachLimits(context, plan, query.getLimits());

        // Capture if we're limiting the results to 1 row and no offset and no sorting ...
        if (query.getLimits().isLimitedToSingleRowWithNoOffset() && query.orderings().isEmpty()) {
            context.getHints().isExistsQuery = true;
        }
        return plan;
    }

    /**
     * Create a JOIN or SOURCE node that contain the source information.
     * 
     * @param context the execution context
     * @param source the source to be processed; may not be null
     * @param usedSelectors the map of {@link SelectorName}s (aliases or names) used in the query.
     * @return the new plan; never null
     */
    protected PlanNode createPlanNode( QueryContext context,
                                       Source source,
                                       Map usedSelectors ) {
        if (source instanceof Selector) {
            // No join required ...
            assert source instanceof AllNodes || source instanceof NamedSelector;
            Selector selector = (Selector)source;
            PlanNode node = new PlanNode(Type.SOURCE);
            if (selector.hasAlias()) {
                node.addSelector(selector.alias());
                node.setProperty(Property.SOURCE_ALIAS, selector.alias());
                node.setProperty(Property.SOURCE_NAME, selector.name());
            } else {
                node.addSelector(selector.name());
                node.setProperty(Property.SOURCE_NAME, selector.name());
            }
            // Validate the source name and set the available columns ...
            NameFactory nameFactory = context.getExecutionContext().getValueFactories().getNameFactory();
            // Always use the qualified form when searching for tables
            Table table = context.getSchemata().getTable(selector.name().qualifiedForm(nameFactory));
            if (table != null) {
                if (table instanceof View) context.getHints().hasView = true;
                if (usedSelectors.put(selector.aliasOrName(), table) != null) {
                    // There was already a table with this alias or name ...
                    I18n msg = GraphI18n.selectorNamesMayNotBeUsedMoreThanOnce;
                    context.getProblems().addError(msg, selector.aliasOrName().getString());
                }
                node.setProperty(Property.SOURCE_COLUMNS, table.getColumns());
            } else {
                context.getProblems().addError(GraphI18n.tableDoesNotExist, selector.name());
            }
            return node;
        }
        if (source instanceof Join) {
            Join join = (Join)source;
            JoinCondition joinCondition = join.getJoinCondition();
            // Set up new join node corresponding to this join predicate
            PlanNode node = new PlanNode(Type.JOIN);
            node.setProperty(Property.JOIN_TYPE, join.type());
            node.setProperty(Property.JOIN_ALGORITHM, JoinAlgorithm.NESTED_LOOP);
            node.setProperty(Property.JOIN_CONDITION, joinCondition);

            context.getHints().hasJoin = true;
            if (join.type() == JoinType.LEFT_OUTER) {
                context.getHints().hasOptionalJoin = true;
            }

            // Handle each child
            Source[] clauses = new Source[] {join.getLeft(), join.getRight()};
            for (int i = 0; i < 2; i++) {
                PlanNode sourceNode = createPlanNode(context, clauses[i], usedSelectors);
                node.addLastChild(sourceNode);
            }

            // Add selectors to the joinNode
            for (PlanNode child : node.getChildren()) {
                node.addSelectors(child.getSelectors());
            }
            return node;
        }
        // should not get here; if we do, somebody added a new type of source
        assert false;
        return null;
    }

    /**
     * Attach all criteria above the join nodes. The optimizer will push these criteria down to the appropriate source.
     * 
     * @param context the context in which the query is being planned
     * @param plan the existing plan, which joins all source groups
     * @param constraint the criteria or constraint from the query
     * @param columns the columns in the select (that may have aliases)
     * @param subqueriesByVariableName the subqueries by variable name
     * @return the updated plan, or the existing plan if there were no constraints; never null
     */
    protected PlanNode attachCriteria( final QueryContext context,
                                       PlanNode plan,
                                       Constraint constraint,
                                       List columns,
                                       Map subqueriesByVariableName ) {
        if (constraint == null) return plan;
        context.getHints().hasCriteria = true;

        // Extract the list of Constraint objects that all must be satisfied ...
        LinkedList andableConstraints = new LinkedList();
        separateAndConstraints(constraint, andableConstraints);
        assert !andableConstraints.isEmpty();

        // Build up the map of aliases for the properties used in the criteria ...
        Map propertyNameByAlias = new HashMap();
        for (Column column : columns) {
            if (column.getColumnName() != null && !column.getColumnName().equals(column.getPropertyName())) {
                propertyNameByAlias.put(column.getColumnName(), column.getPropertyName());
            }
        }

        // For each of these constraints, create a criteria (SELECT) node above the supplied (JOIN or SOURCE) node.
        // Do this in reverse order so that the top-most SELECT node corresponds to the first constraint.
        while (!andableConstraints.isEmpty()) {
            Constraint criteria = andableConstraints.removeLast();

            // Replace any subqueries with bind variables ...
            criteria = PlanUtil.replaceSubqueriesWithBindVariables(context, criteria, subqueriesByVariableName);

            // Replace any use of aliases with the actual properties ...
            criteria = PlanUtil.replaceAliasesWithProperties(context, criteria, propertyNameByAlias);

            // Create the select node ...
            PlanNode criteriaNode = new PlanNode(Type.SELECT);
            criteriaNode.setProperty(Property.SELECT_CRITERIA, criteria);

            // Add selectors to the criteria node ...
            criteriaNode.addSelectors(Visitors.getSelectorsReferencedBy(criteria));

            // Is there at least one full-text search or subquery ...
            Visitors.visitAll(criteria, new Visitors.AbstractVisitor() {
                @Override
                public void visit( FullTextSearch obj ) {
                    context.getHints().hasFullTextSearch = true;
                }
            });

            criteriaNode.addFirstChild(plan);
            plan = criteriaNode;
        }

        if (!subqueriesByVariableName.isEmpty()) {
            context.getHints().hasSubqueries = true;

        }
        return plan;
    }

    /**
     * Walk the supplied constraint to extract a list of the constraints that can be AND-ed together. For example, given the
     * constraint tree ((C1 AND C2) AND (C3 OR C4)), this method would result in a list of three separate criteria: [C1,C2,(C3 OR
     * C4)]. The resulting andConstraints list will contain Constraint objects that all must be true.
     * 
     * @param constraint the input constraint
     * @param andableConstraints the collection into which all non-{@link And AND} constraints should be placed
     */
    protected void separateAndConstraints( Constraint constraint,
                                           List andableConstraints ) {
        if (constraint == null) return;
        assert andableConstraints != null;
        if (constraint instanceof And) {
            And and = (And)constraint;
            separateAndConstraints(and.left(), andableConstraints);
            separateAndConstraints(and.right(), andableConstraints);
        } else {
            andableConstraints.add(constraint);
        }
    }

    /**
     * Attach SORT node at top of tree. The SORT may be pushed down to a source (or sources) if possible by the optimizer.
     * 
     * @param context the context in which the query is being planned
     * @param plan the existing plan
     * @param orderings list of orderings from the query
     * @return the updated plan, or the existing plan if there were no orderings; never null
     */
    protected PlanNode attachSorting( QueryContext context,
                                      PlanNode plan,
                                      List orderings ) {
        if (orderings.isEmpty()) return plan;
        PlanNode sortNode = new PlanNode(Type.SORT);

        context.getHints().hasSort = true;
        sortNode.setProperty(Property.SORT_ORDER_BY, orderings);
        for (Ordering ordering : orderings) {
            sortNode.addSelectors(Visitors.getSelectorsReferencedBy(ordering));
        }

        sortNode.addLastChild(plan);
        return sortNode;
    }

    /**
     * Attach a LIMIT node at the top of the plan tree.
     * 
     * @param context the context in which the query is being planned
     * @param plan the existing plan
     * @param limit the limit definition; may be null
     * @return the updated plan, or the existing plan if there were no limits
     */
    protected PlanNode attachLimits( QueryContext context,
                                     PlanNode plan,
                                     Limit limit ) {
        if (limit.isUnlimited()) return plan;
        context.getHints().hasLimit = true;
        PlanNode limitNode = new PlanNode(Type.LIMIT);

        boolean attach = false;
        if (limit.getOffset() != 0) {
            limitNode.setProperty(Property.LIMIT_OFFSET, limit.getOffset());
            attach = true;
        }
        if (!limit.isUnlimited()) {
            limitNode.setProperty(Property.LIMIT_COUNT, limit.getRowLimit());
            attach = true;
        }
        if (attach) {
            limitNode.addLastChild(plan);
            plan = limitNode;
        }
        return plan;
    }

    /**
     * Attach a PROJECT node at the top of the plan tree.
     * 
     * @param context the context in which the query is being planned
     * @param plan the existing plan
     * @param columns the columns being projected; may be null
     * @param selectors the selectors keyed by their alias or name
     * @return the updated plan
     */
    protected PlanNode attachProject( QueryContext context,
                                      PlanNode plan,
                                      List columns,
                                      Map selectors ) {
        PlanNode projectNode = new PlanNode(Type.PROJECT);

        List newColumns = new LinkedList();
        List newTypes = new ArrayList();
        final boolean multipleSelectors = selectors.size() > 1;
        final boolean qualifyExpandedColumns = context.getHints().qualifyExpandedColumnNames;
        if (columns == null || columns.isEmpty()) {
            // SELECT *, so find all of the columns that are available from all the sources ...
            for (Map.Entry entry : selectors.entrySet()) {
                SelectorName tableName = entry.getKey();
                Table table = entry.getValue();
                // Add the selector that is being used ...
                projectNode.addSelector(tableName);
                // Compute the columns from this selector ...
                allColumnsFor(table, tableName, newColumns, newTypes, qualifyExpandedColumns);
            }
        } else {
            // Add the selector used by each column ...
            for (Column column : columns) {
                SelectorName tableName = column.selectorName();
                // Add the selector that is being used ...
                projectNode.addSelector(tableName);

                // Verify that each column is available in the appropriate source ...
                Table table = selectors.get(tableName);
                if (table == null) {
                    context.getProblems().addError(GraphI18n.tableDoesNotExist, tableName);
                } else {
                    // Make sure that the column is in the table ...
                    String columnName = column.getPropertyName();
                    if ("*".equals(columnName) || columnName == null) {
                        // This is a 'SELECT *' on this source, but this source is one of multiple sources ...
                        // See https://issues.apache.org/jira/browse/JCR-3313; TCK test expects 'true' for last param
                        allColumnsFor(table, tableName, newColumns, newTypes, qualifyExpandedColumns);
                    } else {
                        // This is a particular column, so add it ...
                        if (!newColumns.contains(column)) {
                            if (multipleSelectors && column.getPropertyName().equals(column.getColumnName())) {
                                column = column.withColumnName(column.getSelectorName() + "." + column.getColumnName());
                            }
                            newColumns.add(column);
                            org.modeshape.jcr.query.validate.Schemata.Column schemaColumn = table.getColumn(columnName);
                            if (schemaColumn != null) {
                                newTypes.add(schemaColumn.getPropertyTypeName());
                            } else {
                                newTypes.add(context.getTypeSystem().getDefaultType());
                            }
                        }
                    }
                    boolean validateColumnExistance = context.getHints().validateColumnExistance && !table.hasExtraColumns();
                    boolean columnNameIsWildcard = columnName == null || "*".equals(columnName);
                    if (!columnNameIsWildcard && table.getColumn(columnName) == null && validateColumnExistance) {
                        context.getProblems().addError(GraphI18n.columnDoesNotExistOnTable, columnName, tableName);
                    }
                }
            }
        }
        projectNode.setProperty(Property.PROJECT_COLUMNS, newColumns);
        projectNode.setProperty(Property.PROJECT_COLUMN_TYPES, newTypes);
        projectNode.addLastChild(plan);
        return projectNode;
    }

    protected void allColumnsFor( Table table,
                                  SelectorName tableName,
                                  List columns,
                                  List columnTypes,
                                  boolean includeSelectorNameInColumnName ) {
        // Compute the columns from this selector ...
        for (Schemata.Column column : table.getSelectAllColumns()) {
            String columnName = column.getName();
            String propertyName = columnName;
            if (includeSelectorNameInColumnName) {
                columnName = tableName.getString() + "." + columnName;
            }
            Column newColumn = new Column(tableName, propertyName, columnName);
            if (!columns.contains(column)) {
                columns.add(newColumn);
                columnTypes.add(column.getPropertyTypeName());
            }
        }
    }

    /**
     * Attach DUP_REMOVE node at top of tree. The DUP_REMOVE may be pushed down to a source (or sources) if possible by the
     * optimizer.
     * 
     * @param context the context in which the query is being planned
     * @param plan the existing plan
     * @return the updated plan
     */
    protected PlanNode attachDuplicateRemoval( QueryContext context,
                                               PlanNode plan ) {
        PlanNode dupNode = new PlanNode(Type.DUP_REMOVE);
        plan.setParent(dupNode);
        return dupNode;
    }

    /**
     * Attach plan nodes for each subquery, resulting with the first subquery at the top of the plan tree.
     * 
     * @param context the context in which the query is being planned
     * @param plan the existing plan
     * @param subqueriesByVariableName the queries by the variable name used in substitution
     * @return the updated plan, or the existing plan if there were no limits
     */
    protected PlanNode attachSubqueries( QueryContext context,
                                         PlanNode plan,
                                         Map subqueriesByVariableName ) {
        // Order the variable names in reverse order ...
        List varNames = new ArrayList(subqueriesByVariableName.keySet());
        Collections.sort(varNames);
        Collections.reverse(varNames);

        for (String varName : varNames) {
            Subquery subquery = subqueriesByVariableName.get(varName);
            // Plan out the subquery ...
            PlanNode subqueryNode = createPlan(context, subquery.getQuery());
            setSubqueryVariableName(subqueryNode, varName);

            // Create a DEPENDENT_QUERY node, with the subquery on the LHS (so it is executed first) ...
            PlanNode depQuery = new PlanNode(Type.DEPENDENT_QUERY);
            depQuery.addChildren(subqueryNode, plan);
            depQuery.addSelectors(subqueryNode.getSelectors());
            depQuery.addSelectors(plan.getSelectors());
            plan = depQuery;
        }
        return plan;
    }

    protected void setSubqueryVariableName( PlanNode subqueryPlan,
                                            String varName ) {
        if (subqueryPlan.getType() != Type.DEPENDENT_QUERY) {
            subqueryPlan.setProperty(Property.VARIABLE_NAME, varName);
            return;
        }
        // Otherwise, this is a dependent query, and our subquery should be on the right (last child) ...
        setSubqueryVariableName(subqueryPlan.getLastChild(), varName);
    }
}