All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.sql.planner.iterative.rule.OptimizeDuplicateInsensitiveJoins Maven / Gradle / Ivy

There is a newer version: 465
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.trino.sql.planner.iterative.rule;

import com.google.common.collect.ImmutableList;
import io.trino.Session;
import io.trino.matching.Captures;
import io.trino.matching.Pattern;
import io.trino.sql.planner.DeterminismEvaluator;
import io.trino.sql.planner.iterative.GroupReference;
import io.trino.sql.planner.iterative.Lookup;
import io.trino.sql.planner.iterative.Rule;
import io.trino.sql.planner.plan.AggregationNode;
import io.trino.sql.planner.plan.FilterNode;
import io.trino.sql.planner.plan.JoinNode;
import io.trino.sql.planner.plan.PlanNode;
import io.trino.sql.planner.plan.PlanVisitor;
import io.trino.sql.planner.plan.ProjectNode;
import io.trino.sql.planner.plan.UnionNode;

import java.util.List;
import java.util.Optional;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.SystemSessionProperties.isOptimizeDuplicateInsensitiveJoins;
import static io.trino.sql.ir.Booleans.TRUE;
import static io.trino.sql.planner.DeterminismEvaluator.isDeterministic;
import static io.trino.sql.planner.plan.Patterns.aggregation;
import static java.util.Objects.requireNonNull;

/**
 * For empty aggregations duplicate input rows can be skipped.
 * This rule takes advantage of this fact and sets
 * {@link JoinNode#withMaySkipOutputDuplicates()} for joins below
 * such aggregation.
 */
public class OptimizeDuplicateInsensitiveJoins
        implements Rule
{
    private static final Pattern PATTERN = aggregation()
            .matching(aggregation -> aggregation.getAggregations().isEmpty());

    @Override
    public Pattern getPattern()
    {
        return PATTERN;
    }

    @Override
    public boolean isEnabled(Session session)
    {
        return isOptimizeDuplicateInsensitiveJoins(session);
    }

    @Override
    public Result apply(AggregationNode aggregation, Captures captures, Context context)
    {
        return aggregation.getSource().accept(new Rewriter(context.getLookup()), null)
                .map(rewrittenSource -> Result.ofPlanNode(aggregation.replaceChildren(ImmutableList.of(rewrittenSource))))
                .orElse(Result.empty());
    }

    private static class Rewriter
            extends PlanVisitor, Void>
    {
        private final Lookup lookup;

        private Rewriter(Lookup lookup)
        {
            this.lookup = requireNonNull(lookup, "lookup is null");
        }

        @Override
        protected Optional visitPlan(PlanNode node, Void context)
        {
            return Optional.empty();
        }

        @Override
        public Optional visitFilter(FilterNode node, Void context)
        {
            if (!isDeterministic(node.getPredicate())) {
                // non-deterministic expressions could filter duplicate rows probabilistically
                return Optional.empty();
            }

            return node.getSource().accept(this, null)
                    .map(source -> node.replaceChildren(ImmutableList.of(source)));
        }

        @Override
        public Optional visitProject(ProjectNode node, Void context)
        {
            boolean isDeterministic = node.getAssignments().getExpressions().stream()
                    .allMatch(DeterminismEvaluator::isDeterministic);
            if (!isDeterministic) {
                // non-deterministic projections could be used in downstream filters which could
                // filter duplicate rows probabilistically
                return Optional.empty();
            }

            return node.getSource().accept(this, null)
                    .map(source -> node.replaceChildren(ImmutableList.of(source)));
        }

        @Override
        public Optional visitUnion(UnionNode node, Void context)
        {
            List rewrittenSources = node.getSources().stream()
                    .map(source -> source.accept(this, null).orElse(source))
                    .collect(toImmutableList());

            if (rewrittenSources.equals(node.getSources())) {
                return Optional.empty();
            }

            return Optional.of(node.replaceChildren(rewrittenSources));
        }

        @Override
        public Optional visitJoin(JoinNode node, Void context)
        {
            // LookupJoinOperator will evaluate non-deterministic condition on output rows until one of the
            // rows matches. Therefore it's safe to set maySkipOutputDuplicates for joins with non-deterministic
            // filters.
            if (!isDeterministic(node.getFilter().orElse(TRUE))) {
                if (node.isMaySkipOutputDuplicates()) {
                    // join node is already set to skip duplicates, return empty to prevent rule from looping forever
                    return Optional.empty();
                }

                return Optional.of(node.withMaySkipOutputDuplicates());
            }

            Optional rewrittenLeft = node.getLeft().accept(this, null);
            Optional rewrittenRight = node.getRight().accept(this, null);

            if (node.isMaySkipOutputDuplicates() && rewrittenLeft.isEmpty() && rewrittenRight.isEmpty()) {
                // join node is already set to skip duplicates and children were not modified, return empty
                // to prevent rule from looping forever
                return Optional.empty();
            }

            return Optional.of(node.withMaySkipOutputDuplicates().replaceChildren(ImmutableList.of(
                    rewrittenLeft.orElse(node.getLeft()),
                    rewrittenRight.orElse(node.getRight()))));
        }

        @Override
        public Optional visitGroupReference(GroupReference node, Void context)
        {
            return lookup.resolve(node).accept(this, null);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy