All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer Maven / Gradle / Ivy

There is a newer version: 8.16.1
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

package org.elasticsearch.xpack.esql.optimizer;

import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BlockUtils;
import org.elasticsearch.xpack.esql.core.expression.Alias;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.AttributeMap;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates;
import org.elasticsearch.xpack.esql.core.expression.predicate.nulls.IsNotNull;
import org.elasticsearch.xpack.esql.core.optimizer.OptimizerRules;
import org.elasticsearch.xpack.esql.core.plan.logical.Filter;
import org.elasticsearch.xpack.esql.core.plan.logical.Limit;
import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.core.plan.logical.OrderBy;
import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule;
import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor;
import org.elasticsearch.xpack.esql.core.rule.Rule;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.util.CollectionUtils;
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
import org.elasticsearch.xpack.esql.optimizer.rules.PropagateEmptyRelation;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Project;
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
import org.elasticsearch.xpack.esql.plan.logical.TopN;
import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
import org.elasticsearch.xpack.esql.planner.AbstractPhysicalOperationProviders;
import org.elasticsearch.xpack.esql.planner.PlannerUtils;
import org.elasticsearch.xpack.esql.stats.SearchStats;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static java.util.Arrays.asList;
import static java.util.Collections.emptySet;
import static org.elasticsearch.xpack.esql.core.optimizer.OptimizerRules.TransformDirection.UP;
import static org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer.cleanup;
import static org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer.operators;

public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor {

    public LocalLogicalPlanOptimizer(LocalLogicalOptimizerContext localLogicalOptimizerContext) {
        super(localLogicalOptimizerContext);
    }

    @Override
    protected List> batches() {
        var local = new Batch<>(
            "Local rewrite",
            Limiter.ONCE,
            new ReplaceTopNWithLimitAndSort(),
            new ReplaceMissingFieldWithNull(),
            new InferIsNotNull(),
            new InferNonNullAggConstraint()
        );

        var rules = new ArrayList>();
        rules.add(local);
        // TODO: if the local rules haven't touched the tree, the rest of the rules can be skipped
        rules.addAll(asList(operators(), cleanup()));
        replaceRules(rules);
        return rules;
    }

    private List> replaceRules(List> listOfRules) {
        for (Batch batch : listOfRules) {
            var rules = batch.rules();
            for (int i = 0; i < rules.length; i++) {
                if (rules[i] instanceof PropagateEmptyRelation) {
                    rules[i] = new LocalPropagateEmptyRelation();
                }
            }
        }
        return listOfRules;
    }

    public LogicalPlan localOptimize(LogicalPlan plan) {
        return execute(plan);
    }

    /**
     * Break TopN back into Limit + OrderBy to allow the order rules to kick in.
     */
    public static class ReplaceTopNWithLimitAndSort extends OptimizerRules.OptimizerRule {
        public ReplaceTopNWithLimitAndSort() {
            super(UP);
        }

        @Override
        protected LogicalPlan rule(TopN plan) {
            return new Limit(plan.source(), plan.limit(), new OrderBy(plan.source(), plan.child(), plan.order()));
        }
    }

    /**
     * Look for any fields used in the plan that are missing locally and replace them with null.
     * This should minimize the plan execution, in the best scenario skipping its execution all together.
     */
    private static class ReplaceMissingFieldWithNull extends ParameterizedRule {

        @Override
        public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) {
            return plan.transformUp(p -> missingToNull(p, localLogicalOptimizerContext.searchStats()));
        }

        private LogicalPlan missingToNull(LogicalPlan plan, SearchStats stats) {
            if (plan instanceof EsRelation || plan instanceof LocalRelation) {
                return plan;
            }

            if (plan instanceof Aggregate a) {
                // don't do anything (for now)
                return a;
            }
            // keep the aliased name
            else if (plan instanceof Project project) {
                var projections = project.projections();
                List newProjections = new ArrayList<>(projections.size());
                Map nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());

                for (NamedExpression projection : projections) {
                    // Do not use the attribute name, this can deviate from the field name for union types.
                    if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false) {
                        DataType dt = f.dataType();
                        Alias nullAlias = nullLiteral.get(f.dataType());
                        // save the first field as null (per datatype)
                        if (nullAlias == null) {
                            Alias alias = new Alias(f.source(), f.name(), null, Literal.of(f, null), f.id());
                            nullLiteral.put(dt, alias);
                            projection = alias.toAttribute();
                        }
                        // otherwise point to it
                        else {
                            // since avoids creating field copies
                            projection = new Alias(f.source(), f.name(), f.qualifier(), nullAlias.toAttribute(), f.id());
                        }
                    }

                    newProjections.add(projection);
                }
                // add the first found field as null
                if (nullLiteral.size() > 0) {
                    plan = new Eval(project.source(), project.child(), new ArrayList<>(nullLiteral.values()));
                    plan = new Project(project.source(), plan, newProjections);
                }
            } else if (plan instanceof Eval
                || plan instanceof Filter
                || plan instanceof OrderBy
                || plan instanceof RegexExtract
                || plan instanceof TopN) {
                    plan = plan.transformExpressionsOnlyUp(
                        FieldAttribute.class,
                        // Do not use the attribute name, this can deviate from the field name for union types.
                        f -> stats.exists(f.fieldName()) ? f : Literal.of(f, null)
                    );
                }

            return plan;
        }
    }

    /**
     * Simplify IsNotNull targets by resolving the underlying expression to its root fields with unknown
     * nullability.
     * e.g.
     * (x + 1) / 2 IS NOT NULL --> x IS NOT NULL AND (x+1) / 2 IS NOT NULL
     * SUBSTRING(x, 3) > 4 IS NOT NULL --> x IS NOT NULL AND SUBSTRING(x, 3) > 4 IS NOT NULL
     * When dealing with multiple fields, a conjunction/disjunction based on the predicate:
     * (x + y) / 4 IS NOT NULL --> x IS NOT NULL AND y IS NOT NULL AND (x + y) / 4 IS NOT NULL
     * This handles the case of fields nested inside functions or expressions in order to avoid:
     * - having to evaluate the whole expression
     * - not pushing down the filter due to expression evaluation
     * IS NULL cannot be simplified since it leads to a disjunction which prevents the filter to be
     * pushed down:
     * (x + 1) IS NULL --> x IS NULL OR x + 1 IS NULL
     * and x IS NULL cannot be pushed down
     * 
* Implementation-wise this rule goes bottom-up, keeping an alias up to date to the current plan * and then looks for replacing the target. */ static class InferIsNotNull extends Rule { @Override public LogicalPlan apply(LogicalPlan plan) { // the alias map is shared across the whole plan AttributeMap aliases = new AttributeMap<>(); // traverse bottom-up to pick up the aliases as we go plan = plan.transformUp(p -> inspectPlan(p, aliases)); return plan; } private LogicalPlan inspectPlan(LogicalPlan plan, AttributeMap aliases) { // inspect just this plan properties plan.forEachExpression(Alias.class, a -> aliases.put(a.toAttribute(), a.child())); // now go about finding isNull/isNotNull LogicalPlan newPlan = plan.transformExpressionsOnlyUp(IsNotNull.class, inn -> inferNotNullable(inn, aliases)); return newPlan; } private Expression inferNotNullable(IsNotNull inn, AttributeMap aliases) { Expression result = inn; Set refs = resolveExpressionAsRootAttributes(inn.field(), aliases); // no refs found or could not detect - return the original function if (refs.size() > 0) { // add IsNull for the filters along with the initial inn var innList = CollectionUtils.combine(refs.stream().map(r -> (Expression) new IsNotNull(inn.source(), r)).toList(), inn); result = Predicates.combineAnd(innList); } return result; } /** * Unroll the expression to its references to get to the root fields * that really matter for filtering. */ protected Set resolveExpressionAsRootAttributes(Expression exp, AttributeMap aliases) { Set resolvedExpressions = new LinkedHashSet<>(); boolean changed = doResolve(exp, aliases, resolvedExpressions); return changed ? resolvedExpressions : emptySet(); } private boolean doResolve(Expression exp, AttributeMap aliases, Set resolvedExpressions) { boolean changed = false; // check if the expression can be skipped or is not nullabe if (skipExpression(exp)) { resolvedExpressions.add(exp); } else { for (Expression e : exp.references()) { Expression resolved = aliases.resolve(e, e); // found a root attribute, bail out if (resolved instanceof Attribute a && resolved == e) { resolvedExpressions.add(a); // don't mark things as change if the original expression hasn't been broken down changed |= resolved != exp; } else { // go further changed |= doResolve(resolved, aliases, resolvedExpressions); } } } return changed; } private static boolean skipExpression(Expression e) { return e instanceof Coalesce; } } /** * Local aggregation can only produce intermediate state that get wired into the global agg. */ private static class LocalPropagateEmptyRelation extends PropagateEmptyRelation { /** * Local variant of the aggregation that returns the intermediate value. */ @Override protected void aggOutput(NamedExpression agg, AggregateFunction aggFunc, BlockFactory blockFactory, List blocks) { List output = AbstractPhysicalOperationProviders.intermediateAttributes(List.of(agg), List.of()); for (Attribute o : output) { DataType dataType = o.dataType(); // boolean right now is used for the internal #seen so always return true var value = dataType == DataType.BOOLEAN ? true // look for count(literal) with literal != null : aggFunc instanceof Count count && (count.foldable() == false || count.fold() != null) ? 0L // otherwise nullify : null; var wrapper = BlockUtils.wrapperFor(blockFactory, PlannerUtils.toElementType(dataType), 1); wrapper.accept(value); blocks.add(wrapper.builder().build()); } } } /** * The vast majority of aggs ignore null entries - this rule adds a pushable filter, as it is cheap * to execute, to filter this entries out to begin with. * STATS x = min(a), y = sum(b) * becomes * | WHERE a IS NOT NULL OR b IS NOT NULL * | STATS x = min(a), y = sum(b) *
* Unfortunately this optimization cannot be applied when grouping is necessary since it can filter out * groups containing only null values */ static class InferNonNullAggConstraint extends ParameterizedOptimizerRule { @Override protected LogicalPlan rule(Aggregate aggregate, LocalLogicalOptimizerContext context) { // only look at aggregates with default grouping if (aggregate.groupings().size() > 0) { return aggregate; } SearchStats stats = context.searchStats(); LogicalPlan plan = aggregate; var aggs = aggregate.aggregates(); Set nonNullAggFields = Sets.newLinkedHashSetWithExpectedSize(aggs.size()); for (var agg : aggs) { if (Alias.unwrap(agg) instanceof AggregateFunction af) { Expression field = af.field(); // ignore literals (e.g. COUNT(1)) // make sure the field exists at the source and is indexed (not runtime) if (field.foldable() == false && field instanceof FieldAttribute fa && stats.isIndexed(fa.name())) { nonNullAggFields.add(field); } else { // otherwise bail out since unless disjunction needs to cover _all_ fields, things get filtered out return plan; } } } if (nonNullAggFields.size() > 0) { Expression condition = Predicates.combineOr( nonNullAggFields.stream().map(f -> (Expression) new IsNotNull(aggregate.source(), f)).toList() ); plan = aggregate.replaceChild(new Filter(aggregate.source(), aggregate.child(), condition)); } return plan; } } abstract static class ParameterizedOptimizerRule extends ParameterizedRule { public final LogicalPlan apply(LogicalPlan plan, P context) { return plan.transformUp(typeToken(), t -> rule(t, context)); } protected abstract LogicalPlan rule(SubPlan plan, P context); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy