All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.drill.exec.planner.physical.MetadataAggPrule Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.planner.physical;

import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.rel.RelCollation;
import org.apache.calcite.rel.RelCollationImpl;
import org.apache.calcite.rel.RelCollations;
import org.apache.calcite.rel.RelFieldCollation;
import org.apache.calcite.rel.RelNode;
import org.apache.drill.common.expression.FieldReference;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.data.NamedExpression;
import org.apache.drill.exec.planner.logical.DrillRel;
import org.apache.drill.exec.planner.logical.MetadataAggRel;
import org.apache.drill.exec.planner.logical.RelOptHelper;
import org.apache.drill.exec.planner.physical.AggPrelBase.OperatorPhase;
import org.apache.drill.exec.planner.physical.DrillDistributionTrait.NamedDistributionField;
import org.apache.drill.exec.store.parquet.FilterEvaluatorUtils.FieldReferenceFinder;
import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class MetadataAggPrule extends Prule {
  public static final MetadataAggPrule INSTANCE = new MetadataAggPrule();

  public MetadataAggPrule() {
    super(RelOptHelper.any(MetadataAggRel.class, DrillRel.DRILL_LOGICAL),
        "MetadataAggPrule");
  }

  @Override
  public void onMatch(RelOptRuleCall call) {
    MetadataAggRel aggregate = call.rel(0);
    RelNode input = aggregate.getInput();

    int groupByExprsSize = aggregate.getContext().groupByExpressions().size();

    List collations = new ArrayList<>();
    List names = new ArrayList<>();
    for (int i = 0; i < groupByExprsSize; i++) {
      collations.add(new RelFieldCollation(i + 1));
      SchemaPath fieldPath = getArgumentReference(aggregate.getContext().groupByExpressions().get(i));
      names.add(fieldPath.getRootSegmentPath());
    }

    RelCollation collation = new NamedRelCollation(collations, names);

    RelTraitSet traits;

    if (aggregate.getContext().groupByExpressions().isEmpty()) {
      DrillDistributionTrait singleDist = DrillDistributionTrait.SINGLETON;
      RelTraitSet singleDistTrait = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(singleDist);

      createTransformRequest(call, aggregate, input, singleDistTrait);
    } else {
      // hash distribute on all grouping keys
      DrillDistributionTrait distOnAllKeys =
          new DrillDistributionTrait(DrillDistributionTrait.DistributionType.HASH_DISTRIBUTED,
              ImmutableList.copyOf(getDistributionFields(aggregate.getContext().groupByExpressions())));

      PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
      boolean smallInput =
          input.estimateRowCount(input.getCluster().getMetadataQuery()) < settings.getSliceTarget();

      // force 2-phase aggregation for bottom aggregate call
      // to produce sort locally before aggregation is produced for large inputs
      if (aggregate.getContext().createNewAggregations() && !smallInput) {
        traits = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL);
        RelNode convertedInput = convert(input, traits);

        new TwoPhaseMetadataAggSubsetTransformer(call, collation, distOnAllKeys)
            .go(aggregate, convertedInput);
      } else {
        // TODO: DRILL-7433 - replace DrillDistributionTrait.SINGLETON with distOnAllKeys when parallelization for MetadataHandler is implemented
        traits = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(collation).plus(DrillDistributionTrait.SINGLETON);
        createTransformRequest(call, aggregate, input, traits);
      }
    }
  }

  private void createTransformRequest(RelOptRuleCall call, MetadataAggRel aggregate,
      RelNode input, RelTraitSet traits) {

    RelNode convertedInput = convert(input, PrelUtil.fixTraits(call, traits));

    MetadataStreamAggPrel newAgg = new MetadataStreamAggPrel(
        aggregate.getCluster(),
        traits,
        convertedInput,
        aggregate.getContext(),
        OperatorPhase.PHASE_1of1);

    call.transformTo(newAgg);
  }

  /**
   * Returns list with named distribution fields which correspond to specified expressions arguments.
   *
   * @param namedExpressions expressions list
   * @return list of {@link NamedDistributionField} instances
   */
  private static List getDistributionFields(List namedExpressions) {
    List distributionFields = new ArrayList<>();
    int groupByExprsSize = namedExpressions.size();

    for (int index = 0; index < groupByExprsSize; index++) {
      SchemaPath fieldPath = getArgumentReference(namedExpressions.get(index));
      NamedDistributionField field =
          new NamedDistributionField(index + 1, fieldPath.getRootSegmentPath());
      distributionFields.add(field);
    }

    return distributionFields;
  }

  /**
   * Returns {@link FieldReference} instance which corresponds to the argument of specified {@code namedExpression}.
   *
   * @param namedExpression expression
   * @return {@link FieldReference} instance
   */
  private static FieldReference getArgumentReference(NamedExpression namedExpression) {
    Set arguments = namedExpression.getExpr().accept(FieldReferenceFinder.INSTANCE, null);
    assert arguments.size() == 1 : "Group by expression contains more than one argument";
    return new FieldReference(arguments.iterator().next());
  }

  /**
   * Implementation of {@link RelCollationImpl} with field name.
   * Stores {@link RelFieldCollation} list and corresponding field names to be used in sort operators.
   * Field name is required for the case of dynamic schema discovering
   * when field is not present in rel data type at planning time.
   */
  public static class NamedRelCollation extends RelCollationImpl {
    private final List names;

    protected NamedRelCollation(List fieldCollations, List names) {
      super(com.google.common.collect.ImmutableList.copyOf(fieldCollations));
      this.names = Collections.unmodifiableList(names);
    }

    public String getName(int collationIndex) {
      return names.get(collationIndex - 1);
    }
  }

  /**
   * {@link SubsetTransformer} for creating two-phase metadata aggregation.
   */
  private static class TwoPhaseMetadataAggSubsetTransformer
      extends SubsetTransformer {

    private final RelCollation collation;
    private final DrillDistributionTrait distributionTrait;

    public TwoPhaseMetadataAggSubsetTransformer(RelOptRuleCall call,
        RelCollation collation, DrillDistributionTrait distributionTrait) {
      super(call);
      this.collation = collation;
      this.distributionTrait = distributionTrait;
    }

    @Override
    public RelNode convertChild(MetadataAggRel aggregate, RelNode child) {
      DrillDistributionTrait toDist = child.getTraitSet().getTrait(DrillDistributionTraitDef.INSTANCE);
      RelTraitSet traits = newTraitSet(Prel.DRILL_PHYSICAL, RelCollations.EMPTY, toDist);
      RelNode newInput = convert(child, traits);

      // maps group by expressions to themselves to be able to produce the second aggregation
      List identityExpressions = aggregate.getContext().groupByExpressions().stream()
          .map(namedExpression -> new NamedExpression(namedExpression.getExpr(), getArgumentReference(namedExpression)))
          .collect(Collectors.toList());

      // use hash aggregation for the first stage to avoid sorting raw data
      MetadataHashAggPrel phase1Agg = new MetadataHashAggPrel(
          aggregate.getCluster(),
          traits,
          newInput,
          aggregate.getContext().toBuilder().groupByExpressions(identityExpressions).build(),
          OperatorPhase.PHASE_1of2);

      traits = newTraitSet(Prel.DRILL_PHYSICAL, collation, toDist).plus(distributionTrait);
      SortPrel sort = new SortPrel(
          aggregate.getCluster(),
          traits,
          phase1Agg,
          (RelCollation) traits.getTrait(collation.getTraitDef()));

      int numEndPoints = PrelUtil.getSettings(phase1Agg.getCluster()).numEndPoints();

      HashToMergeExchangePrel exch =
          new HashToMergeExchangePrel(phase1Agg.getCluster(),
              traits,
              sort,
              ImmutableList.copyOf(getDistributionFields(aggregate.getContext().groupByExpressions())),
              collation,
              numEndPoints);

      return new MetadataStreamAggPrel(
          aggregate.getCluster(),
          newTraitSet(Prel.DRILL_PHYSICAL, collation, DrillDistributionTrait.SINGLETON),
          exch,
          aggregate.getContext(),
          OperatorPhase.PHASE_2of2);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy