org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hive-exec
There is a newer version: 4.0.0
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
import org.apache.calcite.rex.RexFieldCollation;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexOver;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;

/**
 * Rule to fix windowing issue when it is done over
 * aggregation columns (more info in HIVE-10627).
 *
 * This rule is applied as a post-processing step after
 * optimization by Calcite in order to add columns
 * that may be pruned by RelFieldTrimmer, but are
 * still needed due to the concrete implementation of
 * Windowing processing in Hive.
 */
public class HiveWindowingFixRule extends RelOptRule {

  public static final HiveWindowingFixRule INSTANCE = new HiveWindowingFixRule();

  private final ProjectFactory projectFactory;


  private HiveWindowingFixRule() {
    super(
        operand(Project.class,
            operand(Aggregate.class, any())));
    this.projectFactory = HiveProject.DEFAULT_PROJECT_FACTORY;
  }

  @Override
  public void onMatch(RelOptRuleCall call) {
    Project project = call.rel(0);
    Aggregate aggregate = call.rel(1);

    // 1. We go over the expressions in the project operator
    //    and we separate the windowing nodes that are result
    //    of an aggregate expression from the rest of nodes
    final int groupingFields = aggregate.getGroupCount() + aggregate.getIndicatorCount();
    Set projectExprsDigest = new HashSet();
    Map windowingExprsDigestToNodes = new HashMap();
    for (RexNode r : project.getChildExps()) {
      if (r instanceof RexOver) {
        RexOver rexOverNode = (RexOver) r;
        // Operands
        for (RexNode operand : rexOverNode.getOperands()) {
          if (operand instanceof RexInputRef &&
                  ((RexInputRef)operand).getIndex() >= groupingFields) {
            windowingExprsDigestToNodes.put(operand.toString(), operand);
          }
        }
        // Partition keys
        for (RexNode partitionKey : rexOverNode.getWindow().partitionKeys) {
          if (partitionKey instanceof RexInputRef &&
                  ((RexInputRef)partitionKey).getIndex() >= groupingFields) {
            windowingExprsDigestToNodes.put(partitionKey.toString(), partitionKey);
          }
        }
        // Order keys
        for (RexFieldCollation orderKey : rexOverNode.getWindow().orderKeys) {
          if (orderKey.left instanceof RexInputRef &&
                  ((RexInputRef)orderKey.left).getIndex() >= groupingFields) {
            windowingExprsDigestToNodes.put(orderKey.left.toString(), orderKey.left);
          }
        }
      } else {
        projectExprsDigest.add(r.toString());
      }
    }

    // 2. We check whether there is a column needed by the
    //    windowing operation that is missing in the
    //    project expressions. For instance, if the windowing
    //    operation is over an aggregation column, Hive expects
    //    that column to be in the Select clause of the query.
    //    The idea is that if there is a column missing, we will
    //    replace the old project operator by two new project
    //    operators:
    //    - a project operator containing the original columns
    //      of the project operator plus all the columns that were
    //      missing
    //    - a project on top of the previous one, that will take
    //      out the columns that were missing and were added by the
    //      previous project

    // These data structures are needed to create the new project
    // operator (below)
    final List belowProjectExprs = new ArrayList();
    final List belowProjectColumnNames = new ArrayList();

    // This data structure is needed to create the new project
    // operator (top)
    final List topProjectExprs = new ArrayList();

    final int projectCount = project.getChildExps().size();
    for (int i = 0; i < projectCount; i++) {
      belowProjectExprs.add(project.getChildExps().get(i));
      belowProjectColumnNames.add(project.getRowType().getFieldNames().get(i));
      topProjectExprs.add(RexInputRef.of(i, project.getRowType()));
    }
    boolean windowingFix = false;
    for (Entry windowingExpr : windowingExprsDigestToNodes.entrySet()) {
      if (!projectExprsDigest.contains(windowingExpr.getKey())) {
        windowingFix = true;
        belowProjectExprs.add(windowingExpr.getValue());
        int colIndex = 0;
        String alias = "window_col_" + colIndex;
        while (belowProjectColumnNames.contains(alias)) {
          alias = "window_col_" + (colIndex++);
        }
        belowProjectColumnNames.add(alias);
      }
    }

    if (!windowingFix) {
      // We do not need to do anything, we bail out
      return;
    }

    // 3. We need to fix it, we create the two replacement project
    //    operators
    RelNode newProjectRel = projectFactory.createProject(
        aggregate, belowProjectExprs, belowProjectColumnNames);
    RelNode newTopProjectRel = projectFactory.createProject(
        newProjectRel, topProjectExprs, project.getRowType().getFieldNames());

    call.transformTo(newTopProjectRel);
  }

}