All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule Maven / Gradle / Ivy

There is a newer version: 1.21.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;

import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptRuleOperand;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.hep.HepRelVertex;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinInfo;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import hive.com.google.common.collect.ImmutableList;
import hive.com.google.common.collect.Lists;

import java.util.ArrayList;
import java.util.List;

/**
 * Planner rule that creates a {@code SemiJoinRule} from a
 * {@link org.apache.calcite.rel.core.Join} on top of a
 * {@link org.apache.calcite.rel.logical.LogicalAggregate}.
 *
 * TODO Remove this rule and use Calcite's SemiJoinRule. Not possible currently
 * since Calcite doesnt use RelBuilder for this rule and we want to generate HiveSemiJoin rel here.
 */
public abstract class HiveSemiJoinRule extends RelOptRule {

  protected static final Logger LOG = LoggerFactory.getLogger(HiveSemiJoinRule.class);

  public static final HiveProjectToSemiJoinRule INSTANCE_PROJECT =
          new HiveProjectToSemiJoinRule(HiveRelFactories.HIVE_BUILDER);

  public static final HiveAggregateToSemiJoinRule INSTANCE_AGGREGATE =
          new HiveAggregateToSemiJoinRule(HiveRelFactories.HIVE_BUILDER);

  private HiveSemiJoinRule(RelOptRuleOperand operand, RelBuilderFactory relBuilder) {
    super(operand, relBuilder, null);
  }

  protected void perform(RelOptRuleCall call, ImmutableBitSet topRefs,
          RelNode topOperator, Join join, RelNode left, Aggregate aggregate) {
    LOG.debug("Matched HiveSemiJoinRule");
    final RelOptCluster cluster = join.getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    final ImmutableBitSet rightBits =
        ImmutableBitSet.range(left.getRowType().getFieldCount(),
            join.getRowType().getFieldCount());
    if (topRefs.intersects(rightBits)) {
      return;
    }
    final JoinInfo joinInfo = join.analyzeCondition();
    if (!joinInfo.rightSet().equals(
        ImmutableBitSet.range(aggregate.getGroupCount()))) {
      // Rule requires that aggregate key to be the same as the join key.
      // By the way, neither a super-set nor a sub-set would work.
      return;
    }
    if(join.getJoinType() == JoinRelType.LEFT) {
      // since for LEFT join we are only interested in rows from LEFT we can get rid of right side
      call.transformTo(topOperator.copy(topOperator.getTraitSet(), ImmutableList.of(left)));
      return;
    }
    if (join.getJoinType() != JoinRelType.INNER) {
      return;
    }
    if (!joinInfo.isEqui()) {
      return;
    }
    LOG.debug("All conditions matched for HiveSemiJoinRule. Going to apply transformation.");
    final List newRightKeyBuilder = Lists.newArrayList();
    final List aggregateKeys = aggregate.getGroupSet().asList();
    for (int key : joinInfo.rightKeys) {
      newRightKeyBuilder.add(aggregateKeys.get(key));
    }
    final ImmutableIntList newRightKeys =
        ImmutableIntList.copyOf(newRightKeyBuilder);
    final RelNode newRight = aggregate.getInput();
    final RexNode newCondition =
        RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight,
            newRightKeys, rexBuilder);

    RelNode semi = null;
    //HIVE-15458: we need to add a Project on top of Join since SemiJoin with Join as it's right input
    // is not expected further down the pipeline. see jira for more details
    if(aggregate.getInput() instanceof HepRelVertex
          && ((HepRelVertex)aggregate.getInput()).getCurrentRel() instanceof  Join) {
        Join rightJoin = (Join)(((HepRelVertex)aggregate.getInput()).getCurrentRel());
        List projects = new ArrayList<>();
        for(int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy