All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1 Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer;

import java.io.Serializable;
import java.util.Map;
import java.util.Stack;

import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;

/**
 * Processor for the rule - table scan followed by reduce sink.
 */
public class GenMRRedSink1 implements NodeProcessor {

  public GenMRRedSink1() {
  }

  /**
   * Reduce Sink encountered.
   * a) If we are seeing this RS for first time, we initialize plan corresponding to this RS.
   * b) If we are seeing this RS for second or later time then either query had a join in which
   *    case we will merge this plan with earlier plan involving this RS or plan for this RS
   *    needs to be split in two branches.
   *
   * @param nd
   *          the reduce sink operator encountered
   * @param opProcCtx
   *          context
   */
  public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx,
      Object... nodeOutputs) throws SemanticException {
    ReduceSinkOperator op = (ReduceSinkOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;

    Map, GenMapRedCtx> mapCurrCtx = ctx
        .getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(stack.get(stack.size() - 2));
    Task currTask = mapredCtx.getCurrTask();
    MapredWork currPlan = (MapredWork) currTask.getWork();
    String currAliasId = mapredCtx.getCurrAliasId();

    if (op.getNumChild() != 1) {
      throw new IllegalStateException("Expecting operator " + op + " to have one child. " +
          "But found multiple children : " + op.getChildOperators());
    }
    Operator reducer = op.getChildOperators().get(0);
    Task oldTask = ctx.getOpTaskMap().get(reducer);

    ctx.setCurrAliasId(currAliasId);
    ctx.setCurrTask(currTask);

    // If the plan for this reducer does not exist, initialize the plan
    if (oldTask == null) {
      if (currPlan.getReduceWork() == null) {
        GenMapRedUtils.initPlan(op, ctx);
      } else {
        GenMapRedUtils.splitPlan(op, ctx);
      }
    } else {
      // This will happen in case of joins. The current plan can be thrown away
      // after being merged with the original plan
      GenMapRedUtils.joinPlan(currTask, oldTask, ctx);
      currTask = oldTask;
      ctx.setCurrTask(currTask);
    }

    mapCurrCtx.put(op, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId()));

    if (GenMapRedUtils.hasBranchFinished(nodeOutputs)) {
      ctx.addRootIfPossible(currTask);
      return false;
    }

    return true;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy