org.apache.hadoop.hive.ql.optimizer.GenMRProcContext Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec
There is a newer version: 4.0.1
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;

/**
 * Processor Context for creating map reduce task. Walk the tree in a DFS manner
 * and process the nodes. Some state is maintained about the current nodes
 * visited so far.
 */
public class GenMRProcContext implements NodeProcessorCtx {

  /**
   * GenMapRedCtx is used to keep track of the current state.
   */
  public static class GenMapRedCtx {
    Task currTask;
    String currAliasId;

    public GenMapRedCtx() {
    }

    /**
     * @param currTask
     *          the current task
     * @param currAliasId
     */
    public GenMapRedCtx(Task currTask, String currAliasId) {
      this.currTask = currTask;
      this.currAliasId = currAliasId;
    }

    /**
     * @return current task
     */
    public Task getCurrTask() {
      return currTask;
    }

    /**
     * @return current alias
     */
    public String getCurrAliasId() {
      return currAliasId;
    }
  }

  /**
   * GenMRUnionCtx.
   *
   */
  public static class GenMRUnionCtx {
    final Task uTask;
    List taskTmpDir;
    List tt_desc;
    List listTopOperators;

    public GenMRUnionCtx(Task uTask) {
      this.uTask = uTask;
      taskTmpDir = new ArrayList();
      tt_desc = new ArrayList();
      listTopOperators = new ArrayList<>();
    }

    public Task getUTask() {
      return uTask;
    }

    public void addTaskTmpDir(String taskTmpDir) {
      this.taskTmpDir.add(taskTmpDir);
    }

    public List getTaskTmpDir() {
      return taskTmpDir;
    }

    public void addTTDesc(TableDesc tt_desc) {
      this.tt_desc.add(tt_desc);
    }

    public List getTTDesc() {
      return tt_desc;
    }

    public List getListTopOperators() {
      return listTopOperators;
    }

    public void addListTopOperators(TableScanOperator topOperator) {
      listTopOperators.add(topOperator);
    }
  }

  private HiveConf conf;
  private
    HashMap, Task> opTaskMap;
  private
    HashMap, List>> taskToSeenOps;

  private HashMap unionTaskMap;
  private List seenFileSinkOps;

  private ParseContext parseCtx;
  private List> mvTask;
  private List> rootTasks;

  private LinkedHashMap, GenMapRedCtx> mapCurrCtx;
  private Task currTask;
  private TableScanOperator currTopOp;
  private UnionOperator currUnionOp;
  private String currAliasId;
  private DependencyCollectionTask dependencyTaskForMultiInsert;

  // If many fileSinkDescs are linked to each other, it is a good idea to keep track of
  // tasks for first fileSinkDesc. others can use it
  private Map> linkedFileDescTasks;

  /**
   * Set of read entities. This list is generated by the walker and is passed to
   * the hooks.
   */
  private Set inputs;
  /**
   * Set of write entities. This list is generated by the walker and is passed
   * to the hooks.
   */
  private Set outputs;

  public GenMRProcContext() {
  }

  /**
   * @param conf
   *          hive configuration
   * @param opTaskMap
   *          reducer to task mapping
   * @param seenOps
   *          operator already visited
   * @param parseCtx
   *          current parse context
   * @param rootTasks
   *          root tasks for the plan
   * @param mvTask
   *          the final move task
   * @param mapCurrCtx
   *          operator to task mappings
   * @param inputs
   *          the set of input tables/partitions generated by the walk
   * @param outputs
   *          the set of destinations generated by the walk
   */
  public GenMRProcContext(
      HiveConf conf,
      HashMap, Task> opTaskMap,
      ParseContext parseCtx,
      List> mvTask,
      List> rootTasks,
      LinkedHashMap, GenMapRedCtx> mapCurrCtx,
      Set inputs, Set outputs) {
    this.conf = conf;
    this.opTaskMap = opTaskMap;
    this.mvTask = mvTask;
    this.parseCtx = parseCtx;
    this.rootTasks = rootTasks;
    this.mapCurrCtx = mapCurrCtx;
    this.inputs = inputs;
    this.outputs = outputs;
    currTask = null;
    currTopOp = null;
    currUnionOp = null;
    currAliasId = null;
    unionTaskMap = new HashMap();
    taskToSeenOps = new HashMap,
        List>>();
    dependencyTaskForMultiInsert = null;
    linkedFileDescTasks = null;
  }

  /**
   * @return reducer to task mapping
   */
  public HashMap,
                 Task> getOpTaskMap() {
    return opTaskMap;
  }

  /**
   * @param opTaskMap
   *          reducer to task mapping
   */
  public void setOpTaskMap(
    HashMap, Task> opTaskMap) {
    this.opTaskMap = opTaskMap;
  }

  public boolean isSeenOp(Task task, Operator operator) {
    List> seenOps = taskToSeenOps.get(task);
    return seenOps != null && seenOps.contains(operator);
  }

  public void addSeenOp(Task task, Operator operator) {
    List> seenOps = taskToSeenOps.get(task);
    if (seenOps == null) {
      taskToSeenOps.put(task, seenOps = new ArrayList>());
    }
    seenOps.add(operator);
  }

  /**
   * @return file operators already visited
   */
  public List getSeenFileSinkOps() {
    return seenFileSinkOps;
  }

  /**
   * @param seenFileSinkOps
   *          file sink operators already visited
   */
  public void setSeenFileSinkOps(List seenFileSinkOps) {
    this.seenFileSinkOps = seenFileSinkOps;
  }

  /**
   * @return current parse context
   */
  public ParseContext getParseCtx() {
    return parseCtx;
  }

  /**
   * @param parseCtx
   *          current parse context
   */
  public void setParseCtx(ParseContext parseCtx) {
    this.parseCtx = parseCtx;
  }

  /**
   * @return the final move task
   */
  public List> getMvTask() {
    return mvTask;
  }

  /**
   * @param mvTask
   *          the final move task
   */
  public void setMvTask(List> mvTask) {
    this.mvTask = mvTask;
  }

  /**
   * @return root tasks for the plan
   */
  public List> getRootTasks() {
    return rootTasks;
  }

  /**
   * @param rootTasks
   *          root tasks for the plan
   */
  public void setRootTasks(List> rootTasks) {
    this.rootTasks = rootTasks;
  }

  public boolean addRootIfPossible(Task task) {
    if (task.getParentTasks() == null || task.getParentTasks().isEmpty()) {
      if (!rootTasks.contains(task)) {
        return rootTasks.add(task);
      }
    }
    return false;
  }

  /**
   * @return operator to task mappings
   */
  public LinkedHashMap, GenMapRedCtx> getMapCurrCtx() {
    return mapCurrCtx;
  }

  /**
   * @param mapCurrCtx
   *          operator to task mappings
   */
  public void setMapCurrCtx(
      LinkedHashMap, GenMapRedCtx> mapCurrCtx) {
    this.mapCurrCtx = mapCurrCtx;
  }

  /**
   * @return current task
   */
  public Task getCurrTask() {
    return currTask;
  }

  /**
   * @param currTask
   *          current task
   */
  public void setCurrTask(Task currTask) {
    this.currTask = currTask;
  }

  /**
   * @return current top operator
   */
  public TableScanOperator getCurrTopOp() {
    return currTopOp;
  }

  /**
   * @param currTopOp
   *          current top operator
   */
  public void setCurrTopOp(TableScanOperator currTopOp) {
    this.currTopOp = currTopOp;
  }

  public UnionOperator getCurrUnionOp() {
    return currUnionOp;
  }

  /**
   * @param currUnionOp
   *          current union operator
   */
  public void setCurrUnionOp(UnionOperator currUnionOp) {
    this.currUnionOp = currUnionOp;
  }

  /**
   * @return current top alias
   */
  public String getCurrAliasId() {
    return currAliasId;
  }

  /**
   * @param currAliasId
   *          current top alias
   */
  public void setCurrAliasId(String currAliasId) {
    this.currAliasId = currAliasId;
  }

  public GenMRUnionCtx getUnionTask(UnionOperator op) {
    return unionTaskMap.get(op);
  }

  public void setUnionTask(UnionOperator op, GenMRUnionCtx uTask) {
    unionTaskMap.put(op, uTask);
  }

  /**
   * Get the input set.
   */
  public Set getInputs() {
    return inputs;
  }

  /**
   * Get the output set.
   */
  public Set getOutputs() {
    return outputs;
  }

  /**
   * @return the conf
   */
  public HiveConf getConf() {
    return conf;
  }

  /**
   * @param conf
   *          the conf to set
   */
  public void setConf(HiveConf conf) {
    this.conf = conf;
  }

  /**
   * Returns dependencyTaskForMultiInsert initializing it if necessary.
   *
   * dependencyTaskForMultiInsert serves as a mutual dependency for the final move tasks in a
   * multi-insert query.
   *
   * @return
   */
  public DependencyCollectionTask getDependencyTaskForMultiInsert() {
    if (dependencyTaskForMultiInsert == null) {
      if (conf.getBoolVar(ConfVars.HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES)) {
        dependencyTaskForMultiInsert =
            (DependencyCollectionTask) TaskFactory.get(new DependencyCollectionWork(), conf);
      }
    }
    return dependencyTaskForMultiInsert;
  }

  public Map> getLinkedFileDescTasks() {
    return linkedFileDescTasks;
  }

  public void setLinkedFileDescTasks(
      Map> linkedFileDescTasks) {
    this.linkedFileDescTasks = linkedFileDescTasks;
  }
}