All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.Task Maven / Gradle / Ivy

Go to download

Hive is a data warehouse infrastructure built on top of Hadoop see http://wiki.apache.org/hadoop/Hive

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.util.StringUtils;

/**
 * Task implementation.
 **/

public abstract class Task implements Serializable, Node {

  private static final long serialVersionUID = 1L;
  protected transient boolean started;
  protected transient boolean initialized;
  protected transient boolean isdone;
  protected transient boolean queued;
  protected transient HiveConf conf;
  protected transient Hive db;
  protected transient LogHelper console;
  protected transient QueryPlan queryPlan;
  protected transient TaskHandle taskHandle;
  protected transient HashMap taskCounters;
  protected transient DriverContext driverContext;
  protected transient boolean clonedConf = false;
  protected transient String jobID;
  protected Task backupTask;
  protected List> backupChildrenTasks = new ArrayList>();
  protected static transient Log LOG = LogFactory.getLog(Task.class);
  protected int taskTag;
  private boolean isLocalMode =false;
  private boolean retryCmdWhenFail = false;

  public static final int NO_TAG = 0;
  public static final int COMMON_JOIN = 1;
  public static final int CONVERTED_MAPJOIN = 2;
  public static final int CONVERTED_LOCAL_MAPJOIN = 3;
  public static final int BACKUP_COMMON_JOIN = 4;
  public static final int LOCAL_MAPJOIN = 5;
  // The join task is converted to a mapjoin task. This can only happen if
  // hive.auto.convert.join.noconditionaltask is set to true. No conditional task was
  // created in case the mapjoin failed.
  public static final int MAPJOIN_ONLY_NOBACKUP = 6;

  // Descendants tasks who subscribe feeds from this task
  protected transient List> feedSubscribers;

  protected String id;
  protected T work;
  public static enum FeedType {
    DYNAMIC_PARTITIONS, // list of dynamic partitions
  };

  // Bean methods

  protected List> childTasks;
  protected List> parentTasks;

  public Task() {
    isdone = false;
    started = false;
    initialized = false;
    queued = false;
    this.taskCounters = new HashMap();
    taskTag = Task.NO_TAG;
  }

  public TaskHandle getTaskHandle() {
    return taskHandle;
  }

  public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
    this.queryPlan = queryPlan;
    isdone = false;
    started = false;
    setInitialized();
    this.conf = conf;

    try {
      db = Hive.get(conf);
    } catch (HiveException e) {
      // Bail out ungracefully - we should never hit
      // this here - but would have hit it in SemanticAnalyzer
      LOG.error(StringUtils.stringifyException(e));
      throw new RuntimeException(e);
    }
    this.driverContext = driverContext;

    console = new LogHelper(LOG);
  }

  /**
   * This method is called in the Driver on every task. It updates counters and calls execute(),
   * which is overridden in each task
   *
   * @return return value of execute()
   */
  public int executeTask() {
    try {
      SessionState ss = SessionState.get();
      this.setStarted();
      if (ss != null) {
        ss.getHiveHistory().logPlanProgress(queryPlan);
      }
      int retval = execute(driverContext);
      this.setDone();
      if (ss != null) {
        ss.getHiveHistory().logPlanProgress(queryPlan);
      }
      return retval;
    } catch (IOException e) {
      throw new RuntimeException(e.getMessage());
    }
  }

  /**
   * This method is overridden in each Task. TODO execute should return a TaskHandle.
   *
   * @return status of executing the task
   */
  protected abstract int execute(DriverContext driverContext);

  // dummy method - FetchTask overwrites this
  public boolean fetch(ArrayList res) throws IOException, CommandNeedRetryException {
    assert false;
    return false;
  }

  public void setChildTasks(List> childTasks) {
    this.childTasks = childTasks;
  }

  public List getChildren() {
    return getChildTasks();
  }

  public List> getChildTasks() {
    return childTasks;
  }

  public void setParentTasks(List> parentTasks) {
    this.parentTasks = parentTasks;
  }

  public List> getParentTasks() {
    return parentTasks;
  }

  public Task getBackupTask() {
    return backupTask;
  }


  public void setBackupTask(Task backupTask) {
    this.backupTask = backupTask;
  }

  public List> getBackupChildrenTasks() {
    return backupChildrenTasks;
  }

  public void setBackupChildrenTasks(List> backupChildrenTasks) {
    this.backupChildrenTasks = backupChildrenTasks;
  }

  public Task getAndInitBackupTask() {
    if (backupTask != null) {
      // first set back the backup task with its children task.
      if( backupChildrenTasks!= null) {
        for (Task backupChild : backupChildrenTasks) {
          backupChild.getParentTasks().add(backupTask);
        }
      }

      // recursively remove task from its children tasks if this task doesn't have any parent task
      this.removeFromChildrenTasks();
    }
    return backupTask;
  }

  public void removeFromChildrenTasks() {

    List> childrenTasks = this.getChildTasks();
    if (childrenTasks == null) {
      return;
    }

    for (Task childTsk : childrenTasks) {
      // remove this task from its children tasks
      childTsk.getParentTasks().remove(this);

      // recursively remove non-parent task from its children
      List> siblingTasks = childTsk.getParentTasks();
      if (siblingTasks == null || siblingTasks.size() == 0) {
        childTsk.removeFromChildrenTasks();
      }
    }

    return;
  }


  /**
   * The default dependent tasks are just child tasks, but different types could implement their own
   * (e.g. ConditionalTask will use the listTasks as dependents).
   *
   * @return a list of tasks that are dependent on this task.
   */
  public List> getDependentTasks() {
    return getChildTasks();
  }

  /**
   * Add a dependent task on the current task. Return if the dependency already existed or is this a
   * new one
   *
   * @return true if the task got added false if it already existed
   */
  public boolean addDependentTask(Task dependent) {
    boolean ret = false;
    if (getChildTasks() == null) {
      setChildTasks(new ArrayList>());
    }
    if (!getChildTasks().contains(dependent)) {
      ret = true;
      getChildTasks().add(dependent);
      if (dependent.getParentTasks() == null) {
        dependent.setParentTasks(new ArrayList>());
      }
      if (!dependent.getParentTasks().contains(this)) {
        dependent.getParentTasks().add(this);
      }
    }
    return ret;
  }

  /**
   * Remove the dependent task.
   *
   * @param dependent
   *          the task to remove
   */
  public void removeDependentTask(Task dependent) {
    if ((getChildTasks() != null) && (getChildTasks().contains(dependent))) {
      getChildTasks().remove(dependent);
      if ((dependent.getParentTasks() != null) && (dependent.getParentTasks().contains(this))) {
        dependent.getParentTasks().remove(this);
      }
    }
  }

  public void setStarted() {
    this.started = true;
  }

  public boolean started() {
    return started;
  }

  public boolean done() {
    return isdone;
  }

  public void setDone() {
    isdone = true;
  }

  public void setQueued() {
    queued = true;
  }

  public boolean getQueued() {
    return queued;
  }

  public void setInitialized() {
    initialized = true;
  }

  public boolean getInitialized() {
    return initialized;
  }

  public boolean isRunnable() {
    boolean isrunnable = true;
    if (parentTasks != null) {
      for (Task parent : parentTasks) {
        if (!parent.done()) {
          isrunnable = false;
          break;
        }
      }
    }
    return isrunnable;
  }



  public void setWork(T work) {
    this.work = work;
  }

  public T getWork() {
    return work;
  }

  public void setId(String id) {
    this.id = id;
  }

  public String getId() {
    return id;
  }

  public boolean isMapRedTask() {
    return false;
  }

  public boolean isMapRedLocalTask() {
    return false;
  }

  public Collection> getTopOperators() {
    return new LinkedList>();
  }

  public boolean hasReduce() {
    return false;
  }

  public Operator getReducer() {
    return null;
  }

  public HashMap getCounters() {
    return taskCounters;
  }

  /**
   * Should be overridden to return the type of the specific task among the types in StageType.
   *
   * @return StageType.* or null if not overridden
   */
  public abstract StageType getType();

  /**
   * If this task uses any map-reduce intermediate data (either for reading or for writing),
   * localize them (using the supplied Context). Map-Reduce intermediate directories are allocated
   * using Context.getMRTmpFileURI() and can be localized using localizeMRTmpFileURI().
   *
   * This method is declared abstract to force any task code to explicitly deal with this aspect of
   * execution.
   *
   * @param ctx
   *          context object with which to localize
   */
  abstract protected void localizeMRTmpFilesImpl(Context ctx);

  /**
   * Localize a task tree
   *
   * @param ctx
   *          context object with which to localize
   */
  public final void localizeMRTmpFiles(Context ctx) {
    localizeMRTmpFilesImpl(ctx);

    if (childTasks == null) {
      return;
    }

    for (Task t : childTasks) {
      t.localizeMRTmpFiles(ctx);
    }
  }

  /**
   * Subscribe the feed of publisher. To prevent cycles, a task can only subscribe to its ancestor.
   * Feed is a generic form of execution-time feedback (type, value) pair from one task to another
   * task. Examples include dynamic partitions (which are only available at execution time). The
   * MoveTask may pass the list of dynamic partitions to the StatsTask since after the MoveTask the
   * list of dynamic partitions are lost (MoveTask moves them to the table's destination directory
   * which is mixed with old partitions).
   *
   * @param publisher
   *          this feed provider.
   */
  public void subscribeFeed(Task publisher) {
    if (publisher != this && publisher.ancestorOrSelf(this)) {
      if (publisher.getFeedSubscribers() == null) {
        publisher.setFeedSubscribers(new LinkedList>());
      }
      publisher.getFeedSubscribers().add(this);
    }
  }

  // return true if this task is an ancestor of itself of parameter desc
  private boolean ancestorOrSelf(Task desc) {
    if (this == desc) {
      return true;
    }
    List> deps = getDependentTasks();
    if (deps != null) {
      for (Task d : deps) {
        if (d.ancestorOrSelf(desc)) {
          return true;
        }
      }
    }
    return false;
  }

  public List> getFeedSubscribers() {
    return feedSubscribers;
  }

  public void setFeedSubscribers(List> s) {
    feedSubscribers = s;
  }

  // push the feed to its subscribers
  protected void pushFeed(FeedType feedType, Object feedValue) {
    if (feedSubscribers != null) {
      for (Task s : feedSubscribers) {
        s.receiveFeed(feedType, feedValue);
      }
    }
  }

  // a subscriber accept the feed and do something depending on the Task type
  protected void receiveFeed(FeedType feedType, Object feedValue) {
  }

  protected void cloneConf() {
    if (!clonedConf) {
      clonedConf = true;
      conf = new HiveConf(conf);
    }
  }


  public int getTaskTag() {
    return taskTag;
  }

  public void setTaskTag(int taskTag) {
    this.taskTag = taskTag;
  }

  public boolean isLocalMode() {
    return isLocalMode;
  }

  public void setLocalMode(boolean isLocalMode) {
    this.isLocalMode = isLocalMode;
  }

  public boolean requireLock() {
    return false;
  }

  public boolean ifRetryCmdWhenFail() {
    return retryCmdWhenFail;
  }

  public void setRetryCmdWhenFail(boolean retryCmdWhenFail) {
    this.retryCmdWhenFail = retryCmdWhenFail;
  }

  public QueryPlan getQueryPlan() {
    return queryPlan;
  }

  public void setQueryPlan(QueryPlan queryPlan) {
    this.queryPlan = queryPlan;
  }

  public String getJobID() {
    return jobID;
  }

  public void shutdown() {
  }

  public List getResultSchema() {
    return null;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy