All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.optimizer.physical.AbstractJoinTaskDispatcher Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.optimizer.physical;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.TaskGraphWalker.TaskGraphWalkerContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.MapWork;

/**
 * Common iteration methods for converting joins and sort-merge joins.
 */
public abstract class AbstractJoinTaskDispatcher implements Dispatcher {

  protected final PhysicalContext physicalContext;

  public AbstractJoinTaskDispatcher(PhysicalContext context) {
    physicalContext = context;
  }

  public abstract Task processCurrentTask(MapRedTask currTask,
      ConditionalTask conditionalTask, Context context)
      throws SemanticException;

  protected void replaceTaskWithConditionalTask(
      Task currTask, ConditionalTask cndTsk) {
    // add this task into task tree
    // set all parent tasks
    List> parentTasks = currTask.getParentTasks();
    currTask.setParentTasks(null);
    if (parentTasks != null) {
      for (Task tsk : parentTasks) {
        // make new generated task depends on all the parent tasks of current task.
        tsk.addDependentTask(cndTsk);
        // remove the current task from its original parent task's dependent task
        tsk.removeDependentTask(currTask);
      }
    } else {
      // remove from current root task and add conditional task to root tasks
      physicalContext.removeFromRootTask(currTask);
      physicalContext.addToRootTask(cndTsk);
    }
    // set all child tasks
    List> oldChildTasks = currTask.getChildTasks();
    if (oldChildTasks != null) {
      for (Task tsk : cndTsk.getListTasks()) {
        if (tsk.equals(currTask)) {
          continue;
        }
        for (Task oldChild : oldChildTasks) {
          tsk.addDependentTask(oldChild);
        }
      }
    }
  }

  // Replace the task with the new task. Copy the children and parents of the old
  // task to the new task.
  protected void replaceTask(
      Task currTask, Task newTask) {
    // add this task into task tree
    // set all parent tasks
    List> parentTasks = currTask.getParentTasks();
    currTask.setParentTasks(null);
    if (parentTasks != null) {
      for (Task tsk : parentTasks) {
        // remove the current task from its original parent task's dependent task
        tsk.removeDependentTask(currTask);
        // make new generated task depends on all the parent tasks of current task.
        tsk.addDependentTask(newTask);
      }
    } else {
      // remove from current root task and add conditional task to root tasks
      physicalContext.removeFromRootTask(currTask);
      physicalContext.addToRootTask(newTask);
    }

    // set all child tasks
    List> oldChildTasks = currTask.getChildTasks();
    currTask.setChildTasks(null);
    if (oldChildTasks != null) {
      for (Task tsk : oldChildTasks) {
        // remove the current task from its original parent task's dependent task
        tsk.getParentTasks().remove(currTask);
        // make new generated task depends on all the parent tasks of current task.
        newTask.addDependentTask(tsk);
      }
    }
  }

  public long getTotalKnownInputSize(Context context, MapWork currWork,
      Map> pathToAliases,
      HashMap aliasToSize) throws SemanticException {
    try {
      // go over all the input paths, and calculate a known total size, known
      // size for each input alias.
      Utilities.getInputSummary(context, currWork, null).getLength();

      // set alias to size mapping, this can be used to determine if one table
      // is chosen as big table, what's the total size of left tables, which
      // are going to be small tables.
      long aliasTotalKnownInputSize = 0L;
      for (Map.Entry> entry : pathToAliases.entrySet()) {
        String path = entry.getKey();
        List aliasList = entry.getValue();
        ContentSummary cs = context.getCS(path);
        if (cs != null) {
          long size = cs.getLength();
          for (String alias : aliasList) {
            aliasTotalKnownInputSize += size;
            Long es = aliasToSize.get(alias);
            if (es == null) {
              es = new Long(0);
            }
            es += size;
            aliasToSize.put(alias, es);
          }
        }
      }
      return aliasTotalKnownInputSize;
    } catch (Exception e) {
      e.printStackTrace();
      throw new SemanticException("Generate Map Join Task Error: " + e.getMessage());
    }
  }

  @Override
  public Object dispatch(Node nd, Stack stack, Object... nodeOutputs)
      throws SemanticException {
    if (nodeOutputs == null || nodeOutputs.length == 0) {
      throw new SemanticException("No Dispatch Context");
    }

    TaskGraphWalkerContext walkerCtx = (TaskGraphWalkerContext) nodeOutputs[0];

    Task currTask = (Task) nd;
    // not map reduce task or not conditional task, just skip
    if (currTask.isMapRedTask()) {
      if (currTask instanceof ConditionalTask) {
        // get the list of task
        List> taskList = ((ConditionalTask) currTask).getListTasks();
        for (Task tsk : taskList) {
          if (tsk.isMapRedTask()) {
            Task newTask = this.processCurrentTask((MapRedTask) tsk,
                ((ConditionalTask) currTask), physicalContext.getContext());
            walkerCtx.addToDispatchList(newTask);
          }
        }
      } else {
        Task newTask =
            this.processCurrentTask((MapRedTask) currTask, null, physicalContext.getContext());
        walkerCtx.addToDispatchList(newTask);
      }
    }
    return null;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy