org.apache.hadoop.hive.ql.optimizer.GenMRProcContext Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.DependencyCollectionTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
/**
* Processor Context for creating map reduce task. Walk the tree in a DFS manner
* and process the nodes. Some state is maintained about the current nodes
* visited so far.
*/
public class GenMRProcContext implements NodeProcessorCtx {
/**
* GenMapRedCtx is used to keep track of the current state.
*/
public static class GenMapRedCtx {
Task extends Serializable> currTask;
String currAliasId;
public GenMapRedCtx() {
}
/**
* @param currTask
* the current task
* @param currAliasId
*/
public GenMapRedCtx(Task extends Serializable> currTask, String currAliasId) {
this.currTask = currTask;
this.currAliasId = currAliasId;
}
/**
* @return current task
*/
public Task extends Serializable> getCurrTask() {
return currTask;
}
/**
* @return current alias
*/
public String getCurrAliasId() {
return currAliasId;
}
}
/**
* GenMRUnionCtx.
*
*/
public static class GenMRUnionCtx {
final Task extends Serializable> uTask;
List taskTmpDir;
List tt_desc;
List listTopOperators;
public GenMRUnionCtx(Task extends Serializable> uTask) {
this.uTask = uTask;
taskTmpDir = new ArrayList();
tt_desc = new ArrayList();
listTopOperators = new ArrayList<>();
}
public Task extends Serializable> getUTask() {
return uTask;
}
public void addTaskTmpDir(String taskTmpDir) {
this.taskTmpDir.add(taskTmpDir);
}
public List getTaskTmpDir() {
return taskTmpDir;
}
public void addTTDesc(TableDesc tt_desc) {
this.tt_desc.add(tt_desc);
}
public List getTTDesc() {
return tt_desc;
}
public List getListTopOperators() {
return listTopOperators;
}
public void addListTopOperators(TableScanOperator topOperator) {
listTopOperators.add(topOperator);
}
}
private HiveConf conf;
private
HashMap, Task extends Serializable>> opTaskMap;
private
HashMap, List>> taskToSeenOps;
private HashMap unionTaskMap;
private List seenFileSinkOps;
private ParseContext parseCtx;
private List> mvTask;
private List> rootTasks;
private LinkedHashMap, GenMapRedCtx> mapCurrCtx;
private Task extends Serializable> currTask;
private TableScanOperator currTopOp;
private UnionOperator currUnionOp;
private String currAliasId;
private DependencyCollectionTask dependencyTaskForMultiInsert;
// If many fileSinkDescs are linked to each other, it is a good idea to keep track of
// tasks for first fileSinkDesc. others can use it
private Map> linkedFileDescTasks;
/**
* Set of read entities. This list is generated by the walker and is passed to
* the hooks.
*/
private Set inputs;
/**
* Set of write entities. This list is generated by the walker and is passed
* to the hooks.
*/
private Set outputs;
public GenMRProcContext() {
}
/**
* @param conf
* hive configuration
* @param opTaskMap
* reducer to task mapping
* @param seenOps
* operator already visited
* @param parseCtx
* current parse context
* @param rootTasks
* root tasks for the plan
* @param mvTask
* the final move task
* @param mapCurrCtx
* operator to task mappings
* @param inputs
* the set of input tables/partitions generated by the walk
* @param outputs
* the set of destinations generated by the walk
*/
public GenMRProcContext(
HiveConf conf,
HashMap, Task extends Serializable>> opTaskMap,
ParseContext parseCtx,
List> mvTask,
List> rootTasks,
LinkedHashMap, GenMapRedCtx> mapCurrCtx,
Set inputs, Set outputs) {
this.conf = conf;
this.opTaskMap = opTaskMap;
this.mvTask = mvTask;
this.parseCtx = parseCtx;
this.rootTasks = rootTasks;
this.mapCurrCtx = mapCurrCtx;
this.inputs = inputs;
this.outputs = outputs;
currTask = null;
currTopOp = null;
currUnionOp = null;
currAliasId = null;
unionTaskMap = new HashMap();
taskToSeenOps = new HashMap,
List>>();
dependencyTaskForMultiInsert = null;
linkedFileDescTasks = null;
}
/**
* @return reducer to task mapping
*/
public HashMap,
Task extends Serializable>> getOpTaskMap() {
return opTaskMap;
}
/**
* @param opTaskMap
* reducer to task mapping
*/
public void setOpTaskMap(
HashMap, Task extends Serializable>> opTaskMap) {
this.opTaskMap = opTaskMap;
}
public boolean isSeenOp(Task task, Operator operator) {
List> seenOps = taskToSeenOps.get(task);
return seenOps != null && seenOps.contains(operator);
}
public void addSeenOp(Task task, Operator operator) {
List> seenOps = taskToSeenOps.get(task);
if (seenOps == null) {
taskToSeenOps.put(task, seenOps = new ArrayList>());
}
seenOps.add(operator);
}
/**
* @return file operators already visited
*/
public List getSeenFileSinkOps() {
return seenFileSinkOps;
}
/**
* @param seenFileSinkOps
* file sink operators already visited
*/
public void setSeenFileSinkOps(List seenFileSinkOps) {
this.seenFileSinkOps = seenFileSinkOps;
}
/**
* @return current parse context
*/
public ParseContext getParseCtx() {
return parseCtx;
}
/**
* @param parseCtx
* current parse context
*/
public void setParseCtx(ParseContext parseCtx) {
this.parseCtx = parseCtx;
}
/**
* @return the final move task
*/
public List> getMvTask() {
return mvTask;
}
/**
* @param mvTask
* the final move task
*/
public void setMvTask(List> mvTask) {
this.mvTask = mvTask;
}
/**
* @return root tasks for the plan
*/
public List> getRootTasks() {
return rootTasks;
}
/**
* @param rootTasks
* root tasks for the plan
*/
public void setRootTasks(List> rootTasks) {
this.rootTasks = rootTasks;
}
public boolean addRootIfPossible(Task extends Serializable> task) {
if (task.getParentTasks() == null || task.getParentTasks().isEmpty()) {
if (!rootTasks.contains(task)) {
return rootTasks.add(task);
}
}
return false;
}
/**
* @return operator to task mappings
*/
public LinkedHashMap, GenMapRedCtx> getMapCurrCtx() {
return mapCurrCtx;
}
/**
* @param mapCurrCtx
* operator to task mappings
*/
public void setMapCurrCtx(
LinkedHashMap, GenMapRedCtx> mapCurrCtx) {
this.mapCurrCtx = mapCurrCtx;
}
/**
* @return current task
*/
public Task extends Serializable> getCurrTask() {
return currTask;
}
/**
* @param currTask
* current task
*/
public void setCurrTask(Task extends Serializable> currTask) {
this.currTask = currTask;
}
/**
* @return current top operator
*/
public TableScanOperator getCurrTopOp() {
return currTopOp;
}
/**
* @param currTopOp
* current top operator
*/
public void setCurrTopOp(TableScanOperator currTopOp) {
this.currTopOp = currTopOp;
}
public UnionOperator getCurrUnionOp() {
return currUnionOp;
}
/**
* @param currUnionOp
* current union operator
*/
public void setCurrUnionOp(UnionOperator currUnionOp) {
this.currUnionOp = currUnionOp;
}
/**
* @return current top alias
*/
public String getCurrAliasId() {
return currAliasId;
}
/**
* @param currAliasId
* current top alias
*/
public void setCurrAliasId(String currAliasId) {
this.currAliasId = currAliasId;
}
public GenMRUnionCtx getUnionTask(UnionOperator op) {
return unionTaskMap.get(op);
}
public void setUnionTask(UnionOperator op, GenMRUnionCtx uTask) {
unionTaskMap.put(op, uTask);
}
/**
* Get the input set.
*/
public Set getInputs() {
return inputs;
}
/**
* Get the output set.
*/
public Set getOutputs() {
return outputs;
}
/**
* @return the conf
*/
public HiveConf getConf() {
return conf;
}
/**
* @param conf
* the conf to set
*/
public void setConf(HiveConf conf) {
this.conf = conf;
}
/**
* Returns dependencyTaskForMultiInsert initializing it if necessary.
*
* dependencyTaskForMultiInsert serves as a mutual dependency for the final move tasks in a
* multi-insert query.
*
* @return
*/
public DependencyCollectionTask getDependencyTaskForMultiInsert() {
if (dependencyTaskForMultiInsert == null) {
if (conf.getBoolVar(ConfVars.HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES)) {
dependencyTaskForMultiInsert =
(DependencyCollectionTask) TaskFactory.get(new DependencyCollectionWork(), conf);
}
}
return dependencyTaskForMultiInsert;
}
public Map> getLinkedFileDescTasks() {
return linkedFileDescTasks;
}
public void setLinkedFileDescTasks(
Map> linkedFileDescTasks) {
this.linkedFileDescTasks = linkedFileDescTasks;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy