org.apache.hadoop.hive.ql.plan.MapredWork Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
Show all versions of hive-exec Show documentation
Hive is a data warehouse infrastructure built on top of Hadoop see
http://wiki.apache.org/hadoop/Hive
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol;
import org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol;
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.parse.QBJoinTree;
import org.apache.hadoop.hive.ql.parse.SplitSample;
/**
* MapredWork.
*
*/
@Explain(displayName = "Map Reduce")
public class MapredWork extends AbstractOperatorDesc {
private static final long serialVersionUID = 1L;
private String command;
// map side work
// use LinkedHashMap to make sure the iteration order is
// deterministic, to ease testing
private LinkedHashMap> pathToAliases;
private LinkedHashMap pathToPartitionInfo;
private LinkedHashMap> aliasToWork;
private LinkedHashMap aliasToPartnInfo;
private HashMap nameToSplitSample;
// map<->reduce interface
// schema of the map-reduce 'key' object - this is homogeneous
private TableDesc keyDesc;
// schema of the map-reduce 'val' object - this is heterogeneous
private List tagToValueDesc;
private Operator> reducer;
private Integer numReduceTasks;
private Integer numMapTasks;
private Long maxSplitSize;
private Long minSplitSize;
private Long minSplitSizePerNode;
private Long minSplitSizePerRack;
private boolean needsTagging;
private boolean hadoopSupportsSplittable;
private MapredLocalWork mapLocalWork;
private String inputformat;
private String indexIntermediateFile;
private boolean gatheringStats;
private String tmpHDFSFileURI;
private LinkedHashMap, OpParseContext> opParseCtxMap;
private QBJoinTree joinTree;
private boolean mapperCannotSpanPartns;
// used to indicate the input is sorted, and so a BinarySearchRecordReader shoudl be used
private boolean inputFormatSorted = false;
private transient boolean useBucketizedHiveInputFormat;
// if this is true, this means that this is the map reduce task which writes the final data,
// ignoring the optional merge task
private boolean finalMapRed = false;
// If this map reduce task has a FileSinkOperator, and bucketing/sorting metadata can be
// inferred about the data being written by that operator, these are mappings from the directory
// that operator writes into to the bucket/sort columns for that data.
private final Map> bucketedColsByDirectory =
new HashMap>();
private final Map> sortedColsByDirectory =
new HashMap>();
public MapredWork() {
aliasToPartnInfo = new LinkedHashMap();
}
public MapredWork(
final String command,
final LinkedHashMap> pathToAliases,
final LinkedHashMap pathToPartitionInfo,
final LinkedHashMap> aliasToWork,
final TableDesc keyDesc, List tagToValueDesc,
final Operator> reducer, final Integer numReduceTasks,
final MapredLocalWork mapLocalWork,
final boolean hadoopSupportsSplittable) {
this.command = command;
this.pathToAliases = pathToAliases;
this.pathToPartitionInfo = pathToPartitionInfo;
this.aliasToWork = aliasToWork;
this.keyDesc = keyDesc;
this.tagToValueDesc = tagToValueDesc;
this.reducer = reducer;
this.numReduceTasks = numReduceTasks;
this.mapLocalWork = mapLocalWork;
aliasToPartnInfo = new LinkedHashMap();
this.hadoopSupportsSplittable = hadoopSupportsSplittable;
maxSplitSize = null;
minSplitSize = null;
minSplitSizePerNode = null;
minSplitSizePerRack = null;
}
public String getCommand() {
return command;
}
public void setCommand(final String command) {
this.command = command;
}
@Explain(displayName = "Path -> Alias", normalExplain = false)
public LinkedHashMap> getPathToAliases() {
return pathToAliases;
}
public void setPathToAliases(
final LinkedHashMap> pathToAliases) {
this.pathToAliases = pathToAliases;
}
@Explain(displayName = "Truncated Path -> Alias", normalExplain = false)
/**
* This is used to display and verify output of "Path -> Alias" in test framework.
*
* {@link QTestUtil} masks "Path -> Alias" and makes verification impossible.
* By keeping "Path -> Alias" intact and adding a new display name which is not
* masked by {@link QTestUtil} by removing prefix.
*
* Notes: we would still be masking for intermediate directories.
*
* @return
*/
public Map> getTruncatedPathToAliases() {
Map> trunPathToAliases = new LinkedHashMap>();
Iterator>> itr = this.pathToAliases.entrySet().iterator();
while (itr.hasNext()) {
final Entry> entry = itr.next();
String origiKey = entry.getKey();
String newKey = PlanUtils.removePrefixFromWarehouseConfig(origiKey);
ArrayList value = entry.getValue();
trunPathToAliases.put(newKey, value);
}
return trunPathToAliases;
}
@Explain(displayName = "Path -> Partition", normalExplain = false)
public LinkedHashMap getPathToPartitionInfo() {
return pathToPartitionInfo;
}
public void setPathToPartitionInfo(
final LinkedHashMap pathToPartitionInfo) {
this.pathToPartitionInfo = pathToPartitionInfo;
}
/**
* @return the aliasToPartnInfo
*/
public LinkedHashMap getAliasToPartnInfo() {
return aliasToPartnInfo;
}
/**
* @param aliasToPartnInfo
* the aliasToPartnInfo to set
*/
public void setAliasToPartnInfo(
LinkedHashMap aliasToPartnInfo) {
this.aliasToPartnInfo = aliasToPartnInfo;
}
@Explain(displayName = "Alias -> Map Operator Tree")
public LinkedHashMap> getAliasToWork() {
return aliasToWork;
}
public void setAliasToWork(
final LinkedHashMap> aliasToWork) {
this.aliasToWork = aliasToWork;
}
/**
* @return the mapredLocalWork
*/
@Explain(displayName = "Local Work")
public MapredLocalWork getMapLocalWork() {
return mapLocalWork;
}
/**
* @param mapLocalWork
* the mapredLocalWork to set
*/
public void setMapLocalWork(final MapredLocalWork mapLocalWork) {
this.mapLocalWork = mapLocalWork;
}
public TableDesc getKeyDesc() {
return keyDesc;
}
/**
* If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing
* to keySerializeInfo of the ReduceSink
*
* @param keyDesc
*/
public void setKeyDesc(final TableDesc keyDesc) {
this.keyDesc = keyDesc;
}
public List getTagToValueDesc() {
return tagToValueDesc;
}
public void setTagToValueDesc(final List tagToValueDesc) {
this.tagToValueDesc = tagToValueDesc;
}
@Explain(displayName = "Reduce Operator Tree")
public Operator> getReducer() {
return reducer;
}
@Explain(displayName = "Split Sample")
public HashMap getNameToSplitSample() {
return nameToSplitSample;
}
public void setNameToSplitSample(HashMap nameToSplitSample) {
this.nameToSplitSample = nameToSplitSample;
}
public void setReducer(final Operator> reducer) {
this.reducer = reducer;
}
public Integer getNumMapTasks() {
return numMapTasks;
}
public void setNumMapTasks(Integer numMapTasks) {
this.numMapTasks = numMapTasks;
}
/**
* If the number of reducers is -1, the runtime will automatically figure it
* out by input data size.
*
* The number of reducers will be a positive number only in case the target
* table is bucketed into N buckets (through CREATE TABLE). This feature is
* not supported yet, so the number of reducers will always be -1 for now.
*/
public Integer getNumReduceTasks() {
return numReduceTasks;
}
public void setNumReduceTasks(final Integer numReduceTasks) {
this.numReduceTasks = numReduceTasks;
}
@Explain(displayName = "Path -> Bucketed Columns", normalExplain = false)
public Map> getBucketedColsByDirectory() {
return bucketedColsByDirectory;
}
@Explain(displayName = "Path -> Sorted Columns", normalExplain = false)
public Map> getSortedColsByDirectory() {
return sortedColsByDirectory;
}
@SuppressWarnings("nls")
public void addMapWork(String path, String alias, Operator> work,
PartitionDesc pd) {
ArrayList curAliases = pathToAliases.get(path);
if (curAliases == null) {
assert (pathToPartitionInfo.get(path) == null);
curAliases = new ArrayList();
pathToAliases.put(path, curAliases);
pathToPartitionInfo.put(path, pd);
} else {
assert (pathToPartitionInfo.get(path) != null);
}
for (String oneAlias : curAliases) {
if (oneAlias.equals(alias)) {
throw new RuntimeException("Multiple aliases named: " + alias
+ " for path: " + path);
}
}
curAliases.add(alias);
if (aliasToWork.get(alias) != null) {
throw new RuntimeException("Existing work for alias: " + alias);
}
aliasToWork.put(alias, work);
}
@SuppressWarnings("nls")
public String isInvalid() {
if ((getNumReduceTasks() >= 1) && (getReducer() == null)) {
return "Reducers > 0 but no reduce operator";
}
if ((getNumReduceTasks() == 0) && (getReducer() != null)) {
return "Reducers == 0 but reduce operator specified";
}
return null;
}
public String toXML() {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Utilities.serializeMapRedWork(this, baos);
return (baos.toString());
}
// non bean
/**
* For each map side operator - stores the alias the operator is working on
* behalf of in the operator runtime state. This is used by reducesink
* operator - but could be useful for debugging as well.
*/
private void setAliases() {
if(aliasToWork == null) {
return;
}
for (String oneAlias : aliasToWork.keySet()) {
aliasToWork.get(oneAlias).setAlias(oneAlias);
}
}
/**
* Derive additional attributes to be rendered by EXPLAIN.
*/
public void deriveExplainAttributes() {
if (pathToPartitionInfo != null) {
for (Map.Entry entry : pathToPartitionInfo
.entrySet()) {
entry.getValue().deriveBaseFileName(entry.getKey());
}
}
if (mapLocalWork != null) {
mapLocalWork.deriveExplainAttributes();
}
}
public void initialize() {
setAliases();
}
@Explain(displayName = "Needs Tagging", normalExplain = false)
public boolean getNeedsTagging() {
return needsTagging;
}
public void setNeedsTagging(boolean needsTagging) {
this.needsTagging = needsTagging;
}
public boolean getHadoopSupportsSplittable() {
return hadoopSupportsSplittable;
}
public void setHadoopSupportsSplittable(boolean hadoopSupportsSplittable) {
this.hadoopSupportsSplittable = hadoopSupportsSplittable;
}
public Long getMaxSplitSize() {
return maxSplitSize;
}
public void setMaxSplitSize(Long maxSplitSize) {
this.maxSplitSize = maxSplitSize;
}
public Long getMinSplitSize() {
return minSplitSize;
}
public void setMinSplitSize(Long minSplitSize) {
this.minSplitSize = minSplitSize;
}
public Long getMinSplitSizePerNode() {
return minSplitSizePerNode;
}
public void setMinSplitSizePerNode(Long minSplitSizePerNode) {
this.minSplitSizePerNode = minSplitSizePerNode;
}
public Long getMinSplitSizePerRack() {
return minSplitSizePerRack;
}
public void setMinSplitSizePerRack(Long minSplitSizePerRack) {
this.minSplitSizePerRack = minSplitSizePerRack;
}
public String getInputformat() {
return inputformat;
}
public void setInputformat(String inputformat) {
this.inputformat = inputformat;
}
public String getIndexIntermediateFile() {
return indexIntermediateFile;
}
public void addIndexIntermediateFile(String fileName) {
if (this.indexIntermediateFile == null) {
this.indexIntermediateFile = fileName;
} else {
this.indexIntermediateFile += "," + fileName;
}
}
public void setGatheringStats(boolean gatherStats) {
this.gatheringStats = gatherStats;
}
public boolean isGatheringStats() {
return this.gatheringStats;
}
public void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns) {
this.mapperCannotSpanPartns = mapperCannotSpanPartns;
}
public boolean isMapperCannotSpanPartns() {
return this.mapperCannotSpanPartns;
}
public String getTmpHDFSFileURI() {
return tmpHDFSFileURI;
}
public void setTmpHDFSFileURI(String tmpHDFSFileURI) {
this.tmpHDFSFileURI = tmpHDFSFileURI;
}
public QBJoinTree getJoinTree() {
return joinTree;
}
public void setJoinTree(QBJoinTree joinTree) {
this.joinTree = joinTree;
}
public
LinkedHashMap, OpParseContext> getOpParseCtxMap() {
return opParseCtxMap;
}
public void setOpParseCtxMap(
LinkedHashMap, OpParseContext> opParseCtxMap) {
this.opParseCtxMap = opParseCtxMap;
}
public boolean isInputFormatSorted() {
return inputFormatSorted;
}
public void setInputFormatSorted(boolean inputFormatSorted) {
this.inputFormatSorted = inputFormatSorted;
}
public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, Path path,
TableDesc tblDesc, ArrayList aliases, PartitionDesc partDesc) {
pathToAliases.put(path.toString(), aliases);
pathToPartitionInfo.put(path.toString(), partDesc);
}
public List> getAllOperators() {
ArrayList> opList = new ArrayList>();
ArrayList> returnList = new ArrayList>();
if (getReducer() != null) {
opList.add(getReducer());
}
Map> pa = getPathToAliases();
if (pa != null) {
for (List ls : pa.values()) {
for (String a : ls) {
Operator> op = getAliasToWork().get(a);
if (op != null ) {
opList.add(op);
}
}
}
}
//recursively add all children
while (!opList.isEmpty()) {
Operator> op = opList.remove(0);
if (op.getChildOperators() != null) {
opList.addAll(op.getChildOperators());
}
returnList.add(op);
}
return returnList;
}
public boolean isUseBucketizedHiveInputFormat() {
return useBucketizedHiveInputFormat;
}
public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat) {
this.useBucketizedHiveInputFormat = useBucketizedHiveInputFormat;
}
public boolean isFinalMapRed() {
return finalMapRed;
}
public void setFinalMapRed(boolean finalMapRed) {
this.finalMapRed = finalMapRed;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy