Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import com.facebook.presto.hive.$internal.org.apache.commons.lang3.tuple.ImmutablePair;
import com.facebook.presto.hive.$internal.org.apache.commons.lang3.tuple.Pair;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
/**
* TezWork. This class encapsulates all the work objects that can be executed
* in a single tez job. Currently it's basically a tree with MapWork at the
* leaves and and ReduceWork in all other nodes.
*
*/
@SuppressWarnings("serial")
@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED },
vectorization = Vectorization.SUMMARY_PATH)
public class TezWork extends AbstractOperatorDesc {
public enum VertexType {
AUTO_INITIALIZED_EDGES, // no custom vertex or edge
INITIALIZED_EDGES, // custom vertex and custom edge but single MR Input
MULTI_INPUT_INITIALIZED_EDGES, // custom vertex, custom edge and multi MR Input
MULTI_INPUT_UNINITIALIZED_EDGES // custom vertex, no custom edge, multi MR Input
;
public static boolean isCustomInputType(VertexType vertex) {
if ((vertex == null) || (vertex == AUTO_INITIALIZED_EDGES)) {
return false;
} else {
return true;
}
}
}
private static transient final Logger LOG = LoggerFactory.getLogger(TezWork.class);
private static final AtomicInteger counter = new AtomicInteger(1);
private final String dagId;
private final String queryName;
private final Set roots = new LinkedHashSet();
private final Set leaves = new LinkedHashSet();
private final Map> workGraph = new HashMap>();
private final Map> invertedWorkGraph = new HashMap>();
private final Map, TezEdgeProperty> edgeProperties =
new HashMap, TezEdgeProperty>();
private final Map workVertexTypeMap = new HashMap();
public TezWork(String queryId) {
this(queryId, null);
}
public TezWork(String queryId, Configuration conf) {
this.dagId = queryId + ":" + counter.getAndIncrement();
String queryName = (conf != null) ? DagUtils.getUserSpecifiedDagName(conf) : null;
if (queryName == null) {
queryName = this.dagId;
}
this.queryName = queryName;
}
@Explain(displayName = "DagName")
public String getName() {
return queryName;
}
@Explain(displayName = "DagId")
public String getDagId() {
return dagId;
}
/**
* getWorkMap returns a map of "vertex name" to BaseWork
*/
@Explain(displayName = "Vertices", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED },
vectorization = Vectorization.SUMMARY_PATH)
public Map getWorkMap() {
Map result = new LinkedHashMap();
for (BaseWork w: getAllWork()) {
result.put(w.getName(), w);
}
return result;
}
/**
* getAllWork returns a topologically sorted list of BaseWork
*/
public List getAllWork() {
List result = new LinkedList();
Set seen = new HashSet();
for (BaseWork leaf: leaves) {
// make sure all leaves are visited at least once
visit(leaf, seen, result);
}
return result;
}
public Collection getAllWorkUnsorted() {
return workGraph.keySet();
}
private void visit(BaseWork child, Set seen, List result) {
if (seen.contains(child)) {
// don't visit multiple times
return;
}
seen.add(child);
for (BaseWork parent: getParents(child)) {
if (!seen.contains(parent)) {
visit(parent, seen, result);
}
}
result.add(child);
}
/**
* add all nodes in the collection without any connections
*/
public void addAll(Collection c) {
for (BaseWork w: c) {
this.add(w);
}
}
/**
* add all nodes in the collection without any connections
*/
public void addAll(BaseWork[] bws) {
for (BaseWork w: bws) {
this.add(w);
}
}
/**
* add creates a new node in the graph without any connections
*/
public void add(BaseWork w) {
if (workGraph.containsKey(w)) {
return;
}
workGraph.put(w, new LinkedList());
invertedWorkGraph.put(w, new LinkedList());
roots.add(w);
leaves.add(w);
}
/**
* disconnect removes an edge between a and b. Both a and
* b have to be in the graph. If there is no matching edge
* no change happens.
*/
public void disconnect(BaseWork a, BaseWork b) {
workGraph.get(a).remove(b);
invertedWorkGraph.get(b).remove(a);
if (getParents(b).isEmpty()) {
roots.add(b);
}
if (getChildren(a).isEmpty()) {
leaves.add(a);
}
}
/**
* getRoots returns all nodes that do not have a parent.
*/
public Set getRoots() {
return new HashSet(roots);
}
/**
* getLeaves returns all nodes that do not have a child
*/
public Set getLeaves() {
return new HashSet(leaves);
}
/**
* getParents returns all the nodes with edges leading into work
*/
public List getParents(BaseWork work) {
assert invertedWorkGraph.containsKey(work)
&& invertedWorkGraph.get(work) != null;
return new LinkedList(invertedWorkGraph.get(work));
}
/**
* getChildren returns all the nodes with edges leading out of work
*/
public List getChildren(BaseWork work) {
assert workGraph.containsKey(work)
&& workGraph.get(work) != null;
return new LinkedList(workGraph.get(work));
}
/**
* remove removes a node from the graph and removes all edges with
* work as start or end point. No change to the graph if the node
* doesn't exist.
*/
public void remove(BaseWork work) {
if (!workGraph.containsKey(work)) {
return;
}
List children = getChildren(work);
List parents = getParents(work);
for (BaseWork w: children) {
invertedWorkGraph.get(w).remove(work);
if (invertedWorkGraph.get(w).size() == 0) {
roots.add(w);
}
}
for (BaseWork w: parents) {
workGraph.get(w).remove(work);
if (workGraph.get(w).size() == 0) {
leaves.add(w);
}
}
roots.remove(work);
leaves.remove(work);
workGraph.remove(work);
invertedWorkGraph.remove(work);
}
public EdgeType getEdgeType(BaseWork a, BaseWork b) {
return edgeProperties.get(new ImmutablePair(a,b)).getEdgeType();
}
/**
* returns the edge type connecting work a and b
*/
public TezEdgeProperty getEdgeProperty(BaseWork a, BaseWork b) {
return edgeProperties.get(new ImmutablePair(a,b));
}
/*
* Dependency is a class used for explain
*/
public class Dependency implements Serializable, Comparable {
public BaseWork w;
public EdgeType type;
@Explain(displayName = "Name")
public String getName() {
return w.getName();
}
@Explain(displayName = "Type")
public String getType() {
return type.toString();
}
@Override
public int compareTo(Dependency o) {
int compare = getName().compareTo(o.getName());
if (compare == 0) {
compare = getType().compareTo(o.getType());
}
return compare;
}
}
@Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED },
vectorization = Vectorization.SUMMARY_PATH)
public Map> getDependencyMap() {
Map> result = new LinkedHashMap>();
for (Map.Entry> entry: invertedWorkGraph.entrySet()) {
List dependencies = new LinkedList();
for (BaseWork d: entry.getValue()) {
Dependency dependency = new Dependency();
dependency.w = d;
dependency.type = getEdgeType(d, entry.getKey());
dependencies.add(dependency);
}
if (!dependencies.isEmpty()) {
Collections.sort(dependencies);
result.put(entry.getKey().getName(), dependencies);
}
}
return result;
}
private static final String MR_JAR_PROPERTY = "tmpjars";
/**
* Calls configureJobConf on instances of work that are part of this TezWork.
* Uses the passed job configuration to extract "tmpjars" added by these, so that Tez
* could add them to the job proper Tez way. This is a very hacky way but currently
* there's no good way to get these JARs - both storage handler interface, and HBase
* code, would have to change to get the list directly (right now it adds to tmpjars).
* This will happen in 0.14 hopefully.
* @param jobConf Job configuration.
* @return List of files added to tmpjars by storage handlers.
*/
public String[] configureJobConfAndExtractJars(JobConf jobConf) {
String[] oldTmpJars = jobConf.getStrings(MR_JAR_PROPERTY);
jobConf.setStrings(MR_JAR_PROPERTY, new String[0]);
for (BaseWork work : workGraph.keySet()) {
work.configureJobConf(jobConf);
}
String[] newTmpJars = jobConf.getStrings(MR_JAR_PROPERTY);
if (oldTmpJars != null || newTmpJars != null) {
String[] finalTmpJars;
if (oldTmpJars == null || oldTmpJars.length == 0) {
// Avoid a copy when oldTmpJars is null or empty
finalTmpJars = newTmpJars;
} else if (newTmpJars == null || newTmpJars.length == 0) {
// Avoid a copy when newTmpJars is null or empty
finalTmpJars = oldTmpJars;
} else {
// Both are non-empty, only copy now
finalTmpJars = new String[oldTmpJars.length + newTmpJars.length];
System.arraycopy(oldTmpJars, 0, finalTmpJars, 0, oldTmpJars.length);
System.arraycopy(newTmpJars, 0, finalTmpJars, oldTmpJars.length, newTmpJars.length);
}
jobConf.setStrings(MR_JAR_PROPERTY, finalTmpJars);
return finalTmpJars;
}
return newTmpJars;
}
/**
* connect adds an edge between a and b. Both nodes have
* to be added prior to calling connect.
* @param
*/
public void connect(BaseWork a, BaseWork b,
TezEdgeProperty edgeProp) {
workGraph.get(a).add(b);
invertedWorkGraph.get(b).add(a);
roots.remove(b);
leaves.remove(a);
ImmutablePair workPair = new ImmutablePair(a, b);
edgeProperties.put(workPair, edgeProp);
}
public void setVertexType(BaseWork w, VertexType incomingVertexType) {
VertexType vertexType = workVertexTypeMap.get(w);
if (vertexType == null) {
vertexType = VertexType.AUTO_INITIALIZED_EDGES;
}
switch (vertexType) {
case INITIALIZED_EDGES:
if (incomingVertexType == VertexType.MULTI_INPUT_UNINITIALIZED_EDGES) {
vertexType = VertexType.MULTI_INPUT_INITIALIZED_EDGES;
}
break;
case MULTI_INPUT_INITIALIZED_EDGES:
// nothing to do
break;
case MULTI_INPUT_UNINITIALIZED_EDGES:
if (incomingVertexType == VertexType.INITIALIZED_EDGES) {
vertexType = VertexType.MULTI_INPUT_INITIALIZED_EDGES;
}
break;
case AUTO_INITIALIZED_EDGES:
vertexType = incomingVertexType;
break;
default:
break;
}
workVertexTypeMap.put(w, vertexType);
}
public VertexType getVertexType(BaseWork w) {
return workVertexTypeMap.get(w);
}
public boolean getLlapMode() {
for (BaseWork work : getAllWork()) {
if (work.getLlapMode()) {
return true;
}
}
return false;
}
}