All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.nephele.jobgraph.JobGraph Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.nephele.jobgraph;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.Vector;

import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.FSDataInputStream;
import eu.stratosphere.core.fs.FileStatus;
import eu.stratosphere.core.fs.FileSystem;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.io.IOReadableWritable;
import eu.stratosphere.core.io.StringRecord;
import eu.stratosphere.nephele.execution.librarycache.LibraryCacheManager;
import eu.stratosphere.util.ClassUtils;

/**
 * A job graph represents an entire job in Nephele. A job graph must consists at least of one job vertex
 * and must be acyclic.
 * 
 */
public class JobGraph implements IOReadableWritable {

	/**
	 * List of input vertices included in this job graph.
	 */
	private Map inputVertices = new HashMap();

	/**
	 * List of output vertices included in this job graph.
	 */
	private Map outputVertices = new HashMap();

	/**
	 * List of task vertices included in this job graph.
	 */
	private Map taskVertices = new HashMap();

	/**
	 * ID of this job.
	 */
	private JobID jobID;

	/**
	 * Name of this job.
	 */
	private String jobName;

	/**
	 * The job configuration attached to this job.
	 */
	private Configuration jobConfiguration = new Configuration();

	/**
	 * The configuration which should be applied to the task managers involved in processing this job.
	 */
	private final Configuration taskManagerConfiguration = new Configuration();

	/**
	 * List of JAR files required to run this job.
	 */
	private final ArrayList userJars = new ArrayList();

	/**
	 * Size of the buffer to be allocated for transferring attached files.
	 */
	private static final int BUFFERSIZE = 8192;

	/**
	 * Constructs a new job graph with a random job ID.
	 */
	public JobGraph() {
		this.jobID = new JobID();
	}

	/**
	 * Constructs a new job graph with the given name and a random job ID.
	 * 
	 * @param jobName
	 *        the name for this job graph
	 */
	public JobGraph(final String jobName) {
		this();
		this.jobName = jobName;
	}

	/**
	 * Returns the name assigned to the job graph.
	 * 
	 * @return the name assigned to the job graph
	 */
	public String getName() {
		return this.jobName;
	}

	/**
	 * Returns the configuration object for this job if it is set.
	 * 
	 * @return the configuration object for this job, or null if it is not set
	 */
	public Configuration getJobConfiguration() {

		return this.jobConfiguration;
	}

	/**
	 * Returns the configuration object distributed among the task managers
	 * before they start processing this job.
	 * 
	 * @return the configuration object for the task managers, or null if it is not set
	 */
	public Configuration getTaskmanagerConfiguration() {

		return this.taskManagerConfiguration;
	}

	/**
	 * Adds a new input vertex to the job graph if it is not already included.
	 * 
	 * @param inputVertex
	 *        the new input vertex to be added
	 */
	public void addVertex(final AbstractJobInputVertex inputVertex) {

		if (!inputVertices.containsKey(inputVertex.getID())) {
			inputVertices.put(inputVertex.getID(), inputVertex);
		}
	}

	/**
	 * Adds a new task vertex to the job graph if it is not already included.
	 * 
	 * @param taskVertex
	 *        the new task vertex to be added
	 */
	public void addVertex(final JobTaskVertex taskVertex) {

		if (!taskVertices.containsKey(taskVertex.getID())) {
			taskVertices.put(taskVertex.getID(), taskVertex);
		}
	}

	/**
	 * Adds a new output vertex to the job graph if it is not already included.
	 * 
	 * @param outputVertex
	 *        the new output vertex to be added
	 */
	public void addVertex(final AbstractJobOutputVertex outputVertex) {

		if (!outputVertices.containsKey(outputVertex.getID())) {
			outputVertices.put(outputVertex.getID(), outputVertex);
		}
	}

	/**
	 * Returns the number of input vertices registered with the job graph.
	 * 
	 * @return the number of input vertices registered with the job graph
	 */
	public int getNumberOfInputVertices() {
		return this.inputVertices.size();
	}

	/**
	 * Returns the number of output vertices registered with the job graph.
	 * 
	 * @return the number of output vertices registered with the job graph
	 */
	public int getNumberOfOutputVertices() {
		return this.outputVertices.size();
	}

	/**
	 * Returns the number of task vertices registered with the job graph.
	 * 
	 * @return the number of task vertices registered with the job graph
	 */
	public int getNumberOfTaskVertices() {
		return this.taskVertices.size();
	}

	/**
	 * Returns an iterator to iterate all input vertices registered with the job graph.
	 * 
	 * @return an iterator to iterate all input vertices registered with the job graph
	 */
	public Iterator getInputVertices() {

		final Collection coll = this.inputVertices.values();

		return coll.iterator();
	}

	/**
	 * Returns an iterator to iterate all output vertices registered with the job graph.
	 * 
	 * @return an iterator to iterate all output vertices registered with the job graph
	 */
	public Iterator getOutputVertices() {

		final Collection coll = this.outputVertices.values();

		return coll.iterator();
	}

	/**
	 * Returns an iterator to iterate all task vertices registered with the job graph.
	 * 
	 * @return an iterator to iterate all task vertices registered with the job graph
	 */
	public Iterator getTaskVertices() {

		final Collection coll = this.taskVertices.values();

		return coll.iterator();
	}

	/**
	 * Returns the number of all job vertices registered with this job graph.
	 * 
	 * @return the number of all job vertices registered with this job graph
	 */
	public int getNumberOfVertices() {

		return this.inputVertices.size() + this.outputVertices.size() + this.taskVertices.size();
	}

	/**
	 * Returns an array of all job vertices than can be reached when traversing the job graph from the input vertices.
	 * 
	 * @return an array of all job vertices than can be reached when traversing the job graph from the input vertices
	 */
	public AbstractJobVertex[] getAllReachableJobVertices() {

		final Vector collector = new Vector();
		collectVertices(null, collector);
		return collector.toArray(new AbstractJobVertex[0]);
	}

	/**
	 * Returns an array of all job vertices that are registered with the job graph. The order in which the vertices
	 * appear in the list is not defined.
	 * 
	 * @return an array of all job vertices that are registered with the job graph
	 */
	public AbstractJobVertex[] getAllJobVertices() {

		int i = 0;
		final AbstractJobVertex[] vertices = new AbstractJobVertex[inputVertices.size() + outputVertices.size()
			+ taskVertices.size()];

		final Iterator iv = getInputVertices();
		while (iv.hasNext()) {
			vertices[i++] = iv.next();
		}

		final Iterator ov = getOutputVertices();
		while (ov.hasNext()) {
			vertices[i++] = ov.next();
		}

		final Iterator tv = getTaskVertices();
		while (tv.hasNext()) {
			vertices[i++] = tv.next();
		}

		return vertices;
	}

	/**
	 * Auxiliary method to collect all vertices which are reachable from the input vertices.
	 * 
	 * @param jv
	 *        the currently considered job vertex
	 * @param collector
	 *        a temporary list to store the vertices that have already been visisted
	 */
	private void collectVertices(final AbstractJobVertex jv, final List collector) {

		if (jv == null) {
			final Iterator iter = getInputVertices();
			while (iter.hasNext()) {
				collectVertices(iter.next(), collector);
			}
		} else {

			if (!collector.contains(jv)) {
				collector.add(jv);
			} else {
				return;
			}

			for (int i = 0; i < jv.getNumberOfForwardConnections(); i++) {
				collectVertices(jv.getForwardConnection(i).getConnectedVertex(), collector);
			}
		}
	}

	/**
	 * Returns the ID of the job.
	 * 
	 * @return the ID of the job
	 */
	public JobID getJobID() {
		return this.jobID;
	}

	/**
	 * Searches for a vertex with a matching ID and returns it.
	 * 
	 * @param id
	 *        the ID of the vertex to search for
	 * @return the vertex with the matching ID or null if no vertex with such ID could be found
	 */
	public AbstractJobVertex findVertexByID(final JobVertexID id) {

		if (this.inputVertices.containsKey(id)) {
			return this.inputVertices.get(id);
		}

		if (this.outputVertices.containsKey(id)) {
			return this.outputVertices.get(id);
		}

		if (this.taskVertices.containsKey(id)) {
			return this.taskVertices.get(id);
		}

		return null;
	}

	/**
	 * Checks if the job vertex with the given ID is registered with the job graph.
	 * 
	 * @param id
	 *        the ID of the vertex to search for
	 * @return true if a vertex with the given ID is registered with the job graph, false
	 *         otherwise.
	 */
	private boolean includedInJobGraph(final JobVertexID id) {

		if (this.inputVertices.containsKey(id)) {
			return true;
		}

		if (this.outputVertices.containsKey(id)) {
			return true;
		}

		if (this.taskVertices.containsKey(id)) {
			return true;
		}

		return false;
	}

	/**
	 * Checks if the job graph is weakly connected.
	 * 
	 * @return true if the job graph is weakly connected, otherwise false
	 */
	public boolean isWeaklyConnected() {

		final AbstractJobVertex[] reachable = getAllReachableJobVertices();
		final AbstractJobVertex[] all = getAllJobVertices();

		// Check if number if reachable vertices matches number of registered vertices
		if (reachable.length != all.length) {
			return false;
		}

		final HashMap tmp = new HashMap();
		for (int i = 0; i < reachable.length; i++) {
			tmp.put(reachable[i].getID(), reachable[i]);
		}

		// Check if all is subset of reachable
		for (int i = 0; i < all.length; i++) {
			if (!tmp.containsKey(all[i].getID())) {
				return false;
			}
		}

		// Check if reachable is a subset of all
		for (int i = 0; i < reachable.length; i++) {
			if (!includedInJobGraph(reachable[i].getID())) {
				return false;
			}
		}

		return true;
	}

	/**
	 * Checks if the job graph is acyclic.
	 * 
	 * @return true if the job graph is acyclic, false otherwise
	 */
	public boolean isAcyclic() {

		// Tarjan's algorithm to detect strongly connected componenent of a graph
		final AbstractJobVertex[] reachable = getAllReachableJobVertices();
		final HashMap indexMap = new HashMap();
		final HashMap lowLinkMap = new HashMap();
		final Stack stack = new Stack();
		final Integer index = Integer.valueOf(0);

		for (int i = 0; i < reachable.length; i++) {
			if (!indexMap.containsKey(reachable[i])) {
				if (!tarjan(reachable[i], index, indexMap, lowLinkMap, stack)) {
					return false;
				}
			}
		}

		return true;
	}

	/**
	 * Auxiliary method implementing Tarjan's algorithm for strongly-connected components to determine whether the job
	 * graph is acyclic.
	 */
	private boolean tarjan(final AbstractJobVertex jv, Integer index,
			final HashMap indexMap, final HashMap lowLinkMap,
			final Stack stack) {

		indexMap.put(jv, Integer.valueOf(index));
		lowLinkMap.put(jv, Integer.valueOf(index));
		index = Integer.valueOf(index.intValue() + 1);
		stack.push(jv);

		for (int i = 0; i < jv.getNumberOfForwardConnections(); i++) {

			final AbstractJobVertex jv2 = jv.getForwardConnection(i).getConnectedVertex();
			if (!indexMap.containsKey(jv2) || stack.contains(jv2)) {
				if (!indexMap.containsKey(jv2)) {
					if (!tarjan(jv2, index, indexMap, lowLinkMap, stack)) {
						return false;
					}
				}
				if (lowLinkMap.get(jv) > lowLinkMap.get(jv2)) {
					lowLinkMap.put(jv, Integer.valueOf(lowLinkMap.get(jv2)));
				}
			}
		}

		if (lowLinkMap.get(jv).equals(indexMap.get(jv))) {

			int count = 0;
			while (stack.size() > 0) {
				final AbstractJobVertex jv2 = stack.pop();
				if (jv == jv2) {
					break;
				}

				count++;
			}

			if (count > 0) {
				return false;
			}
		}

		return true;
	}

	/**
	 * Checks for all registered job vertices if their in-/out-degree is correct.
	 * 
	 * @return null if the in-/out-degree of all vertices is correct or the first job vertex whose
	 *         in-/out-degree is incorrect.
	 */
	public AbstractJobVertex areVertexDegreesCorrect() {

		// Check input vertices
		final Iterator iter = getInputVertices();
		while (iter.hasNext()) {

			final AbstractJobVertex jv = iter.next();

			if (jv.getNumberOfForwardConnections() < 1 || jv.getNumberOfBackwardConnections() > 0) {
				return jv;
			}
		}

		// Check task vertices
		final Iterator iter2 = getTaskVertices();
		while (iter2.hasNext()) {

			final AbstractJobVertex jv = iter2.next();

			if (jv.getNumberOfForwardConnections() < 1 || jv.getNumberOfBackwardConnections() < 1) {
				return jv;
			}
		}

		// Check output vertices
		final Iterator iter3 = getOutputVertices();
		while (iter3.hasNext()) {

			final AbstractJobVertex jv = iter3.next();

			if (jv.getNumberOfForwardConnections() > 0 || jv.getNumberOfBackwardConnections() < 1) {
				return jv;
			}
		}

		return null;
	}


	@Override
	public void read(final DataInput in) throws IOException {

		// Read job id
		this.jobID.read(in);

		// Read the job name
		this.jobName = StringRecord.readString(in);

		// Read required jar files
		readRequiredJarFiles(in);

		// First read total number of vertices;
		final int numVertices = in.readInt();

		// First, recreate each vertex and add it to reconstructionMap
		for (int i = 0; i < numVertices; i++) {
			final String className = StringRecord.readString(in);
			final JobVertexID id = new JobVertexID();
			id.read(in);
			final String vertexName = StringRecord.readString(in);

			Class c;
			try {
				c = ClassUtils.getRecordByName(className);
			} catch (ClassNotFoundException cnfe) {
				throw new IOException(cnfe.toString());
			}

			// Find constructor
			Constructor cst;
			try {
				cst = c.getConstructor(String.class, JobVertexID.class, JobGraph.class);
			} catch (SecurityException e1) {
				throw new IOException(e1.toString());
			} catch (NoSuchMethodException e1) {
				throw new IOException(e1.toString());
			}

			try {
				cst.newInstance(vertexName, id, this);
			} catch (IllegalArgumentException e) {
				throw new IOException(e.toString());
			} catch (InstantiationException e) {
				throw new IOException(e.toString());
			} catch (IllegalAccessException e) {
				throw new IOException(e.toString());
			} catch (InvocationTargetException e) {
				throw new IOException(e.toString());
			}
		}

		final JobVertexID tmpID = new JobVertexID();
		for (int i = 0; i < numVertices; i++) {

			AbstractJobVertex jv;

			tmpID.read(in);
			if (inputVertices.containsKey(tmpID)) {
				jv = inputVertices.get(tmpID);
			} else {
				if (outputVertices.containsKey(tmpID)) {
					jv = outputVertices.get(tmpID);
				} else {
					if (taskVertices.containsKey(tmpID)) {
						jv = taskVertices.get(tmpID);
					} else {
						throw new IOException("Cannot find vertex with ID " + tmpID + " in any vertex map.");
					}
				}
			}

			// Read the vertex data
			jv.read(in);
		}

		// Find the class loader for the job
		final ClassLoader cl = LibraryCacheManager.getClassLoader(this.jobID);
		if (cl == null) {
			throw new IOException("Cannot find class loader for job graph " + this.jobID);
		}

		// Re-instantiate the job configuration object and read the configuration
		this.jobConfiguration = new Configuration(cl);
		this.jobConfiguration.read(in);

		// Read the task manager configuration
		this.taskManagerConfiguration.read(in);
	}


	@Override
	public void write(final DataOutput out) throws IOException {

		// Write job ID
		this.jobID.write(out);

		// Write out job name
		StringRecord.writeString(out, this.jobName);

		final AbstractJobVertex[] allVertices = this.getAllJobVertices();

		// Write out all required jar files
		writeRequiredJarFiles(out, allVertices);

		// Write total number of vertices
		out.writeInt(allVertices.length);

		// First write out class name and id for every vertex
		for (int i = 0; i < allVertices.length; i++) {

			final String className = allVertices[i].getClass().getName();
			StringRecord.writeString(out, className);
			allVertices[i].getID().write(out);
			StringRecord.writeString(out, allVertices[i].getName());
		}

		// Now write out vertices themselves
		for (int i = 0; i < allVertices.length; i++) {
			allVertices[i].getID().write(out);
			allVertices[i].write(out);
		}

		// Write out configuration objects
		this.jobConfiguration.write(out);
		this.taskManagerConfiguration.write(out);
	}

	/**
	 * Writes the JAR files of all vertices in array jobVertices to the specified output stream.
	 * 
	 * @param out
	 *        the output stream to write the JAR files to
	 * @param jobVertices
	 *        array of job vertices whose required JAR file are to be written to the output stream
	 * @throws IOException
	 *         thrown if an error occurs while writing to the stream
	 */
	private void writeRequiredJarFiles(final DataOutput out, final AbstractJobVertex[] jobVertices) throws IOException {

		// Now check if all the collected jar files really exist
		final FileSystem fs = FileSystem.getLocalFileSystem();

		for (int i = 0; i < this.userJars.size(); i++) {
			if (!fs.exists(this.userJars.get(i))) {
				throw new IOException("Cannot find jar file " + this.userJars.get(i));
			}
		}

		// How many jar files follow?
		out.writeInt(this.userJars.size());

		for (int i = 0; i < this.userJars.size(); i++) {

			final Path jar = this.userJars.get(i);

			// Write out the actual path
			jar.write(out);

			// Write out the length of the file
			final FileStatus file = fs.getFileStatus(jar);
			out.writeLong(file.getLen());

			// Now write the jar file
			final FSDataInputStream inStream = fs.open(this.userJars.get(i));
			final byte[] buf = new byte[BUFFERSIZE];
			int read = inStream.read(buf, 0, buf.length);
			while (read > 0) {
				out.write(buf, 0, read);
				read = inStream.read(buf, 0, buf.length);
			}
		}
	}

	/**
	 * Reads required JAR files from an input stream and adds them to the
	 * library cache manager.
	 * 
	 * @param in
	 *        the data stream to read the JAR files from
	 * @throws IOException
	 *         thrown if an error occurs while reading the stream
	 */
	private void readRequiredJarFiles(final DataInput in) throws IOException {

		// Do jar files follow;
		final int numJars = in.readInt();

		if (numJars > 0) {

			for (int i = 0; i < numJars; i++) {

				final Path p = new Path();
				p.read(in);
				this.userJars.add(p);

				// Read the size of the jar file
				final long sizeOfJar = in.readLong();

				// Add the jar to the library manager
				LibraryCacheManager.addLibrary(this.jobID, p, sizeOfJar, in);
			}

		}

		// Register this job with the library cache manager
		LibraryCacheManager.register(this.jobID, this.userJars.toArray(new Path[0]));
	}

	/**
	 * Adds the path of a JAR file required to run the job on a task manager.
	 * 
	 * @param jar
	 *        path of the JAR file required to run the job on a task manager
	 */
	public void addJar(final Path jar) {

		if (jar == null) {
			return;
		}

		if (!userJars.contains(jar)) {
			userJars.add(jar);
		}
	}

	/**
	 * Returns a (possibly empty) array of paths to JAR files which are required to run the job on a task manager.
	 * 
	 * @return a (possibly empty) array of paths to JAR files which are required to run the job on a task manager
	 */
	public Path[] getJars() {

		return userJars.toArray(new Path[userJars.size()]);
	}

	/**
	 * Checks if any vertex of this job graph has an outgoing edge which is set to null. If this is the
	 * case the respective vertex is returned.
	 * 
	 * @return the vertex which has an outgoing edge set to null or null if no such vertex
	 *         exists
	 */
	public AbstractJobVertex findVertexWithNullEdges() {

		final AbstractJobVertex[] allVertices = getAllJobVertices();

		for (int i = 0; i < allVertices.length; i++) {

			for (int j = 0; j < allVertices[i].getNumberOfForwardConnections(); j++) {
				if (allVertices[i].getForwardConnection(j) == null) {
					return allVertices[i];
				}
			}

			for (int j = 0; j < allVertices[i].getNumberOfBackwardConnections(); j++) {
				if (allVertices[i].getBackwardConnection(j) == null) {
					return allVertices[i];
				}
			}
		}

		return null;
	}

	/**
	 * Checks if the instance dependency chain created with the setVertexToShareInstancesWith method is
	 * acyclic.
	 * 
	 * @return true if the dependency chain is acyclic, false otherwise
	 */
	public boolean isInstanceDependencyChainAcyclic() {

		final AbstractJobVertex[] allVertices = this.getAllJobVertices();
		final Set alreadyVisited = new HashSet();

		for (AbstractJobVertex vertex : allVertices) {

			if (alreadyVisited.contains(vertex)) {
				continue;
			}

			AbstractJobVertex vertexToShareInstancesWith = vertex.getVertexToShareInstancesWith();
			if (vertexToShareInstancesWith != null) {

				final Set cycleMap = new HashSet();

				while (vertexToShareInstancesWith != null) {

					if (cycleMap.contains(vertexToShareInstancesWith)) {
						return false;
					} else {
						alreadyVisited.add(vertexToShareInstancesWith);
						cycleMap.add(vertexToShareInstancesWith);
						vertexToShareInstancesWith = vertexToShareInstancesWith.getVertexToShareInstancesWith();
					}
				}
			}
		}

		return true;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy