eu.stratosphere.nephele.jobmanager.splitassigner.InputSplitManager Maven / Gradle / Ivy
/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.nephele.jobmanager.splitassigner;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.core.io.GenericInputSplit;
import eu.stratosphere.core.io.InputSplit;
import eu.stratosphere.nephele.executiongraph.ExecutionGraph;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertexIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionVertex;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.jobmanager.splitassigner.file.FileInputSplitAssigner;
import eu.stratosphere.nephele.template.AbstractInputTask;
import eu.stratosphere.nephele.template.AbstractInvokable;
import eu.stratosphere.util.StringUtils;
/**
* The input split manager is responsible for serving input splits to {@link AbstractInputTask} objects at runtime.
* Before passed on to the {@link AbstractScheduler}, an {@link ExecutionGraph} is registered with the input split
* manager and all included input vertices of the graph register their generated input splits with the manager. Each
* type of input split can be assigned to a specific {@link InputSplitAssigner} which is loaded by the input split
* manager at runtime.
*
* This class is thread-safe.
*/
public final class InputSplitManager {
/**
* The logging object which is used to report information and errors.
*/
private static final Log LOG = LogFactory.getLog(InputSplitManager.class);
/**
* The prefix of the configuration key which is used to retrieve the class names of the individual
* {@link InputSplitAssigner} classes
*/
private static final String INPUT_SPLIT_CONFIG_KEY_PREFIX = "inputsplit.assigner.";
/**
* A cache which stores the mapping of group vertices to assigner objects for fast retrieval during the job
* execution.
*/
private final Map assignerCache = new ConcurrentHashMap();
/**
* A map holding an instance of each available {@link InputSplitAssigner}, accessible via the class name of the
* corresponding split type.
*/
private final Map, InputSplitAssigner> loadedAssigners = new HashMap, InputSplitAssigner>();
/**
* The input split tracker makes sure that a vertex retrieves the same sequence of input splits after being
* restarted.
*/
private final InputSplitTracker inputSplitTracker = new InputSplitTracker();
/**
* The default input split assigner which is always used if a more specific assigner cannot be found.
*/
private final InputSplitAssigner defaultAssigner = new DefaultInputSplitAssigner();
/**
* Registers a new job represented by its {@link ExecutionGraph} with the input split manager.
*
* @param executionGraph
* the job to be registered
*/
public void registerJob(final ExecutionGraph executionGraph) {
final Iterator it = new ExecutionGroupVertexIterator(executionGraph, true, -1);
while (it.hasNext()) {
final ExecutionGroupVertex groupVertex = it.next();
final InputSplit[] inputSplits = groupVertex.getInputSplits();
if (inputSplits == null) {
continue;
}
if (inputSplits.length == 0) {
continue;
}
final AbstractInvokable invokable = groupVertex.getEnvironment().getInvokable();
if (!(invokable instanceof AbstractInputTask)) {
LOG.error(groupVertex.getName() + " has " + inputSplits.length
+ " input splits, but is not of typt AbstractInputTask, ignoring...");
continue;
}
@SuppressWarnings("unchecked")
final AbstractInputTask extends InputSplit> inputTask = (AbstractInputTask extends InputSplit>) invokable;
final Class extends InputSplit> splitType = inputTask.getInputSplitType();
final InputSplitAssigner assigner = getAssignerByType(splitType, true);
// Add entry to cache for fast retrieval during the job execution
this.assignerCache.put(groupVertex, assigner);
assigner.registerGroupVertex(groupVertex);
}
// Register job with the input split tracker
this.inputSplitTracker.registerJob(executionGraph);
}
/**
* Unregisters the given job represented by its {@link ExecutionGraph} with the input split manager.
*
* @param executionGraph
* the job to be unregistered
*/
public void unregisterJob(final ExecutionGraph executionGraph) {
final Iterator it = new ExecutionGroupVertexIterator(executionGraph, true, -1);
while (it.hasNext()) {
final ExecutionGroupVertex groupVertex = it.next();
final InputSplit[] inputSplits = groupVertex.getInputSplits();
if (inputSplits == null) {
continue;
}
if (inputSplits.length == 0) {
continue;
}
final InputSplitAssigner assigner = this.assignerCache.remove(groupVertex);
if (assigner == null) {
LOG.error("Group vertex " + groupVertex.getName()
+ " is unregistered, but cannot be found in assigner cache");
continue;
}
assigner.unregisterGroupVertex(groupVertex);
}
// Unregister job from input split tracker
this.inputSplitTracker.unregisterJob(executionGraph);
}
/**
* Returns the next input split the input split manager (or the responsible {@link InputSplitAssigner} to be more
* precise) has chosen for the given vertex to consume.
*
* @param vertex
* the vertex for which the next input split is to be determined
* @param sequenceNumber
* the sequence number of the vertex's request
* @return the next input split to consume or null
if the vertex shall consume no more input splits
*/
public InputSplit getNextInputSplit(final ExecutionVertex vertex, final int sequenceNumber) {
InputSplit nextInputSplit = this.inputSplitTracker.getInputSplitFromLog(vertex, sequenceNumber);
if (nextInputSplit != null) {
LOG.info("Input split " + nextInputSplit.getSplitNumber() + " for vertex " + vertex + " replayed from log");
return nextInputSplit;
}
final ExecutionGroupVertex groupVertex = vertex.getGroupVertex();
final InputSplitAssigner inputSplitAssigner = this.assignerCache.get(groupVertex);
if (inputSplitAssigner == null) {
final JobID jobID = groupVertex.getExecutionStage().getExecutionGraph().getJobID();
LOG.error("Cannot find input assigner for group vertex " + groupVertex.getName() + " (job " + jobID + ")");
return null;
}
nextInputSplit = inputSplitAssigner.getNextInputSplit(vertex);
if (nextInputSplit != null) {
this.inputSplitTracker.addInputSplitToLog(vertex, sequenceNumber, nextInputSplit);
LOG.info(vertex + " receives input split " + nextInputSplit.getSplitNumber());
}
return nextInputSplit;
}
/**
* Returns the {@link InputSplitAssigner} which is defined for the given type of input split.
*
* @param inputSplitType
* the type of input split to find the corresponding {@link InputSplitAssigner} for
* @param allowLoading
* true
to indicate that the input split assigner is allowed to load additional classes if
* necessary, false
otherwise
* @return the {@link InputSplitAssigner} responsible for the given type of input split
*/
private InputSplitAssigner getAssignerByType(final Class extends InputSplit> inputSplitType,
final boolean allowLoading) {
synchronized (this.loadedAssigners) {
InputSplitAssigner assigner = this.loadedAssigners.get(inputSplitType);
if (assigner == null && allowLoading) {
assigner = loadInputSplitAssigner(inputSplitType);
if (assigner != null) {
this.loadedAssigners.put(inputSplitType, assigner);
}
}
if (assigner != null) {
return assigner;
}
}
LOG.warn("Unable to find specific input split provider for type " + inputSplitType.getName()
+ ", using default assigner");
return this.defaultAssigner;
}
/**
* Attempts to find the responsible type of {@link InputSplitAssigner} for the given type of input split from the
* configuration and instantiate an object for it.
*
* @param inputSplitType
* the type of input split to load the {@link InputSplitAssigner} for
* @return the newly loaded {@link InputSplitAssigner} object or null
if no such object could be
* located or loaded
*/
private InputSplitAssigner loadInputSplitAssigner(final Class extends InputSplit> inputSplitType) {
final String className = inputSplitType.getName();
final String assignerKey = INPUT_SPLIT_CONFIG_KEY_PREFIX + className;
LOG.info("Trying to load input split assigner for type " + className);
String assignerClassName = GlobalConfiguration.getString(assignerKey, null);
// Provide hard-wired default configuration for FileInputSplit objects to make configuration more robust
if (assignerClassName == null) {
if (FileInputSplit.class == inputSplitType) {
return new FileInputSplitAssigner();
}
else if (GenericInputSplit.class == inputSplitType) {
return new DefaultInputSplitAssigner();
}
else {
return null;
}
}
try {
final Class extends InputSplitAssigner> assignerClass =
Class.forName(assignerClassName).asSubclass(InputSplitAssigner.class);
return assignerClass.newInstance();
}
catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
}
return null;
}
}