eu.stratosphere.nephele.jobmanager.splitassigner.InputSplitTracker Maven / Gradle / Ivy
/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.nephele.jobmanager.splitassigner;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.core.io.InputSplit;
import eu.stratosphere.nephele.executiongraph.ExecutionGraph;
import eu.stratosphere.nephele.executiongraph.ExecutionGraphIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertexIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionVertexID;
/**
* The input split tracker maintains a log of all the input splits that are handed out to the individual input vertices.
* In case of an input vertex must be restarted the input split tracker makes sure that the vertex receives the same
* sequence of input splits as in its original run up to the point that it crashed.
*
* This class is thread-safe.
*
*/
final class InputSplitTracker {
/**
* The logging object which is used to report information and errors.
*/
private static final Log LOG = LogFactory.getLog(InputSplitTracker.class);
/**
* The central split map which stores the logs of the individual input vertices.
*/
private final ConcurrentMap> splitMap = new ConcurrentHashMap>();
/**
* Constructor with package visibility only.
*/
InputSplitTracker() {
}
/**
* Registers a new job with the input split tracker.
*
* @param eg
* the execution graph of the job to be registered
*/
void registerJob(final ExecutionGraph eg) {
final Iterator it = new ExecutionGroupVertexIterator(eg, true, -1);
while (it.hasNext()) {
final ExecutionGroupVertex groupVertex = it.next();
final InputSplit[] inputSplits = groupVertex.getInputSplits();
if (inputSplits == null) {
continue;
}
if (inputSplits.length == 0) {
continue;
}
for (int i = 0; i < groupVertex.getCurrentNumberOfGroupMembers(); ++i) {
final ExecutionVertex vertex = groupVertex.getGroupMember(i);
if (this.splitMap.put(vertex.getID(), new ArrayList()) != null) {
LOG.error("InputSplitTracker must keep track of two vertices with ID " + vertex.getID());
}
}
}
}
/**
* Unregisters a job from the input split tracker.
*
* @param eg
* the execution graph of the job to be unregistered
*/
void unregisterJob(final ExecutionGraph eg) {
final Iterator it = new ExecutionGraphIterator(eg, true);
while (it.hasNext()) {
this.splitMap.remove(it.next().getID());
}
}
/**
* Returns the input split with the given sequence number from the specified vertex's log or null
if no
* such input split exists.
*
* @param vertex
* the vertex for which the input split shall be returned from the log
* @param sequenceNumber
* the sequence number identifying the log entry
* @return the input split that was stored under the given sequence number of the vertex's log or null
* if no such input split exists
*/
InputSplit getInputSplitFromLog(final ExecutionVertex vertex, final int sequenceNumber) {
final List inputSplitLog = this.splitMap.get(vertex.getID());
if (inputSplitLog == null) {
LOG.error("Cannot find input split log for vertex " + vertex + " (" + vertex.getID() + ")");
return null;
}
synchronized (inputSplitLog) {
if (sequenceNumber < inputSplitLog.size()) {
return inputSplitLog.get(sequenceNumber);
}
}
return null;
}
/**
* Adds the given input split to the vertex's log and stores it under the specified sequence number.
*
* @param vertex
* the vertex for which the input split shall be stored
* @param sequenceNumber
* the sequence number identifying the log entry under which the input split shall be stored
* @param inputSplit
* the input split to be stored
*/
void addInputSplitToLog(final ExecutionVertex vertex, final int sequenceNumber, final InputSplit inputSplit) {
final List inputSplitLog = this.splitMap.get(vertex.getID());
if (inputSplitLog == null) {
LOG.error("Cannot find input split log for vertex " + vertex + " (" + vertex.getID() + ")");
return;
}
synchronized (inputSplitLog) {
if (inputSplitLog.size() != sequenceNumber) {
LOG.error("Expected input split with sequence number " + inputSplitLog.size() + " for vertex " + vertex
+ " (" + vertex.getID() + ") but received " + sequenceNumber + ", skipping...");
return;
}
inputSplitLog.add(inputSplit);
}
}
}