All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.nephele.jobmanager.splitassigner.InputSplitManager Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.nephele.jobmanager.splitassigner;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.stratosphere.configuration.GlobalConfiguration;
import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.core.io.GenericInputSplit;
import eu.stratosphere.core.io.InputSplit;
import eu.stratosphere.nephele.executiongraph.ExecutionGraph;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertexIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionVertex;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.jobmanager.splitassigner.file.FileInputSplitAssigner;
import eu.stratosphere.nephele.template.AbstractInputTask;
import eu.stratosphere.nephele.template.AbstractInvokable;
import eu.stratosphere.util.StringUtils;

/**
 * The input split manager is responsible for serving input splits to {@link AbstractInputTask} objects at runtime.
 * Before passed on to the {@link AbstractScheduler}, an {@link ExecutionGraph} is registered with the input split
 * manager and all included input vertices of the graph register their generated input splits with the manager. Each
 * type of input split can be assigned to a specific {@link InputSplitAssigner} which is loaded by the input split
 * manager at runtime.
 * 

* This class is thread-safe. */ public final class InputSplitManager { /** * The logging object which is used to report information and errors. */ private static final Log LOG = LogFactory.getLog(InputSplitManager.class); /** * The prefix of the configuration key which is used to retrieve the class names of the individual * {@link InputSplitAssigner} classes */ private static final String INPUT_SPLIT_CONFIG_KEY_PREFIX = "inputsplit.assigner."; /** * A cache which stores the mapping of group vertices to assigner objects for fast retrieval during the job * execution. */ private final Map assignerCache = new ConcurrentHashMap(); /** * A map holding an instance of each available {@link InputSplitAssigner}, accessible via the class name of the * corresponding split type. */ private final Map, InputSplitAssigner> loadedAssigners = new HashMap, InputSplitAssigner>(); /** * The input split tracker makes sure that a vertex retrieves the same sequence of input splits after being * restarted. */ private final InputSplitTracker inputSplitTracker = new InputSplitTracker(); /** * The default input split assigner which is always used if a more specific assigner cannot be found. */ private final InputSplitAssigner defaultAssigner = new DefaultInputSplitAssigner(); /** * Registers a new job represented by its {@link ExecutionGraph} with the input split manager. * * @param executionGraph * the job to be registered */ public void registerJob(final ExecutionGraph executionGraph) { final Iterator it = new ExecutionGroupVertexIterator(executionGraph, true, -1); while (it.hasNext()) { final ExecutionGroupVertex groupVertex = it.next(); final InputSplit[] inputSplits = groupVertex.getInputSplits(); if (inputSplits == null) { continue; } if (inputSplits.length == 0) { continue; } final AbstractInvokable invokable = groupVertex.getEnvironment().getInvokable(); if (!(invokable instanceof AbstractInputTask)) { LOG.error(groupVertex.getName() + " has " + inputSplits.length + " input splits, but is not of typt AbstractInputTask, ignoring..."); continue; } @SuppressWarnings("unchecked") final AbstractInputTask inputTask = (AbstractInputTask) invokable; final Class splitType = inputTask.getInputSplitType(); final InputSplitAssigner assigner = getAssignerByType(splitType, true); // Add entry to cache for fast retrieval during the job execution this.assignerCache.put(groupVertex, assigner); assigner.registerGroupVertex(groupVertex); } // Register job with the input split tracker this.inputSplitTracker.registerJob(executionGraph); } /** * Unregisters the given job represented by its {@link ExecutionGraph} with the input split manager. * * @param executionGraph * the job to be unregistered */ public void unregisterJob(final ExecutionGraph executionGraph) { final Iterator it = new ExecutionGroupVertexIterator(executionGraph, true, -1); while (it.hasNext()) { final ExecutionGroupVertex groupVertex = it.next(); final InputSplit[] inputSplits = groupVertex.getInputSplits(); if (inputSplits == null) { continue; } if (inputSplits.length == 0) { continue; } final InputSplitAssigner assigner = this.assignerCache.remove(groupVertex); if (assigner == null) { LOG.error("Group vertex " + groupVertex.getName() + " is unregistered, but cannot be found in assigner cache"); continue; } assigner.unregisterGroupVertex(groupVertex); } // Unregister job from input split tracker this.inputSplitTracker.unregisterJob(executionGraph); } /** * Returns the next input split the input split manager (or the responsible {@link InputSplitAssigner} to be more * precise) has chosen for the given vertex to consume. * * @param vertex * the vertex for which the next input split is to be determined * @param sequenceNumber * the sequence number of the vertex's request * @return the next input split to consume or null if the vertex shall consume no more input splits */ public InputSplit getNextInputSplit(final ExecutionVertex vertex, final int sequenceNumber) { InputSplit nextInputSplit = this.inputSplitTracker.getInputSplitFromLog(vertex, sequenceNumber); if (nextInputSplit != null) { LOG.info("Input split " + nextInputSplit.getSplitNumber() + " for vertex " + vertex + " replayed from log"); return nextInputSplit; } final ExecutionGroupVertex groupVertex = vertex.getGroupVertex(); final InputSplitAssigner inputSplitAssigner = this.assignerCache.get(groupVertex); if (inputSplitAssigner == null) { final JobID jobID = groupVertex.getExecutionStage().getExecutionGraph().getJobID(); LOG.error("Cannot find input assigner for group vertex " + groupVertex.getName() + " (job " + jobID + ")"); return null; } nextInputSplit = inputSplitAssigner.getNextInputSplit(vertex); if (nextInputSplit != null) { this.inputSplitTracker.addInputSplitToLog(vertex, sequenceNumber, nextInputSplit); LOG.info(vertex + " receives input split " + nextInputSplit.getSplitNumber()); } return nextInputSplit; } /** * Returns the {@link InputSplitAssigner} which is defined for the given type of input split. * * @param inputSplitType * the type of input split to find the corresponding {@link InputSplitAssigner} for * @param allowLoading * true to indicate that the input split assigner is allowed to load additional classes if * necessary, false otherwise * @return the {@link InputSplitAssigner} responsible for the given type of input split */ private InputSplitAssigner getAssignerByType(final Class inputSplitType, final boolean allowLoading) { synchronized (this.loadedAssigners) { InputSplitAssigner assigner = this.loadedAssigners.get(inputSplitType); if (assigner == null && allowLoading) { assigner = loadInputSplitAssigner(inputSplitType); if (assigner != null) { this.loadedAssigners.put(inputSplitType, assigner); } } if (assigner != null) { return assigner; } } LOG.warn("Unable to find specific input split provider for type " + inputSplitType.getName() + ", using default assigner"); return this.defaultAssigner; } /** * Attempts to find the responsible type of {@link InputSplitAssigner} for the given type of input split from the * configuration and instantiate an object for it. * * @param inputSplitType * the type of input split to load the {@link InputSplitAssigner} for * @return the newly loaded {@link InputSplitAssigner} object or null if no such object could be * located or loaded */ private InputSplitAssigner loadInputSplitAssigner(final Class inputSplitType) { final String className = inputSplitType.getName(); final String assignerKey = INPUT_SPLIT_CONFIG_KEY_PREFIX + className; LOG.info("Trying to load input split assigner for type " + className); String assignerClassName = GlobalConfiguration.getString(assignerKey, null); // Provide hard-wired default configuration for FileInputSplit objects to make configuration more robust if (assignerClassName == null) { if (FileInputSplit.class == inputSplitType) { return new FileInputSplitAssigner(); } else if (GenericInputSplit.class == inputSplitType) { return new DefaultInputSplitAssigner(); } else { return null; } } try { final Class assignerClass = Class.forName(assignerClassName).asSubclass(InputSplitAssigner.class); return assignerClass.newInstance(); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } return null; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy