All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.nephele.jobmanager.splitassigner.file.FileInputSplitList Maven / Gradle / Ivy

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.nephele.jobmanager.splitassigner.file;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.nephele.instance.AbstractInstance;

/**
 * The file input split list stores the file input splits for an input vertex that are still expected to be consumed.
 * Besides simply storing the splits, the file input split list also computes the distance all {@link AbstractInstance}
 * objects which request a input split and its nearest storage location with respect to the underlying network topology.
 * That way input splits are always given to consuming vertices in a way that data locality is preserved as well as
 * possible.
 * 

* This class is not thread-safe. * */ public final class FileInputSplitList { /** * The logging object which is used to report information and errors. */ private static final Log LOG = LogFactory.getLog(FileInputSplitList.class); /** * The set containing all the file input splits that still must be consumed. */ private Set masterSet = new HashSet(); /** * The map caching the specific file input split lists for each {@link AbstractInstance}. */ private Map> instanceMap = new HashMap>(); /** * This is an auxiliary class to store the minimum distance between a file input split's storage locations and an * {@link AbstractInstance}. * */ private final class QueueElem implements Comparable { /** * The file input split the distance applies to. */ final FileInputSplit inputSplit; /** * The minimum distance between the file input split's storage locations and the instance this object has been * created for. */ final int distance; /** * Creates a new queue element. * * @param inputSplit * the file input split to be stored * @param distance * the minimum distance between the stored input split's storage locations and the instance this object * has been created for */ private QueueElem(final FileInputSplit inputSplit, final int distance) { this.inputSplit = inputSplit; this.distance = distance; } /** * Returns the file input split stored within this object. * * @return the file input split */ private FileInputSplit getInputSplit() { return this.inputSplit; } /** * {@inheritDoc} */ @Override public int compareTo(final QueueElem o) { return (this.distance - o.distance); } } /** * Adds the given file input split to the set of file input splits to be consumed. * * @param fileInputSplit * the file input split to be added */ synchronized void addSplit(final FileInputSplit fileInputSplit) { this.masterSet.add(fileInputSplit); } /** * Returns the next file input split to be consumed by the given instance. The returned input split is selected in a * way that the distance between the split's storage location and the requesting {@link AbstractInstance} is as * short as possible. * * @param instance * the instance requesting the next file input split * @return the next input split to be consumed by the given instance or null if all input splits have * already been consumed. */ synchronized FileInputSplit getNextInputSplit(final AbstractInstance instance) { final Queue instanceSplitList = getInstanceSplitList(instance); while (true) { final QueueElem candidate = instanceSplitList.poll(); if (candidate == null) { return null; } if (this.masterSet.remove(candidate.getInputSplit())) { if (LOG.isInfoEnabled()) { if (candidate.distance == 0) { LOG.info(instance + " receives local file input split"); } else { LOG.info(instance + " receives remote file input split (distance " + candidate.distance + ")"); } } return candidate.getInputSplit(); } if (this.masterSet.isEmpty()) { return null; } } } /** * Returns a list of file input splits specifically ordered for the given {@link AbstractInstance}. When the list is * initially created, it contains all the unconsumed file input splits at that point in time, ascendingly ordered by * the minimum distance between the input splits' storage locations and the given {@link AbstractInstance}. * * @param instance * the instance for which the file input split list has been computed * @return the list of file input splits ordered specifically for the given instance */ private Queue getInstanceSplitList(final AbstractInstance instance) { Queue instanceSplitList = this.instanceMap.get(instance); if (instanceSplitList == null) { // Create and populate instance specific split list instanceSplitList = new PriorityQueue(); final Iterator it = this.masterSet.iterator(); while (it.hasNext()) { final FileInputSplit split = it.next(); final String[] hostNames = split.getHostNames(); if (hostNames == null) { instanceSplitList.add(new QueueElem(split, Integer.MAX_VALUE)); } else { int minDistance = Integer.MAX_VALUE; for (int i = 0; i < hostNames.length; ++i) { final int distance = instance.getDistance(hostNames[i]); if (LOG.isDebugEnabled()) { LOG.debug("Distance between " + instance + " and " + hostNames[i] + " is " + distance); } if (distance < minDistance) { minDistance = distance; } } instanceSplitList.add(new QueueElem(split, minDistance)); } } this.instanceMap.put(instance, instanceSplitList); } return instanceSplitList; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy