All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.nephele.jobmanager.splitassigner.LocatableInputSplitList Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.nephele.jobmanager.splitassigner;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.stratosphere.core.io.LocatableInputSplit;
import eu.stratosphere.nephele.instance.AbstractInstance;

/**
 * The locatable input split list stores the locatable input splits for an input vertex that are still expected to be
 * consumed. Besides simply storing the splits, the locatable input split list also computes the distance all
 * {@link AbstractInstance} objects which request an input split and its nearest storage location with respect to the
 * underlying network topology. That way input splits are always given to consuming vertices in a way that data locality
 * is preserved as well as possible.
 * 

* This class is not thread-safe. * */ public final class LocatableInputSplitList { /** * The logging object which is used to report information and errors. */ private static final Log LOG = LogFactory.getLog(LocatableInputSplitList.class); /** * The set containing all the locatable input splits that still must be consumed. */ private Set masterSet = new HashSet(); /** * The map caching the specific file input split lists for each {@link AbstractInstance}. */ private Map> instanceMap = new HashMap>(); /** * This is an auxiliary class to store the minimum distance between a file input split's storage locations and an * {@link AbstractInstance}. * */ private final class QueueElem implements Comparable { /** * The locatable input split the distance applies to. */ final LocatableInputSplit inputSplit; /** * The minimum distance between the file input split's storage locations and the instance this object has been * created for. */ final int distance; /** * Creates a new queue element. * * @param inputSplit * the locatable input split to be stored * @param distance * the minimum distance between the stored input split's storage locations and the instance this object * has been created for */ private QueueElem(final LocatableInputSplit inputSplit, final int distance) { this.inputSplit = inputSplit; this.distance = distance; } /** * Returns the locatable input split stored within this object. * * @return the locatable input split */ private LocatableInputSplit getInputSplit() { return this.inputSplit; } /** * {@inheritDoc} */ @Override public int compareTo(final QueueElem o) { return (this.distance - o.distance); } } /** * Adds the given locate input split to the set of locatable input splits to be consumed. * * @param locatableInputSplit * the locatable input split to be added */ synchronized void addSplit(final LocatableInputSplit locatableInputSplit) { this.masterSet.add(locatableInputSplit); } /** * Returns the next locatable input split to be consumed by the given instance. The returned input split is selected * in a * way that the distance between the split's storage location and the requesting {@link AbstractInstance} is as * short as possible. * * @param instance * the instance requesting the next file input split * @return the next input split to be consumed by the given instance or null if all input splits have * already been consumed. */ synchronized LocatableInputSplit getNextInputSplit(final AbstractInstance instance) { final Queue instanceSplitList = getInstanceSplitList(instance); while (true) { final QueueElem candidate = instanceSplitList.poll(); if (candidate == null) { return null; } if (this.masterSet.remove(candidate.getInputSplit())) { if (LOG.isInfoEnabled()) { if (candidate.distance == 0) { LOG.info(instance + " receives local file input split"); } else { LOG.info(instance + " receives remote file input split (distance " + candidate.distance + ")"); } } return candidate.getInputSplit(); } if (this.masterSet.isEmpty()) { return null; } } } /** * Returns a list of locatable input splits specifically ordered for the given {@link AbstractInstance}. When the * list is initially created, it contains all the unconsumed located input splits at that point in time, ascendingly * ordered * by the minimum distance between the input splits' storage locations and the given {@link AbstractInstance}. * * @param instance * the instance for which the locatable input split list has been computed * @return the list of file input splits ordered specifically for the given instance */ private Queue getInstanceSplitList(final AbstractInstance instance) { Queue instanceSplitList = this.instanceMap.get(instance); if (instanceSplitList == null) { // Create and populate instance specific split list instanceSplitList = new PriorityQueue(); final Iterator it = this.masterSet.iterator(); while (it.hasNext()) { final LocatableInputSplit split = it.next(); final String[] hostnames = split.getHostnames(); if (hostnames == null) { instanceSplitList.add(new QueueElem(split, Integer.MAX_VALUE)); } else { int minDistance = Integer.MAX_VALUE; for (int i = 0; i < hostnames.length; ++i) { final int distance = instance.getDistance(hostnames[i]); if (LOG.isDebugEnabled()) { LOG.debug("Distance between " + instance + " and " + hostnames[i] + " is " + distance); } if (distance < minDistance) { minDistance = distance; } } instanceSplitList.add(new QueueElem(split, minDistance)); } } this.instanceMap.put(instance, instanceSplitList); } return instanceSplitList; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy