org.apache.ignite.hadoop.mapreduce.IgniteHadoopMapReducePlanner Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.hadoop.mapreduce;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.UUID;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.IgniteException;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.igfs.IgfsBlockLocation;
import org.apache.ignite.igfs.IgfsPath;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.processors.hadoop.HadoopFileBlock;
import org.apache.ignite.internal.processors.hadoop.HadoopInputSplit;
import org.apache.ignite.internal.processors.hadoop.HadoopJob;
import org.apache.ignite.internal.processors.hadoop.HadoopMapReducePlan;
import org.apache.ignite.internal.processors.hadoop.igfs.HadoopIgfsEndpoint;
import org.apache.ignite.internal.processors.hadoop.planner.HadoopDefaultMapReducePlan;
import org.apache.ignite.internal.processors.hadoop.planner.HadoopAbstractMapReducePlanner;
import org.apache.ignite.internal.processors.igfs.IgfsEx;
import org.apache.ignite.internal.util.typedef.F;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import static org.apache.ignite.IgniteFileSystem.IGFS_SCHEME;
/**
* Default map-reduce planner implementation.
*/
public class IgniteHadoopMapReducePlanner extends HadoopAbstractMapReducePlanner {
/** {@inheritDoc} */
@Override public HadoopMapReducePlan preparePlan(HadoopJob job, Collection top,
@Nullable HadoopMapReducePlan oldPlan) throws IgniteCheckedException {
// Convert collection of topology nodes to collection of topology node IDs.
Collection topIds = new HashSet<>(top.size(), 1.0f);
for (ClusterNode topNode : top)
topIds.add(topNode.id());
Map> mappers = mappers(top, topIds, job.input());
int rdcCnt = job.info().reducers();
if (rdcCnt < 0)
throw new IgniteCheckedException("Number of reducers must be non-negative, actual: " + rdcCnt);
Map reducers = reducers(top, mappers, rdcCnt);
return new HadoopDefaultMapReducePlan(mappers, reducers);
}
/**
* Create plan for mappers.
*
* @param top Topology nodes.
* @param topIds Topology node IDs.
* @param splits Splits.
* @return Mappers map.
* @throws IgniteCheckedException If failed.
*/
private Map> mappers(Collection top, Collection topIds,
Iterable splits) throws IgniteCheckedException {
Map> mappers = new HashMap<>();
Map> nodes = groupByHost(top);
Map nodeLoads = new HashMap<>(top.size(), 1.0f); // Track node load.
for (UUID nodeId : topIds)
nodeLoads.put(nodeId, 0);
for (HadoopInputSplit split : splits) {
UUID nodeId = nodeForSplit(split, topIds, nodes, nodeLoads);
if (log.isDebugEnabled())
log.debug("Mapped split to node [split=" + split + ", nodeId=" + nodeId + ']');
Collection nodeSplits = mappers.get(nodeId);
if (nodeSplits == null) {
nodeSplits = new ArrayList<>();
mappers.put(nodeId, nodeSplits);
}
nodeSplits.add(split);
// Updated node load.
nodeLoads.put(nodeId, nodeLoads.get(nodeId) + 1);
}
return mappers;
}
/**
* Determine the best node for this split.
*
* @param split Split.
* @param topIds Topology node IDs.
* @param nodes Nodes.
* @param nodeLoads Node load tracker.
* @return Node ID.
*/
@SuppressWarnings("unchecked")
private UUID nodeForSplit(HadoopInputSplit split, Collection topIds, Map> nodes,
Map nodeLoads) throws IgniteCheckedException {
if (split instanceof HadoopFileBlock) {
HadoopFileBlock split0 = (HadoopFileBlock)split;
if (IGFS_SCHEME.equalsIgnoreCase(split0.file().getScheme())) {
HadoopIgfsEndpoint endpoint = new HadoopIgfsEndpoint(split0.file().getAuthority());
IgfsEx igfs = null;
if (F.eq(ignite.name(), endpoint.grid()))
igfs = (IgfsEx)((IgniteEx)ignite).igfsx(endpoint.igfs());
if (igfs != null && !igfs.isProxy(split0.file())) {
IgfsPath path = new IgfsPath(split0.file());
if (igfs.exists(path)) {
Collection blocks;
try {
blocks = igfs.affinity(path, split0.start(), split0.length());
}
catch (IgniteException e) {
throw new IgniteCheckedException(e);
}
assert blocks != null;
if (blocks.size() == 1)
// Fast-path, split consists of one IGFS block (as in most cases).
return bestNode(blocks.iterator().next().nodeIds(), topIds, nodeLoads, false);
else {
// Slow-path, file consists of multiple IGFS blocks. First, find the most co-located nodes.
Map nodeMap = new HashMap<>();
List bestNodeIds = null;
long bestLen = -1L;
for (IgfsBlockLocation block : blocks) {
for (UUID blockNodeId : block.nodeIds()) {
if (topIds.contains(blockNodeId)) {
Long oldLen = nodeMap.get(blockNodeId);
long newLen = oldLen == null ? block.length() : oldLen + block.length();
nodeMap.put(blockNodeId, newLen);
if (bestNodeIds == null || bestLen < newLen) {
bestNodeIds = new ArrayList<>(1);
bestNodeIds.add(blockNodeId);
bestLen = newLen;
}
else if (bestLen == newLen) {
assert !F.isEmpty(bestNodeIds);
bestNodeIds.add(blockNodeId);
}
}
}
}
if (bestNodeIds != null) {
return bestNodeIds.size() == 1 ? bestNodeIds.get(0) :
bestNode(bestNodeIds, topIds, nodeLoads, true);
}
}
}
}
}
}
// Cannot use local IGFS for some reason, try selecting the node by host.
Collection blockNodes = null;
for (String host : split.hosts()) {
Collection hostNodes = nodes.get(host);
if (!F.isEmpty(hostNodes)) {
if (blockNodes == null)
blockNodes = new ArrayList<>(hostNodes);
else
blockNodes.addAll(hostNodes);
}
}
return bestNode(blockNodes, topIds, nodeLoads, false);
}
/**
* Finds the best (the least loaded) node among the candidates.
*
* @param candidates Candidates.
* @param topIds Topology node IDs.
* @param nodeLoads Known node loads.
* @param skipTopCheck Whether to skip topology check.
* @return The best node.
*/
private UUID bestNode(@Nullable Collection candidates, Collection topIds, Map nodeLoads,
boolean skipTopCheck) {
UUID bestNode = null;
int bestLoad = Integer.MAX_VALUE;
if (candidates != null) {
for (UUID candidate : candidates) {
if (skipTopCheck || topIds.contains(candidate)) {
int load = nodeLoads.get(candidate);
if (bestNode == null || bestLoad > load) {
bestNode = candidate;
bestLoad = load;
if (bestLoad == 0)
break; // Minimum load possible, no need for further iterations.
}
}
}
}
if (bestNode == null) {
// Blocks are located on nodes which are not Hadoop-enabled, assign to the least loaded one.
bestLoad = Integer.MAX_VALUE;
for (UUID nodeId : topIds) {
int load = nodeLoads.get(nodeId);
if (bestNode == null || bestLoad > load) {
bestNode = nodeId;
bestLoad = load;
if (bestLoad == 0)
break; // Minimum load possible, no need for further iterations.
}
}
}
assert bestNode != null;
return bestNode;
}
/**
* Create plan for reducers.
*
* @param top Topology.
* @param mappers Mappers map.
* @param reducerCnt Reducers count.
* @return Reducers map.
*/
private Map reducers(Collection top,
Map> mappers, int reducerCnt) {
// Determine initial node weights.
int totalWeight = 0;
List nodes = new ArrayList<>(top.size());
for (ClusterNode node : top) {
Collection split = mappers.get(node.id());
int weight = reducerNodeWeight(node, split != null ? split.size() : 0);
nodes.add(new WeightedNode(node.id(), weight, weight));
totalWeight += weight;
}
// Adjust weights.
int totalAdjustedWeight = 0;
for (WeightedNode node : nodes) {
node.floatWeight = ((float)node.weight * reducerCnt) / totalWeight;
node.weight = Math.round(node.floatWeight);
totalAdjustedWeight += node.weight;
}
// Apply redundant/lost reducers.
Collections.sort(nodes);
if (totalAdjustedWeight > reducerCnt) {
// Too much reducers set.
ListIterator iter = nodes.listIterator(nodes.size() - 1);
while (totalAdjustedWeight != reducerCnt) {
if (!iter.hasPrevious())
iter = nodes.listIterator(nodes.size() - 1);
WeightedNode node = iter.previous();
if (node.weight > 0) {
node.weight -= 1;
totalAdjustedWeight--;
}
}
}
else if (totalAdjustedWeight < reducerCnt) {
// Not enough reducers set.
ListIterator iter = nodes.listIterator(0);
while (totalAdjustedWeight != reducerCnt) {
if (!iter.hasNext())
iter = nodes.listIterator(0);
WeightedNode node = iter.next();
if (node.floatWeight > 0.0f) {
node.weight += 1;
totalAdjustedWeight++;
}
}
}
int idx = 0;
Map reducers = new HashMap<>(nodes.size(), 1.0f);
for (WeightedNode node : nodes) {
if (node.weight > 0) {
int[] arr = new int[node.weight];
for (int i = 0; i < arr.length; i++)
arr[i] = idx++;
reducers.put(node.nodeId, arr);
}
}
return reducers;
}
/**
* Calculate node weight based on node metrics and data co-location.
*
* @param node Node.
* @param splitCnt Splits mapped to this node.
* @return Node weight.
*/
@SuppressWarnings("UnusedParameters")
protected int reducerNodeWeight(ClusterNode node, int splitCnt) {
return splitCnt;
}
/**
* Weighted node.
*/
private static class WeightedNode implements Comparable {
/** Node ID. */
private final UUID nodeId;
/** Weight. */
private int weight;
/** Floating point weight. */
private float floatWeight;
/**
* Constructor.
*
* @param nodeId Node ID.
* @param weight Weight.
* @param floatWeight Floating point weight.
*/
private WeightedNode(UUID nodeId, int weight, float floatWeight) {
this.nodeId = nodeId;
this.weight = weight;
this.floatWeight = floatWeight;
}
/** {@inheritDoc} */
@Override public boolean equals(Object obj) {
return obj != null && obj instanceof WeightedNode && F.eq(nodeId, ((WeightedNode)obj).nodeId);
}
/** {@inheritDoc} */
@Override public int hashCode() {
return nodeId.hashCode();
}
/** {@inheritDoc} */
@Override public int compareTo(@NotNull WeightedNode other) {
float res = other.floatWeight - floatWeight;
return res > 0.0f ? 1 : res < 0.0f ? -1 : nodeId.compareTo(other.nodeId);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy