All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.blockmanagement;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.net.NodeBase;

import java.util.*;

/**
 * The class is responsible for choosing the desired number of targets
 * for placing block replicas.
 * The strategy is that it tries its best to place the replicas to most racks.
 */
@InterfaceAudience.Private
public class BlockPlacementPolicyRackFaultTolerant extends BlockPlacementPolicyDefault {

  @Override
  protected int[] getMaxNodesPerRack(int numOfChosen, int numOfReplicas) {
    int clusterSize = clusterMap.getNumOfLeaves();
    int totalNumOfReplicas = numOfChosen + numOfReplicas;
    if (totalNumOfReplicas > clusterSize) {
      numOfReplicas -= (totalNumOfReplicas-clusterSize);
      totalNumOfReplicas = clusterSize;
    }
    // No calculation needed when there is only one rack or picking one node.
    int numOfRacks = clusterMap.getNumOfRacks();
    if (numOfRacks == 1 || totalNumOfReplicas <= 1) {
      return new int[] {numOfReplicas, totalNumOfReplicas};
    }
    if(totalNumOfReplicas
   * In the end, the difference of the numbers of replicas for each two racks
   * is no more than 1.
   * Either way it always prefer local storage.
   * @return local node of writer
   */
  @Override
  protected Node chooseTargetInOrder(int numOfReplicas,
                                 Node writer,
                                 final Set excludedNodes,
                                 final long blocksize,
                                 final int maxNodesPerRack,
                                 final List results,
                                 final boolean avoidStaleNodes,
                                 final boolean newBlock,
                                 EnumMap storageTypes)
                                 throws NotEnoughReplicasException {
    int totalReplicaExpected = results.size() + numOfReplicas;
    int numOfRacks = clusterMap.getNumOfRacks();
    if (totalReplicaExpected < numOfRacks ||
        totalReplicaExpected % numOfRacks == 0) {
      writer = chooseOnce(numOfReplicas, writer, excludedNodes, blocksize,
          maxNodesPerRack, results, avoidStaleNodes, storageTypes);
      return writer;
    }

    assert totalReplicaExpected > (maxNodesPerRack -1) * numOfRacks;

    // Calculate numOfReplicas for filling each rack exactly (maxNodesPerRack-1)
    // replicas.
    HashMap rackCounts = new HashMap<>();
    for (DatanodeStorageInfo dsInfo : results) {
      String rack = dsInfo.getDatanodeDescriptor().getNetworkLocation();
      Integer count = rackCounts.get(rack);
      if (count != null) {
        rackCounts.put(rack, count + 1);
      } else {
        rackCounts.put(rack, 1);
      }
    }
    int excess = 0; // Sum of the above (maxNodesPerRack-1) part of nodes in results
    for (int count : rackCounts.values()) {
      if (count > maxNodesPerRack -1) {
        excess += count - (maxNodesPerRack -1);
      }
    }
    numOfReplicas = Math.min(totalReplicaExpected - results.size(),
        (maxNodesPerRack -1) * numOfRacks - (results.size() - excess));

    // Fill each rack exactly (maxNodesPerRack-1) replicas.
    writer = chooseOnce(numOfReplicas, writer, new HashSet<>(excludedNodes),
        blocksize, maxNodesPerRack -1, results, avoidStaleNodes, storageTypes);

    for (DatanodeStorageInfo resultStorage : results) {
      addToExcludedNodes(resultStorage.getDatanodeDescriptor(), excludedNodes);
    }

    // For some racks, place one more replica to each one of them.
    numOfReplicas = totalReplicaExpected - results.size();
    chooseOnce(numOfReplicas, writer, excludedNodes, blocksize,
        maxNodesPerRack, results, avoidStaleNodes, storageTypes);

    return writer;
  }

  /**
   * Randomly choose numOfReplicas targets from the given scope.
   * Except that 1st replica prefer local storage.
   * @return local node of writer.
   */
  private Node chooseOnce(int numOfReplicas,
                            Node writer,
                            final Set excludedNodes,
                            final long blocksize,
                            final int maxNodesPerRack,
                            final List results,
                            final boolean avoidStaleNodes,
                            EnumMap storageTypes)
                            throws NotEnoughReplicasException {
    if (numOfReplicas == 0) {
      return writer;
    }
    writer = chooseLocalStorage(writer, excludedNodes, blocksize,
        maxNodesPerRack, results, avoidStaleNodes, storageTypes, true)
        .getDatanodeDescriptor();
    if (--numOfReplicas == 0) {
      return writer;
    }
    chooseRandom(numOfReplicas, NodeBase.ROOT, excludedNodes, blocksize,
        maxNodesPerRack, results, avoidStaleNodes, storageTypes);
    return writer;
  }

  @Override
  public BlockPlacementStatus verifyBlockPlacement(DatanodeInfo[] locs,
      int numberOfReplicas) {
    if (locs == null)
      locs = DatanodeDescriptor.EMPTY_ARRAY;
    if (!clusterMap.hasClusterEverBeenMultiRack()) {
      // only one rack
      return new BlockPlacementStatusDefault(1, 1);
    }
    // 1. Check that all locations are different.
    // 2. Count locations on different racks.
    Set racks = new TreeSet();
    for (DatanodeInfo dn : locs)
      racks.add(dn.getNetworkLocation());
    return new BlockPlacementStatusDefault(racks.size(), numberOfReplicas);
  }

  @Override
  protected Collection pickupReplicaSet(
      Collection moreThanOne,
      Collection exactlyOne,
      Map> rackMap) {
    return moreThanOne.isEmpty() ? exactlyOne : moreThanOne;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy