Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.blockmanagement;
import com.google.common.collect.Iterables;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.INodeId;
import org.apache.hadoop.hdfs.util.LightWeightHashSet;
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Map;
import java.util.List;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ArrayDeque;
import java.util.Queue;
/**
* This class implements the logic to track decommissioning and entering
* maintenance nodes, ensure all their blocks are adequately replicated
* before they are moved to the decommissioned or maintenance state.
*
* This monitor avoids flooding the replication queue with all pending blocks
* and instead feeds them to the queue as the prior set complete replication.
*
* HDFS-14854 contains details about the overall design of this class.
*
*/
public class DatanodeAdminBackoffMonitor extends DatanodeAdminMonitorBase
implements DatanodeAdminMonitorInterface {
/**
* Map containing the DECOMMISSION_INPROGRESS or ENTERING_MAINTENANCE
* datanodes that are being tracked so they can be be marked as
* DECOMMISSIONED or IN_MAINTENANCE. Even after the node is marked as
* IN_MAINTENANCE, the node remains in the map until
* maintenance expires checked during a monitor tick.
*
* This holds a set of references to the under-replicated blocks on the DN
* at the time the DN is added to the map, i.e. the blocks that are
* preventing the node from being marked as decommissioned. During a monitor
* tick, this list is pruned as blocks becomes replicated.
*
* Note also that the reference to the list of under-replicated blocks
* will be null on initial add
*
* However, this map can become out-of-date since it is not updated by block
* reports or other events. Before being finally marking as decommissioned,
* another check is done with the actual block map.
*/
private HashMap>
outOfServiceNodeBlocks = new HashMap<>();
/**
* Any nodes where decommission or maintenance has been cancelled are added
* to this queue for later processing.
*/
private final Queue cancelledNodes = new ArrayDeque<>();
/**
* The numbe of blocks to process when moving blocks to pendingReplication
* before releasing and reclaiming the namenode lock.
*/
private int blocksPerLock;
/**
* The number of blocks that have been checked on this tick.
*/
private int numBlocksChecked = 0;
/**
* The maximum number of blocks to hold in PendingRep at any time.
*/
private int pendingRepLimit;
/**
* The list of blocks which have been placed onto the replication queue
* and are waiting to be sufficiently replicated.
*/
private final Map>
pendingRep = new HashMap<>();
private static final Logger LOG =
LoggerFactory.getLogger(DatanodeAdminBackoffMonitor.class);
DatanodeAdminBackoffMonitor() {
}
@Override
protected void processConf() {
this.pendingRepLimit = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT,
DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT_DEFAULT);
if (this.pendingRepLimit < 1) {
LOG.error("{} is set to an invalid value, it must be greater than "+
"zero. Defaulting to {}",
DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT,
DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT_DEFAULT
);
this.pendingRepLimit = DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_LIMIT_DEFAULT;
}
this.blocksPerLock = conf.getInt(
DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK,
DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK_DEFAULT
);
if (blocksPerLock <= 0) {
LOG.error("{} is set to an invalid value, it must be greater than "+
"zero. Defaulting to {}",
DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK,
DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK_DEFAULT);
blocksPerLock =
DFSConfigKeys.
DFS_NAMENODE_DECOMMISSION_BACKOFF_MONITOR_PENDING_BLOCKS_PER_LOCK_DEFAULT;
}
LOG.info("Initialized the Backoff Decommission and Maintenance Monitor");
}
/**
* Queue a node to be removed from tracking. This method must be called
* under the namenode write lock.
* @param dn The datanode to stop tracking for decommission.
*/
@Override
public void stopTrackingNode(DatanodeDescriptor dn) {
pendingNodes.remove(dn);
cancelledNodes.add(dn);
}
@Override
public int getTrackedNodeCount() {
return outOfServiceNodeBlocks.size();
}
@Override
public int getNumNodesChecked() {
// We always check all nodes on each tick
return outOfServiceNodeBlocks.size();
}
@Override
public void run() {
LOG.debug("DatanodeAdminMonitorV2 is running.");
if (!namesystem.isRunning()) {
LOG.info("Namesystem is not running, skipping " +
"decommissioning/maintenance checks.");
return;
}
// Reset the checked count at beginning of each iteration
numBlocksChecked = 0;
// Check decommission or maintenance progress.
try {
namesystem.writeLock();
try {
/**
* Other threads can modify the pendingNode list and the cancelled
* node list, so we must process them under the NN write lock to
* prevent any concurrent modifications.
*
* Always process the cancelled list before the pending list, as
* it is possible for a node to be cancelled, and then quickly added
* back again. If we process these the other way around, the added
* node will be removed from tracking by the pending cancel.
*/
processCancelledNodes();
processPendingNodes();
} finally {
namesystem.writeUnlock();
}
// After processing the above, various parts of the check() method will
// take and drop the read / write lock as needed. Aside from the
// cancelled and pending lists, nothing outside of the monitor thread
// modifies anything inside this class, so many things can be done
// without any lock.
check();
} catch (Exception e) {
LOG.warn("DatanodeAdminMonitor caught exception when processing node.",
e);
}
if (numBlocksChecked + outOfServiceNodeBlocks.size() > 0) {
LOG.info("Checked {} blocks this tick. {} nodes are now " +
"in maintenance or transitioning state. {} nodes pending. {} " +
"nodes waiting to be cancelled.",
numBlocksChecked, outOfServiceNodeBlocks.size(), pendingNodes.size(),
cancelledNodes.size());
}
}
/**
* Move any pending nodes into outOfServiceNodeBlocks to initiate the
* decommission or maintenance mode process.
*
* This method must be executed under the namenode write lock to prevent
* the pendingNodes list from being modified externally.
*/
private void processPendingNodes() {
while (!pendingNodes.isEmpty() &&
(maxConcurrentTrackedNodes == 0 ||
outOfServiceNodeBlocks.size() < maxConcurrentTrackedNodes)) {
outOfServiceNodeBlocks.put(pendingNodes.poll(), null);
}
}
/**
* Process any nodes which have had their decommission or maintenance mode
* cancelled by an administrator.
*
* This method must be executed under the
* write lock to prevent the cancelledNodes list being modified externally.
*/
private void processCancelledNodes() {
while(!cancelledNodes.isEmpty()) {
DatanodeDescriptor dn = cancelledNodes.poll();
outOfServiceNodeBlocks.remove(dn);
pendingRep.remove(dn);
}
}
/**
* This method performs each of the steps to track a node from
* decommissioning or entering maintenance to the end state.
*
* First, any newly added nodes are scanned.
*
* Then any expired maintenance nodes are handled.
*
* Next the pendingRep map is scanned and all blocks which are now
* sufficiently replicated are removed
*
* Then new blocks are moved to pendingRep
*
* Finally we check if any nodes have completed the replication process and
* if so move them to their final states.
*
* This methods which this method calls will take and release the namenode
* read and write lock several times.
*
*/
private void check() {
final List toRemove = new ArrayList<>();
if (outOfServiceNodeBlocks.size() == 0) {
// No nodes currently being tracked so simply return
return;
}
// Check if there are any pending nodes to process, ie those where the
// storage has not been scanned yet. For all which are pending, scan
// the storage and load the under-replicated block list into
// outOfServiceNodeBlocks. As this does not modify any external structures
// it can be done under the namenode *read* lock, and the lock can be
// dropped between each storage on each node.
//
// TODO - This is an expensive call, depending on how many nodes are
// to be processed, but it requires only the read lock and it will
// be dropped and re-taken frequently. We may want to throttle this
// to process only a few nodes per iteration.
outOfServiceNodeBlocks.keySet()
.stream()
.filter(n -> outOfServiceNodeBlocks.get(n) == null)
.forEach(n -> scanDatanodeStorage(n, true));
processMaintenanceNodes();
// First check the pending replication list and remove any blocks
// which are now replicated OK. This list is constrained in size so this
// call should not be overly expensive.
processPendingReplication();
// Now move a limited number of blocks to pending
moveBlocksToPending();
// Check if any nodes have reached zero blocks and also update the stats
// exposed via JMX for all nodes still being processed.
checkForCompletedNodes(toRemove);
// Finally move the nodes to their final state if they are ready.
processCompletedNodes(toRemove);
}
/**
* Checks for any nodes which are in maintenance and if maintenance has
* expired, the node will be moved back to in_service (or dead) as required.
*/
private void processMaintenanceNodes() {
// Check for any maintenance state nodes which need to be expired
namesystem.writeLock();
try {
for (DatanodeDescriptor dn : outOfServiceNodeBlocks.keySet()) {
if (dn.isMaintenance() && dn.maintenanceExpired()) {
// If maintenance expires, stop tracking it. This can be an
// expensive call, as it may need to invalidate blocks. Therefore
// we can yield and retake the write lock after each node
//
// The call to stopMaintenance makes a call to stopTrackingNode()
// which added the node to the cancelled list. Therefore expired
// maintenance nodes do not need to be added to the toRemove list.
dnAdmin.stopMaintenance(dn);
namesystem.writeUnlock();
namesystem.writeLock();
}
}
} finally {
namesystem.writeUnlock();
}
}
/**
* Loop over all nodes in the passed toRemove list and move the node to
* the required end state. This will also remove any entries from
* outOfServiceNodeBlocks and pendingRep for the node if required.
*
* @param toRemove The list of nodes to process for completion.
*/
private void processCompletedNodes(List toRemove) {
if (toRemove.size() == 0) {
// If there are no nodes to process simply return and avoid
// taking the write lock at all.
return;
}
namesystem.writeLock();
try {
for (DatanodeDescriptor dn : toRemove) {
final boolean isHealthy =
blockManager.isNodeHealthyForDecommissionOrMaintenance(dn);
if (isHealthy) {
if (dn.isDecommissionInProgress()) {
dnAdmin.setDecommissioned(dn);
outOfServiceNodeBlocks.remove(dn);
pendingRep.remove(dn);
} else if (dn.isEnteringMaintenance()) {
// IN_MAINTENANCE node remains in the outOfServiceNodeBlocks to
// to track maintenance expiration.
dnAdmin.setInMaintenance(dn);
pendingRep.remove(dn);
} else if (dn.isInService()) {
// Decom / maint was cancelled and the node is yet to be processed
// from cancelledNodes
LOG.info("Node {} completed decommission and maintenance " +
"but has been moved back to in service", dn);
pendingRep.remove(dn);
outOfServiceNodeBlocks.remove(dn);
continue;
} else {
// Should not happen
LOG.error("Node {} is in an unexpected state {} and has been "+
"removed from tracking for decommission or maintenance",
dn, dn.getAdminState());
pendingRep.remove(dn);
outOfServiceNodeBlocks.remove(dn);
continue;
}
LOG.info("Node {} is sufficiently replicated and healthy, "
+ "marked as {}.", dn, dn.getAdminState());
} else {
LOG.info("Node {} isn't healthy."
+ " It needs to replicate {} more blocks."
+ " {} is still in progress.", dn,
getPendingCountForNode(dn), dn.getAdminState());
}
}
} finally {
namesystem.writeUnlock();
}
}
/**
* Loop over all nodes and check for any which have zero unprocessed or
* pending blocks. If the node has zero blocks pending, the storage is
* rescanned to ensure no transient blocks were missed on the first pass.
*
* If, after rescan the number of blocks pending replication is zero, the
* node is added to the passed removeList which will later be processed to
* complete the decommission or entering maintenance process.
*
* @param removeList Nodes which have zero pending blocks are added to this
* list.
*/
private void checkForCompletedNodes(List removeList) {
for (DatanodeDescriptor dn : outOfServiceNodeBlocks.keySet()) {
// If the node is already in maintenance, we don't need to perform
// any further checks on it.
if (dn.isInMaintenance()) {
LOG.debug("Node {} is currently in maintenance", dn);
continue;
} else if (!dn.isInService()) {
// A node could be inService if decom or maint has been cancelled, but
// the cancelled list is yet to be processed. We don't need to check
// inService nodes here
int outstandingBlocks = getPendingCountForNode(dn);
if (outstandingBlocks == 0) {
scanDatanodeStorage(dn, false);
outstandingBlocks = getPendingCountForNode(dn);
}
LOG.info("Node {} has {} blocks yet to process", dn, outstandingBlocks);
if (outstandingBlocks == 0) {
removeList.add(dn);
}
}
}
}
/**
* Returns the number of block pending for the given node by adding those
* blocks in pendingRep and outOfServiceNodeBlocks.
*
* @param dn The datanode to return the count for
* @return The total block count, or zero if none are pending
*/
private int getPendingCountForNode(DatanodeDescriptor dn) {
int count = 0;
HashMap blocks = outOfServiceNodeBlocks.get(dn);
if (blocks != null) {
count += blocks.size();
}
List pendingBlocks = pendingRep.get(dn);
if (pendingBlocks != null) {
count += pendingBlocks.size();
}
return count;
}
/**
* Iterate across all nodes in outOfServiceNodeBlocks which have blocks yet
* to be processed.
*
* The block is removed from outOfServiceNodeBlocks and if it needs
* replication it is added to the pendingRep map and also to the
* BlockManager replication queue.
*
* Any block that does not need replication is discarded.
*
* The method will return when there are the pendingRep map has
* pendingRepLimit blocks or there are no further blocks to process.
*/
private void moveBlocksToPending() {
int blocksProcessed = 0;
int pendingCount = getPendingCount();
int yetToBeProcessed = getYetToBeProcessedCount();
if (pendingCount == 0 && yetToBeProcessed == 0) {
// There are no blocks to process so just return
LOG.debug("There are no pending or blocks yet to be processed");
return;
}
namesystem.writeLock();
try {
long repQueueSize = blockManager.getLowRedundancyBlocksCount();
LOG.info("There are {} blocks pending replication and the limit is "+
"{}. A further {} blocks are waiting to be processed. "+
"The replication queue currently has {} blocks",
pendingCount, pendingRepLimit, yetToBeProcessed, repQueueSize);
if (pendingCount >= pendingRepLimit) {
// Only add more blocks to the replication queue if we don't already
// have too many pending
return;
}
// Create a "Block Iterator" for each node decommissioning or entering
// maintenance. These iterators will be used "round robined" to add blocks
// to the replication queue and PendingRep
HashMap>
iterators = new HashMap<>();
for (Map.Entry> e
: outOfServiceNodeBlocks.entrySet()) {
iterators.put(e.getKey(), e.getValue().keySet().iterator());
}
// Now loop until we fill the pendingRep map with pendingRepLimit blocks
// or run out of blocks to add.
Iterator nodeIter =
Iterables.cycle(iterators.keySet()).iterator();
while (nodeIter.hasNext()) {
// Cycle through each node with blocks which still need processed
DatanodeDescriptor dn = nodeIter.next();
Iterator blockIt = iterators.get(dn);
while (blockIt.hasNext()) {
// Process the blocks for the node until we find one that needs
// replication
if (blocksProcessed >= blocksPerLock) {
blocksProcessed = 0;
namesystem.writeUnlock();
namesystem.writeLock();
}
blocksProcessed++;
if (nextBlockAddedToPending(blockIt, dn)) {
// Exit the inner "block" loop so an iterator for the next datanode
// is used for the next block.
pendingCount++;
break;
}
}
if (!blockIt.hasNext()) {
// remove the iterator as there are no blocks left in it
nodeIter.remove();
}
if (pendingCount >= pendingRepLimit) {
// We have scheduled the limit of blocks for replication, so do
// not add any more
break;
}
}
} finally {
namesystem.writeUnlock();
}
LOG.debug("{} blocks are now pending replication", pendingCount);
}
/**
* Takes and removes the next block from the given iterator and checks if it
* needs additional replicas. If it does, it will be scheduled for
* reconstruction and added to the pendingRep map.
* @param it The iterator to take the next block from
* @param dn The datanodeDescriptor the iterator applies to
* @return True if the block needs replication, otherwise false
*/
private boolean nextBlockAddedToPending(Iterator it,
DatanodeDescriptor dn) {
BlockInfo block = it.next();
it.remove();
numBlocksChecked++;
if (!isBlockReplicatedOk(dn, block, true, null)) {
pendingRep.computeIfAbsent(dn, k -> new LinkedList<>()).add(block);
return true;
}
return false;
}
private int getPendingCount() {
if (pendingRep.size() == 0) {
return 0;
}
return pendingRep.values()
.stream()
.map(a -> a.size())
.reduce(0, (a, b) -> a + b);
}
private int getYetToBeProcessedCount() {
if (outOfServiceNodeBlocks.size() == 0) {
return 0;
}
return outOfServiceNodeBlocks.values()
.stream()
.map(a -> a.size())
.reduce(0, (a, b) -> a + b);
}
/**
* Scan all the blocks held on a datanodes. For a node being decommissioned
* we assume that the majority of blocks on the node will need to have new
* replicas made, and therefore we do not check if they are under replicated
* here and instead add them to the list of blocks to track.
*
* For a node being moved into maintenance, we assume most blocks will be
* replicated OK and hence we do check their under-replicated status here,
* hopefully reducing the number of blocks to track.
*
* On a re-scan (initalScan = false) we assume the node has been processed
* already, and hence there should be few under-replicated blocks, so we
* check the under-replicated status before adding the blocks to the
* tracking list.
*
* This means that for a node being decomission there should be a large
* number of blocks to process later but for maintenance, a smaller number.
*
* As this method does not schedule any blocks for reconstuction, this
* scan can be performed under the namenode readlock, and the lock is
* dropped and reaquired for each storage on the DN.
*
* @param dn - The datanode to process
* @param initialScan - True is this is the first time scanning the node
* or false if it is a rescan.
*/
private void scanDatanodeStorage(DatanodeDescriptor dn,
Boolean initialScan) {
HashMap blockList = outOfServiceNodeBlocks.get(dn);
if (blockList == null) {
blockList = new HashMap<>();
outOfServiceNodeBlocks.put(dn, blockList);
}
DatanodeStorageInfo[] storage;
namesystem.readLock();
try {
storage = dn.getStorageInfos();
} finally {
namesystem.readUnlock();
}
for (DatanodeStorageInfo s : storage) {
namesystem.readLock();
try {
// As the lock is dropped and re-taken between each storage, we need
// to check the storage is still present before processing it, as it
// may have been removed.
if (dn.getStorageInfo(s.getStorageID()) == null) {
continue;
}
Iterator it = s.getBlockIterator();
while (it.hasNext()) {
BlockInfo b = it.next();
if (!initialScan || dn.isEnteringMaintenance()) {
// this is a rescan, so most blocks should be replicated now,
// or this node is going into maintenance. On a healthy
// cluster using racks or upgrade domain, a node should be
// able to go into maintenance without replicating many blocks
// so we will check them immediately.
if (!isBlockReplicatedOk(dn, b, false, null)) {
blockList.put(b, null);
}
} else {
blockList.put(b, null);
}
numBlocksChecked++;
}
} finally {
namesystem.readUnlock();
}
}
}
/**
* Process the list of pendingReplication Blocks. These are the blocks
* which have been moved from outOfServiceNodeBlocks, confirmed to be
* under-replicated and were added to the blockManager replication
* queue.
*
* Any blocks which have been confirmed to be replicated sufficiently are
* removed from the list.
*
* The datanode stats are also updated in this method, updating the total
* pending block count, the number of blocks in PendingRep which are in
* open files and the number of blocks in PendingRep which are only on
* out of service nodes.
*
* As this method makes changes to the replication queue, it acquires the
* namenode write lock while it runs.
*/
private void processPendingReplication() {
namesystem.writeLock();
try {
for (Iterator>>
entIt = pendingRep.entrySet().iterator(); entIt.hasNext();) {
Map.Entry> entry = entIt.next();
DatanodeDescriptor dn = entry.getKey();
List blocks = entry.getValue();
if (blocks == null) {
// should not be able to happen
entIt.remove();
continue;
}
Iterator blockIt = blocks.iterator();
BlockStats suspectBlocks = new BlockStats();
while(blockIt.hasNext()) {
BlockInfo b = blockIt.next();
if (isBlockReplicatedOk(dn, b, true, suspectBlocks)) {
blockIt.remove();
}
numBlocksChecked++;
}
if (blocks.size() == 0) {
entIt.remove();
}
// Update metrics for this datanode.
dn.getLeavingServiceStatus().set(
suspectBlocks.getOpenFileCount(),
suspectBlocks.getOpenFiles(),
getPendingCountForNode(dn),
suspectBlocks.getOutOfServiceBlockCount());
}
} finally {
namesystem.writeUnlock();
}
}
/**
* Checks if a block is sufficiently replicated and optionally schedules
* it for reconstruction if it is not.
*
* If a BlockStats object is passed, this method will also update it if the
* block is part of an open file or only on outOfService nodes.
*
* @param datanode The datanode the block belongs to
* @param block The block to check
* @param scheduleReconStruction Whether to add the block to the replication
* queue if it is not sufficiently replicated.
* Passing true will add it to the replication
* queue, and false will not.
* @param suspectBlocks If non-null check if the block is part of an open
* file or only on out of service nodes and update the
* passed object accordingly.
* @return
*/
private boolean isBlockReplicatedOk(DatanodeDescriptor datanode,
BlockInfo block, Boolean scheduleReconStruction,
BlockStats suspectBlocks) {
if (blockManager.blocksMap.getStoredBlock(block) == null) {
LOG.trace("Removing unknown block {}", block);
return true;
}
long bcId = block.getBlockCollectionId();
if (bcId == INodeId.INVALID_INODE_ID) {
// Orphan block, will be invalidated eventually. Skip.
return false;
}
final BlockCollection bc = blockManager.getBlockCollection(block);
final NumberReplicas num = blockManager.countNodes(block);
final int liveReplicas = num.liveReplicas();
// Schedule low redundancy blocks for reconstruction
// if not already pending.
boolean isDecommission = datanode.isDecommissionInProgress();
boolean isMaintenance = datanode.isEnteringMaintenance();
boolean neededReconstruction = isDecommission ?
blockManager.isNeededReconstruction(block, num) :
blockManager.isNeededReconstructionForMaintenance(block, num);
if (neededReconstruction && scheduleReconStruction) {
if (!blockManager.neededReconstruction.contains(block) &&
blockManager.pendingReconstruction.getNumReplicas(block) == 0 &&
blockManager.isPopulatingReplQueues()) {
// Process these blocks only when active NN is out of safe mode.
blockManager.neededReconstruction.add(block,
liveReplicas, num.readOnlyReplicas(),
num.outOfServiceReplicas(),
blockManager.getExpectedRedundancyNum(block));
}
}
if (suspectBlocks != null) {
// Only if we pass a BlockStats object should we do these
// checks, as they should only be checked when processing PendingRep.
if (bc.isUnderConstruction()) {
INode ucFile = namesystem.getFSDirectory().getInode(bc.getId());
if (!(ucFile instanceof INodeFile) ||
!ucFile.asFile().isUnderConstruction()) {
LOG.warn("File {} is not under construction. Skipping add to " +
"low redundancy open files!", ucFile.getLocalName());
} else {
suspectBlocks.addOpenFile(ucFile.getId());
}
}
if ((liveReplicas == 0) && (num.outOfServiceReplicas() > 0)) {
suspectBlocks.incrementOutOfServiceBlocks();
}
}
// Even if the block is without sufficient redundancy,
// it might not block decommission/maintenance if it
// has sufficient redundancy.
if (dnAdmin.isSufficient(block, bc, num, isDecommission, isMaintenance)) {
return true;
}
return false;
}
static class BlockStats {
private LightWeightHashSet openFiles =
new LightWeightLinkedSet<>();
private int openFileBlockCount = 0;
private int outOfServiceBlockCount = 0;
public void addOpenFile(long id) {
// Several blocks can be part of the same file so track how
// many adds we get, as the same file could be added several times
// for different blocks.
openFileBlockCount++;
openFiles.add(id);
}
public void incrementOutOfServiceBlocks() {
outOfServiceBlockCount++;
}
public LightWeightHashSet getOpenFiles() {
return openFiles;
}
public int getOpenFileCount() {
return openFileBlockCount;
}
public int getOutOfServiceBlockCount() {
return outOfServiceBlockCount;
}
}
}