org.apache.hadoop.hdfs.server.datanode.BlockPoolSliceScanner Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.DataOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.RollingLogs;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.util.GSet;
import org.apache.hadoop.util.LightWeightGSet;
import org.apache.hadoop.util.LightWeightGSet.LinkedElement;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Time;
import com.google.common.annotations.VisibleForTesting;
/**
* Scans the block files under a block pool and verifies that the
* files are not corrupt.
* This keeps track of blocks and their last verification times.
* Currently it does not modify the metadata for block.
*/
class BlockPoolSliceScanner {
public static final Log LOG = LogFactory.getLog(BlockPoolSliceScanner.class);
private static final String DATA_FORMAT = "yyyy-MM-dd HH:mm:ss,SSS";
private static final int MAX_SCAN_RATE = 8 * 1024 * 1024; // 8MB per sec
private static final int MIN_SCAN_RATE = 1 * 1024 * 1024; // 1MB per sec
private static final long DEFAULT_SCAN_PERIOD_HOURS = 21*24L; // three weeks
private static final String VERIFICATION_PREFIX = "dncp_block_verification.log";
private final String blockPoolId;
private final long scanPeriod;
private final AtomicLong lastScanTime = new AtomicLong();
private final DataNode datanode;
private final FsDatasetSpi extends FsVolumeSpi> dataset;
private final SortedSet blockInfoSet
= new TreeSet(BlockScanInfo.LAST_SCAN_TIME_COMPARATOR);
private final GSet blockMap
= new LightWeightGSet(
LightWeightGSet.computeCapacity(0.5, "BlockMap"));
// processedBlocks keeps track of which blocks are scanned
// since the last run.
private volatile HashMap processedBlocks;
private long totalScans = 0;
private long totalScanErrors = 0;
private long totalTransientErrors = 0;
private final AtomicInteger totalBlocksScannedInLastRun = new AtomicInteger(); // Used for test only
private long currentPeriodStart = Time.now();
private long bytesLeft = 0; // Bytes to scan in this period
private long totalBytesToScan = 0;
private boolean isNewPeriod = true;
private final LogFileHandler verificationLog;
private final DataTransferThrottler throttler = new DataTransferThrottler(
200, MAX_SCAN_RATE);
private static enum ScanType {
VERIFICATION_SCAN, // scanned as part of periodic verfication
NONE,
}
// Extend Block because in the DN process there's a 1-to-1 correspondence of
// BlockScanInfo to Block instances, so by extending rather than containing
// Block, we can save a bit of Object overhead (about 24 bytes per block
// replica.)
static class BlockScanInfo extends Block
implements LightWeightGSet.LinkedElement {
/** Compare the info by the last scan time. */
static final Comparator LAST_SCAN_TIME_COMPARATOR
= new Comparator() {
@Override
public int compare(BlockScanInfo left, BlockScanInfo right) {
final long l = left.lastScanTime;
final long r = right.lastScanTime;
// compare blocks itself if scantimes are same to avoid.
// because TreeMap uses comparator if available to check existence of
// the object.
return l < r? -1: l > r? 1: left.compareTo(right);
}
};
long lastScanTime = 0;
ScanType lastScanType = ScanType.NONE;
boolean lastScanOk = true;
private LinkedElement next;
BlockScanInfo(Block block) {
super(block);
}
@Override
public int hashCode() {
return super.hashCode();
}
@Override
public boolean equals(Object that) {
if (this == that) {
return true;
}
return super.equals(that);
}
long getLastScanTime() {
return (lastScanType == ScanType.NONE) ? 0 : lastScanTime;
}
@Override
public void setNext(LinkedElement next) {
this.next = next;
}
@Override
public LinkedElement getNext() {
return next;
}
}
BlockPoolSliceScanner(String bpid, DataNode datanode,
FsDatasetSpi extends FsVolumeSpi> dataset, Configuration conf) {
this.datanode = datanode;
this.dataset = dataset;
this.blockPoolId = bpid;
long hours = conf.getInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY,
DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_DEFAULT);
if (hours <= 0) {
hours = DEFAULT_SCAN_PERIOD_HOURS;
}
this.scanPeriod = hours * 3600 * 1000;
LOG.info("Periodic Block Verification Scanner initialized with interval "
+ hours + " hours for block pool " + bpid);
// get the list of blocks and arrange them in random order
List arr = dataset.getFinalizedBlocks(blockPoolId);
Collections.shuffle(arr);
long scanTime = -1;
for (Block block : arr) {
BlockScanInfo info = new BlockScanInfo( block );
info.lastScanTime = scanTime--;
//still keep 'info.lastScanType' to NONE.
addBlockInfo(info);
}
RollingLogs rollingLogs = null;
try {
rollingLogs = dataset.createRollingLogs(blockPoolId, VERIFICATION_PREFIX);
} catch (IOException e) {
LOG.warn("Could not open verfication log. " +
"Verification times are not stored.");
}
verificationLog = rollingLogs == null? null: new LogFileHandler(rollingLogs);
}
String getBlockPoolId() {
return blockPoolId;
}
private void updateBytesToScan(long len, long lastScanTime) {
// len could be negative when a block is deleted.
totalBytesToScan += len;
if ( lastScanTime < currentPeriodStart ) {
bytesLeft += len;
}
// Should we change throttler bandwidth every time bytesLeft changes?
// not really required.
}
private synchronized void addBlockInfo(BlockScanInfo info) {
boolean added = blockInfoSet.add(info);
blockMap.put(info);
if (added) {
updateBytesToScan(info.getNumBytes(), info.lastScanTime);
}
}
private synchronized void delBlockInfo(BlockScanInfo info) {
boolean exists = blockInfoSet.remove(info);
blockMap.remove(info);
if (exists) {
updateBytesToScan(-info.getNumBytes(), info.lastScanTime);
}
}
/** Update blockMap by the given LogEntry */
private synchronized void updateBlockInfo(LogEntry e) {
BlockScanInfo info = blockMap.get(new Block(e.blockId, 0, e.genStamp));
if(info != null && e.verificationTime > 0 &&
info.lastScanTime < e.verificationTime) {
delBlockInfo(info);
info.lastScanTime = e.verificationTime;
info.lastScanType = ScanType.VERIFICATION_SCAN;
addBlockInfo(info);
}
}
private synchronized long getNewBlockScanTime() {
/* If there are a lot of blocks, this returns a random time with in
* the scan period. Otherwise something sooner.
*/
long period = Math.min(scanPeriod,
Math.max(blockMap.size(),1) * 600 * 1000L);
int periodInt = Math.abs((int)period);
return Time.now() - scanPeriod +
DFSUtil.getRandom().nextInt(periodInt);
}
/** Adds block to list of blocks */
synchronized void addBlock(ExtendedBlock block) {
BlockScanInfo info = blockMap.get(block.getLocalBlock());
if ( info != null ) {
LOG.warn("Adding an already existing block " + block);
delBlockInfo(info);
}
info = new BlockScanInfo(block.getLocalBlock());
info.lastScanTime = getNewBlockScanTime();
addBlockInfo(info);
adjustThrottler();
}
/** Deletes the block from internal structures */
synchronized void deleteBlock(Block block) {
BlockScanInfo info = blockMap.get(block);
if ( info != null ) {
delBlockInfo(info);
}
}
@VisibleForTesting
long getTotalScans() {
return totalScans;
}
/** @return the last scan time for the block pool. */
long getLastScanTime() {
return lastScanTime.get();
}
/** @return the last scan time the given block. */
synchronized long getLastScanTime(Block block) {
BlockScanInfo info = blockMap.get(block);
return info == null? 0: info.lastScanTime;
}
/** Deletes blocks from internal structures */
void deleteBlocks(Block[] blocks) {
for ( Block b : blocks ) {
deleteBlock(b);
}
}
private synchronized void updateScanStatus(Block block,
ScanType type,
boolean scanOk) {
BlockScanInfo info = blockMap.get(block);
if ( info != null ) {
delBlockInfo(info);
} else {
// It might already be removed. Thats ok, it will be caught next time.
info = new BlockScanInfo(block);
}
long now = Time.now();
info.lastScanType = type;
info.lastScanTime = now;
info.lastScanOk = scanOk;
addBlockInfo(info);
// Don't update meta data if the verification failed.
if (!scanOk) {
return;
}
if (verificationLog != null) {
verificationLog.append(now, block.getGenerationStamp(),
block.getBlockId());
}
}
private void handleScanFailure(ExtendedBlock block) {
LOG.info("Reporting bad " + block);
try {
datanode.reportBadBlocks(block);
} catch (IOException ie) {
// it is bad, but not bad enough to shutdown the scanner
LOG.warn("Cannot report bad " + block.getBlockId());
}
}
static private class LogEntry {
long blockId = -1;
long verificationTime = -1;
long genStamp = GenerationStamp.GRANDFATHER_GENERATION_STAMP;
/**
* The format consists of single line with multiple entries. each
* entry is in the form : name="value".
* This simple text and easily extendable and easily parseable with a
* regex.
*/
private static final Pattern entryPattern =
Pattern.compile("\\G\\s*([^=\\p{Space}]+)=\"(.*?)\"\\s*");
static String toString(long verificationTime, long genStamp, long blockId,
DateFormat dateFormat) {
return "\ndate=\"" + dateFormat.format(new Date(verificationTime))
+ "\"\t time=\"" + verificationTime
+ "\"\t genstamp=\"" + genStamp
+ "\"\t id=\"" + blockId + "\"";
}
static LogEntry parseEntry(String line) {
LogEntry entry = new LogEntry();
Matcher matcher = entryPattern.matcher(line);
while (matcher.find()) {
String name = matcher.group(1);
String value = matcher.group(2);
try {
if (name.equals("id")) {
entry.blockId = Long.valueOf(value);
} else if (name.equals("time")) {
entry.verificationTime = Long.valueOf(value);
} else if (name.equals("genstamp")) {
entry.genStamp = Long.valueOf(value);
}
} catch(NumberFormatException nfe) {
LOG.warn("Cannot parse line: " + line, nfe);
return null;
}
}
return entry;
}
}
private synchronized void adjustThrottler() {
long timeLeft = currentPeriodStart+scanPeriod - Time.now();
long bw = Math.max(bytesLeft*1000/timeLeft, MIN_SCAN_RATE);
throttler.setBandwidth(Math.min(bw, MAX_SCAN_RATE));
}
@VisibleForTesting
void verifyBlock(ExtendedBlock block) {
BlockSender blockSender = null;
/* In case of failure, attempt to read second time to reduce
* transient errors. How do we flush block data from kernel
* buffers before the second read?
*/
for (int i=0; i<2; i++) {
boolean second = (i > 0);
try {
adjustThrottler();
blockSender = new BlockSender(block, 0, -1, false, true, true,
datanode, null, CachingStrategy.newDropBehind());
DataOutputStream out =
new DataOutputStream(new IOUtils.NullOutputStream());
blockSender.sendBlock(out, null, throttler);
LOG.info((second ? "Second " : "") +
"Verification succeeded for " + block);
if ( second ) {
totalTransientErrors++;
}
updateScanStatus(block.getLocalBlock(), ScanType.VERIFICATION_SCAN, true);
return;
} catch (IOException e) {
updateScanStatus(block.getLocalBlock(), ScanType.VERIFICATION_SCAN, false);
// If the block does not exists anymore, then its not an error
if (!dataset.contains(block)) {
LOG.info(block + " is no longer in the dataset");
deleteBlock(block.getLocalBlock());
return;
}
// If the block exists, the exception may due to a race with write:
// The BlockSender got an old block path in rbw. BlockReceiver removed
// the rbw block from rbw to finalized but BlockSender tried to open the
// file before BlockReceiver updated the VolumeMap. The state of the
// block can be changed again now, so ignore this error here. If there
// is a block really deleted by mistake, DirectoryScan should catch it.
if (e instanceof FileNotFoundException ) {
LOG.info("Verification failed for " + block +
" - may be due to race with write");
deleteBlock(block.getLocalBlock());
return;
}
LOG.warn((second ? "Second " : "First ") + "Verification failed for "
+ block, e);
if (second) {
totalScanErrors++;
datanode.getMetrics().incrBlockVerificationFailures();
handleScanFailure(block);
return;
}
} finally {
IOUtils.closeStream(blockSender);
datanode.getMetrics().incrBlocksVerified();
totalScans++;
}
}
}
private synchronized long getEarliestScanTime() {
if (!blockInfoSet.isEmpty()) {
return blockInfoSet.first().lastScanTime;
}
return Long.MAX_VALUE;
}
private synchronized boolean isFirstBlockProcessed() {
if (!blockInfoSet.isEmpty()) {
long blockId = blockInfoSet.first().getBlockId();
if ((processedBlocks.get(blockId) != null)
&& (processedBlocks.get(blockId) == 1)) {
return true;
}
}
return false;
}
// Picks one block and verifies it
private void verifyFirstBlock() {
Block block = null;
synchronized (this) {
if (!blockInfoSet.isEmpty()) {
block = blockInfoSet.first();
}
}
if ( block != null ) {
verifyBlock(new ExtendedBlock(blockPoolId, block));
processedBlocks.put(block.getBlockId(), 1);
}
}
// Used for tests only
int getBlocksScannedInLastRun() {
return totalBlocksScannedInLastRun.get();
}
/**
* Reads the current and previous log files (if any) and marks the blocks
* processed if they were processed within last scan period. Copies the log
* records of recently scanned blocks from previous to current file.
* Returns false if the process was interrupted because the thread is marked
* to exit.
*/
private boolean assignInitialVerificationTimes() {
//First updates the last verification times from the log file.
if (verificationLog != null) {
long now = Time.now();
RollingLogs.LineIterator logIterator = null;
try {
logIterator = verificationLog.logs.iterator(false);
// update verification times from the verificationLog.
while (logIterator.hasNext()) {
if (!datanode.shouldRun
|| datanode.blockScanner.blockScannerThread.isInterrupted()) {
return false;
}
LogEntry entry = LogEntry.parseEntry(logIterator.next());
if (entry != null) {
updateBlockInfo(entry);
if (now - entry.verificationTime < scanPeriod) {
BlockScanInfo info = blockMap.get(new Block(entry.blockId, 0,
entry.genStamp));
if (info != null) {
if (processedBlocks.get(entry.blockId) == null) {
if (isNewPeriod) {
updateBytesLeft(-info.getNumBytes());
}
processedBlocks.put(entry.blockId, 1);
}
if (logIterator.isLastReadFromPrevious()) {
// write the log entry to current file
// so that the entry is preserved for later runs.
verificationLog.append(entry.verificationTime, entry.genStamp,
entry.blockId);
}
}
}
}
}
} catch (IOException e) {
LOG.warn("Failed to read previous verification times.", e);
} finally {
IOUtils.closeStream(logIterator);
}
isNewPeriod = false;
}
/* Before this loop, entries in blockInfoSet that are not
* updated above have lastScanTime of <= 0 . Loop until first entry has
* lastModificationTime > 0.
*/
synchronized (this) {
final int numBlocks = Math.max(blockMap.size(), 1);
// Initially spread the block reads over half of scan period
// so that we don't keep scanning the blocks too quickly when restarted.
long verifyInterval = Math.min(scanPeriod/(2L * numBlocks), 10*60*1000L);
long lastScanTime = Time.now() - scanPeriod;
if (!blockInfoSet.isEmpty()) {
BlockScanInfo info;
while ((info = blockInfoSet.first()).lastScanTime < 0) {
delBlockInfo(info);
info.lastScanTime = lastScanTime;
lastScanTime += verifyInterval;
addBlockInfo(info);
}
}
}
return true;
}
private synchronized void updateBytesLeft(long len) {
bytesLeft += len;
}
private synchronized void startNewPeriod() {
LOG.info("Starting a new period : work left in prev period : "
+ String.format("%.2f%%", totalBytesToScan == 0 ? 0
: (bytesLeft * 100.0) / totalBytesToScan));
// reset the byte counts :
bytesLeft = totalBytesToScan;
currentPeriodStart = Time.now();
isNewPeriod = true;
}
private synchronized boolean workRemainingInCurrentPeriod() {
if (bytesLeft <= 0 && Time.now() < currentPeriodStart + scanPeriod) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipping scan since bytesLeft=" + bytesLeft + ", Start=" +
currentPeriodStart + ", period=" + scanPeriod + ", now=" +
Time.now() + " " + blockPoolId);
}
return false;
} else {
return true;
}
}
void scanBlockPoolSlice() {
if (!workRemainingInCurrentPeriod()) {
return;
}
// Create a new processedBlocks structure
processedBlocks = new HashMap();
if (!assignInitialVerificationTimes()) {
return;
}
// Start scanning
try {
scan();
} finally {
totalBlocksScannedInLastRun.set(processedBlocks.size());
lastScanTime.set(Time.now());
}
}
/**
* Shuts down this BlockPoolSliceScanner and releases any internal resources.
*/
void shutdown() {
if (verificationLog != null) {
verificationLog.close();
}
}
private void scan() {
if (LOG.isDebugEnabled()) {
LOG.debug("Starting to scan blockpool: " + blockPoolId);
}
try {
adjustThrottler();
while (datanode.shouldRun
&& !datanode.blockScanner.blockScannerThread.isInterrupted()
&& datanode.isBPServiceAlive(blockPoolId)) {
long now = Time.now();
synchronized (this) {
if ( now >= (currentPeriodStart + scanPeriod)) {
startNewPeriod();
}
}
if (((now - getEarliestScanTime()) >= scanPeriod)
|| ((!blockInfoSet.isEmpty()) && !(this.isFirstBlockProcessed()))) {
verifyFirstBlock();
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("All remaining blocks were processed recently, "
+ "so this run is complete");
}
break;
}
}
} catch (RuntimeException e) {
LOG.warn("RuntimeException during BlockPoolScanner.scan()", e);
throw e;
} finally {
rollVerificationLogs();
if (LOG.isDebugEnabled()) {
LOG.debug("Done scanning block pool: " + blockPoolId);
}
}
}
private synchronized void rollVerificationLogs() {
if (verificationLog != null) {
try {
verificationLog.logs.roll();
} catch (IOException ex) {
LOG.warn("Received exception: ", ex);
verificationLog.close();
}
}
}
synchronized void printBlockReport(StringBuilder buffer,
boolean summaryOnly) {
long oneHour = 3600*1000;
long oneDay = 24*oneHour;
long oneWeek = 7*oneDay;
long fourWeeks = 4*oneWeek;
int inOneHour = 0;
int inOneDay = 0;
int inOneWeek = 0;
int inFourWeeks = 0;
int inScanPeriod = 0;
int neverScanned = 0;
DateFormat dateFormat = new SimpleDateFormat(DATA_FORMAT);
int total = blockInfoSet.size();
long now = Time.now();
Date date = new Date();
for(Iterator it = blockInfoSet.iterator(); it.hasNext();) {
BlockScanInfo info = it.next();
long scanTime = info.getLastScanTime();
long diff = now - scanTime;
if (diff <= oneHour) inOneHour++;
if (diff <= oneDay) inOneDay++;
if (diff <= oneWeek) inOneWeek++;
if (diff <= fourWeeks) inFourWeeks++;
if (diff <= scanPeriod) inScanPeriod++;
if (scanTime <= 0) neverScanned++;
if (!summaryOnly) {
date.setTime(scanTime);
String scanType =
(info.lastScanType == ScanType.VERIFICATION_SCAN) ? "local" : "none";
buffer.append(String.format("%-26s : status : %-6s type : %-6s" +
" scan time : " +
"%-15d %s%n", info,
(info.lastScanOk ? "ok" : "failed"),
scanType, scanTime,
(scanTime <= 0) ? "not yet verified" :
dateFormat.format(date)));
}
}
double pctPeriodLeft = (scanPeriod + currentPeriodStart - now)
*100.0/scanPeriod;
double pctProgress = (totalBytesToScan == 0) ? 100 :
(totalBytesToScan-bytesLeft)*100.0/totalBytesToScan;
buffer.append(String.format("%nTotal Blocks : %6d" +
"%nVerified in last hour : %6d" +
"%nVerified in last day : %6d" +
"%nVerified in last week : %6d" +
"%nVerified in last four weeks : %6d" +
"%nVerified in SCAN_PERIOD : %6d" +
"%nNot yet verified : %6d" +
"%nVerified since restart : %6d" +
"%nScans since restart : %6d" +
"%nScan errors since restart : %6d" +
"%nTransient scan errors : %6d" +
"%nCurrent scan rate limit KBps : %6d" +
"%nProgress this period : %6.0f%%" +
"%nTime left in cur period : %6.2f%%" +
"%n",
total, inOneHour, inOneDay, inOneWeek,
inFourWeeks, inScanPeriod, neverScanned,
totalScans, totalScans,
totalScanErrors, totalTransientErrors,
Math.round(throttler.getBandwidth()/1024.0),
pctProgress, pctPeriodLeft));
}
/**
* This class takes care of log file used to store the last verification
* times of the blocks.
*/
private static class LogFileHandler {
private final DateFormat dateFormat = new SimpleDateFormat(DATA_FORMAT);
private final RollingLogs logs;
private LogFileHandler(RollingLogs logs) {
this.logs = logs;
}
void append(long verificationTime, long genStamp, long blockId) {
final String m = LogEntry.toString(verificationTime, genStamp, blockId,
dateFormat);
try {
logs.appender().append(m);
} catch (IOException e) {
LOG.warn("Failed to append to " + logs + ", m=" + m, e);
}
}
void close() {
try {
logs.appender().close();
} catch (IOException e) {
LOG.warn("Failed to close the appender of " + logs, e);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy