Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.Time;
/**
* Periodically scans the data directories for block and block metadata files.
* Reconciles the differences with block information maintained in the dataset.
*/
@InterfaceAudience.Private
public class DirectoryScanner implements Runnable {
private static final Log LOG = LogFactory.getLog(DirectoryScanner.class);
private final DataNode datanode;
private final FsDatasetSpi> dataset;
private final ExecutorService reportCompileThreadPool;
private final ScheduledExecutorService masterThread;
private final long scanPeriodMsecs;
private volatile boolean shouldRun = false;
private boolean retainDiffs = false;
ScanInfoPerBlockPool diffs = new ScanInfoPerBlockPool();
Map stats = new HashMap();
/**
* Allow retaining diffs for unit test and analysis
* @param b - defaults to false (off)
*/
void setRetainDiffs(boolean b) {
retainDiffs = b;
}
/** Stats tracked for reporting and testing, per blockpool */
static class Stats {
String bpid;
long totalBlocks = 0;
long missingMetaFile = 0;
long missingBlockFile = 0;
long missingMemoryBlocks = 0;
long mismatchBlocks = 0;
public Stats(String bpid) {
this.bpid = bpid;
}
@Override
public String toString() {
return "BlockPool " + bpid
+ " Total blocks: " + totalBlocks + ", missing metadata files:"
+ missingMetaFile + ", missing block files:" + missingBlockFile
+ ", missing blocks in memory:" + missingMemoryBlocks
+ ", mismatched blocks:" + mismatchBlocks;
}
}
static class ScanInfoPerBlockPool extends
HashMap> {
private static final long serialVersionUID = 1L;
ScanInfoPerBlockPool() {super();}
ScanInfoPerBlockPool(int sz) {super(sz);}
/**
* Merges "that" ScanInfoPerBlockPool into this one
* @param that
*/
public void addAll(ScanInfoPerBlockPool that) {
if (that == null) return;
for (Entry> entry : that.entrySet()) {
String bpid = entry.getKey();
LinkedList list = entry.getValue();
if (this.containsKey(bpid)) {
//merge that per-bpid linked list with this one
this.get(bpid).addAll(list);
} else {
//add that new bpid and its linked list to this
this.put(bpid, list);
}
}
}
/**
* Convert all the LinkedList values in this ScanInfoPerBlockPool map
* into sorted arrays, and return a new map of these arrays per blockpool
* @return a map of ScanInfo arrays per blockpool
*/
public Map toSortedArrays() {
Map result =
new HashMap(this.size());
for (Entry> entry : this.entrySet()) {
String bpid = entry.getKey();
LinkedList list = entry.getValue();
// convert list to array
ScanInfo[] record = list.toArray(new ScanInfo[list.size()]);
// Sort array based on blockId
Arrays.sort(record);
result.put(bpid, record);
}
return result;
}
}
/**
* Tracks the files and other information related to a block on the disk
* Missing file is indicated by setting the corresponding member
* to null.
*
* Because millions of these structures may be created, we try to save
* memory here. So instead of storing full paths, we store path suffixes.
* The block file, if it exists, will have a path like this:
* /
* So we don't need to store the volume path, since we already know what the
* volume is.
*
* The metadata file, if it exists, will have a path like this:
* /_.meta
* So if we have a block file, there isn't any need to store the block path
* again.
*
* The accessor functions take care of these manipulations.
*/
static class ScanInfo implements Comparable {
private final long blockId;
/**
* The block file path, relative to the volume's base directory.
* If there was no block file found, this may be null. If 'vol'
* is null, then this is the full path of the block file.
*/
private final String blockSuffix;
/**
* The suffix of the meta file path relative to the block file.
* If blockSuffix is null, then this will be the entire path relative
* to the volume base directory, or an absolute path if vol is also
* null.
*/
private final String metaSuffix;
private final FsVolumeSpi volume;
/**
* Get the file's length in async block scan
*/
private final long blockFileLength;
private final static Pattern CONDENSED_PATH_REGEX =
Pattern.compile("(?>>32));
}
public long getGenStamp() {
return metaSuffix != null ? Block.getGenerationStamp(
getMetaFile().getName()) :
GenerationStamp.GRANDFATHER_GENERATION_STAMP;
}
}
DirectoryScanner(DataNode dn, FsDatasetSpi> dataset, Configuration conf) {
this.datanode = dn;
this.dataset = dataset;
int interval = conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY,
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT);
scanPeriodMsecs = interval * 1000L; //msec
int threads =
conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY,
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT);
reportCompileThreadPool = Executors.newFixedThreadPool(threads,
new Daemon.DaemonFactory());
masterThread = new ScheduledThreadPoolExecutor(1,
new Daemon.DaemonFactory());
}
void start() {
shouldRun = true;
long offset = DFSUtil.getRandom().nextInt((int) (scanPeriodMsecs/1000L)) * 1000L; //msec
long firstScanTime = Time.now() + offset;
LOG.info("Periodic Directory Tree Verification scan starting at "
+ firstScanTime + " with interval " + scanPeriodMsecs);
masterThread.scheduleAtFixedRate(this, offset, scanPeriodMsecs,
TimeUnit.MILLISECONDS);
}
// for unit test
boolean getRunStatus() {
return shouldRun;
}
private void clear() {
diffs.clear();
stats.clear();
}
/**
* Main program loop for DirectoryScanner
* Runs "reconcile()" periodically under the masterThread.
*/
@Override
public void run() {
try {
if (!shouldRun) {
//shutdown has been activated
LOG.warn("this cycle terminating immediately because 'shouldRun' has been deactivated");
return;
}
String[] bpids = dataset.getBlockPoolList();
for(String bpid : bpids) {
UpgradeManagerDatanode um =
datanode.getUpgradeManagerDatanode(bpid);
if (um != null && !um.isUpgradeCompleted()) {
//If distributed upgrades underway, exit and wait for next cycle.
LOG.warn("this cycle terminating immediately because Distributed Upgrade is in process");
return;
}
}
//We're are okay to run - do it
reconcile();
} catch (Exception e) {
//Log and continue - allows Executor to run again next cycle
LOG.error("Exception during DirectoryScanner execution - will continue next cycle", e);
} catch (Error er) {
//Non-recoverable error - re-throw after logging the problem
LOG.error("System Error during DirectoryScanner execution - permanently terminating periodic scanner", er);
throw er;
}
}
void shutdown() {
if (!shouldRun) {
LOG.warn("DirectoryScanner: shutdown has been called, but periodic scanner not started");
} else {
LOG.warn("DirectoryScanner: shutdown has been called");
}
shouldRun = false;
if (masterThread != null) masterThread.shutdown();
if (reportCompileThreadPool != null) reportCompileThreadPool.shutdown();
if (masterThread != null) {
try {
masterThread.awaitTermination(1, TimeUnit.MINUTES);
} catch (InterruptedException e) {
LOG.error("interrupted while waiting for masterThread to " +
"terminate", e);
}
}
if (reportCompileThreadPool != null) {
try {
reportCompileThreadPool.awaitTermination(1, TimeUnit.MINUTES);
} catch (InterruptedException e) {
LOG.error("interrupted while waiting for reportCompileThreadPool to " +
"terminate", e);
}
}
if (!retainDiffs) clear();
}
/**
* Reconcile differences between disk and in-memory blocks
*/
void reconcile() {
scan();
for (Entry> entry : diffs.entrySet()) {
String bpid = entry.getKey();
LinkedList diff = entry.getValue();
for (ScanInfo info : diff) {
dataset.checkAndUpdate(bpid, info.getBlockId(), info.getBlockFile(),
info.getMetaFile(), info.getVolume());
}
}
if (!retainDiffs) clear();
}
/**
* Scan for the differences between disk and in-memory blocks
* Scan only the "finalized blocks" lists of both disk and memory.
*/
void scan() {
clear();
Map diskReport = getDiskReport();
// Hold FSDataset lock to prevent further changes to the block map
synchronized(dataset) {
for (Entry entry : diskReport.entrySet()) {
String bpid = entry.getKey();
ScanInfo[] blockpoolReport = entry.getValue();
Stats statsRecord = new Stats(bpid);
stats.put(bpid, statsRecord);
LinkedList diffRecord = new LinkedList();
diffs.put(bpid, diffRecord);
statsRecord.totalBlocks = blockpoolReport.length;
List bl = dataset.getFinalizedBlocks(bpid);
Block[] memReport = bl.toArray(new Block[bl.size()]);
Arrays.sort(memReport); // Sort based on blockId
int d = 0; // index for blockpoolReport
int m = 0; // index for memReprot
while (m < memReport.length && d < blockpoolReport.length) {
Block memBlock = memReport[Math.min(m, memReport.length - 1)];
ScanInfo info = blockpoolReport[Math.min(
d, blockpoolReport.length - 1)];
if (info.getBlockId() < memBlock.getBlockId()) {
// Block is missing in memory
statsRecord.missingMemoryBlocks++;
addDifference(diffRecord, statsRecord, info);
d++;
continue;
}
if (info.getBlockId() > memBlock.getBlockId()) {
// Block is missing on the disk
addDifference(diffRecord, statsRecord, memBlock.getBlockId());
m++;
continue;
}
// Block file and/or metadata file exists on the disk
// Block exists in memory
if (info.getBlockFile() == null) {
// Block metadata file exits and block file is missing
addDifference(diffRecord, statsRecord, info);
} else if (info.getGenStamp() != memBlock.getGenerationStamp()
|| info.getBlockFileLength() != memBlock.getNumBytes()) {
// Block metadata file is missing or has wrong generation stamp,
// or block file length is different than expected
statsRecord.mismatchBlocks++;
addDifference(diffRecord, statsRecord, info);
}
d++;
m++;
}
while (m < memReport.length) {
addDifference(diffRecord, statsRecord, memReport[m++].getBlockId());
}
while (d < blockpoolReport.length) {
statsRecord.missingMemoryBlocks++;
addDifference(diffRecord, statsRecord, blockpoolReport[d++]);
}
LOG.info(statsRecord.toString());
} //end for
} //end synchronized
}
/**
* Block is found on the disk. In-memory block is missing or does not match
* the block on the disk
*/
private void addDifference(LinkedList diffRecord,
Stats statsRecord, ScanInfo info) {
statsRecord.missingMetaFile += info.getMetaFile() == null ? 1 : 0;
statsRecord.missingBlockFile += info.getBlockFile() == null ? 1 : 0;
diffRecord.add(info);
}
/** Block is not found on the disk */
private void addDifference(LinkedList diffRecord,
Stats statsRecord, long blockId) {
statsRecord.missingBlockFile++;
statsRecord.missingMetaFile++;
diffRecord.add(new ScanInfo(blockId));
}
/** Is the given volume still valid in the dataset? */
private static boolean isValid(final FsDatasetSpi> dataset,
final FsVolumeSpi volume) {
for (FsVolumeSpi vol : dataset.getVolumes()) {
if (vol == volume) {
return true;
}
}
return false;
}
/** Get lists of blocks on the disk sorted by blockId, per blockpool */
private Map getDiskReport() {
// First get list of data directories
final List extends FsVolumeSpi> volumes = dataset.getVolumes();
// Use an array since the threads may return out of order and
// compilersInProgress#keySet may return out of order as well.
ScanInfoPerBlockPool[] dirReports = new ScanInfoPerBlockPool[volumes.size()];
Map> compilersInProgress =
new HashMap>();
for (int i = 0; i < volumes.size(); i++) {
if (isValid(dataset, volumes.get(i))) {
ReportCompiler reportCompiler =
new ReportCompiler(volumes.get(i));
Future result =
reportCompileThreadPool.submit(reportCompiler);
compilersInProgress.put(i, result);
}
}
for (Entry> report :
compilersInProgress.entrySet()) {
try {
dirReports[report.getKey()] = report.getValue().get();
} catch (Exception ex) {
LOG.error("Error compiling report", ex);
// Propagate ex to DataBlockScanner to deal with
throw new RuntimeException(ex);
}
}
// Compile consolidated report for all the volumes
ScanInfoPerBlockPool list = new ScanInfoPerBlockPool();
for (int i = 0; i < volumes.size(); i++) {
if (isValid(dataset, volumes.get(i))) {
// volume is still valid
list.addAll(dirReports[i]);
}
}
return list.toSortedArrays();
}
private static boolean isBlockMetaFile(String blockId, String metaFile) {
return metaFile.startsWith(blockId)
&& metaFile.endsWith(Block.METADATA_EXTENSION);
}
private static class ReportCompiler
implements Callable {
private FsVolumeSpi volume;
public ReportCompiler(FsVolumeSpi volume) {
this.volume = volume;
}
@Override
public ScanInfoPerBlockPool call() throws Exception {
String[] bpList = volume.getBlockPoolList();
ScanInfoPerBlockPool result = new ScanInfoPerBlockPool(bpList.length);
for (String bpid : bpList) {
LinkedList report = new LinkedList();
File bpFinalizedDir = volume.getFinalizedDir(bpid);
result.put(bpid, compileReport(volume, bpFinalizedDir, report));
}
return result;
}
/** Compile list {@link ScanInfo} for the blocks in the directory */
private LinkedList compileReport(FsVolumeSpi vol, File dir,
LinkedList report) {
File[] files;
try {
files = FileUtil.listFiles(dir);
} catch (IOException ioe) {
LOG.warn("Exception occured while compiling report: ", ioe);
// Ignore this directory and proceed.
return report;
}
Arrays.sort(files);
/*
* Assumption: In the sorted list of files block file appears immediately
* before block metadata file. This is true for the current naming
* convention for block file blk_ and meta file
* blk__.meta
*/
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
compileReport(vol, files[i], report);
continue;
}
if (!Block.isBlockFilename(files[i])) {
if (isBlockMetaFile("blk_", files[i].getName())) {
long blockId = Block.getBlockId(files[i].getName());
report.add(new ScanInfo(blockId, null, files[i], vol));
}
continue;
}
File blockFile = files[i];
long blockId = Block.filename2id(blockFile.getName());
File metaFile = null;
// Skip all the files that start with block name until
// getting to the metafile for the block
while (i + 1 < files.length && files[i + 1].isFile()
&& files[i + 1].getName().startsWith(blockFile.getName())) {
i++;
if (isBlockMetaFile(blockFile.getName(), files[i].getName())) {
metaFile = files[i];
break;
}
}
report.add(new ScanInfo(blockId, blockFile, metaFile, vol));
}
return report;
}
}
}