org.apache.hadoop.fs.DU Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Shell;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.HashSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong;
/** Filesystem disk space usage statistics. Uses the unix 'du' program*/
public class DU {
private String dirPath;
private volatile boolean shouldRun = true;
private Thread refreshUsed;
private long refreshInterval;
private final ConcurrentMap namespaceSliceDUMap;
public static final Log LOG = LogFactory.getLog(DU.class);
/**
* Keeps track of disk usage.
* @param path the path to check disk usage in
* @param interval refresh the disk usage at this interval
* @throws IOException if we fail to refresh the disk usage
*/
public DU(File path, long interval) throws IOException {
//we set the Shell interval to 0 so it will always run our command
//and use this one to set the thread sleep interval
this.refreshInterval = interval;
this.dirPath = path.getCanonicalPath();
this.namespaceSliceDUMap =
new ConcurrentHashMap();
}
/**
* Keeps track of disk usage.
* @param path the path to check disk usage in
* @param conf configuration object
* @throws IOException if we fail to refresh the disk usage
*/
public DU(File path, Configuration conf) throws IOException {
this(path, 600000L);
//10 minutes default refresh interval
}
public NamespaceSliceDU addNamespace(int namespaceId, File path, Configuration conf)
throws IOException{
NamespaceSliceDU nsdu = new NamespaceSliceDU(path, conf);
NamespaceSliceDU oldVal = namespaceSliceDUMap.putIfAbsent(namespaceId, nsdu);
return oldVal != null? oldVal: nsdu;
}
public void removeNamespace(int namespaceId) {
this.namespaceSliceDUMap.remove(namespaceId);
}
/**
* This thread refreshes the "used" variable.
*
* Future improvements could be to not permanently
* run this thread, instead run when getUsed is called.
**/
class DURefreshThread implements Runnable {
public void run() {
while(shouldRun) {
try {
Thread.sleep(refreshInterval);
for (NamespaceSliceDU nsdu: namespaceSliceDUMap.values()) {
try {
//update the used variable
nsdu.run();
synchronized(nsdu.exceptionLock) {
//If succeed, we should set the exception to null
nsdu.duException = null;
}
} catch (IOException e) {
synchronized(nsdu.exceptionLock) {
//save the latest exception so we can return it in getUsed()
nsdu.duException = e;
}
LOG.warn("Could not get disk usage information", e);
}
}
} catch (InterruptedException e) {
}
}
}
}
public class NamespaceSliceDU extends Shell {
private String dirPath;
private AtomicLong used = new AtomicLong();
private volatile IOException duException = null;
final Object exceptionLock = new Object();
HashSet suspiciousFiles = null;
public NamespaceSliceDU(File path, Configuration conf) throws IOException {
super(0);
dirPath = path.getCanonicalPath();
//populate the used variable
run();
}
public void processErrorOutput(ExitCodeException ece)
throws IOException {
// Errors outputs like
// 'du: cannot access `': No such file or directory'
// are expected and don't impact the size estimation. The error code
// is 1 for this case. We just log the error message without throwing
// any exception.
//
if (super.getExitCode() != 1) {
throw ece;
}
String errMsg = ece.getMessage();
boolean containExpectMsg = false;
HashSet newSuspiciousFiles = new HashSet();
for (String line : errMsg.trim().split(
System.getProperty("line.separator"))) {
if (line.trim().startsWith("du: cannot access `")
&& line.trim().endsWith("': No such file or directory")) {
containExpectMsg = true;
if (suspiciousFiles != null
&& suspiciousFiles.contains(line.trim())) {
throw new IOException("Cannot access a file at least twice", ece);
}
newSuspiciousFiles.add(line.trim());
} else {
throw ece;
}
}
suspiciousFiles = newSuspiciousFiles;
if (!containExpectMsg) {
throw ece;
}
LOG.info("DU error message: " + errMsg);
}
public void run() throws IOException{
try {
super.run();
} catch (ExitCodeException ece) {
processErrorOutput(ece);
}
}
/**
* Decrease how much disk space we use.
* @param value decrease by this value
*/
public void decDfsUsed(long value) {
used.addAndGet(-value);
}
/**
* Increase how much disk space we use.
* @param value increase by this value
*/
public void incDfsUsed(long value) {
used.addAndGet(value);
}
/**
* @return disk space used
* @throws IOException if the shell command fails
*/
public long getUsed() throws IOException {
//if the updating thread isn't started, update on demand
if(refreshUsed == null) {
run();
} else {
synchronized (this.exceptionLock) {
//if an exception was thrown in the last run, rethrow
if(duException != null) {
IOException tmp = duException;
duException = null;
throw tmp;
}
}
}
return used.longValue();
}
/**
* @return the path of which we're keeping track of disk usage
*/
public String getDirPath() {
return dirPath;
}
protected void parseExecResult(BufferedReader lines) throws IOException {
String line = lines.readLine();
if (line == null) {
throw new IOException("Expecting a line not the end of stream");
}
String[] tokens = line.split("\t");
if(tokens.length == 0) {
throw new IOException("Illegal du output");
}
this.used.set(Long.parseLong(tokens[0])*1024);
}
public String toString() {
return
"du -sk " + dirPath +"\n" +
used + "\t" + dirPath;
}
protected String[] getExecString() {
return new String[] {"du", "-sk", dirPath};
}
}
/**
* @return the path of which we're keeping track of disk usage
*/
public String getDirPath() {
return dirPath;
}
/**
* Start the disk usage checking thread.
*/
public void start() {
//only start the thread if the interval is sane
if(refreshInterval > 0) {
refreshUsed = new Thread(new DURefreshThread(),
"refreshUsed-"+dirPath);
refreshUsed.setDaemon(true);
refreshUsed.start();
}
}
/**
* Shut down the refreshing thread.
*/
public void shutdown() {
this.shouldRun = false;
this.namespaceSliceDUMap.clear();
if(this.refreshUsed != null) {
this.refreshUsed.interrupt();
try {
this.refreshUsed.join();
this.refreshUsed = null;
} catch (InterruptedException ie) {
}
}
}
public static void main(String[] args) throws Exception {
String path = ".";
if (args.length > 0) {
path = args[0];
}
System.out.println(new DU(new File(path), new Configuration()).toString());
}
}