Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Comparator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ProcessTree;
import org.apache.hadoop.util.Shell;
import org.apache.log4j.Appender;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* A simple logger to handle the task-specific user logs.
* This class uses the system property hadoop.log.dir.
*
*/
public class TaskLog {
private static final Log LOG =
LogFactory.getLog(TaskLog.class);
private static final File LOG_DIR =
new File(System.getProperty("hadoop.log.dir"),
"userlogs").getAbsoluteFile();
// localFS is set in (and used by) writeToIndexFile()
static LocalFileSystem localFS = null;
static {
if (!LOG_DIR.exists()) {
LOG_DIR.mkdirs();
}
}
public static File getTaskLogFile(TaskAttemptID taskid, LogName filter) {
return new File(getBaseDir(taskid.toString()), filter.toString());
}
/**
* @deprecated Instead use
* {@link #getAllLogsFileDetails(TaskAttemptID, boolean)} to get
* the details of all log-files and then use the particular
* log-type's detail to call getRealTaskLogFileLocation(String,
* LogName) real log-location
*/
@Deprecated
public static File getRealTaskLogFileLocation(TaskAttemptID taskid,
LogName filter) {
LogFileDetail l;
try {
Map allFilesDetails =
getAllLogsFileDetails(taskid, false);
l = allFilesDetails.get(filter);
} catch (IOException ie) {
LOG.error("getTaskLogFileDetailgetAllLogsFileDetails threw an exception "
+ ie);
return null;
}
return new File(getBaseDir(l.location), filter.toString());
}
/**
* Get the real task-log file-path
*
* @param location Location of the log-file. This should point to an
* attempt-directory.
* @param filter
* @return
* @throws IOException
*/
static String getRealTaskLogFilePath(String location, LogName filter)
throws IOException {
return FileUtil.makeShellPath(new File(getBaseDir(location),
filter.toString()));
}
static class LogFileDetail {
final static String LOCATION = "LOG_DIR:";
String location;
long start;
long length;
}
static Map getAllLogsFileDetails(
TaskAttemptID taskid, boolean isCleanup) throws IOException {
Map allLogsFileDetails =
new HashMap();
File indexFile = getIndexFile(taskid.toString(), isCleanup);
BufferedReader fis = new BufferedReader(new java.io.FileReader(indexFile));
//the format of the index file is
//LOG_DIR:
//stdout:
//stderr:
//syslog:
String str = fis.readLine();
if (str == null) { //the file doesn't have anything
throw new IOException ("Index file for the log of " + taskid+" doesn't exist.");
}
String loc = str.substring(str.indexOf(LogFileDetail.LOCATION)+
LogFileDetail.LOCATION.length());
//special cases are the debugout and profile.out files. They are guaranteed
//to be associated with each task attempt since jvm reuse is disabled
//when profiling/debugging is enabled
for (LogName filter : new LogName[] { LogName.DEBUGOUT, LogName.PROFILE }) {
LogFileDetail l = new LogFileDetail();
l.location = loc;
l.length = new File(getBaseDir(l.location), filter.toString()).length();
l.start = 0;
allLogsFileDetails.put(filter, l);
}
str = fis.readLine();
while (str != null) {
LogFileDetail l = new LogFileDetail();
l.location = loc;
int idx = str.indexOf(':');
LogName filter = LogName.valueOf(str.substring(0, idx).toUpperCase());
str = str.substring(idx + 1);
String[] startAndLen = str.split(" ");
l.start = Long.parseLong(startAndLen[0]);
l.length = Long.parseLong(startAndLen[1]);
allLogsFileDetails.put(filter, l);
str = fis.readLine();
}
fis.close();
return allLogsFileDetails;
}
private static File getTmpIndexFile(String taskid) {
return new File(getBaseDir(taskid), "log.tmp");
}
public static File getIndexFile(String taskid) {
return getIndexFile(taskid, false);
}
public static File getIndexFile(String taskid, boolean isCleanup) {
if (isCleanup) {
return new File(getBaseDir(taskid), "log.index.cleanup");
} else {
return new File(getBaseDir(taskid), "log.index");
}
}
static File getBaseDir(String taskid) {
return new File(LOG_DIR, taskid);
}
static final List LOGS_TRACKED_BY_INDEX_FILES =
Arrays.asList(LogName.STDOUT, LogName.STDERR, LogName.SYSLOG);
private static TaskAttemptID currentTaskid;
/**
* Map to store previous and current lengths.
*/
private static Map logLengths =
new HashMap();
static {
for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) {
logLengths.put(logName, new Long[] { Long.valueOf(0L),
Long.valueOf(0L) });
}
}
static void writeToIndexFile(TaskAttemptID firstTaskid,
TaskAttemptID currentTaskid, boolean isCleanup,
Map lengths) throws IOException {
// To ensure atomicity of updates to index file, write to temporary index
// file first and then rename.
File tmpIndexFile = getTmpIndexFile(currentTaskid.toString());
BufferedOutputStream bos =
new BufferedOutputStream(new FileOutputStream(tmpIndexFile,false));
DataOutputStream dos = new DataOutputStream(bos);
//the format of the index file is
//LOG_DIR:
//STDOUT:
//STDERR:
//SYSLOG:
dos.writeBytes(LogFileDetail.LOCATION
+ firstTaskid.toString()
+ "\n");
for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) {
Long[] lens = lengths.get(logName);
dos.writeBytes(logName.toString() + ":"
+ lens[0].toString() + " "
+ Long.toString(lens[1].longValue() - lens[0].longValue())
+ "\n");}
dos.close();
File indexFile = getIndexFile(currentTaskid.toString(), isCleanup);
Path indexFilePath = new Path(indexFile.getAbsolutePath());
Path tmpIndexFilePath = new Path(tmpIndexFile.getAbsolutePath());
if (localFS == null) {// set localFS once
localFS = FileSystem.getLocal(new Configuration());
}
localFS.rename (tmpIndexFilePath, indexFilePath);
}
public synchronized static void syncLogs(TaskAttemptID firstTaskid,
TaskAttemptID taskid)
throws IOException {
syncLogs(firstTaskid, taskid, false);
}
@SuppressWarnings("unchecked")
public synchronized static void syncLogs(TaskAttemptID firstTaskid,
TaskAttemptID taskid,
boolean isCleanup)
throws IOException {
System.out.flush();
System.err.flush();
Enumeration allLoggers = LogManager.getCurrentLoggers();
while (allLoggers.hasMoreElements()) {
Logger l = allLoggers.nextElement();
Enumeration allAppenders = l.getAllAppenders();
while (allAppenders.hasMoreElements()) {
Appender a = allAppenders.nextElement();
if (a instanceof TaskLogAppender) {
((TaskLogAppender)a).flush();
}
}
}
// set start and end
for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) {
if (currentTaskid != taskid) {
// Set start = current-end
logLengths.get(logName)[0] =
Long.valueOf(getTaskLogFile(firstTaskid, logName).length());
}
// Set current end
logLengths.get(logName)[1] =
Long.valueOf(getTaskLogFile(firstTaskid, logName).length());
}
if (currentTaskid != taskid) {
if (currentTaskid != null) {
LOG.info("Starting logging for a new task " + taskid
+ " in the same JVM as that of the first task " + firstTaskid);
}
currentTaskid = taskid;
}
writeToIndexFile(firstTaskid, taskid, isCleanup, logLengths);
}
/**
* The filter for userlogs.
*/
public static enum LogName {
/** Log on the stdout of the task. */
STDOUT ("stdout"),
/** Log on the stderr of the task. */
STDERR ("stderr"),
/** Log on the map-reduce system logs of the task. */
SYSLOG ("syslog"),
/** The java profiler information. */
PROFILE ("profile.out"),
/** Log the debug script's stdout */
DEBUGOUT ("debugout");
private String prefix;
private LogName(String prefix) {
this.prefix = prefix;
}
@Override
public String toString() {
return prefix;
}
}
/**
* Purge old user logs, and clean up half of the logs if the number of logs
* exceed the limit.
*
* @throws IOException
*/
static void cleanup(int logsRetainHours, int logsNumberLimit
) throws IOException {
cleanup(TaskLog.getUserLogDir(), logsRetainHours, logsNumberLimit);
}
static void cleanup(File logDir, int logsRetainHours, int logsNumberLimit
) throws IOException {
//Delete logs of tasks if the number of files exceed fileNumberLimit
LOG.info("TaskLog.cleanup: logsRetainHours=" + logsRetainHours +
" logsNumberLimit=" + logsNumberLimit);
File[] totalLogs = logDir.listFiles();
long purgeTimeStamp = System.currentTimeMillis() -
(logsRetainHours*60L*60*1000);
if (totalLogs != null) {
int numLogs = totalLogs.length;
if (totalLogs.length > logsNumberLimit) {
// sorting files first according to last modified time.
Arrays.sort(totalLogs, new Comparator() {
public int compare(File f1, File f2) {
return Long.valueOf(f1.lastModified()).compareTo(f2.lastModified());
}
});
// deleting half of the oldest logs
for (int i=0; i < numLogs / 2; i++) {
LOG.info("TaskLog.cleanup deleting " + totalLogs[i]);
FileUtil.fullyDelete(totalLogs[i]);
}
// now, delete the remaining ones that are older than purgeTimeStamp
// since the array is sorted, break the loop once we pass the boundary
for (int i = numLogs / 2; i < numLogs; i++) {
if (totalLogs[i].lastModified() < purgeTimeStamp) {
LOG.info("TaskLog.cleanup deleting " + totalLogs[i]);
FileUtil.fullyDelete(totalLogs[i]);
} else {
break;
}
}
} else {
// if the number of logs is less than limit, only delete the files that are
// older than PurgeTimeStamp
for (int i=0; i < numLogs; i++) {
if (totalLogs[i].lastModified() < purgeTimeStamp) {
LOG.info("TaskLog.cleanup deleting " + totalLogs[i]);
FileUtil.fullyDelete(totalLogs[i]);
}
}
}
}
}
static class Reader extends InputStream {
private long bytesRemaining;
private FileInputStream file;
public Reader(TaskAttemptID taskid, LogName kind,
long start, long end) throws IOException {
this(taskid, kind, start, end, false);
}
/**
* Read a log file from start to end positions. The offsets may be negative,
* in which case they are relative to the end of the file. For example,
* Reader(taskid, kind, 0, -1) is the entire file and
* Reader(taskid, kind, -4197, -1) is the last 4196 bytes.
* @param taskid the id of the task to read the log file for
* @param kind the kind of log to read
* @param start the offset to read from (negative is relative to tail)
* @param end the offset to read upto (negative is relative to tail)
* @param isCleanup whether the attempt is cleanup attempt or not
* @throws IOException
*/
public Reader(TaskAttemptID taskid, LogName kind,
long start, long end, boolean isCleanup) throws IOException {
// find the right log file
Map allFilesDetails =
getAllLogsFileDetails(taskid, isCleanup);
LogFileDetail fileDetail = allFilesDetails.get(kind);
// calculate the start and stop
long size = fileDetail.length;
if (start < 0) {
start += size + 1;
}
if (end < 0) {
end += size + 1;
}
start = Math.max(0, Math.min(start, size));
end = Math.max(0, Math.min(end, size));
start += fileDetail.start;
end += fileDetail.start;
bytesRemaining = end - start;
file = new FileInputStream(new File(getBaseDir(fileDetail.location),
kind.toString()));
// skip upto start
long pos = 0;
while (pos < start) {
long result = file.skip(start - pos);
if (result < 0) {
bytesRemaining = 0;
break;
}
pos += result;
}
}
@Override
public int read() throws IOException {
int result = -1;
if (bytesRemaining > 0) {
bytesRemaining -= 1;
result = file.read();
}
return result;
}
@Override
public int read(byte[] buffer, int offset, int length) throws IOException {
length = (int) Math.min(length, bytesRemaining);
int bytes = file.read(buffer, offset, length);
if (bytes > 0) {
bytesRemaining -= bytes;
}
return bytes;
}
@Override
public int available() throws IOException {
return (int) Math.min(bytesRemaining, file.available());
}
@Override
public void close() throws IOException {
file.close();
}
}
private static final String bashCommand = "bash";
private static final String tailCommand = "tail";
/**
* Get the desired maximum length of task's logs.
* @param conf the job to look in
* @return the number of bytes to cap the log files at
*/
public static long getTaskLogLength(JobConf conf) {
return conf.getLong("mapred.userlog.limit.kb", 100) * 1024;
}
/**
* Wrap a command in a shell to capture stdout and stderr to files.
* If the tailLength is 0, the entire output will be saved.
* @param cmd The command and the arguments that should be run
* @param stdoutFilename The filename that stdout should be saved to
* @param stderrFilename The filename that stderr should be saved to
* @param tailLength The length of the tail to be saved.
* @return the modified command that should be run
*/
public static List captureOutAndError(List cmd,
File stdoutFilename,
File stderrFilename,
long tailLength
) throws IOException {
return captureOutAndError(null, cmd, stdoutFilename,
stderrFilename, tailLength, false);
}
/**
* Wrap a command in a shell to capture stdout and stderr to files.
* Setup commands such as setting memory limit can be passed which
* will be executed before exec.
* If the tailLength is 0, the entire output will be saved.
* @param setup The setup commands for the execed process.
* @param cmd The command and the arguments that should be run
* @param stdoutFilename The filename that stdout should be saved to
* @param stderrFilename The filename that stderr should be saved to
* @param tailLength The length of the tail to be saved.
* @return the modified command that should be run
*/
public static List captureOutAndError(List setup,
List cmd,
File stdoutFilename,
File stderrFilename,
long tailLength
) throws IOException {
return captureOutAndError(setup, cmd, stdoutFilename, stderrFilename,
tailLength, false);
}
/**
* Wrap a command in a shell to capture stdout and stderr to files.
* Setup commands such as setting memory limit can be passed which
* will be executed before exec.
* If the tailLength is 0, the entire output will be saved.
* @param setup The setup commands for the execed process.
* @param cmd The command and the arguments that should be run
* @param stdoutFilename The filename that stdout should be saved to
* @param stderrFilename The filename that stderr should be saved to
* @param tailLength The length of the tail to be saved.
* @deprecated pidFiles are no more used. Instead pid is exported to
* env variable JVM_PID.
* @return the modified command that should be run
*/
@Deprecated
public static List captureOutAndError(List setup,
List cmd,
File stdoutFilename,
File stderrFilename,
long tailLength,
String pidFileName
) throws IOException {
return captureOutAndError(setup, cmd, stdoutFilename, stderrFilename,
tailLength, false, pidFileName);
}
/**
* Wrap a command in a shell to capture stdout and stderr to files.
* Setup commands such as setting memory limit can be passed which
* will be executed before exec.
* If the tailLength is 0, the entire output will be saved.
* @param setup The setup commands for the execed process.
* @param cmd The command and the arguments that should be run
* @param stdoutFilename The filename that stdout should be saved to
* @param stderrFilename The filename that stderr should be saved to
* @param tailLength The length of the tail to be saved.
* @param useSetsid Should setsid be used in the command or not.
* @deprecated pidFiles are no more used. Instead pid is exported to
* env variable JVM_PID.
* @return the modified command that should be run
*
*/
@Deprecated
public static List captureOutAndError(List setup,
List cmd,
File stdoutFilename,
File stderrFilename,
long tailLength,
boolean useSetsid,
String pidFileName
) throws IOException {
return captureOutAndError(setup,cmd, stdoutFilename, stderrFilename, tailLength,
useSetsid);
}
/**
* Wrap a command in a shell to capture stdout and stderr to files.
* Setup commands such as setting memory limit can be passed which
* will be executed before exec.
* If the tailLength is 0, the entire output will be saved.
* @param setup The setup commands for the execed process.
* @param cmd The command and the arguments that should be run
* @param stdoutFilename The filename that stdout should be saved to
* @param stderrFilename The filename that stderr should be saved to
* @param tailLength The length of the tail to be saved.
* @param useSetsid Should setsid be used in the command or not.
* @return the modified command that should be run
*/
public static List captureOutAndError(List setup,
List cmd,
File stdoutFilename,
File stderrFilename,
long tailLength,
boolean useSetsid
) throws IOException {
List result = new ArrayList(3);
result.add(bashCommand);
result.add("-c");
String mergedCmd = buildCommandLine(setup, cmd,
stdoutFilename,
stderrFilename, tailLength,
useSetsid);
result.add(mergedCmd.toString());
return result;
}
static String buildCommandLine(List setup,
List cmd,
File stdoutFilename,
File stderrFilename,
long tailLength,
boolean useSetSid) throws IOException {
String stdout = FileUtil.makeShellPath(stdoutFilename);
String stderr = FileUtil.makeShellPath(stderrFilename);
StringBuffer mergedCmd = new StringBuffer();
if (!Shell.WINDOWS) {
mergedCmd.append(" export JVM_PID=`echo $$` ; ");
}
if (setup != null && setup.size() > 0) {
mergedCmd.append(addCommand(setup, false));
mergedCmd.append(";");
}
if (tailLength > 0) {
mergedCmd.append("(");
} else if (ProcessTree.isSetsidAvailable && useSetSid
&& !Shell.WINDOWS) {
mergedCmd.append("exec setsid ");
} else {
mergedCmd.append("exec ");
}
mergedCmd.append(addCommand(cmd, true));
mergedCmd.append(" < /dev/null ");
if (tailLength > 0) {
mergedCmd.append(" | ");
mergedCmd.append(tailCommand);
mergedCmd.append(" -c ");
mergedCmd.append(tailLength);
mergedCmd.append(" >> ");
mergedCmd.append(stdout);
mergedCmd.append(" ; exit $PIPESTATUS ) 2>&1 | ");
mergedCmd.append(tailCommand);
mergedCmd.append(" -c ");
mergedCmd.append(tailLength);
mergedCmd.append(" >> ");
mergedCmd.append(stderr);
mergedCmd.append(" ; exit $PIPESTATUS");
} else {
mergedCmd.append(" 1>> ");
mergedCmd.append(stdout);
mergedCmd.append(" 2>> ");
mergedCmd.append(stderr);
}
return mergedCmd.toString();
}
/**
* Add quotes to each of the command strings and
* return as a single string
* @param cmd The command to be quoted
* @param isExecutable makes shell path if the first
* argument is executable
* @return returns The quoted string.
* @throws IOException
*/
public static String addCommand(List cmd, boolean isExecutable)
throws IOException {
StringBuffer command = new StringBuffer();
for(String s: cmd) {
command.append('\'');
if (isExecutable) {
// the executable name needs to be expressed as a shell path for the
// shell to find it.
command.append(FileUtil.makeShellPath(new File(s)));
isExecutable = false;
} else {
command.append(s);
}
command.append('\'');
command.append(" ");
}
return command.toString();
}
/**
* Wrap a command in a shell to capture debug script's
* stdout and stderr to debugout.
* @param cmd The command and the arguments that should be run
* @param debugoutFilename The filename that stdout and stderr
* should be saved to.
* @return the modified command that should be run
* @throws IOException
*/
public static List captureDebugOut(List cmd,
File debugoutFilename
) throws IOException {
String debugout = FileUtil.makeShellPath(debugoutFilename);
List result = new ArrayList(3);
result.add(bashCommand);
result.add("-c");
StringBuffer mergedCmd = new StringBuffer();
mergedCmd.append("exec ");
boolean isExecutable = true;
for(String s: cmd) {
if (isExecutable) {
// the executable name needs to be expressed as a shell path for the
// shell to find it.
mergedCmd.append(FileUtil.makeShellPath(new File(s)));
isExecutable = false;
} else {
mergedCmd.append(s);
}
mergedCmd.append(" ");
}
mergedCmd.append(" < /dev/null ");
mergedCmd.append(" >");
mergedCmd.append(debugout);
mergedCmd.append(" 2>&1 ");
result.add(mergedCmd.toString());
return result;
}
public static File getUserLogDir() {
return LOG_DIR;
}
} // TaskLog