All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapred.TaskLog Maven / Gradle / Ivy

Go to download

Hadoop is the distributed computing framework of Apache; hadoop-core contains the filesystem, job tracker and map/reduce modules

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Appender;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/**
 * A simple logger to handle the task-specific user logs.
 * This class uses the system property hadoop.log.dir.
 * 
 */
public class TaskLog {
  private static final Log LOG =
    LogFactory.getLog(TaskLog.class.getName());

  private static final File LOG_DIR = 
    new File(System.getProperty("hadoop.log.dir"), 
             "userlogs").getAbsoluteFile();
  
  static LocalFileSystem localFS = null;
  static {
    try {
      localFS = FileSystem.getLocal(new Configuration());
    } catch (IOException ioe) {
      LOG.warn("Getting local file system failed.");
    }
    if (!LOG_DIR.exists()) {
      LOG_DIR.mkdirs();
    }
  }

  public static File getTaskLogFile(TaskAttemptID taskid, LogName filter) {
    return new File(getBaseDir(taskid.toString()), filter.toString());
  }
  public static File getRealTaskLogFileLocation(TaskAttemptID taskid, 
      LogName filter) {
    LogFileDetail l;
    try {
      l = getTaskLogFileDetail(taskid, filter);
    } catch (IOException ie) {
      LOG.error("getTaskLogFileDetail threw an exception " + ie);
      return null;
    }
    return new File(getBaseDir(l.location), filter.toString());
  }
  private static class LogFileDetail {
    final static String LOCATION = "LOG_DIR:";
    String location;
    long start;
    long length;
  }
  
  private static LogFileDetail getTaskLogFileDetail(TaskAttemptID taskid,
      LogName filter) throws IOException {
    return getLogFileDetail(taskid, filter, false);
  }
  
  private static LogFileDetail getLogFileDetail(TaskAttemptID taskid, 
                                                LogName filter,
                                                boolean isCleanup) 
  throws IOException {
    File indexFile = getIndexFile(taskid.toString(), isCleanup);
    BufferedReader fis = new BufferedReader(new java.io.FileReader(indexFile));
    //the format of the index file is
    //LOG_DIR: 
    //stdout: 
    //stderr: 
    //syslog: 
    LogFileDetail l = new LogFileDetail();
    String str = fis.readLine();
    if (str == null) { //the file doesn't have anything
      throw new IOException ("Index file for the log of " + taskid+" doesn't exist.");
    }
    l.location = str.substring(str.indexOf(LogFileDetail.LOCATION)+
        LogFileDetail.LOCATION.length());
    //special cases are the debugout and profile.out files. They are guaranteed
    //to be associated with each task attempt since jvm reuse is disabled
    //when profiling/debugging is enabled
    if (filter.equals(LogName.DEBUGOUT) || filter.equals(LogName.PROFILE)) {
      l.length = new File(getBaseDir(l.location), filter.toString()).length();
      l.start = 0;
      fis.close();
      return l;
    }
    str = fis.readLine();
    while (str != null) {
      //look for the exact line containing the logname
      if (str.contains(filter.toString())) {
        str = str.substring(filter.toString().length()+1);
        String[] startAndLen = str.split(" ");
        l.start = Long.parseLong(startAndLen[0]);
        l.length = Long.parseLong(startAndLen[1]);
        break;
      }
      str = fis.readLine();
    }
    fis.close();
    return l;
  }
  
  private static File getTmpIndexFile(String taskid) {
    return new File(getBaseDir(taskid), "log.tmp");
  }
  public static File getIndexFile(String taskid) {
    return getIndexFile(taskid, false);
  }
  
  public static File getIndexFile(String taskid, boolean isCleanup) {
    if (isCleanup) {
      return new File(getBaseDir(taskid), "log.index.cleanup");
    } else {
      return new File(getBaseDir(taskid), "log.index");
    }
  }
  
  private static File getBaseDir(String taskid) {
    return new File(LOG_DIR, taskid);
  }
  private static long prevOutLength;
  private static long prevErrLength;
  private static long prevLogLength;
  
  private static void writeToIndexFile(TaskAttemptID firstTaskid,
                                       boolean isCleanup) 
  throws IOException {
    // To ensure atomicity of updates to index file, write to temporary index
    // file first and then rename.
    File tmpIndexFile = getTmpIndexFile(currentTaskid.toString());
    
    BufferedOutputStream bos = 
      new BufferedOutputStream(new FileOutputStream(tmpIndexFile,false));
    DataOutputStream dos = new DataOutputStream(bos);
    //the format of the index file is
    //LOG_DIR: 
    //STDOUT:  
    //STDERR:  
    //SYSLOG:      
    dos.writeBytes(LogFileDetail.LOCATION + firstTaskid.toString()+"\n"+
        LogName.STDOUT.toString()+":");
    dos.writeBytes(Long.toString(prevOutLength)+" ");
    dos.writeBytes(Long.toString(getTaskLogFile(firstTaskid, LogName.STDOUT)
        .length() - prevOutLength)+"\n"+LogName.STDERR+":");
    dos.writeBytes(Long.toString(prevErrLength)+" ");
    dos.writeBytes(Long.toString(getTaskLogFile(firstTaskid, LogName.STDERR)
        .length() - prevErrLength)+"\n"+LogName.SYSLOG.toString()+":");
    dos.writeBytes(Long.toString(prevLogLength)+" ");
    dos.writeBytes(Long.toString(getTaskLogFile(firstTaskid, LogName.SYSLOG)
        .length() - prevLogLength)+"\n");
    dos.close();

    File indexFile = getIndexFile(currentTaskid.toString(), isCleanup);
    Path indexFilePath = new Path(indexFile.getAbsolutePath());
    Path tmpIndexFilePath = new Path(tmpIndexFile.getAbsolutePath());
    localFS.rename (tmpIndexFilePath, indexFilePath);
  }
  private static void resetPrevLengths(TaskAttemptID firstTaskid) {
    prevOutLength = getTaskLogFile(firstTaskid, LogName.STDOUT).length();
    prevErrLength = getTaskLogFile(firstTaskid, LogName.STDERR).length();
    prevLogLength = getTaskLogFile(firstTaskid, LogName.SYSLOG).length();
  }
  private volatile static TaskAttemptID currentTaskid = null;

  public synchronized static void syncLogs(TaskAttemptID firstTaskid, 
                                           TaskAttemptID taskid) 
  throws IOException {
    syncLogs(firstTaskid, taskid, false);
  }
  
  @SuppressWarnings("unchecked")
  public synchronized static void syncLogs(TaskAttemptID firstTaskid, 
                                           TaskAttemptID taskid,
                                           boolean isCleanup) 
  throws IOException {
    System.out.flush();
    System.err.flush();
    Enumeration allLoggers = LogManager.getCurrentLoggers();
    while (allLoggers.hasMoreElements()) {
      Logger l = allLoggers.nextElement();
      Enumeration allAppenders = l.getAllAppenders();
      while (allAppenders.hasMoreElements()) {
        Appender a = allAppenders.nextElement();
        if (a instanceof TaskLogAppender) {
          ((TaskLogAppender)a).flush();
        }
      }
    }
    if (currentTaskid != taskid) {
      currentTaskid = taskid;
      resetPrevLengths(firstTaskid);
    }
    writeToIndexFile(firstTaskid, isCleanup);
  }
  
  /**
   * The filter for userlogs.
   */
  public static enum LogName {
    /** Log on the stdout of the task. */
    STDOUT ("stdout"),

    /** Log on the stderr of the task. */
    STDERR ("stderr"),
    
    /** Log on the map-reduce system logs of the task. */
    SYSLOG ("syslog"),
    
    /** The java profiler information. */
    PROFILE ("profile.out"),
    
    /** Log the debug script's stdout  */
    DEBUGOUT ("debugout");
        
    private String prefix;
    
    private LogName(String prefix) {
      this.prefix = prefix;
    }
    
    @Override
    public String toString() {
      return prefix;
    }
  }

  private static class TaskLogsPurgeFilter implements FileFilter {
    long purgeTimeStamp;
  
    TaskLogsPurgeFilter(long purgeTimeStamp) {
      this.purgeTimeStamp = purgeTimeStamp;
    }

    public boolean accept(File file) {
      LOG.debug("PurgeFilter - file: " + file + ", mtime: " + file.lastModified() + ", purge: " + purgeTimeStamp);
      return file.lastModified() < purgeTimeStamp;
    }
  }
  /**
   * Purge old user logs.
   * 
   * @throws IOException
   */
  public static synchronized void cleanup(int logsRetainHours
                                          ) throws IOException {
    // Purge logs of tasks on this tasktracker if their  
    // mtime has exceeded "mapred.task.log.retain" hours
    long purgeTimeStamp = System.currentTimeMillis() - 
                            (logsRetainHours*60L*60*1000);
    File[] oldTaskLogs = LOG_DIR.listFiles
                           (new TaskLogsPurgeFilter(purgeTimeStamp));
    if (oldTaskLogs != null) {
      for (int i=0; i < oldTaskLogs.length; ++i) {
        FileUtil.fullyDelete(oldTaskLogs[i]);
      }
    }
  }

  static class Reader extends InputStream {
    private long bytesRemaining;
    private FileInputStream file;

    public Reader(TaskAttemptID taskid, LogName kind, 
                  long start, long end) throws IOException {
      this(taskid, kind, start, end, false);
    }
    
    /**
     * Read a log file from start to end positions. The offsets may be negative,
     * in which case they are relative to the end of the file. For example,
     * Reader(taskid, kind, 0, -1) is the entire file and 
     * Reader(taskid, kind, -4197, -1) is the last 4196 bytes. 
     * @param taskid the id of the task to read the log file for
     * @param kind the kind of log to read
     * @param start the offset to read from (negative is relative to tail)
     * @param end the offset to read upto (negative is relative to tail)
     * @param isCleanup whether the attempt is cleanup attempt or not
     * @throws IOException
     */
    public Reader(TaskAttemptID taskid, LogName kind, 
                  long start, long end, boolean isCleanup) throws IOException {
      // find the right log file
      LogFileDetail fileDetail = getLogFileDetail(taskid, kind, isCleanup);
      // calculate the start and stop
      long size = fileDetail.length;
      if (start < 0) {
        start += size + 1;
      }
      if (end < 0) {
        end += size + 1;
      }
      start = Math.max(0, Math.min(start, size));
      end = Math.max(0, Math.min(end, size));
      start += fileDetail.start;
      end += fileDetail.start;
      bytesRemaining = end - start;
      file = new FileInputStream(new File(getBaseDir(fileDetail.location), 
          kind.toString()));
      // skip upto start
      long pos = 0;
      while (pos < start) {
        long result = file.skip(start - pos);
        if (result < 0) {
          bytesRemaining = 0;
          break;
        }
        pos += result;
      }
    }
    
    @Override
    public int read() throws IOException {
      int result = -1;
      if (bytesRemaining > 0) {
        bytesRemaining -= 1;
        result = file.read();
      }
      return result;
    }
    
    @Override
    public int read(byte[] buffer, int offset, int length) throws IOException {
      length = (int) Math.min(length, bytesRemaining);
      int bytes = file.read(buffer, offset, length);
      if (bytes > 0) {
        bytesRemaining -= bytes;
      }
      return bytes;
    }
    
    @Override
    public int available() throws IOException {
      return (int) Math.min(bytesRemaining, file.available());
    }

    @Override
    public void close() throws IOException {
      file.close();
    }
  }

  private static final String bashCommand = "bash";
  private static final String tailCommand = "tail";
  
  /**
   * Get the desired maximum length of task's logs.
   * @param conf the job to look in
   * @return the number of bytes to cap the log files at
   */
  public static long getTaskLogLength(JobConf conf) {
    return conf.getLong("mapred.userlog.limit.kb", 100) * 1024;
  }

  /**
   * Wrap a command in a shell to capture stdout and stderr to files.
   * If the tailLength is 0, the entire output will be saved.
   * @param cmd The command and the arguments that should be run
   * @param stdoutFilename The filename that stdout should be saved to
   * @param stderrFilename The filename that stderr should be saved to
   * @param tailLength The length of the tail to be saved.
   * @return the modified command that should be run
   */
  public static List captureOutAndError(List cmd, 
                                                File stdoutFilename,
                                                File stderrFilename,
                                                long tailLength
                                               ) throws IOException {
    return captureOutAndError(null, cmd, stdoutFilename,
                              stderrFilename, tailLength, null );
  }

  /**
   * Wrap a command in a shell to capture stdout and stderr to files.
   * Setup commands such as setting memory limit can be passed which 
   * will be executed before exec.
   * If the tailLength is 0, the entire output will be saved.
   * @param setup The setup commands for the execed process.
   * @param cmd The command and the arguments that should be run
   * @param stdoutFilename The filename that stdout should be saved to
   * @param stderrFilename The filename that stderr should be saved to
   * @param tailLength The length of the tail to be saved.
   * @return the modified command that should be run
   */
  public static List captureOutAndError(List setup,
                                                List cmd, 
                                                File stdoutFilename,
                                                File stderrFilename,
                                                long tailLength
                                               ) throws IOException {
    return captureOutAndError(setup, cmd, stdoutFilename, stderrFilename,
        tailLength, null);
  }

  /**
   * Wrap a command in a shell to capture stdout and stderr to files.
   * Setup commands such as setting memory limit can be passed which 
   * will be executed before exec.
   * If the tailLength is 0, the entire output will be saved.
   * @param setup The setup commands for the execed process.
   * @param cmd The command and the arguments that should be run
   * @param stdoutFilename The filename that stdout should be saved to
   * @param stderrFilename The filename that stderr should be saved to
   * @param tailLength The length of the tail to be saved.
   * @param pidFileName The name of the pid-file
   * @return the modified command that should be run
   */
  public static List captureOutAndError(List setup,
                                                List cmd, 
                                                File stdoutFilename,
                                                File stderrFilename,
                                                long tailLength,
                                                String pidFileName
                                               ) throws IOException {
    String stdout = FileUtil.makeShellPath(stdoutFilename);
    String stderr = FileUtil.makeShellPath(stderrFilename);
    List result = new ArrayList(3);
    result.add(bashCommand);
    result.add("-c");
    StringBuffer mergedCmd = new StringBuffer();
    
    // Spit out the pid to pidFileName
    if (pidFileName != null) {
      mergedCmd.append("echo $$ > ");
      mergedCmd.append(pidFileName);
      mergedCmd.append(" ;");
    }

    if (setup != null && setup.size() > 0) {
      mergedCmd.append(addCommand(setup, false));
      mergedCmd.append(";");
    }
    if (tailLength > 0) {
      mergedCmd.append("(");
    } else {
      mergedCmd.append("exec ");
    }
    mergedCmd.append(addCommand(cmd, true));
    mergedCmd.append(" < /dev/null ");
    if (tailLength > 0) {
      mergedCmd.append(" | ");
      mergedCmd.append(tailCommand);
      mergedCmd.append(" -c ");
      mergedCmd.append(tailLength);
      mergedCmd.append(" >> ");
      mergedCmd.append(stdout);
      mergedCmd.append(" ; exit $PIPESTATUS ) 2>&1 | ");
      mergedCmd.append(tailCommand);
      mergedCmd.append(" -c ");
      mergedCmd.append(tailLength);
      mergedCmd.append(" >> ");
      mergedCmd.append(stderr);
      mergedCmd.append(" ; exit $PIPESTATUS");
    } else {
      mergedCmd.append(" 1>> ");
      mergedCmd.append(stdout);
      mergedCmd.append(" 2>> ");
      mergedCmd.append(stderr);
    }
    result.add(mergedCmd.toString());
    return result;
  }

  /**
   * Add quotes to each of the command strings and
   * return as a single string 
   * @param cmd The command to be quoted
   * @param isExecutable makes shell path if the first 
   * argument is executable
   * @return returns The quoted string. 
   * @throws IOException
   */
  public static String addCommand(List cmd, boolean isExecutable) 
  throws IOException {
    StringBuffer command = new StringBuffer();
    for(String s: cmd) {
    	command.append('\'');
      if (isExecutable) {
        // the executable name needs to be expressed as a shell path for the  
        // shell to find it.
    	  command.append(FileUtil.makeShellPath(new File(s)));
        isExecutable = false; 
      } else {
    	  command.append(s);
      }
      command.append('\'');
      command.append(" ");
    }
    return command.toString();
  }
  
  /**
   * Wrap a command in a shell to capture debug script's 
   * stdout and stderr to debugout.
   * @param cmd The command and the arguments that should be run
   * @param debugoutFilename The filename that stdout and stderr
   *  should be saved to.
   * @return the modified command that should be run
   * @throws IOException
   */
  public static List captureDebugOut(List cmd, 
                                             File debugoutFilename
                                            ) throws IOException {
    String debugout = FileUtil.makeShellPath(debugoutFilename);
    List result = new ArrayList(3);
    result.add(bashCommand);
    result.add("-c");
    StringBuffer mergedCmd = new StringBuffer();
    mergedCmd.append("exec ");
    boolean isExecutable = true;
    for(String s: cmd) {
      if (isExecutable) {
        // the executable name needs to be expressed as a shell path for the  
        // shell to find it.
        mergedCmd.append(FileUtil.makeShellPath(new File(s)));
        isExecutable = false; 
      } else {
        mergedCmd.append(s);
      }
      mergedCmd.append(" ");
    }
    mergedCmd.append(" < /dev/null ");
    mergedCmd.append(" >");
    mergedCmd.append(debugout);
    mergedCmd.append(" 2>&1 ");
    result.add(mergedCmd.toString());
    return result;
  }
  
} // TaskLog




© 2015 - 2024 Weber Informatics LLC | Privacy Policy