All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapred.TaskLog Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Comparator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ProcessTree;
import org.apache.hadoop.util.Shell;
import org.apache.log4j.Appender;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/**
 * A simple logger to handle the task-specific user logs.
 * This class uses the system property hadoop.log.dir.
 * 
 */
public class TaskLog {
  private static final Log LOG =
    LogFactory.getLog(TaskLog.class);

  private static final File LOG_DIR = 
    new File(System.getProperty("hadoop.log.dir"), 
             "userlogs").getAbsoluteFile();
  
  // localFS is set in (and used by) writeToIndexFile()
  static LocalFileSystem localFS = null;
  static {
    if (!LOG_DIR.exists()) {
      LOG_DIR.mkdirs();
    }
  }

  public static File getTaskLogFile(TaskAttemptID taskid, LogName filter) {
    return new File(getBaseDir(taskid.toString()), filter.toString());
  }

  /**
   * @deprecated Instead use
   *             {@link #getAllLogsFileDetails(TaskAttemptID, boolean)} to get
   *             the details of all log-files and then use the particular
   *             log-type's detail to call getRealTaskLogFileLocation(String,
   *             LogName) real log-location
   */
  @Deprecated
  public static File getRealTaskLogFileLocation(TaskAttemptID taskid, 
      LogName filter) {
    LogFileDetail l;
    try {
      Map allFilesDetails =
          getAllLogsFileDetails(taskid, false);
      l = allFilesDetails.get(filter);
    } catch (IOException ie) {
      LOG.error("getTaskLogFileDetailgetAllLogsFileDetails threw an exception "
          + ie);
      return null;
    }
    return new File(getBaseDir(l.location), filter.toString());
  }

  /**
   * Get the real task-log file-path
   * 
   * @param location Location of the log-file. This should point to an
   *          attempt-directory.
   * @param filter
   * @return
   * @throws IOException
   */
  static String getRealTaskLogFilePath(String location, LogName filter)
      throws IOException {
    return FileUtil.makeShellPath(new File(getBaseDir(location),
        filter.toString()));
  }

  static class LogFileDetail {
    final static String LOCATION = "LOG_DIR:";
    String location;
    long start;
    long length;
  }

  static Map getAllLogsFileDetails(
      TaskAttemptID taskid, boolean isCleanup) throws IOException {

    Map allLogsFileDetails =
        new HashMap();

    File indexFile = getIndexFile(taskid.toString(), isCleanup);
    BufferedReader fis = new BufferedReader(new java.io.FileReader(indexFile));
    //the format of the index file is
    //LOG_DIR: 
    //stdout: 
    //stderr: 
    //syslog: 
    String str = fis.readLine();
    if (str == null) { //the file doesn't have anything
      throw new IOException ("Index file for the log of " + taskid+" doesn't exist.");
    }
    String loc = str.substring(str.indexOf(LogFileDetail.LOCATION)+
        LogFileDetail.LOCATION.length());
    //special cases are the debugout and profile.out files. They are guaranteed
    //to be associated with each task attempt since jvm reuse is disabled
    //when profiling/debugging is enabled
    for (LogName filter : new LogName[] { LogName.DEBUGOUT, LogName.PROFILE }) {
      LogFileDetail l = new LogFileDetail();
      l.location = loc;
      l.length = new File(getBaseDir(l.location), filter.toString()).length();
      l.start = 0;
      allLogsFileDetails.put(filter, l);
    }
    str = fis.readLine();
    while (str != null) {
      LogFileDetail l = new LogFileDetail();
      l.location = loc;
      int idx = str.indexOf(':');
      LogName filter = LogName.valueOf(str.substring(0, idx).toUpperCase());
      str = str.substring(idx + 1);
      String[] startAndLen = str.split(" ");
      l.start = Long.parseLong(startAndLen[0]);
      l.length = Long.parseLong(startAndLen[1]);
      allLogsFileDetails.put(filter, l);
      str = fis.readLine();
    }
    fis.close();
    return allLogsFileDetails;
  }
  
  private static File getTmpIndexFile(String taskid) {
    return new File(getBaseDir(taskid), "log.tmp");
  }
  public static File getIndexFile(String taskid) {
    return getIndexFile(taskid, false);
  }
  
  public static File getIndexFile(String taskid, boolean isCleanup) {
    if (isCleanup) {
      return new File(getBaseDir(taskid), "log.index.cleanup");
    } else {
      return new File(getBaseDir(taskid), "log.index");
    }
  }
  
  static File getBaseDir(String taskid) {
    return new File(LOG_DIR, taskid);
  }

  static final List LOGS_TRACKED_BY_INDEX_FILES =
      Arrays.asList(LogName.STDOUT, LogName.STDERR, LogName.SYSLOG);

  private static TaskAttemptID currentTaskid;

  /**
   * Map to store previous and current lengths.
   */
  private static Map logLengths =
      new HashMap();
  static {
    for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) {
      logLengths.put(logName, new Long[] { Long.valueOf(0L),
          Long.valueOf(0L) });
    }
  }
  
  static void writeToIndexFile(TaskAttemptID firstTaskid,
      TaskAttemptID currentTaskid, boolean isCleanup,
      Map lengths) throws IOException {
    // To ensure atomicity of updates to index file, write to temporary index
    // file first and then rename.
    File tmpIndexFile = getTmpIndexFile(currentTaskid.toString());
    
    BufferedOutputStream bos = 
      new BufferedOutputStream(new FileOutputStream(tmpIndexFile,false));
    DataOutputStream dos = new DataOutputStream(bos);
    //the format of the index file is
    //LOG_DIR: 
    //STDOUT:  
    //STDERR:  
    //SYSLOG:      
    dos.writeBytes(LogFileDetail.LOCATION
        + firstTaskid.toString()
        + "\n");
    for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) {
      Long[] lens = lengths.get(logName);
      dos.writeBytes(logName.toString() + ":"
          + lens[0].toString() + " "
          + Long.toString(lens[1].longValue() - lens[0].longValue())
          + "\n");}
    dos.close();

    File indexFile = getIndexFile(currentTaskid.toString(), isCleanup);
    Path indexFilePath = new Path(indexFile.getAbsolutePath());
    Path tmpIndexFilePath = new Path(tmpIndexFile.getAbsolutePath());

    if (localFS == null) {// set localFS once
      localFS = FileSystem.getLocal(new Configuration());
    }
    localFS.rename (tmpIndexFilePath, indexFilePath);
  }

  public synchronized static void syncLogs(TaskAttemptID firstTaskid, 
                                           TaskAttemptID taskid) 
  throws IOException {
    syncLogs(firstTaskid, taskid, false);
  }

  @SuppressWarnings("unchecked")
  public synchronized static void syncLogs(TaskAttemptID firstTaskid, 
                                           TaskAttemptID taskid,
                                           boolean isCleanup) 
  throws IOException {
    System.out.flush();
    System.err.flush();
    Enumeration allLoggers = LogManager.getCurrentLoggers();
    while (allLoggers.hasMoreElements()) {
      Logger l = allLoggers.nextElement();
      Enumeration allAppenders = l.getAllAppenders();
      while (allAppenders.hasMoreElements()) {
        Appender a = allAppenders.nextElement();
        if (a instanceof TaskLogAppender) {
          ((TaskLogAppender)a).flush();
        }
      }
    }
    // set start and end
    for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) {
      if (currentTaskid != taskid) {
        // Set start = current-end
        logLengths.get(logName)[0] =
            Long.valueOf(getTaskLogFile(firstTaskid, logName).length());
      }
      // Set current end
      logLengths.get(logName)[1] =
          Long.valueOf(getTaskLogFile(firstTaskid, logName).length());
    }
    if (currentTaskid != taskid) {
      if (currentTaskid != null) {
        LOG.info("Starting logging for a new task " + taskid
            + " in the same JVM as that of the first task " + firstTaskid);
      }
      currentTaskid = taskid;
    }
    writeToIndexFile(firstTaskid, taskid, isCleanup, logLengths);
  }
  
  /**
   * The filter for userlogs.
   */
  public static enum LogName {
    /** Log on the stdout of the task. */
    STDOUT ("stdout"),

    /** Log on the stderr of the task. */
    STDERR ("stderr"),
    
    /** Log on the map-reduce system logs of the task. */
    SYSLOG ("syslog"),
    
    /** The java profiler information. */
    PROFILE ("profile.out"),
    
    /** Log the debug script's stdout  */
    DEBUGOUT ("debugout");
        
    private String prefix;
    
    private LogName(String prefix) {
      this.prefix = prefix;
    }
    
    @Override
    public String toString() {
      return prefix;
    }
  }

  /**
   * Purge old user logs, and clean up half of the logs if the number of logs
   * exceed the limit.
   * 
   * @throws IOException
   */
  static void cleanup(int logsRetainHours, int logsNumberLimit
                                             ) throws IOException {
    cleanup(TaskLog.getUserLogDir(), logsRetainHours, logsNumberLimit);
  }

  static void cleanup(File logDir, int logsRetainHours, int logsNumberLimit
                                             ) throws IOException {
    //Delete logs of tasks if the number of files exceed fileNumberLimit
    LOG.info("TaskLog.cleanup: logsRetainHours=" + logsRetainHours +
      " logsNumberLimit=" + logsNumberLimit);
    File[] totalLogs = logDir.listFiles();
    long purgeTimeStamp = System.currentTimeMillis() -
                            (logsRetainHours*60L*60*1000);
    if (totalLogs != null) {
      int numLogs = totalLogs.length;
      if (totalLogs.length > logsNumberLimit) {
        // sorting files first according to last modified time.
        Arrays.sort(totalLogs, new Comparator() {
          public int compare(File f1, File f2) {
            return Long.valueOf(f1.lastModified()).compareTo(f2.lastModified());
          }
        });
        // deleting half of the oldest logs
        for (int i=0; i < numLogs / 2; i++) {
          LOG.info("TaskLog.cleanup deleting " + totalLogs[i]);
          FileUtil.fullyDelete(totalLogs[i]);
        }
        // now, delete the remaining ones that are older than purgeTimeStamp
        // since the array is sorted, break the loop once we pass the boundary
        for (int i = numLogs / 2; i < numLogs; i++) {
          if (totalLogs[i].lastModified() < purgeTimeStamp) {
            LOG.info("TaskLog.cleanup deleting " + totalLogs[i]);
            FileUtil.fullyDelete(totalLogs[i]);
          } else {
            break;
          }
        }
      } else {
      // if the number of logs is less than limit, only delete the files that are
      // older than PurgeTimeStamp
        for (int i=0; i < numLogs; i++) {
          if (totalLogs[i].lastModified() < purgeTimeStamp) {
            LOG.info("TaskLog.cleanup deleting " + totalLogs[i]);
            FileUtil.fullyDelete(totalLogs[i]);
          }
        }
      }
    }
  }

  static class Reader extends InputStream {
    private long bytesRemaining;
    private FileInputStream file;

    public Reader(TaskAttemptID taskid, LogName kind, 
                  long start, long end) throws IOException {
      this(taskid, kind, start, end, false);
    }
    
    /**
     * Read a log file from start to end positions. The offsets may be negative,
     * in which case they are relative to the end of the file. For example,
     * Reader(taskid, kind, 0, -1) is the entire file and 
     * Reader(taskid, kind, -4197, -1) is the last 4196 bytes. 
     * @param taskid the id of the task to read the log file for
     * @param kind the kind of log to read
     * @param start the offset to read from (negative is relative to tail)
     * @param end the offset to read upto (negative is relative to tail)
     * @param isCleanup whether the attempt is cleanup attempt or not
     * @throws IOException
     */
    public Reader(TaskAttemptID taskid, LogName kind, 
                  long start, long end, boolean isCleanup) throws IOException {
      // find the right log file
      Map allFilesDetails =
          getAllLogsFileDetails(taskid, isCleanup);
      LogFileDetail fileDetail = allFilesDetails.get(kind);
      // calculate the start and stop
      long size = fileDetail.length;
      if (start < 0) {
        start += size + 1;
      }
      if (end < 0) {
        end += size + 1;
      }
      start = Math.max(0, Math.min(start, size));
      end = Math.max(0, Math.min(end, size));
      start += fileDetail.start;
      end += fileDetail.start;
      bytesRemaining = end - start;
      file = new FileInputStream(new File(getBaseDir(fileDetail.location), 
          kind.toString()));
      // skip upto start
      long pos = 0;
      while (pos < start) {
        long result = file.skip(start - pos);
        if (result < 0) {
          bytesRemaining = 0;
          break;
        }
        pos += result;
      }
    }
    
    @Override
    public int read() throws IOException {
      int result = -1;
      if (bytesRemaining > 0) {
        bytesRemaining -= 1;
        result = file.read();
      }
      return result;
    }
    
    @Override
    public int read(byte[] buffer, int offset, int length) throws IOException {
      length = (int) Math.min(length, bytesRemaining);
      int bytes = file.read(buffer, offset, length);
      if (bytes > 0) {
        bytesRemaining -= bytes;
      }
      return bytes;
    }
    
    @Override
    public int available() throws IOException {
      return (int) Math.min(bytesRemaining, file.available());
    }

    @Override
    public void close() throws IOException {
      file.close();
    }
  }

  private static final String bashCommand = "bash";
  private static final String tailCommand = "tail";
  
  /**
   * Get the desired maximum length of task's logs.
   * @param conf the job to look in
   * @return the number of bytes to cap the log files at
   */
  public static long getTaskLogLength(JobConf conf) {
    return conf.getLong("mapred.userlog.limit.kb", 100) * 1024;
  }

  /**
   * Wrap a command in a shell to capture stdout and stderr to files.
   * If the tailLength is 0, the entire output will be saved.
   * @param cmd The command and the arguments that should be run
   * @param stdoutFilename The filename that stdout should be saved to
   * @param stderrFilename The filename that stderr should be saved to
   * @param tailLength The length of the tail to be saved.
   * @return the modified command that should be run
   */
  public static List captureOutAndError(List cmd, 
                                                File stdoutFilename,
                                                File stderrFilename,
                                                long tailLength
                                               ) throws IOException {
    return captureOutAndError(null, cmd, stdoutFilename,
                              stderrFilename, tailLength, false);
  }

  /**
   * Wrap a command in a shell to capture stdout and stderr to files.
   * Setup commands such as setting memory limit can be passed which 
   * will be executed before exec.
   * If the tailLength is 0, the entire output will be saved.
   * @param setup The setup commands for the execed process.
   * @param cmd The command and the arguments that should be run
   * @param stdoutFilename The filename that stdout should be saved to
   * @param stderrFilename The filename that stderr should be saved to
   * @param tailLength The length of the tail to be saved.
   * @return the modified command that should be run
   */
  public static List captureOutAndError(List setup,
                                                List cmd, 
                                                File stdoutFilename,
                                                File stderrFilename,
                                                long tailLength
                                               ) throws IOException {
    return captureOutAndError(setup, cmd, stdoutFilename, stderrFilename,
        tailLength, false);
  }

  /**
   * Wrap a command in a shell to capture stdout and stderr to files.
   * Setup commands such as setting memory limit can be passed which 
   * will be executed before exec.
   * If the tailLength is 0, the entire output will be saved.
   * @param setup The setup commands for the execed process.
   * @param cmd The command and the arguments that should be run
   * @param stdoutFilename The filename that stdout should be saved to
   * @param stderrFilename The filename that stderr should be saved to
   * @param tailLength The length of the tail to be saved.
   * @deprecated pidFiles are no more used. Instead pid is exported to
   *             env variable JVM_PID.
   * @return the modified command that should be run
   */
  @Deprecated
  public static List captureOutAndError(List setup,
                                                List cmd, 
                                                File stdoutFilename,
                                                File stderrFilename,
                                                long tailLength,
                                                String pidFileName
                                               ) throws IOException {
    return captureOutAndError(setup, cmd, stdoutFilename, stderrFilename,
        tailLength, false, pidFileName);
  }
  
  /**
   * Wrap a command in a shell to capture stdout and stderr to files.
   * Setup commands such as setting memory limit can be passed which 
   * will be executed before exec.
   * If the tailLength is 0, the entire output will be saved.
   * @param setup The setup commands for the execed process.
   * @param cmd The command and the arguments that should be run
   * @param stdoutFilename The filename that stdout should be saved to
   * @param stderrFilename The filename that stderr should be saved to
   * @param tailLength The length of the tail to be saved.
   * @param useSetsid Should setsid be used in the command or not.
   * @deprecated pidFiles are no more used. Instead pid is exported to
   *             env variable JVM_PID.
   * @return the modified command that should be run
   * 
   */
  @Deprecated
  public static List captureOutAndError(List setup,
      List cmd, 
      File stdoutFilename,
      File stderrFilename,
      long tailLength,
      boolean useSetsid,
      String pidFileName
     ) throws IOException {
    return captureOutAndError(setup,cmd, stdoutFilename, stderrFilename, tailLength,
        useSetsid);
  }

  /**
   * Wrap a command in a shell to capture stdout and stderr to files.
   * Setup commands such as setting memory limit can be passed which 
   * will be executed before exec.
   * If the tailLength is 0, the entire output will be saved.
   * @param setup The setup commands for the execed process.
   * @param cmd The command and the arguments that should be run
   * @param stdoutFilename The filename that stdout should be saved to
   * @param stderrFilename The filename that stderr should be saved to
   * @param tailLength The length of the tail to be saved.
   * @param useSetsid Should setsid be used in the command or not.
   * @return the modified command that should be run
   */
  public static List captureOutAndError(List setup,
      List cmd, 
      File stdoutFilename,
      File stderrFilename,
      long tailLength,
      boolean useSetsid
     ) throws IOException {
    List result = new ArrayList(3);
    result.add(bashCommand);
    result.add("-c");
    String mergedCmd = buildCommandLine(setup, cmd,
        stdoutFilename,
        stderrFilename, tailLength,
        useSetsid);
    result.add(mergedCmd.toString());
    return result; 
  }
  
  
  static String buildCommandLine(List setup,
      List cmd, 
      File stdoutFilename,
      File stderrFilename,
      long tailLength,
      boolean useSetSid) throws IOException {
    
    String stdout = FileUtil.makeShellPath(stdoutFilename);
    String stderr = FileUtil.makeShellPath(stderrFilename);
    StringBuffer mergedCmd = new StringBuffer();
    
    if (!Shell.WINDOWS) {
      mergedCmd.append(" export JVM_PID=`echo $$` ; ");
    }

    if (setup != null && setup.size() > 0) {
      mergedCmd.append(addCommand(setup, false));
      mergedCmd.append(";");
    }
    if (tailLength > 0) {
      mergedCmd.append("(");
    } else if (ProcessTree.isSetsidAvailable && useSetSid 
        && !Shell.WINDOWS) {
      mergedCmd.append("exec setsid ");
    } else {
      mergedCmd.append("exec ");
    }
    mergedCmd.append(addCommand(cmd, true));
    mergedCmd.append(" < /dev/null ");
    if (tailLength > 0) {
      mergedCmd.append(" | ");
      mergedCmd.append(tailCommand);
      mergedCmd.append(" -c ");
      mergedCmd.append(tailLength);
      mergedCmd.append(" >> ");
      mergedCmd.append(stdout);
      mergedCmd.append(" ; exit $PIPESTATUS ) 2>&1 | ");
      mergedCmd.append(tailCommand);
      mergedCmd.append(" -c ");
      mergedCmd.append(tailLength);
      mergedCmd.append(" >> ");
      mergedCmd.append(stderr);
      mergedCmd.append(" ; exit $PIPESTATUS");
    } else {
      mergedCmd.append(" 1>> ");
      mergedCmd.append(stdout);
      mergedCmd.append(" 2>> ");
      mergedCmd.append(stderr);
    }
    return mergedCmd.toString();
  }

  /**
   * Add quotes to each of the command strings and
   * return as a single string 
   * @param cmd The command to be quoted
   * @param isExecutable makes shell path if the first 
   * argument is executable
   * @return returns The quoted string. 
   * @throws IOException
   */
  public static String addCommand(List cmd, boolean isExecutable) 
  throws IOException {
    StringBuffer command = new StringBuffer();
    for(String s: cmd) {
    	command.append('\'');
      if (isExecutable) {
        // the executable name needs to be expressed as a shell path for the  
        // shell to find it.
    	  command.append(FileUtil.makeShellPath(new File(s)));
        isExecutable = false; 
      } else {
    	  command.append(s);
      }
      command.append('\'');
      command.append(" ");
    }
    return command.toString();
  }
  
  /**
   * Wrap a command in a shell to capture debug script's 
   * stdout and stderr to debugout.
   * @param cmd The command and the arguments that should be run
   * @param debugoutFilename The filename that stdout and stderr
   *  should be saved to.
   * @return the modified command that should be run
   * @throws IOException
   */
  public static List captureDebugOut(List cmd, 
                                             File debugoutFilename
                                            ) throws IOException {
    String debugout = FileUtil.makeShellPath(debugoutFilename);
    List result = new ArrayList(3);
    result.add(bashCommand);
    result.add("-c");
    StringBuffer mergedCmd = new StringBuffer();
    mergedCmd.append("exec ");
    boolean isExecutable = true;
    for(String s: cmd) {
      if (isExecutable) {
        // the executable name needs to be expressed as a shell path for the  
        // shell to find it.
        mergedCmd.append(FileUtil.makeShellPath(new File(s)));
        isExecutable = false; 
      } else {
        mergedCmd.append(s);
      }
      mergedCmd.append(" ");
    }
    mergedCmd.append(" < /dev/null ");
    mergedCmd.append(" >");
    mergedCmd.append(debugout);
    mergedCmd.append(" 2>&1 ");
    result.add(mergedCmd.toString());
    return result;
  }
  
  public static File getUserLogDir() {  
    return LOG_DIR;
  }
  
} // TaskLog




© 2015 - 2024 Weber Informatics LLC | Privacy Policy