All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.yarn.logaggregation.LogAggregationUtils Maven / Gradle / Ivy

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.yarn.logaggregation;

import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;

import org.apache.hadoop.classification.VisibleForTesting;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.NoSuchElementException;

@Private
public class LogAggregationUtils {

  public static final String TMP_FILE_SUFFIX = ".tmp";
  private static final String BUCKET_SUFFIX = "bucket-";

  /**
   * Constructs the full filename for an application's log file per node.
   * @param remoteRootLogDir the aggregated remote root log dir
   * @param appId the application Id
   * @param user the application owner
   * @param nodeId the node id
   * @param suffix the log dir suffix
   * @return the remote log file.
   */
  public static Path getRemoteNodeLogFileForApp(Path remoteRootLogDir,
      ApplicationId appId, String user, NodeId nodeId, String suffix) {
    return new Path(getRemoteAppLogDir(remoteRootLogDir, appId, user, suffix),
        getNodeString(nodeId));
  }

  /**
   * Gets the remote app log dir.
   * @param remoteRootLogDir the aggregated log remote root log dir
   * @param appId the application id
   * @param user the application owner
   * @param suffix the log directory suffix
   * @return the remote application specific log dir.
   */
  public static Path getRemoteAppLogDir(Path remoteRootLogDir,
      ApplicationId appId, String user, String suffix) {
    return new Path(getRemoteBucketDir(remoteRootLogDir, user, suffix,
        appId), appId.toString());
  }

  /**
   * Gets the older remote app log dir.
   * @param appId the application id
   * @param user the application owner
   * @param remoteRootLogDir the aggregated log remote root log dir
   * @param suffix the log directory suffix
   * @return the remote application specific log dir.
   */
  public static Path getOlderRemoteAppLogDir(ApplicationId appId,
      String user, Path remoteRootLogDir, String suffix) {
    return new Path(getOlderRemoteLogSuffixedDir(remoteRootLogDir, user,
         suffix), appId.toString());
  }

  public static Path getOlderRemoteAppLogDir(Configuration conf,
      ApplicationId appId, String user, Path remoteRootLogDir, String suffix)
      throws IOException {
    org.apache.hadoop.fs.Path remoteAppDir = null;
    if (user == null) {
      org.apache.hadoop.fs.Path qualifiedRemoteRootLogDir =
          FileContext.getFileContext(conf).makeQualified(remoteRootLogDir);
      FileContext fc = FileContext.getFileContext(
          qualifiedRemoteRootLogDir.toUri(), conf);
      org.apache.hadoop.fs.Path toMatch = LogAggregationUtils
          .getOlderRemoteAppLogDir(appId, "*", remoteRootLogDir, suffix);
      FileStatus[] matching  = fc.util().globStatus(toMatch);
      if (matching == null || matching.length != 1) {
        throw new IOException("Can not find remote application directory for "
            + "the application:" + appId);
      }
      remoteAppDir = matching[0].getPath();
    } else {
      remoteAppDir = LogAggregationUtils.getOlderRemoteAppLogDir(
          appId, user, remoteRootLogDir, suffix);
    }
    return remoteAppDir;
  }

  /**
   * Gets the remote suffixed log dir for the user.
   * @param remoteRootLogDir the aggregated log remote root log dir
   * @param user the application owner
   * @param suffix the log dir suffix
   * @return the remote suffixed log dir.
   */
  public static Path getRemoteLogSuffixedDir(Path remoteRootLogDir,
      String user, String suffix) {
    suffix = getBucketSuffix() + suffix;
    return new Path(getRemoteLogUserDir(remoteRootLogDir, user), suffix);
  }

  /**
   * Gets the older remote suffixed log dir for the user.
   * @param remoteRootLogDir the aggregated log remote root log dir
   * @param user the application owner
   * @param suffix the log dir suffix
   * @return the older remote suffixed log dir.
   */
  public static Path getOlderRemoteLogSuffixedDir(Path remoteRootLogDir,
      String user, String suffix) {
    if (suffix == null || suffix.isEmpty()) {
      return getRemoteLogUserDir(remoteRootLogDir, user);
    }
    // TODO Maybe support suffix to be more than a single file.
    return new Path(getRemoteLogUserDir(remoteRootLogDir, user), suffix);
  }

  /**
   * Gets the remote log user dir.
   * @param remoteRootLogDir the aggregated log remote root log dir
   * @param user the application owner
   * @return the remote per user log dir.
   */
  public static Path getRemoteLogUserDir(Path remoteRootLogDir, String user) {
    return new Path(remoteRootLogDir, user);
  }

  /**
   * Gets the remote log user's bucket dir.
   * @param remoteRootLogDir the aggregated log remote root log dir
   * @param user the application owner
   * @param suffix the log dir suffix
   * @param appId the application id
   * @return the remote log per user per cluster timestamp per bucket dir.
   */
  public static Path getRemoteBucketDir(Path remoteRootLogDir, String user,
      String suffix, ApplicationId appId) {
    int bucket = appId.getId() % 10000;
    String bucketDir = String.format("%04d", bucket);
    return new Path(getRemoteLogSuffixedDir(remoteRootLogDir,
       user, suffix), bucketDir);
  }

  /**
   * Check if older Application Log Directory has to be included.
   * @param conf the configuration
   * @return Is Older App Log Dir enabled?
   */
  public static boolean isOlderPathEnabled(Configuration conf) {
    return conf.getBoolean(YarnConfiguration.
         NM_REMOTE_APP_LOG_DIR_INCLUDE_OLDER,
             YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR_INCLUDE_OLDER);
  }

  /**
   * Returns the bucket suffix component of the log dir.
   * @return the bucket suffix which appended to user log dir
   */
  public static String getBucketSuffix() {
    return BUCKET_SUFFIX;
  }

  
  /**
   * Converts a nodeId to a form used in the app log file name.
   * @param nodeId the nodeId
   * @return the node string to be used to construct the file name.
   */
  @VisibleForTesting
  public static String getNodeString(NodeId nodeId) {
    return nodeId.toString().replace(":", "_");
  }

  @VisibleForTesting
  public static String getNodeString(String nodeId) {
    return nodeId.toString().replace(":", "_");
  }

  /**
   * Return the remote application log directory.
   * @param conf the configuration
   * @param appId the application
   * @param appOwner the application owner
   * @param remoteRootLogDir the remote root log directory
   * @param suffix the log directory suffix
   * @return the remote application log directory path
   * @throws IOException if we can not find remote application log directory
   */
  public static org.apache.hadoop.fs.Path getRemoteAppLogDir(
      Configuration conf, ApplicationId appId, String appOwner,
      org.apache.hadoop.fs.Path remoteRootLogDir, String suffix)
      throws IOException {
    org.apache.hadoop.fs.Path remoteAppDir = null;
    if (appOwner == null) {
      org.apache.hadoop.fs.Path qualifiedRemoteRootLogDir =
          FileContext.getFileContext(conf).makeQualified(remoteRootLogDir);
      FileContext fc = FileContext.getFileContext(
          qualifiedRemoteRootLogDir.toUri(), conf);
      org.apache.hadoop.fs.Path toMatch = LogAggregationUtils
          .getRemoteAppLogDir(remoteRootLogDir, appId, "*", suffix);
      FileStatus[] matching  = fc.util().globStatus(toMatch);
      if (matching == null || matching.length != 1) {
        throw new IOException("Can not find remote application directory for "
            + "the application:" + appId);
      }
      remoteAppDir = matching[0].getPath();
    } else {
      remoteAppDir = LogAggregationUtils.getRemoteAppLogDir(
          remoteRootLogDir, appId, appOwner, suffix);
    }
    return remoteAppDir;
  }

  /**
   * Get all available log files under remote app log directory.
   * @param conf the configuration
   * @param remoteAppLogDir the application log directory
   * @param appId the applicationId
   * @param appOwner the application owner
   * @return the iterator of available log files
   * @throws IOException if there is no log file directory
   */
  public static RemoteIterator getNodeFiles(Configuration conf,
      Path remoteAppLogDir, ApplicationId appId, String appOwner)
      throws IOException {
    Path qualifiedLogDir =
        FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
    return FileContext.getFileContext(
        qualifiedLogDir.toUri(), conf).listStatus(remoteAppLogDir);
  }

  /**
   * Get all available log files under remote app log directory.
   * @param conf the configuration
   * @param appId the applicationId
   * @param appOwner the application owner
   * @param remoteRootLogDir the remote root log directory
   * @param suffix the log directory suffix
   * @return the iterator of available log files
   * @throws IOException if there is no log file available
   */
  public static RemoteIterator getRemoteNodeFileDir(
      Configuration conf, ApplicationId appId, String appOwner,
      org.apache.hadoop.fs.Path remoteRootLogDir, String suffix)
      throws IOException {
    RemoteIterator nodeFilesCur= null;
    RemoteIterator nodeFilesPrev = null;
    StringBuilder diagnosticsMsg = new StringBuilder();

    // Get Node Files from new app log dir
    try {
      Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner,
          remoteRootLogDir, suffix);
      nodeFilesCur = getNodeFiles(conf, remoteAppLogDir, appId, appOwner);
    } catch (IOException ex) {
      diagnosticsMsg.append(ex.getMessage() + "\n");
    }

    // Get Node Files from old app log dir
    if (isOlderPathEnabled(conf)) {
      try {
        Path remoteAppLogDir = getOlderRemoteAppLogDir(conf, appId, appOwner,
            remoteRootLogDir, suffix);
        nodeFilesPrev = getNodeFiles(conf,
                remoteAppLogDir, appId, appOwner);
      } catch (IOException ex) {
        diagnosticsMsg.append(ex.getMessage() + "\n");
      }

      // Return older files if new app log dir does not exist
      if (nodeFilesCur == null) {
        return nodeFilesPrev;
      } else if (nodeFilesPrev != null) {
        // Return both new and old node files combined
        RemoteIterator curDir = nodeFilesCur;
        RemoteIterator prevDir = nodeFilesPrev;

        return combineIterators(prevDir, curDir);
      }
    }

    // Error reading from or new app log dir does not exist
    if (nodeFilesCur == null) {
      throw new IOException(diagnosticsMsg.toString());
    }
    return nodeFilesCur;
  }

  /**
   * Get all available log files under remote app log directory.
   * @param conf the configuration
   * @param appId the applicationId
   * @param appOwner the application owner
   * @param remoteRootLogDir the remote root log directory
   * @param suffix the log directory suffix
   * @return the list of available log files
   * @throws IOException if there is no log file available
   */
  public static List getRemoteNodeFileList(
      Configuration conf, ApplicationId appId, String appOwner,
      org.apache.hadoop.fs.Path remoteRootLogDir, String suffix)
      throws IOException {
    StringBuilder diagnosticsMsg = new StringBuilder();
    List nodeFiles = new ArrayList<>();

    // Get Node Files from new app log dir
    try {
      Path remoteAppLogDir = getRemoteAppLogDir(conf, appId, appOwner,
          remoteRootLogDir, suffix);
      Path qualifiedLogDir =
          FileContext.getFileContext(conf).makeQualified(remoteAppLogDir);
      nodeFiles.addAll(Arrays.asList(FileContext.getFileContext(
          qualifiedLogDir.toUri(), conf).util().listStatus(remoteAppLogDir)));
    } catch (IOException ex) {
      diagnosticsMsg.append(ex.getMessage() + "\n");
    }

    // Get Node Files from old app log dir
    if (isOlderPathEnabled(conf)) {
      try {
        Path remoteAppLogDir = getOlderRemoteAppLogDir(conf, appId, appOwner,
            remoteRootLogDir, suffix);
        Path qualifiedLogDir = FileContext.getFileContext(conf).
            makeQualified(remoteAppLogDir);
        nodeFiles.addAll(Arrays.asList(FileContext.getFileContext(
            qualifiedLogDir.toUri(), conf).util().listStatus(remoteAppLogDir)));
      } catch (IOException ex) {
        diagnosticsMsg.append(ex.getMessage() + "\n");
      }
    }

    // Error reading from or new app log dir does not exist
    if (nodeFiles.isEmpty()) {
      throw new IOException(diagnosticsMsg.toString());
    }
    return nodeFiles;
  }

  public static RemoteIterator getRemoteFiles(
      Configuration conf, Path appPath) throws IOException {

    Path qualifiedLogDir =
        FileContext.getFileContext(conf).makeQualified(appPath);
    return FileContext.getFileContext(
        qualifiedLogDir.toUri(), conf).listStatus(appPath);
  }

  public static RemoteIterator getUserRemoteLogDir(
      Configuration conf, String user, Path remoteRootLogDir,
      String remoteRootLogDirSuffix) throws IOException {
    Path userPath = LogAggregationUtils.getRemoteLogSuffixedDir(
        remoteRootLogDir, user, remoteRootLogDirSuffix);
    final RemoteIterator userRootDirFiles =
        getRemoteFiles(conf, userPath);

    RemoteIterator newDirs = new RemoteIterator() {
      private RemoteIterator currentBucketDir =
          LogAggregationUtils.getSubDir(conf, userRootDirFiles);
      @Override
      public boolean hasNext() throws IOException {
        return currentBucketDir != null && currentBucketDir.hasNext() ||
            userRootDirFiles.hasNext();
      }

      @Override
      public FileStatus next() throws IOException {
        FileStatus next = null;
        while (next == null) {
          if (currentBucketDir != null && currentBucketDir.hasNext()) {
            next = currentBucketDir.next();
          } else if (userRootDirFiles.hasNext()) {
            currentBucketDir = LogAggregationUtils.getSubDir(
                conf, userRootDirFiles);
          } else {
            throw new NoSuchElementException();
          }
        }
        return next;
      }
    };

    RemoteIterator allDir = newDirs;
    if (LogAggregationUtils.isOlderPathEnabled(conf)) {
      try {
        Path oldPath = LogAggregationUtils.getOlderRemoteLogSuffixedDir(
            remoteRootLogDir, user, remoteRootLogDirSuffix);
        final RemoteIterator oldUserRootDirFiles =
            getRemoteFiles(conf, oldPath);
        allDir = combineIterators(oldUserRootDirFiles, newDirs);
      } catch (FileNotFoundException e) {
        return newDirs;
      }
    }

    return allDir;
  }

  private static RemoteIterator getSubDir(
      Configuration conf, RemoteIterator rootDir)
      throws IOException {
    if (rootDir.hasNext()) {
      Path userPath = rootDir.next().getPath();
      Path qualifiedLogDir =
          FileContext.getFileContext(conf).makeQualified(userPath);
      return FileContext.getFileContext(
          qualifiedLogDir.toUri(), conf).listStatus(userPath);
    } else {
      return null;
    }
  }

  private static RemoteIterator combineIterators(
      RemoteIterator first, RemoteIterator second) {
    return new RemoteIterator() {
      @Override
      public boolean hasNext() throws IOException {
        return first.hasNext() || second.hasNext();
      }

      @Override
      public FileStatus next() throws IOException {
        return first.hasNext() ? first.next() : second.next();
      }
    };

  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy