All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.yarn.util.FSDownload Maven / Gradle / Ivy

 /**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.yarn.util;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URISyntaxException;
import java.security.PrivilegedExceptionAction;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.regex.Pattern;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.util.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.RunJar;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;

import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.cache.CacheLoader;
import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures;
import org.apache.hadoop.yarn.exceptions.YarnException;

import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY;
import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE;
import static org.apache.hadoop.util.functional.FutureIO.awaitFuture;

 /**
 * Download a single URL to the local disk.
 *
 */
@LimitedPrivate({"YARN", "MapReduce"})
public class FSDownload implements Callable {

  private static final Logger LOG =
      LoggerFactory.getLogger(FSDownload.class);

  private FileContext files;
  private final UserGroupInformation userUgi;
  private Configuration conf;
  private LocalResource resource;
  private final LoadingCache> statCache;
  
  /** The local FS dir path under which this resource is to be localized to */
  private Path destDirPath;

  private static final FsPermission cachePerms = new FsPermission(
      (short) 0755);
  static final FsPermission PUBLIC_FILE_PERMS = new FsPermission((short) 0555);
  static final FsPermission PRIVATE_FILE_PERMS = new FsPermission(
      (short) 0500);
  static final FsPermission PUBLIC_DIR_PERMS = new FsPermission((short) 0755);
  static final FsPermission PRIVATE_DIR_PERMS = new FsPermission((short) 0700);


  public FSDownload(FileContext files, UserGroupInformation ugi, Configuration conf,
      Path destDirPath, LocalResource resource) {
    this(files, ugi, conf, destDirPath, resource, null);
  }

  public FSDownload(FileContext files, UserGroupInformation ugi, Configuration conf,
      Path destDirPath, LocalResource resource,
      LoadingCache> statCache) {
    this.conf = conf;
    this.destDirPath = destDirPath;
    this.files = files;
    this.userUgi = ugi;
    this.resource = resource;
    this.statCache = statCache;
  }

  LocalResource getResource() {
    return resource;
  }

  private void createDir(Path path, FsPermission perm) throws IOException {
    files.mkdir(path, perm, false);
    if (!perm.equals(files.getUMask().applyUMask(perm))) {
      files.setPermission(path, perm);
    }
  }

  /**
   * Creates the cache loader for the status loading cache. This should be used
   * to create an instance of the status cache that is passed into the
   * FSDownload constructor.
   *
   * @param conf configuration.
   * @return cache loader for the status loading cache.
   */
  public static CacheLoader>
      createStatusCacheLoader(final Configuration conf) {
    return new CacheLoader>() {
      public Future load(Path path) {
        try {
          FileSystem fs = path.getFileSystem(conf);
          return Futures.immediateFuture(fs.getFileStatus(path));
        } catch (Throwable th) {
          // report failures so it can be memoized
          return Futures.immediateFailedFuture(th);
        }
      }
    };
  }

  /**
   * Returns a boolean to denote whether a cache file is visible to all (public)
   * or not.
   *
   * @param fs fileSystem.
   * @param current current path.
   * @param sStat file status.
   * @param statCache stat cache.
   * @return true if the path in the current path is visible to all, false
   * otherwise
   * @throws IOException io error occur.
   */
  @Private
  public static boolean isPublic(FileSystem fs, Path current, FileStatus sStat,
      LoadingCache> statCache) throws IOException {
    current = fs.makeQualified(current);
    //the leaf level file should be readable by others
    if (!checkPublicPermsForAll(fs, sStat, FsAction.READ_EXECUTE, FsAction.READ)) {
      return false;
    }

    if (Shell.WINDOWS && fs instanceof LocalFileSystem) {
      // Relax the requirement for public cache on LFS on Windows since default
      // permissions are "700" all the way up to the drive letter. In this
      // model, the only requirement for a user is to give EVERYONE group
      // permission on the file and the file will be considered public.
      // This code path is only hit when fs.default.name is file:/// (mainly
      // in tests).
      return true;
    }
    return ancestorsHaveExecutePermissions(fs, current.getParent(), statCache);
  }

  private static boolean checkPublicPermsForAll(FileSystem fs, 
        FileStatus status, FsAction dir, FsAction file) 
    throws IOException {
    FsPermission perms = status.getPermission();
    FsAction otherAction = perms.getOtherAction();
    if (status.isDirectory()) {
      if (!otherAction.implies(dir)) {
        return false;
      }
      
      for (FileStatus child : fs.listStatus(status.getPath())) {
        if(!checkPublicPermsForAll(fs, child, dir, file)) {
          return false;
        }
      }
      return true;
    }
    return (otherAction.implies(file));
  }

  /**
   * Returns true if all ancestors of the specified path have the 'execute'
   * permission set for all users (i.e. that other users can traverse
   * the directory hierarchy to the given path)
   */
  @VisibleForTesting
  static boolean ancestorsHaveExecutePermissions(FileSystem fs,
      Path path, LoadingCache> statCache)
      throws IOException {
    Path current = path;
    while (current != null) {
      //the subdirs in the path should have execute permissions for others
      if (!checkPermissionOfOther(fs, current, FsAction.EXECUTE, statCache)) {
        return false;
      }
      current = current.getParent();
    }
    return true;
  }

  /**
   * Checks for a given path whether the Other permissions on it 
   * imply the permission in the passed FsAction
   * @param fs
   * @param path
   * @param action
   * @return true if the path in the uri is visible to all, false otherwise
   * @throws IOException
   */
  private static boolean checkPermissionOfOther(FileSystem fs, Path path,
      FsAction action, LoadingCache> statCache)
      throws IOException {
    FileStatus status = getFileStatus(fs, path, statCache);
    FsPermission perms = status.getPermission();
    FsAction otherAction = perms.getOtherAction();
    return otherAction.implies(action);
  }

  /**
   * Obtains the file status, first by checking the stat cache if it is
   * available, and then by getting it explicitly from the filesystem. If we got
   * the file status from the filesystem, it is added to the stat cache.
   *
   * The stat cache is expected to be managed by callers who provided it to
   * FSDownload.
   */
  private static FileStatus getFileStatus(final FileSystem fs, final Path path,
      LoadingCache> statCache) throws IOException {
    // if the stat cache does not exist, simply query the filesystem
    if (statCache == null) {
      return fs.getFileStatus(path);
    }

    try {
      // get or load it from the cache
      return statCache.get(path).get();
    } catch (ExecutionException e) {
      Throwable cause = e.getCause();
      // the underlying exception should normally be IOException
      if (cause instanceof IOException) {
        throw (IOException)cause;
      } else {
        throw new IOException(cause);
      }
    } catch (InterruptedException e) { // should not happen
      Thread.currentThread().interrupt();
      throw new IOException(e);
    }
  }

  /**
   * Localize files.
   * @param destination destination directory
   * @throws IOException cannot read or write file
   * @throws YarnException subcommand returned an error
   */
  private void verifyAndCopy(Path destination)
      throws IOException, YarnException {
    final Path sCopy;
    try {
      sCopy = resource.getResource().toPath();
    } catch (URISyntaxException e) {
      throw new IOException("Invalid resource", e);
    }
    FileSystem sourceFs = sCopy.getFileSystem(conf);
    FileStatus sStat = sourceFs.getFileStatus(sCopy);
    if (sStat.getModificationTime() != resource.getTimestamp()) {
      throw new IOException("Resource " + sCopy + " changed on src filesystem" +
          " - expected: " +
          "\"" + Times.formatISO8601(resource.getTimestamp()) + "\"" +
          ", was: " +
          "\"" + Times.formatISO8601(sStat.getModificationTime()) + "\"" +
          ", current time: " + "\"" + Times.formatISO8601(Time.now()) + "\"");
    }
    if (resource.getVisibility() == LocalResourceVisibility.PUBLIC) {
      if (!isPublic(sourceFs, sCopy, sStat, statCache)) {
        throw new IOException("Resource " + sCopy +
            " is not publicly accessible and as such cannot be part of the" +
            " public cache.");
      }
    }

    downloadAndUnpack(sCopy, sStat, destination);
  }

  /**
   * Copy source path to destination with localization rules.
   * @param source source path to copy. Typically HDFS or an object store.
   * @param sourceStatus status of source
   * @param destination destination path. Typically local filesystem
   * @exception YarnException Any error has occurred
   */
  private void downloadAndUnpack(Path source,
      FileStatus sourceStatus,  Path destination)
      throws YarnException {
    try {
      FileSystem sourceFileSystem = source.getFileSystem(conf);
      FileSystem destinationFileSystem = destination.getFileSystem(conf);
      if (sourceStatus.isDirectory()) {
        FileUtil.copy(
            sourceFileSystem, sourceStatus,
            destinationFileSystem, destination, false,
            true, conf);
      } else {
        unpack(source, destination, sourceFileSystem, destinationFileSystem);
      }
    } catch (Exception e) {
      throw new YarnException("Download and unpack failed", e);
    }
  }

  /**
   * Do the localization action on the input stream.
   * We use the deprecated method RunJar.unJarAndSave for compatibility reasons.
   * We should use the more efficient RunJar.unJar in the future.
   * @param source Source path
   * @param destination Destination pth
   * @param sourceFileSystem Source filesystem
   * @param destinationFileSystem Destination filesystem
   * @throws IOException Could not read or write stream
   * @throws InterruptedException Operation interrupted by caller
   * @throws ExecutionException Could not create thread pool execution
   */
  @SuppressWarnings("deprecation")
  private void unpack(Path source, Path destination,
                      FileSystem sourceFileSystem,
                      FileSystem destinationFileSystem)
      throws IOException, InterruptedException, ExecutionException {
    try (InputStream inputStream = awaitFuture(
        sourceFileSystem.openFile(source)
            .opt(FS_OPTION_OPENFILE_READ_POLICY,
                FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE)
            .build())) {
      File dst = new File(destination.toUri());
      String lowerDst = StringUtils.toLowerCase(dst.getName());
      switch (resource.getType()) {
      case ARCHIVE:
        if (lowerDst.endsWith(".jar")) {
          RunJar.unJar(inputStream, dst, RunJar.MATCH_ANY);
        } else if (lowerDst.endsWith(".zip")) {
          FileUtil.unZip(inputStream, dst);
        } else if (lowerDst.endsWith(".tar.gz") ||
            lowerDst.endsWith(".tgz") ||
            lowerDst.endsWith(".tar")) {
          FileUtil.unTar(inputStream, dst, lowerDst.endsWith("gz"));
        } else {
          LOG.warn("Cannot unpack " + source);
          try (OutputStream outputStream =
                   destinationFileSystem.create(destination, true)) {
            IOUtils.copy(inputStream, outputStream);
          }
        }
        break;
      case PATTERN:
        if (lowerDst.endsWith(".jar")) {
          String p = resource.getPattern();
          if (!dst.exists() && !dst.mkdir()) {
            throw new IOException("Unable to create directory: [" + dst + "]");
          }
          RunJar.unJarAndSave(inputStream, dst, source.getName(),
              p == null ? RunJar.MATCH_ANY : Pattern.compile(p));
        } else if (lowerDst.endsWith(".zip")) {
          LOG.warn("Treating [" + source + "] as an archive even though it " +
              "was specified as PATTERN");
          FileUtil.unZip(inputStream, dst);
        } else if (lowerDst.endsWith(".tar.gz") ||
            lowerDst.endsWith(".tgz") ||
            lowerDst.endsWith(".tar")) {
          LOG.warn("Treating [" + source + "] as an archive even though it " +
              "was specified as PATTERN");
          FileUtil.unTar(inputStream, dst, lowerDst.endsWith("gz"));
        } else {
          LOG.warn("Cannot unpack " + source);
          try (OutputStream outputStream =
                   destinationFileSystem.create(destination, true)) {
            IOUtils.copy(inputStream, outputStream);
          }
        }
        break;
      case FILE:
      default:
        try (OutputStream outputStream =
                 destinationFileSystem.create(destination, true)) {
          IOUtils.copy(inputStream, outputStream);
        }
        break;
      }
      // TODO Should calculate here before returning
      //return FileUtil.getDU(destDir);
    }
  }

  @Override
  public Path call() throws Exception {
    final Path sCopy;
    try {
      sCopy = resource.getResource().toPath();
    } catch (URISyntaxException e) {
      throw new IOException("Invalid resource", e);
    }

    LOG.debug("Starting to download {} {} {}", sCopy,
        resource.getType(), resource.getPattern());

    final Path destinationTmp = new Path(destDirPath + "_tmp");
    createDir(destinationTmp, cachePerms);
    Path dFinal =
        files.makeQualified(new Path(destinationTmp, sCopy.getName()));
    try {
      if (userUgi == null) {
        verifyAndCopy(dFinal);
      } else {
        userUgi.doAs(new PrivilegedExceptionAction() {
          @Override
          public Void run() throws Exception {
            verifyAndCopy(dFinal);
            return null;
          }
        });
      }
      changePermissions(dFinal.getFileSystem(conf), dFinal);
      files.rename(destinationTmp, destDirPath, Rename.OVERWRITE);

      LOG.debug("File has been downloaded to {} from {}",
          new Path(destDirPath, sCopy.getName()), sCopy);
    } catch (Exception e) {
      try {
        files.delete(destDirPath, true);
      } catch (IOException ignore) {
      }
      throw e;
    } finally {
      try {
        files.delete(destinationTmp, true);
      } catch (FileNotFoundException ignore) {
      }
      conf = null;
      resource = null;
    }
    return files.makeQualified(new Path(destDirPath, sCopy.getName()));
  }

  /**
   * Recursively change permissions of all files/dirs on path based 
   * on resource visibility.
   * Change to 755 or 700 for dirs, 555 or 500 for files.
   * @param fs FileSystem
   * @param path Path to modify perms for
   * @throws IOException io error occur.
   * @throws InterruptedException 
   */
  private void changePermissions(FileSystem fs, final Path path)
      throws IOException, InterruptedException {
    File f = new File(path.toUri());
    if (FileUtils.isSymlink(f)) {
      // avoid following symlinks when changing permissions
      return;
    }
    boolean isDir = f.isDirectory();
    FsPermission perm = cachePerms;
    // set public perms as 755 or 555 based on dir or file
    if (resource.getVisibility() == LocalResourceVisibility.PUBLIC) {
      perm = isDir ? PUBLIC_DIR_PERMS : PUBLIC_FILE_PERMS;
    }
    // set private perms as 700 or 500
    else {
      // PRIVATE:
      // APPLICATION:
      perm = isDir ? PRIVATE_DIR_PERMS : PRIVATE_FILE_PERMS;
    }

    LOG.debug("Changing permissions for path {} to perm {}", path, perm);

    final FsPermission fPerm = perm;
    if (null == userUgi) {
      files.setPermission(path, perm);
    }
    else {
      userUgi.doAs(new PrivilegedExceptionAction() {
        public Void run() throws Exception {
          files.setPermission(path, fPerm);
          return null;
        }
      });
    }
    if (isDir) {
      FileStatus[] statuses = fs.listStatus(path);
      for (FileStatus status : statuses) {
        changePermissions(fs, status.getPath());
      }
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy