All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.tools.util.DistCpUtils Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.tools.util;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.mapreduce.InputFormat;

import java.io.IOException;
import java.util.EnumSet;
import java.util.Locale;
import java.text.DecimalFormat;
import java.net.URI;
import java.net.InetAddress;
import java.net.UnknownHostException;

/**
 * Utility functions used in DistCp.
 */
public class DistCpUtils {

  private static final Log LOG = LogFactory.getLog(DistCpUtils.class);

  /**
   * Retrieves size of the file at the specified path.
   * @param path The path of the file whose size is sought.
   * @param configuration Configuration, to retrieve the appropriate FileSystem.
   * @return The file-size, in number of bytes.
   * @throws IOException, on failure.
   */
  public static long getFileSize(Path path, Configuration configuration)
                                            throws IOException {
    if (LOG.isDebugEnabled())
      LOG.debug("Retrieving file size for: " + path);
    return path.getFileSystem(configuration).getFileStatus(path).getLen();
  }

  /**
   * Utility to publish a value to a configuration.
   * @param configuration The Configuration to which the value must be written.
   * @param label The label for the value being published.
   * @param value The value being published.
   * @param  The type of the value.
   */
  public static  void publish(Configuration configuration,
                                 String label, T value) {
    configuration.set(label, String.valueOf(value));
  }

  /**
   * Utility to retrieve a specified key from a Configuration. Throw exception
   * if not found.
   * @param configuration The Configuration in which the key is sought.
   * @param label The key being sought.
   * @return Integer value of the key.
   */
  public static int getInt(Configuration configuration, String label) {
    int value = configuration.getInt(label, -1);
    assert value >= 0 : "Couldn't find " + label;
    return value;
  }

  /**
   * Utility to retrieve a specified key from a Configuration. Throw exception
   * if not found.
   * @param configuration The Configuration in which the key is sought.
   * @param label The key being sought.
   * @return Long value of the key.
   */
  public static long getLong(Configuration configuration, String label) {
    long value = configuration.getLong(label, -1);
    assert value >= 0 : "Couldn't find " + label;
    return value;
  }

  /**
   * Returns the class that implements a copy strategy. Looks up the implementation for
   * a particular strategy from distcp-default.xml
   *
   * @param conf - Configuration object
   * @param options - Handle to input options
   * @return Class implementing the strategy specified in options.
   */
  public static Class getStrategy(Configuration conf,
                                                                 DistCpOptions options) {
    String confLabel = "distcp." +
        options.getCopyStrategy().toLowerCase(Locale.getDefault()) + ".strategy.impl";
    return conf.getClass(confLabel, UniformSizeInputFormat.class, InputFormat.class);
  }

  /**
   * Gets relative path of child path with respect to a root path
   * For ex. If childPath = /tmp/abc/xyz/file and
   *            sourceRootPath = /tmp/abc
   * Relative path would be /xyz/file
   *         If childPath = /file and
   *            sourceRootPath = /
   * Relative path would be /file
   * @param sourceRootPath - Source root path
   * @param childPath - Path for which relative path is required
   * @return - Relative portion of the child path (always prefixed with /
   *           unless it is empty 
   */
  public static String getRelativePath(Path sourceRootPath, Path childPath) {
    String childPathString = childPath.toUri().getPath();
    String sourceRootPathString = sourceRootPath.toUri().getPath();
    return sourceRootPathString.equals("/") ? childPathString :
        childPathString.substring(sourceRootPathString.length());
  }

  /**
   * Pack file preservation attributes into a string, containing
   * just the first character of each preservation attribute
   * @param attributes - Attribute set to preserve
   * @return - String containing first letters of each attribute to preserve
   */
  public static String packAttributes(EnumSet attributes) {
    StringBuffer buffer = new StringBuffer(5);
    int len = 0;
    for (FileAttribute attribute : attributes) {
      buffer.append(attribute.name().charAt(0));
      len++;
    }
    return buffer.substring(0, len);
  }

  /**
   * Un packs preservation attribute string containing the first character of
   * each preservation attribute back to a set of attributes to preserve
   * @param attributes - Attribute string
   * @return - Attribute set
   */
  public static EnumSet unpackAttributes(String attributes) {
    EnumSet retValue = EnumSet.noneOf(FileAttribute.class);

    if (attributes != null) {
      for (int index = 0; index < attributes.length(); index++) {
        retValue.add(FileAttribute.getAttribute(attributes.charAt(index)));
      }
    }

    return retValue;
  }

  /**
   * Preserve attribute on file matching that of the file status being sent
   * as argument. Barring the block size, all the other attributes are preserved
   * by this function
   *
   * @param targetFS - File system
   * @param path - Path that needs to preserve original file status
   * @param srcFileStatus - Original file status
   * @param attributes - Attribute set that need to be preserved
   * @throws IOException - Exception if any (particularly relating to group/owner
   *                       change or any transient error)
   */
  public static void preserve(FileSystem targetFS, Path path,
                              FileStatus srcFileStatus,
                              EnumSet attributes) throws IOException {

    FileStatus targetFileStatus = targetFS.getFileStatus(path);
    String group = targetFileStatus.getGroup();
    String user = targetFileStatus.getOwner();
    boolean chown = false;

    if (attributes.contains(FileAttribute.PERMISSION) &&
      !srcFileStatus.getPermission().equals(targetFileStatus.getPermission())) {
      targetFS.setPermission(path, srcFileStatus.getPermission());
    }

    if (attributes.contains(FileAttribute.REPLICATION) && ! targetFileStatus.isDirectory() &&
        srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
      targetFS.setReplication(path, srcFileStatus.getReplication());
    }

    if (attributes.contains(FileAttribute.GROUP) &&
            !group.equals(srcFileStatus.getGroup())) {
      group = srcFileStatus.getGroup();
      chown = true;
    }

    if (attributes.contains(FileAttribute.USER) &&
            !user.equals(srcFileStatus.getOwner())) {
      user = srcFileStatus.getOwner();
      chown = true;
    }

    if (chown) {
      targetFS.setOwner(path, user, group);
    }
  }

  /**
   * Sort sequence file containing FileStatus and Text as key and value respecitvely
   *
   * @param fs - File System
   * @param conf - Configuration
   * @param sourceListing - Source listing file
   * @return Path of the sorted file. Is source file with _sorted appended to the name
   * @throws IOException - Any exception during sort.
   */
  public static Path sortListing(FileSystem fs, Configuration conf, Path sourceListing)
      throws IOException {
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, Text.class, FileStatus.class, conf);
    Path output = new Path(sourceListing.toString() +  "_sorted");

    if (fs.exists(output)) {
      fs.delete(output, false);
    }

    sorter.sort(sourceListing, output);
    return output;
  }

  /**
   * String utility to convert a number-of-bytes to human readable format.
   */
  private static ThreadLocal FORMATTER
                        = new ThreadLocal() {
    @Override
    protected DecimalFormat initialValue() {
      return new DecimalFormat("0.0");
    }
  };

  public static DecimalFormat getFormatter() {
    return FORMATTER.get();
  }

  public static String getStringDescriptionFor(long nBytes) {

    char units [] = {'B', 'K', 'M', 'G', 'T', 'P'};

    double current = nBytes;
    double prev    = current;
    int index = 0;

    while ((current = current/1024) >= 1) {
      prev = current;
      ++index;
    }

    assert index < units.length : "Too large a number.";

    return getFormatter().format(prev) + units[index];
  }

  /**
   * Utility to compare checksums for the paths specified.
   *
   * If checksums's can't be retrieved, it doesn't fail the test
   * Only time the comparison would fail is when checksums are
   * available and they don't match
   *                                  
   * @param sourceFS FileSystem for the source path.
   * @param source The source path.
   * @param targetFS FileSystem for the target path.
   * @param target The target path.
   * @return If either checksum couldn't be retrieved, the function returns
   * false. If checksums are retrieved, the function returns true if they match,
   * and false otherwise.
   * @throws IOException if there's an exception while retrieving checksums.
   */
  public static boolean checksumsAreEqual(FileSystem sourceFS, Path source,
                                   FileSystem targetFS, Path target)
                                   throws IOException {
    FileChecksum sourceChecksum = null;
    FileChecksum targetChecksum = null;
    try {
      sourceChecksum = sourceFS.getFileChecksum(source);
      targetChecksum = targetFS.getFileChecksum(target);
    } catch (IOException e) {
      LOG.error("Unable to retrieve checksum for " + source + " or " + target, e);
    }
    return (sourceChecksum == null || targetChecksum == null ||
            sourceChecksum.equals(targetChecksum));
  }

  /* see if two file systems are the same or not
   *
   */
  public static boolean compareFs(FileSystem srcFs, FileSystem destFs) {
    URI srcUri = srcFs.getUri();
    URI dstUri = destFs.getUri();
    if (srcUri.getScheme() == null) {
      return false;
    }
    if (!srcUri.getScheme().equals(dstUri.getScheme())) {
      return false;
    }
    String srcHost = srcUri.getHost();
    String dstHost = dstUri.getHost();
    if ((srcHost != null) && (dstHost != null)) {
      try {
        srcHost = InetAddress.getByName(srcHost).getCanonicalHostName();
        dstHost = InetAddress.getByName(dstHost).getCanonicalHostName();
      } catch(UnknownHostException ue) {
        if (LOG.isDebugEnabled())
          LOG.debug("Could not compare file-systems. Unknown host: ", ue);
        return false;
      }
      if (!srcHost.equals(dstHost)) {
        return false;
      }
    }
    else if (srcHost == null && dstHost != null) {
      return false;
    }
    else if (srcHost != null) {
      return false;
    }

    //check for ports

    return srcUri.getPort() == dstUri.getPort();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy