All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tachyon.client.UfsUtils Maven / Gradle / Ivy

/*
 * Licensed to the University of California, Berkeley under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package tachyon.client;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.UnknownHostException;
import java.util.LinkedList;
import java.util.Queue;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import tachyon.Constants;
import tachyon.TachyonURI;
import tachyon.collections.Pair;
import tachyon.collections.PrefixList;
import tachyon.conf.TachyonConf;
import tachyon.underfs.UnderFileSystem;
import tachyon.util.io.PathUtils;
import tachyon.util.network.NetworkAddressUtils;

/**
 * Utilities related to under file system.
 */
public final class UfsUtils {
  private static final Logger LOG = LoggerFactory.getLogger(Constants.LOGGER_TYPE);

  /**
   * Builds a new path relative to a given mTachyonFS root by retrieving the given path relative to
   * the ufsRootPath.
   *
   * @param tfsRootPath the destination point in mTachyonFS to load the under FS path onto
   * @param ufsRootPath the source path in the under FS to be loaded
   * @param path the path in the under FS be loaded, path.startsWith(ufsRootPath) must be true
   * @return the new path relative to tfsRootPath
   */
  private static TachyonURI buildTFSPath(
      TachyonURI tfsRootPath, TachyonURI ufsRootPath, TachyonURI path) {
    String filePath = path.getPath().substring(ufsRootPath.getPath().length());
    if (filePath.isEmpty()) {
      // retrieve the basename in ufsRootPath
      filePath = path.getPath().substring(
          ufsRootPath.getPath().lastIndexOf(TachyonURI.SEPARATOR) + 1);
    }
    return new TachyonURI(PathUtils.concatPath(tfsRootPath, filePath));
  }

  /**
   * Loads files under path "ufsAddrRootPath" (excluding excludePathPrefix relative to the path) to
   * the given tfs under a given destination path.
   *
   * @param tfsAddrRootPath the mTachyonFS address and path to load the src files, like
   *        "tachyon://host:port/dest".
   * @param ufsAddrRootPath the address and root path of the under FS, like "hdfs://host:port/src".
   * @param excludePaths paths to exclude from ufsRootPath, which will not be loaded in mTachyonFS.
   * @param tachyonConf the instance of {@link tachyon.conf.TachyonConf} to be used.
   * @throws IOException when an event that prevents the operation from completing is encountered
   */
  private static void loadUfs(TachyonURI tfsAddrRootPath, TachyonURI ufsAddrRootPath,
      String excludePaths, TachyonConf tachyonConf) throws IOException {
    TachyonFS tfs = TachyonFS.get(tfsAddrRootPath, tachyonConf);

    PrefixList excludePathPrefix = new PrefixList(excludePaths, ";");

    loadUfs(tfs, tfsAddrRootPath, ufsAddrRootPath, excludePathPrefix, tachyonConf);
  }

  /**
   * Loads files under path "ufsAddress/ufsRootPath" (excluding excludePathPrefix) to the given tfs
   * under the given tfsRootPath directory.
   *
   * @param tfs the mTachyonFS handler created out of address like "tachyon://host:port"
   * @param tachyonPath the destination point in mTachyonFS to load the under FS path onto
   * @param ufsAddrRootPath the address and root path of the under FS, like "hdfs://host:port/dir".
   * @param excludePathPrefix paths to exclude from ufsRootPath, which will not be registered in
   *        mTachyonFS.
   * @param tachyonConf instance of TachyonConf
   * @throws IOException when an event that prevents the operation from completing is encountered
   */
  @Deprecated
  public static void loadUfs(TachyonFS tfs, TachyonURI tachyonPath, TachyonURI ufsAddrRootPath,
      PrefixList excludePathPrefix, TachyonConf tachyonConf) throws IOException {
    LOG.info("Loading to " + tachyonPath + " " + ufsAddrRootPath + " " + excludePathPrefix);
    try {
      // resolve and replace hostname embedded in the given ufsAddress/tachyonAddress
      ufsAddrRootPath = NetworkAddressUtils.replaceHostName(ufsAddrRootPath);
      tachyonPath = NetworkAddressUtils.replaceHostName(tachyonPath);
    } catch (UnknownHostException e) {
      LOG.error("Failed to resolve hostname", e);
      throw new IOException(e);
    }

    Pair ufsPair = UnderFileSystem.parse(ufsAddrRootPath, tachyonConf);
    String ufsAddress = ufsPair.getFirst();
    String ufsRootPath = ufsPair.getSecond();

    LOG.debug("Loading ufs, address:" + ufsAddress + "; root path: " + ufsRootPath);

    // create the under FS handler (e.g. hdfs, local FS, s3 etc.)
    UnderFileSystem ufs = UnderFileSystem.get(ufsAddress, tachyonConf);

    if (!ufs.exists(ufsAddrRootPath.toString())) {
      throw new FileNotFoundException("ufs path " + ufsAddrRootPath + " not found.");
    }

    // directory name to load, either the path parent or the actual path if it is a directory
    TachyonURI directoryName;
    if (ufs.isFile(ufsAddrRootPath.toString())) {
      if ((ufsRootPath == null) || ufsRootPath.isEmpty() || ufsRootPath.equals("/")) {
        directoryName = TachyonURI.EMPTY_URI;
      } else {
        int lastSlashPos = ufsRootPath.lastIndexOf('/');
        if (lastSlashPos > 0) {
          directoryName = new TachyonURI(ufsRootPath.substring(0, lastSlashPos)); // trim the slash
        } else {
          directoryName = TachyonURI.EMPTY_URI;
        }
      }
    } else {
      directoryName = tachyonPath;
    }

    if (!directoryName.equals(TachyonURI.EMPTY_URI)) {
      if (!tfs.exist(directoryName)) {
        LOG.debug("Loading ufs. Make dir if needed for '" + directoryName + "'.");
        tfs.mkdir(directoryName);
      }
      // TODO(hy): Add the following.
      // if (tfs.mkdir(tfsRootPath)) {
      // LOG.info("directory " + tfsRootPath + " does not exist in Tachyon: created");
      // } else {
      // throw new IOException("Failed to create folder in Tachyon: " + tfsRootPath);
      // }
    }

    Queue ufsPathQueue = new LinkedList();
    if (excludePathPrefix.outList(ufsRootPath)) {
      ufsPathQueue.add(ufsAddrRootPath);
    }

    while (!ufsPathQueue.isEmpty()) {
      TachyonURI ufsPath = ufsPathQueue.poll(); // this is the absolute path
      LOG.debug("Loading: " + ufsPath);
      if (ufs.isFile(ufsPath.toString())) { // TODO(hy): Fix path matching issue.
        TachyonURI tfsPath = buildTFSPath(directoryName, ufsAddrRootPath, ufsPath);
        LOG.debug("Loading ufs. tfs path = " + tfsPath + ".");
        if (tfs.exist(tfsPath)) {
          LOG.debug("File " + tfsPath + " already exists in Tachyon.");
          continue;
        }
        long fileId = tfs.createFile(tfsPath, ufsPath);
        if (fileId == -1) {
          LOG.warn("Failed to create tachyon file: " + tfsPath);
        } else {
          LOG.debug("Create tachyon file " + tfsPath + " with file id " + fileId + " and "
              + "checkpoint location " + ufsPath);
        }
      } else { // ufsPath is a directory
        LOG.debug("Loading ufs. ufs path is a directory.");
        String[] files = ufs.list(ufsPath.toString()); // ufs.list() returns relative path
        if (files != null) {
          for (String filePath : files) {
            if (filePath.isEmpty()) { // Prevent infinite loops
              continue;
            }
            LOG.debug("Get: " + filePath);
            String aPath = PathUtils.concatPath(ufsPath, filePath);
            String checkPath = aPath.substring(ufsAddrRootPath.toString().length());
            if (checkPath.startsWith(TachyonURI.SEPARATOR)) {
              checkPath = checkPath.substring(TachyonURI.SEPARATOR.length());
            }
            if (excludePathPrefix.inList(checkPath)) {
              LOG.debug("excluded: " + checkPath);
            } else {
              ufsPathQueue.add(new TachyonURI(aPath));
            }
          }
        }
        // ufsPath is a directory, so only concat the tfsRoot with the relative path
        TachyonURI tfsPath = new TachyonURI(PathUtils.concatPath(
            tachyonPath, ufsPath.getPath().substring(ufsAddrRootPath.getPath().length())));
        LOG.debug("Loading ufs. ufs path is a directory. tfsPath = " + tfsPath + ".");
        if (!tfs.exist(tfsPath)) {
          LOG.debug("Loading ufs. ufs path is a directory. make dir = "
                  + tfsPath + ".");
          tfs.mkdir(tfsPath);
          // TODO(hy): Add the following.
          // if (tfs.mkdir(tfsPath)) {
          // LOG.info("Created mTachyonFS folder " + tfsPath + " with checkpoint location " +
          // ufsPath);
          // } else {
          // LOG.info("Failed to create tachyon folder: " + tfsPath);
          // }
        }
      }
    }
  }

  public static void main(String[] args) {
    if (!(args.length == 2 || args.length == 3)) {
      printUsage();
      System.exit(-1);
    }

    String exList = (args.length == 3) ? args[2] : "";

    try {
      loadUfs(new TachyonURI(args[0]), new TachyonURI(args[1]), exList, new TachyonConf());
    } catch (Exception e) {
      e.printStackTrace();
      printUsage();
      System.exit(-1);
    }

    System.exit(0);
  }

  public static void printUsage() {
    String cmd = "java -cp " + Constants.TACHYON_JAR + " tachyon.client.UfsUtils ";

    System.out.println("Usage: " + cmd + "  "
        + "[]");
    System.out.println("Example: " + cmd + "tachyon://127.0.0.1:19998/a hdfs://localhost:9000/b c");
    System.out.println("Example: " + cmd + "tachyon://127.0.0.1:19998/a file:///b c");
    System.out.println("Example: " + cmd + "tachyon://127.0.0.1:19998/a /b c");
    System.out.print("In the TFS, all files under local FS /b will be registered under /a, ");
    System.out.println("except for those with prefix c");
  }

  private UfsUtils() {} // prevent instantiation
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy