All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tachyon.hadoop.AbstractTFS Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the University of California, Berkeley under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package tachyon.hadoop;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Lists;

import tachyon.Constants;
import tachyon.TachyonURI;
import tachyon.client.ClientContext;
import tachyon.client.TachyonFS;
import tachyon.client.TachyonFile;
import tachyon.client.TachyonStorageType;
import tachyon.client.UnderStorageType;
import tachyon.client.WriteType;
import tachyon.conf.TachyonConf;
import tachyon.thrift.FileBlockInfo;
import tachyon.thrift.FileInfo;
import tachyon.thrift.NetAddress;
import tachyon.util.CommonUtils;

/**
 * Base class for Apache Hadoop based Tachyon {@link FileSystem}. This class really just delegates
 * to {@link tachyon.client.TachyonFS} for most operations.
 *
 * All implementing classes must define {@link #isZookeeperMode()} which states if fault tolerant is
 * used and {@link #getScheme()} for Hadoop's {@link java.util.ServiceLoader} support.
 */
abstract class AbstractTFS extends FileSystem {
  public static final String FIRST_COM_PATH = "tachyon_dep/";
  public static final String RECOMPUTE_PATH = "tachyon_recompute/";

  private static final Logger LOG = LoggerFactory.getLogger(Constants.LOGGER_TYPE);

  private String mUnderFSAddress;

  private URI mUri = null;
  private Path mWorkingDir = new Path(TachyonURI.SEPARATOR);
  private Statistics mStatistics = null;
  private TachyonFS mTFS = null;
  private String mTachyonHeader = null;
  private final TachyonConf mTachyonConf = ClientContext.getConf();

  @Override
  public FSDataOutputStream append(Path cPath, int bufferSize, Progressable progress)
      throws IOException {
    LOG.info("append(" + cPath + ", " + bufferSize + ", " + progress + ")");
    if (mStatistics != null) {
      mStatistics.incrementWriteOps(1);
    }
    TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath));
    long fileId = mTFS.getFileId(path);
    TachyonFile file = mTFS.getFile(fileId);

    if (file.length() > 0) {
      LOG.warn("This maybe an error.");
    }

    return new FSDataOutputStream(file.getOutStream(), mStatistics);
  }

  @Override
  public void close() throws IOException {
    try {
      super.close();
    } finally {
      if (mTFS != null) {
        mTFS.close();
      }
    }
  }

  /**
   * Attempts to create a file. Overwrite will not succeed if the path exists and is a folder.
   *
   * @param cPath path to create
   * @param permission permissions of the created file/folder
   * @param overwrite overwrite if file exists
   * @param bufferSize the size in bytes of the buffer to be used
   * @param replication under filesystem replication factor
   * @param blockSize block size in bytes
   * @param progress queryable progress
   * @return an FSDataOutputStream created at the indicated path of a file
   * @throws IOException if overwrite is not specified and the path already exists or if the path is
   *         a folder
   */
  @Override
  public FSDataOutputStream create(Path cPath, FsPermission permission, boolean overwrite,
      int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
    LOG.info("create(" + cPath + ", " + permission + ", " + overwrite + ", " + bufferSize + ", "
        + replication + ", " + blockSize + ", " + progress + ")");
    if (mStatistics != null) {
      mStatistics.incrementWriteOps(1);
    }

    TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath));
    if (mTFS.exist(path)) {
      if (overwrite && !mTFS.getFileStatus(-1, path).isFolder) {
        if (!mTFS.delete(path, false)) {
          throw new IOException("Failed to delete existing data " + cPath);
        }
      } else {
        throw new IOException(cPath.toString() + " already exists. Directories cannot be "
            + "overwritten with create.");
      }
    }
    long fileId = mTFS.createFile(path, blockSize);
    TachyonFile file = mTFS.getFile(fileId);
    file.setUFSConf(getConf());

    return new FSDataOutputStream(file.getOutStream(), mStatistics);
  }

  /**
   * Opens an FSDataOutputStream at the indicated Path with write-progress reporting. Same as
   * create(), except fails if parent directory doesn't already exist.
   *
   * TODO(hy): We need to refactor this method after having a new internal API support (TACHYON-46).
   *
   * @param cPath the file name to open
   * @param overwrite if a file with this name already exists, then if true, the file will be
   *        overwritten, and if false an error will be thrown.
   * @param bufferSize the size of the buffer to be used.
   * @param replication required block replication for the file.
   * @param blockSize the size in bytes of the buffer to be used.
   * @param progress queryable progress
   * @throws IOException if 1) overwrite is not specified and the path already exists, 2) if the
   *         path is a folder, or 3) the parent directory does not exist
   * @see #setPermission(Path, FsPermission)
   * @deprecated API only for 0.20-append
   */
  @Override
  @Deprecated
  public FSDataOutputStream createNonRecursive(Path cPath, FsPermission permission,
      boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress)
      throws IOException {
    TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath.getParent()));
    if (!mTFS.exist(path)) {
      throw new FileNotFoundException("Parent directory does not exist!");
    }
    return this.create(cPath, permission, overwrite, bufferSize, replication, blockSize, progress);
  }

  @Override
  @Deprecated
  public boolean delete(Path path) throws IOException {
    return delete(path, true);
  }

  /**
   * Attempts to delete the file or directory with the specified path.
   *
   * @param cPath path to delete
   * @param recursive if true, will attempt to delete all children of the path
   * @return true if one or more files/directories were deleted; false otherwise
   * @throws IOException if the path failed to be deleted due to some constraint (ie. non empty
   *         directory with recursive flag disabled)
   */
  @Override
  public boolean delete(Path cPath, boolean recursive) throws IOException {
    LOG.info("delete(" + cPath + ", " + recursive + ")");
    if (mStatistics != null) {
      mStatistics.incrementWriteOps(1);
    }
    TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath));
    if (!mTFS.exist(path)) {
      return false;
    }
    boolean rtn = mTFS.delete(path, recursive);
    if (mTFS.exist(path)) {
      throw new IOException("Failed to delete path " + path.toString());
    }
    return rtn;
  }

  @Override
  public long getDefaultBlockSize() {
    return mTachyonConf.getBytes(Constants.USER_BLOCK_SIZE_BYTES_DEFAULT);
  }

  @Override
  public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len)
      throws IOException {
    if (file == null) {
      return null;
    }
    if (mStatistics != null) {
      mStatistics.incrementReadOps(1);
    }

    TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(file.getPath()));
    long fileId = mTFS.getFileId(path);
    if (fileId == -1) {
      throw new FileNotFoundException("File does not exist: " + file.getPath());
    }

    List blockLocations = new ArrayList();
    List blocks = mTFS.getFileBlocks(fileId);
    for (int k = 0; k < blocks.size(); k ++) {
      FileBlockInfo info = blocks.get(k);
      long offset = info.getOffset();
      long end = offset + info.blockInfo.getLength();
      // Check if there is any overlapping between [start, start+len] and [offset, end]
      if (end >= start && offset <= start + len) {
        ArrayList names = new ArrayList();
        ArrayList hosts = new ArrayList();
        List addrs = Lists.newArrayList();
        // add the existing in-memory block locations first
        for (tachyon.thrift.BlockLocation location : info.getBlockInfo().getLocations()) {
          addrs.add(location.getWorkerAddress());
        }
        // then add under file system location
        addrs.addAll(info.getUfsLocations());
        for (NetAddress addr : addrs) {
          // Name format is "hostname:data transfer port"
          String name = addr.host + ":" + addr.dataPort;
          LOG.debug("getFileBlockLocations : adding name : '" + name + "");
          names.add(name);
          hosts.add(addr.host);
        }
        blockLocations.add(new BlockLocation(CommonUtils.toStringArray(names),
            CommonUtils.toStringArray(hosts), offset, info.blockInfo.getLength()));
      }
    }

    BlockLocation[] ret = new BlockLocation[blockLocations.size()];
    blockLocations.toArray(ret);
    return ret;
  }

  /**
   * {@inheritDoc}
   *
   * If the file does not exist in Tachyon, query it from HDFS.
   */
  @Override
  public FileStatus getFileStatus(Path path) throws IOException {
    TachyonURI tPath = new TachyonURI(Utils.getPathWithoutScheme(path));
    Path hdfsPath = Utils.getHDFSPath(tPath, mUnderFSAddress);

    LOG.info("getFileStatus(" + path + "): HDFS Path: " + hdfsPath + " TPath: " + mTachyonHeader
        + tPath);
    if (mStatistics != null) {
      mStatistics.incrementReadOps(1);
    }
    TachyonFile file;
    try {
      file = mTFS.getFile(tPath);
    } catch (IOException ioe) {
      LOG.info("File does not exist: " + path);
      throw new FileNotFoundException("File does not exist: " + path);
    }

    FileStatus ret =
        new FileStatus(file.length(), file.isDirectory(), file.getDiskReplication(),
            file.getBlockSizeByte(), file.getCreationTimeMs(), file.getCreationTimeMs(), null,
            null, null, new Path(mTachyonHeader + tPath));
    return ret;
  }

  /**
   * Gets the URI schema that maps to the FileSystem. This was introduced in Hadoop 2.x as a means
   * to make loading new FileSystems simpler. This doesn't exist in Hadoop 1.x, so cannot put
   * @Override.
   *
   * @return schema hadoop should map to
   *
   * @see org.apache.hadoop.fs.FileSystem#createFileSystem(java.net.URI,
   *      org.apache.hadoop.conf.Configuration)
   */
  public abstract String getScheme();

  /**
   * Returns an object implementing the Tachyon-specific client API.
   *
   * @return null if initialize() hasn't been called
   */
  public TachyonFS getTachyonFS() {
    return mTFS;
  }

  @Override
  public URI getUri() {
    return mUri;
  }

  @Override
  public Path getWorkingDirectory() {
    LOG.info("getWorkingDirectory: " + mWorkingDir);
    return mWorkingDir;
  }

  /**
   * {@inheritDoc}
   *
   * Sets up a lazy connection to Tachyon through mTFS.
   */
  @Override
  public void initialize(URI uri, Configuration conf) throws IOException {
    super.initialize(uri, conf);
    LOG.info("initialize(" + uri + ", " + conf + "). Connecting to Tachyon: " + uri.toString());
    Utils.addS3Credentials(conf);
    setConf(conf);
    mTachyonHeader = getScheme() + "://" + uri.getHost() + ":" + uri.getPort();

    // Set the statistics member. Use mStatistics instead of the parent class's variable.
    mStatistics = statistics;

    // Load TachyonConf if any and merge to the one in TachyonFS
    TachyonConf siteConf = ConfUtils.loadFromHadoopConfiguration(conf);
    if (siteConf != null) {
      mTachyonConf.merge(siteConf);
    }
    mTachyonConf.set(Constants.MASTER_HOSTNAME, uri.getHost());
    mTachyonConf.set(Constants.MASTER_PORT, Integer.toString(uri.getPort()));
    mTachyonConf.set(Constants.ZOOKEEPER_ENABLED, Boolean.toString(isZookeeperMode()));

    mTFS = TachyonFS.get(mTachyonConf);

    mUri = URI.create(mTachyonHeader);
    mUnderFSAddress = mTFS.getUfsAddress();
    LOG.info(mTachyonHeader + " " + mUri + " " + mUnderFSAddress);
  }

  /**
   * Determines if zookeeper should be used for the FileSystem. This method should only be used for
   * {@link #initialize(java.net.URI, org.apache.hadoop.conf.Configuration)}.
   *
   * @return true if zookeeper should be used
   */
  protected abstract boolean isZookeeperMode();

  @Override
  public FileStatus[] listStatus(Path path) throws IOException {
    TachyonURI tPath = new TachyonURI(Utils.getPathWithoutScheme(path));
    Path hdfsPath = Utils.getHDFSPath(tPath, mUnderFSAddress);
    LOG.info("listStatus(" + path + "): HDFS Path: " + hdfsPath);

    if (mStatistics != null) {
      mStatistics.incrementReadOps(1);
    }

    if (!mTFS.exist(tPath)) {
      throw new FileNotFoundException("File does not exist: " + path);
    }

    List files = mTFS.listStatus(tPath);
    FileStatus[] ret = new FileStatus[files.size()];
    for (int k = 0; k < files.size(); k ++) {
      FileInfo info = files.get(k);
      // TODO(hy): Replicate 3 with the number of disk replications.
      ret[k] =
          new FileStatus(info.getLength(), info.isFolder, 3, info.getBlockSizeBytes(),
              info.getCreationTimeMs(), info.getCreationTimeMs(), null, null, null, new Path(
                  mTachyonHeader + info.getPath()));
    }
    return ret;
  }

  /**
   * Attempts to create a folder with the specified path. Parent directories will be created. Mkdirs
   * will fail if the path already exists or a parent is a file.
   *
   * @param cPath path to create
   * @param permission permissions to grant the created folder
   * @return true if the indicated folder is created successfully, false otherwise
   * @throws IOException if the folder cannot be created (e.g., it already exists)
   */
  @Override
  public boolean mkdirs(Path cPath, FsPermission permission) throws IOException {
    LOG.info("mkdirs(" + cPath + ", " + permission + ")");
    if (mStatistics != null) {
      mStatistics.incrementWriteOps(1);
    }
    TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath));
    return mTFS.mkdir(path);
  }

  /**
   * Attempts to open the specified file for reading.
   *
   * @param cPath the file name to open
   * @param bufferSize the size in bytes of the buffer to be used
   * @return an FSDataInputStream at the indicated path of a file
   * @throws IOException if the file cannot be opened (e.g., the path is a folder)
   */
  @Override
  public FSDataInputStream open(Path cPath, int bufferSize) throws IOException {
    LOG.info("open(" + cPath + ", " + bufferSize + ")");
    if (mStatistics != null) {
      mStatistics.incrementReadOps(1);
    }

    TachyonURI path = new TachyonURI(Utils.getPathWithoutScheme(cPath));
    long fileId = mTFS.getFileId(path);

    return new FSDataInputStream(new HdfsFileInputStream(mTFS, fileId, Utils.getHDFSPath(path,
        mUnderFSAddress), getConf(), bufferSize, mStatistics, mTachyonConf));
  }

  @Override
  public boolean rename(Path src, Path dst) throws IOException {
    LOG.info("rename(" + src + ", " + dst + ")");
    if (mStatistics != null) {
      mStatistics.incrementWriteOps(1);
    }

    TachyonURI srcPath = new TachyonURI(Utils.getPathWithoutScheme(src));
    TachyonURI dstPath = new TachyonURI(Utils.getPathWithoutScheme(dst));
    FileInfo info;
    try {
      info = mTFS.getFileStatus(-1, dstPath);
    } catch (IOException ioe) {
      info = null;
    }
    // If the destination is an existing folder, try to move the src into the folder
    if (info != null && info.isFolder) {
      dstPath = dstPath.join(srcPath.getName());
    }
    try {
      return mTFS.rename(srcPath, dstPath);
    } catch (IOException ioe) {
      LOG.error("Failed to rename {} to {}", src, dst, ioe);
      return false;
    }
  }

  @Override
  public void setWorkingDirectory(Path path) {
    LOG.info("setWorkingDirectory(" + path + ")");
    if (path.isAbsolute()) {
      mWorkingDir = path;
    } else {
      mWorkingDir = new Path(mWorkingDir, path);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy