All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.HdlfsFileSystem Maven / Gradle / Ivy

package com.sap.hana.datalake.files;

import com.sap.hana.datalake.files.enumeration.CreateOperationMode;
import com.sap.hana.datalake.files.util.LRUCache;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.SWebHdfsFileSystem;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HdlfsFileSystem extends FileSystem {
  public static final String SCHEME = "hdlfs";

  private static final Logger LOG = LoggerFactory.getLogger(HdlfsFileSystem.class);
  private static final String TEMPORARY_DIR = "/_temporary/";
  private static final Pattern AUTHORITY_PATTERN = Pattern.compile(HdlfsConstants.FS_HDLFS_AUTHORITY_REGEX);
  private static final String FILEOUTPUTCOMMITTER_VERSION = "2";
  private static final LRUCache LOCAL_MKDIR_CACHE = new LRUCache<>(50);
  private static final FileStatus[] NO_FILES = {};

  private FileSystem delegateFS;
  private URI uri;
  private CreateOperationMode createOperationMode;
  private boolean optimizeFileOutputCommit;
  private boolean fsUriRequiresPort;

  @Override
  public void initialize(final URI name, final Configuration conf) throws IOException {
    final boolean sslEnabled = conf.getBoolean(HdlfsConstants.FS_HDLFS_SSL_ENABLED_KEY, true);
    this.initialize(name, conf, sslEnabled ? new SWebHdfsFileSystem() : new WebHdfsFileSystem());
  }

  public void initialize(final URI fsUri, final Configuration conf, final WebHdfsFileSystem webHdfsFileSystem) throws IOException {
    super.initialize(fsUri, conf);
    setConf(conf);

    String fileContainer;
    fileContainer = conf.get(HdlfsConstants.FS_HDLFS_FILECONTAINER_KEY);

    /* As a requirement,
       - If "fs.hdlfs.connection.id" property is specified, it should be complemented with fs.hdlfs.filecontainer -> "" for RMS.
       - If "fs.hdlfs.filecontainer" property is provided, simply use that value to set the header.
       - If "fs.hdlfs.filecontainer" property is not provided, we need to parse the file container from the URI to
         keep supporting previous configurations.
    */
    if (fileContainer == null) {
      final String[] parsedHost = this.parseURI(fsUri);
      fileContainer = parsedHost[0];
      final String hdlfsEndpoint = parsedHost[1];

      if (fileContainer == null || fileContainer.isEmpty()) {
        throw new IOException("Failed to parse File container from the URI.");
      }

      if (hdlfsEndpoint == null || hdlfsEndpoint.isEmpty()) {
        throw new IOException("Failed to parse HDLFS endpoint from the URI.");
      }
    }

    // by default, all optimizations made for file output committer following the stochator pattern will be disabled
    this.optimizeFileOutputCommit = conf.getBoolean(HdlfsConstants.FS_HDLFS_OPTIMIZE_FILEOUTPUTCOMMIT_ENABLED_KEY, false);

    /* We do not need the fileoutputcommitter validation in case we are connecting to the remote object store
       via the "fs.hdlfs.connection.id" property
       Also when connecting to a file container, we only check for file output committer version if the
       optimizations are enabled.
    */
    if (!fileContainer.isEmpty() && this.isOptimizeFileOutputCommitEnabled()) {
      final String fileOutputCommitterVersion = conf.get(HdlfsConstants.MAPREDUCE_FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_KEY);

      if (!FILEOUTPUTCOMMITTER_VERSION.equals(fileOutputCommitterVersion)) {
        throw new IllegalArgumentException(String.format("File Output Committer version is %s but should be %s",
                fileOutputCommitterVersion, FILEOUTPUTCOMMITTER_VERSION));
      }
    }

    this.uri = fsUri;
    this.fsUriRequiresPort = fsUri.getPort() > 0;
    int port = fsUri.getPort() > 0 ? fsUri.getPort() : ((webHdfsFileSystem instanceof SWebHdfsFileSystem) ? 443: 80);
    final Configuration delegateFsConf = new Configuration(conf);

    final URI delegateFsUri;

    try {
      delegateFsUri = new URI(webHdfsFileSystem.getScheme(), fsUri.getUserInfo(), fsUri.getHost(),
          port, fsUri.getPath(), fsUri.getQuery(), fsUri.getFragment());
    } catch (final URISyntaxException ex) {
      throw new IOException(ex);
    }

    // we can still have an empty file container if connection-id property is used
    delegateFsConf.set(HdfsClientConfigKeys.DFS_WEBHDFS_CUSTOM_CONNECTION_CONFIGURATOR_IMPL, HdlfsConnectionConfigurator.class.getName());
    delegateFsConf.set(HdlfsConstants.FS_HDLFS_FILECONTAINER_KEY, fileContainer);
    delegateFsConf.setBoolean(WebHdfsFileSystem.DFS_OPERATION_LISTSTATUSBATCH_USE_WITH_PAGE_ID_KEY, true);
    this.createOperationMode = conf.getEnum(HdlfsConstants.FS_HDLFS_OPERATION_CREATE_MODE_KEY, CreateOperationMode.DEFAULT);
    this.delegateFS = webHdfsFileSystem;
    this.delegateFS.initialize(delegateFsUri, delegateFsConf);
  }

  @Override
  public String getScheme() {
    return SCHEME;
  }

  @Override
  public URI getUri() {
    return this.uri;
  }

  @Override
  public FSDataInputStream open(final Path path, final int bufferSize) throws IOException {
    LOG.debug("Performing OPEN on: {}", path.toString());
    final Path delegateFsPath = this.rewritePathToDelegateFs(path);
    return this.delegateFS.open(delegateFsPath, bufferSize);
  }

  @Override
  public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite, final int bufferSize,
                                   final short replication, final long blockSize, final Progressable progress) throws IOException {
    final Path delegateFsPath;

    if (!this.isOptimizeFileOutputCommitEnabled()) {
      // file output committer optimizations disabled, simply delegate the request
      LOG.debug("FileOutputCommitter optimizations disabled. Performing CREATE on: {}", path.toString());
      this.checkCreateOperation(path, overwrite);
      delegateFsPath = this.rewritePathToDelegateFs(path);
      return this.delegateFS.create(delegateFsPath, fsPermission, overwrite, bufferSize, replication, blockSize, progress);
    }

    final String pathStr = path.toString();
    final int idxOfTempDir = pathStr.indexOf(TEMPORARY_DIR);
    final Path pathWithFile;

    if (idxOfTempDir >= 0) {
      final String parentFolder = path.getParent().toString();
      final int attemptFolderIdx = parentFolder.lastIndexOf("/");
      final String attemptId = parentFolder.substring(attemptFolderIdx + 1);
      final String fileName = attemptId + "-" + path.getName();
      final String outputDirWithFile = pathStr.substring(0, idxOfTempDir + 1) + fileName;

      pathWithFile = new Path(outputDirWithFile);
    } else {
      pathWithFile = path;
    }

    LOG.debug("Performing CREATE on: {}", pathWithFile);

    this.checkCreateOperation(pathWithFile, overwrite);

    delegateFsPath = this.rewritePathToDelegateFs(pathWithFile);
    return this.delegateFS.create(delegateFsPath, fsPermission, true, bufferSize, replication, blockSize, progress);
  }

  @Override
  public FSDataOutputStream append(final Path path, final int bufferSize, final Progressable progress) throws IOException {
    LOG.debug("Performing APPEND on: {}", path.toString());
    final Path delegateFsPath = this.rewritePathToDelegateFs(path);
    return this.delegateFS.append(delegateFsPath, bufferSize, progress);
  }

  @Override
  public boolean rename(final Path pathFrom, final Path pathTo) throws IOException {
    final Path delegateFsPathFrom;
    final Path delegateFsPathTo;

    if (!this.isOptimizeFileOutputCommitEnabled()) {
      // file output committer optimizations disabled, simply delegate the request
      LOG.debug("FileOutputCommitter optimizations disabled. Performing RENAME: {} to {}", pathFrom.toString(), pathTo.toString());

      delegateFsPathFrom = this.rewritePathToDelegateFs(pathFrom);
      delegateFsPathTo = this.rewritePathToDelegateFs(pathTo);
      return this.delegateFS.rename(delegateFsPathFrom, delegateFsPathTo);
    }

    final String pathFromStr = pathFrom.toString();

    if (pathFromStr.contains(TEMPORARY_DIR)) {
      LOG.debug("Skipping RENAME on path as this is a temporary folder: {}", pathFromStr);
      return true;
    }

    LOG.debug("Performing RENAME: {} to {}", pathFrom, pathTo);

    delegateFsPathFrom = this.rewritePathToDelegateFs(pathFrom);
    delegateFsPathTo = this.rewritePathToDelegateFs(pathTo);
    return this.delegateFS.rename(delegateFsPathFrom, delegateFsPathTo);
  }

  @Override
  public boolean delete(final Path path, final boolean recursive) throws IOException {
    final Path delegateFsPath;

    try {

      if (!this.isOptimizeFileOutputCommitEnabled()) {
        // file output committer optimizations disabled, simply delegate the request
        LOG.debug("FileOutputCommitter optimizations disabled. Performing DELETE on: {}", path.toString());

        delegateFsPath = this.rewritePathToDelegateFs(path);
        return this.delegateFS.delete(delegateFsPath, recursive);
      }

      final String pathStr = path.toString();

      if (pathStr.contains(TEMPORARY_DIR)) {
        LOG.debug("Skipping DELETE on path as this is a temporary folder: {}", pathStr);
        return true;
      }

      LOG.debug("Performing DELETE on: {}", pathStr);

      delegateFsPath = this.rewritePathToDelegateFs(path);
      return this.delegateFS.delete(delegateFsPath, recursive);

    } finally {

      final Path qualifiedPath = this.makeQualified(path);
      LOCAL_MKDIR_CACHE.remove(qualifiedPath.toString());

    }
  }

  @Override
  public FileStatus[] listStatus(final Path path) throws IOException {
    LOG.debug("Performing LISTSTATUS on: {}", path.toString());

    try {

      final Path delegateFsPath = this.rewritePathToDelegateFs(path);
      return Arrays.stream(this.delegateFS.listStatus(delegateFsPath))
              .map(status -> HdlfsFileStatus.create(status, this.fsUriRequiresPort))
              .toArray(FileStatus[]::new);

    } catch (FileNotFoundException ex) {

      final Path qualifiedPath = this.makeQualified(path);
      if (LOCAL_MKDIR_CACHE.has(qualifiedPath.toString())) {
        return NO_FILES;
      } else {
        throw ex;
      }

    }
  }

  @Override
  protected DirectoryEntries listStatusBatch(final Path path, final byte[] token) throws IOException {
    LOG.debug("Performing LISTSTATUS_BATCH on: {}", path.toString());

    if (this.delegateFS instanceof WebHdfsFileSystem) {
      try {
        final Path delegateFsPath = this.rewritePathToDelegateFs(path);
        final DirectoryEntries directoryEntries = ((WebHdfsFileSystem) this.delegateFS).listStatusBatch(delegateFsPath, token);
        final FileStatus[] fileStatuses = directoryEntries.getEntries();
        int idx = 0;

        for (final FileStatus fileStatus : fileStatuses) {
          fileStatuses[idx++] = HdlfsFileStatus.create(fileStatus, this.fsUriRequiresPort);
        }

        return directoryEntries;
      } catch (final FileNotFoundException ex) {
        final Path qualifiedPath = this.makeQualified(path);

        if (LOCAL_MKDIR_CACHE.has(qualifiedPath.toString())) {
          return new DirectoryEntries(NO_FILES, null, false);
        } else {
          throw ex;
        }
      }
    } else {
      throw new RuntimeException("Delegate FileSystem must be of type WebHdfsFileSystem");
    }
  }

  @Override
  public void setWorkingDirectory(final Path path) {
    final Path delegateFsPath = this.rewritePathToDelegateFs(path);
    this.delegateFS.setWorkingDirectory(delegateFsPath);
  }

  @Override
  public Path getWorkingDirectory() {
    final Path workingDirectory = this.delegateFS.getWorkingDirectory();
    return this.rewritePathFromDelegateFs(workingDirectory);
  }

  @Override
  public boolean mkdirs(final Path path, final FsPermission fp) {
    LOG.debug("MKDIRS operation is not supported, doing nothing but caching the dir locally.");
    final Path qualifiedPath = this.makeQualified(path);
    final FileStatus fileStatus = new FileStatus(0, true, 0, 0, System.currentTimeMillis(), qualifiedPath);
    LOCAL_MKDIR_CACHE.put(qualifiedPath.toString(), fileStatus);
    return true;
  }

  @Override
  public FileStatus getFileStatus(final Path path) throws IOException {
    LOG.debug("Performing GETFILESTATUS on: {}", path.toString());
    FileStatus fileStatus;

    final Path delegateFsPath = this.rewritePathToDelegateFs(path);

    try {

      fileStatus = this.delegateFS.getFileStatus(delegateFsPath);

    } catch (FileNotFoundException ex) {

      String pathStr = this.makeQualified(path).toString();
      if (LOCAL_MKDIR_CACHE.has(pathStr)) {
        fileStatus = LOCAL_MKDIR_CACHE.get(pathStr);
      } else {
        throw ex;
      }

    }

    return HdlfsFileStatus.create(fileStatus, this.fsUriRequiresPort);
  }

  @Override
  public Token[] addDelegationTokens(final String renewer, final Credentials credentials) throws IOException {
    return this.delegateFS.addDelegationTokens(renewer, credentials);
  }

  private String[] parseURI(final URI name) throws IOException {
    final String authority = name.getAuthority();
    final Matcher matcher = AUTHORITY_PATTERN.matcher(authority);

    if (matcher.find()) {
      final String container = matcher.group(1);
      final String endpoint = matcher.group(2);
      return new String[] { container, endpoint };
    } else {
      throw new IOException("Authority does not conform to pattern");
    }
  }

  protected Path rewritePathToDelegateFs(final Path path) {
    final URI pathUri = path.toUri();

    if (!SCHEME.equals(pathUri.getScheme())) {
      return path;
    }

    final URI delegateFsUri = this.delegateFS.getUri();
    return new Path(delegateFsUri.getScheme(), delegateFsUri.getAuthority(), pathUri.getPath());
  }

  protected Path rewritePathFromDelegateFs(final Path path) {
    final URI pathUri = path.toUri();

    if (pathUri.getScheme() == null || SCHEME.equals(pathUri.getScheme())) {
      return path;
    }

    final URI delegateFsUri = this.delegateFS.getUri();

    if (fsUriRequiresPort) {
      return new Path(HdlfsFileSystem.SCHEME, delegateFsUri.getAuthority(), pathUri.getPath());
    } else {
      return new Path(HdlfsFileSystem.SCHEME, delegateFsUri.getHost(), pathUri.getPath());
    }
  }

  protected boolean isOptimizeFileOutputCommitEnabled() {
    return this.optimizeFileOutputCommit;
  }

  protected void checkCreateOperation(final Path path, final boolean overwrite) throws IOException {
    if (this.createOperationMode == CreateOperationMode.OVERWRITE) {
      if (!overwrite) {
        LOG.debug("Operation CREATE(path={}); ignoring parameter [overwrite=false]", path);
      }

      return;
    }

    if (this.createOperationMode == CreateOperationMode.DEFAULT && overwrite) {
      return;
    }

    final boolean isDirectory;

    try {
      final Path delegateFsPath = this.rewritePathToDelegateFs(path);
      final FileStatus fileStatus = this.delegateFS.getFileStatus(delegateFsPath);
      isDirectory = fileStatus.isDirectory();
    } catch (final FileNotFoundException ex) {
      LOG.debug("Path not found: " + path.toString(), ex);
      return;
    }

    if (this.createOperationMode == CreateOperationMode.DEFAULT) {
      if (!isDirectory) {
        throw new FileAlreadyExistsException(path.toString() + " already exists");
      }

      return;
    }

    if (this.createOperationMode == CreateOperationMode.COMPATIBLE && (!overwrite || isDirectory)) {
      final String errorMessage = path.toString() + (isDirectory ? " is a directory" : " already exists");
      throw new FileAlreadyExistsException(errorMessage);
    }

  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy