All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.HdlfsFileSystem Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.

There is a newer version: 3.0.27
Show newest version
// © 2021-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files;

import com.sap.hana.datalake.files.directaccess.gcs.GcsFileSystem;
import com.sap.hana.datalake.files.directaccess.s3.S3FileSystem;
import com.sap.hana.datalake.files.directaccess.wasb.WasbFileSystem;
import com.sap.hana.datalake.files.enumeration.DirectAccessMode;
import com.sap.hana.datalake.files.enumeration.DirectAccessType;
import com.sap.hana.datalake.files.enumeration.OpenOperationMode;
import com.sap.hana.datalake.files.operations.create.ByteBufferPool;
import com.sap.hana.datalake.files.utils.http.HttpClientUtils;
import com.sap.hana.datalake.files.utils.threads.ThreadUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.SWebHdfsFileSystem;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.URLConnectionFactory;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;
import org.apache.http.client.HttpClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HdlfsFileSystem extends FileSystem implements HdlfsFileSystemCapabilities, AtomicWritingCapable {

  public static final String SCHEME = "hdlfs";

  private static final Pattern AUTHORITY_PATTERN = Pattern.compile(HdlfsConstants.FS_HDLFS_AUTHORITY_REGEX);
  private static final Logger LOG = LoggerFactory.getLogger(HdlfsFileSystem.class);

  private static volatile HttpClient directAccessHttpClient;
  private static volatile ExecutorService directAccessMultipartUploadThreadPool;

  private static boolean initializeDirectAccessHttpClient(final Configuration config) {
    if (directAccessHttpClient == null) {
      synchronized (HdlfsFileSystem.class) {
        if (directAccessHttpClient == null) {
          final HttpClientUtils.Builder builder = new HttpClientUtils.Builder()
                  .setConnectionsMaxCount(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_MAX_COUNT_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_MAX_COUNT_DEFAULT))
                  .setConnectionsKeepAliveSeconds(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_KEEP_ALIVE_SECONDS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_KEEP_ALIVE_SECONDS_DEFAULT))
                  .setConnectionsTtlSeconds(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_TTL_SECONDS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_TTL_SECONDS_DEFAULT))
                  .setValidateConnectionsAfterInactivityMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_VALIDATE_AFTER_INACTIVITY_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_VALIDATE_AFTER_INACTIVITY_MS_DEFAULT))
                  .setConnectTimeoutMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECT_TIMEOUT_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECT_TIMEOUT_MS_DEFAULT))
                  .setSocketTimeoutMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_SOCKET_TIMEOUT_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_SOCKET_TIMEOUT_MS_DEFAULT))
                  .setRetriesMaxCount(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MAX_COUNT_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MAX_COUNT_DEFAULT))
                  .setRetriesMinIntervalMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MIN_INTERVAL_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MIN_INTERVAL_MS_DEFAULT))
                  .setRetriesMaxIntervalMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MAX_INTERVAL_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MAX_INTERVAL_MS_DEFAULT))
                  .setRetrySentRequests(config.getBoolean(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRY_SENT_REQUESTS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRY_SENT_REQUESTS_DEFAULT))
                  .setNonRetriableExceptions(config.getClasses(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_NON_RETRIABLE_EXCEPTIONS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_NON_RETRIABLE_EXCEPTIONS_DEFAULT));

          directAccessHttpClient = HttpClientUtils.createHttpClient(builder);

          LOG.debug("DirectAccessHttpClient(builder={}) initialized", builder);

          return true;
        }
      }
    }

    return false;
  }

  private static boolean initializeDirectAccessMultipartUploadThreadPool(final Configuration config) {
    if (directAccessMultipartUploadThreadPool == null) {
      synchronized (HdlfsFileSystem.class) {
        if (directAccessMultipartUploadThreadPool == null) {
          final int threadPoolSize = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_SIZE_KEY,
                  HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_SIZE_DEFAULT);
          final int threadPoolQueueSize = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_QUEUE_SIZE_KEY,
                  HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_QUEUE_SIZE_DEFAULT);
          final long threadPoolKeepAliveSecs = config.getLong(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_KEEP_ALIVE_SECS_KEY,
                  HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_KEEP_ALIVE_SECS_DEFAULT);

          directAccessMultipartUploadThreadPool = ThreadUtils.newDaemonThreadBlockingExecutor(
                  threadPoolSize,
                  threadPoolQueueSize,
                  /* allowCoreThreadTimeOut */ true,
                  threadPoolKeepAliveSecs, TimeUnit.SECONDS,
                  /* fairSemaphore */ false,
                  /* namePrefix */ "DirectAccessMPU-thread",
                  config);

          LOG.debug("DirectAccessMultipartUploadThreadPool(maxThreads={}, queueSize={}, keepAlive={}s) initialized", threadPoolSize,
                  threadPoolQueueSize, threadPoolKeepAliveSecs);

          return true;
        }
      }
    }

    return false;
  }

  static String[] parseFsUriAuthority(final URI fsUri) {
    final String authority = fsUri.getAuthority();
    final Matcher matcher = AUTHORITY_PATTERN.matcher(authority);

    if (matcher.find()) {
      final String container = matcher.group(1);
      final String endpoint = matcher.group(2);

      return new String[] { container, endpoint };
    } else {
      return null;
    }
  }

  static boolean isParsedHostValid(final String[] parsedHost) {
    if (parsedHost == null || parsedHost.length != 2) {
      return false;
    }

    final String fileContainer = parsedHost[0];
    final String suffix = parsedHost[1];

    return fileContainer != null && !fileContainer.isEmpty() && suffix != null && !suffix.isEmpty();
  }

  private HdlfsBaseFileSystem delegateFS;
  private DirectAccessType directAccessType;

  @Override
  public void initialize(final URI fsUri, final Configuration conf) throws IOException {
    this.initializeGlobalByteBufferPool(conf);
    super.initialize(fsUri, conf);
    this.setConf(conf);

    final WebHdfsFileSystem webHdfsFileSystem = this.initializeWebHdfsFileSystem(fsUri, conf);

    this.directAccessType = this.resolveDirectAccessType(conf, webHdfsFileSystem);

    if (this.directAccessType != null) {
      if (!initializeDirectAccessHttpClient(conf)) {
        LOG.info("DirectAccessHttpClient had already been initialized; new config values will be ignored");
      }

      if (this.directAccessType == DirectAccessType.WASB && !initializeDirectAccessMultipartUploadThreadPool(conf)) {
        LOG.info("DirectAccessMultipartUploadThreadPool had already been initialized; new config values will be ignored");
      }
    }

    this.delegateFS = this.resolveDelegateFs(this.directAccessType, webHdfsFileSystem);
    this.delegateFS.initialize(fsUri, conf);
  }

  @Override
  public String getScheme() {
    return SCHEME;
  }

  @Override
  public URI getUri() {
    return this.delegateFS.getUri();
  }

  @Override
  public FSDataInputStream open(final Path path, final int bufferSize) throws IOException {
    return this.delegateFS.open(path, bufferSize);
  }

  @Override
  public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite, final int bufferSize,
                                   final short replication, final long blockSize, final Progressable progress) throws IOException {
    return this.delegateFS.create(path, fsPermission, overwrite, bufferSize, replication, blockSize, progress);
  }

  @Override
  public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite, final int bufferSize,
                                   final short replication, final long blockSize, final Progressable progress, final boolean atomic) throws IOException {
    LOG.debug("Parameter [atomic] will be ignored");

    return this.create(path, fsPermission, overwrite, bufferSize, replication, blockSize, progress);
  }

  @Override
  public FSDataOutputStream append(final Path path, final int bufferSize, final Progressable progress) throws IOException {
    return this.delegateFS.append(path, bufferSize, progress);
  }

  @Override
  public boolean rename(final Path pathFrom, final Path pathTo) throws IOException {
    return this.delegateFS.rename(pathFrom, pathTo);
  }

  @Override
  public boolean delete(final Path path, final boolean recursive) throws IOException {
    return this.delegateFS.delete(path, recursive);
  }

  @Override
  public FileStatus[] listStatus(final Path path) throws IOException {
    return this.delegateFS.listStatus(path);
  }

  @Override
  protected DirectoryEntries listStatusBatch(final Path path, final byte[] token) throws IOException {
    return this.delegateFS.listStatusBatch(path, token);
  }

  @Override
  public DirectoryEntries listStatusRecursive(final Path path, final byte[] token) throws IOException {
    return this.delegateFS.listStatusRecursive(path, token);
  }

  @Override
  public void setWorkingDirectory(final Path path) {
    this.delegateFS.setWorkingDirectory(path);
  }

  @Override
  public Path getWorkingDirectory() {
    return this.delegateFS.getWorkingDirectory();
  }

  @Override
  public boolean mkdirs(final Path path, final FsPermission fp) throws IOException {
    return this.delegateFS.mkdirs(path, fp);
  }

  @Override
  public FileStatus getFileStatus(final Path path) throws IOException {
    return this.delegateFS.getFileStatus(path);
  }

  @Override
  public Token[] addDelegationTokens(final String renewer, final Credentials credentials) throws IOException {
    return this.delegateFS.addDelegationTokens(renewer, credentials);
  }

  @Override
  public DeleteBatchResult deleteBatch(final Collection files) throws IOException {
    return this.delegateFS.deleteBatch(files);
  }

  @Override
  public DeleteBatchResult deleteBatch(final Collection files, final boolean shouldWaitForResult) throws IOException {
    return this.delegateFS.deleteBatch(files, shouldWaitForResult);
  }

  @Override
  public DeleteBatchResult completeDeleteBatch(final String token, final long waitTimeSeconds) throws IOException {
    return this.delegateFS.completeDeleteBatch(token, waitTimeSeconds);
  }

  @Override
  public MergeResult merge(final Path path, final Collection sources) throws IOException {
    return this.delegateFS.merge(path, sources);
  }

  @Override
  public CopyResult copy(final Path path, final Path destination) throws IOException {
    return this.delegateFS.copy(path, destination);
  }

  public Catalog getCatalog() {
    return this.delegateFS.getCatalog();
  }

  HdlfsBaseFileSystem getDelegateFS() {
    return this.delegateFS;
  }

  WebHdfsFileSystem getWebHdfsFileSystem() {
    return this.delegateFS.getWebHdfsFileSystem();
  }

  FsCache getFsCache() {
    return this.delegateFS.getFsCache();
  }

  RetryPolicy getRetryPolicy() {
    return this.delegateFS.getRetryPolicy();
  }

  DirectAccessType getDirectAccessType() {
    return this.directAccessType;
  }

  void addPathToPendingFilesCreated(final Path path) {
    this.delegateFS.addPathToPendingFilesCreated(path);
  }

  void addConsistentWritePathPatterns(final Collection patterns) {
    this.delegateFS.addConsistentWritePathPatterns(patterns);
  }

  void removeConsistentWritePathPatterns(final Collection patterns) {
    this.delegateFS.removeConsistentWritePathPatterns(patterns);
  }

  // Necessary for unit tests
  protected WebHdfsFileSystem createWebHdfsFileSystem(final boolean sslEnabled) {
    return sslEnabled ? new SWebHdfsFileSystem() : new WebHdfsFileSystem();
  }

  private void initializeGlobalByteBufferPool(final Configuration conf) {
    // The ByteBufferPool is the same that is used by Storage Gateway and HDL Files, and it uses DIRECT buffers by default
    // In the context of our Spark driver, we want the default to be HEAP buffers
    final String bufferType = conf.get(HdlfsConstants.FS_GLOBAL_BUFFER_POOL_BUFFER_TYPE_KEY, HdlfsConstants.FS_GLOBAL_BUFFER_POOL_BUFFER_TYPE_DEFAULT);
    final String threadLocalBufferType = conf.get(HdlfsConstants.FS_GLOBAL_BUFFER_POOL_THREAD_LOCAL_BUFFER_TYPE_KEY, HdlfsConstants.FS_GLOBAL_BUFFER_POOL_THREAD_LOCAL_BUFFER_TYPE_DEFAULT);

    // Override the default configuration
    conf.set(HdlfsConstants.FS_GLOBAL_BUFFER_POOL_BUFFER_TYPE_KEY, bufferType);
    conf.set(HdlfsConstants.FS_GLOBAL_BUFFER_POOL_THREAD_LOCAL_BUFFER_TYPE_KEY, threadLocalBufferType);

    ByteBufferPool.initialize(conf);
  }

  private WebHdfsFileSystem initializeWebHdfsFileSystem(final URI fsUri, final Configuration conf) throws IOException {
    final boolean sslEnabled = conf.getBoolean(HdlfsConstants.FS_HDLFS_SSL_ENABLED_KEY, true);
    final WebHdfsFileSystem webHdfsFileSystem = this.createWebHdfsFileSystem(sslEnabled);
    final int port = fsUri.getPort() > 0 ? fsUri.getPort() : (sslEnabled ? HdlfsConstants.HTTPS_PORT : HdlfsConstants.HTTP_PORT);

    URI webHdfsFsUri;
    String fileContainer;

    // We start by checking if the endpoint was provided as a configuration.
    // If it was, then we inject it in the delegatedFS URI as the new authority.
    // In this case, we consider the old authority as the fileContainer to be used.
    final String endpoint = conf.get(HdlfsConstants.FS_HDLFS_ENDPOINT_KEY);
    final String[] parsedHost = parseFsUriAuthority(fsUri);

    if (endpoint != null && !endpoint.isEmpty()) {
      // In case endpoint parameter was specified, but we have a fully qualified URI, the fully qualified URI will be used.
      if (isParsedHostValid(parsedHost)) {
        fileContainer = parsedHost[0];

        try {
          webHdfsFsUri = new URI(webHdfsFileSystem.getScheme(), fsUri.getUserInfo(), fsUri.getHost(), port, fsUri.getPath(), fsUri.getQuery(), fsUri.getFragment());
        } catch (final URISyntaxException ex) {
          throw new IOException(ex);
        }
      } else {
        // Otherwise, use the endpoint parameter as the host and use the FsUri authority as the filecontainer
        fileContainer = fsUri.getAuthority();
        LOG.debug("Endpoint [{}] was provided, injecting it in the delegatedFS URI as the new authority and considering old authority [{}] as the FileContainer", endpoint, fileContainer);

        try {
          webHdfsFsUri = new URI(webHdfsFileSystem.getScheme(), endpoint, fsUri.getPath(), fsUri.getQuery(), fsUri.getFragment());
        } catch (final URISyntaxException ex) {
          throw new IOException(ex);
        }
      }
    } else {
      /* If the endpoint, was not provided, we ensure that:
       - If "fs.hdlfs.filecontainer" property is provided, simply use that value to set the header.
       - If "fs.hdlfs.filecontainer" property is not provided, we need to parse the file container from the URI to
         keep supporting previous configurations.

         In both cases, we use the URI as it is.
      */
      fileContainer = conf.get(HdlfsConstants.FS_HDLFS_FILECONTAINER_KEY);

      if (fileContainer == null) {
        if (!isParsedHostValid(parsedHost)) {
          throw new IOException("No valid fully qualified URI, endpoint or FileContainer was provided");
        }

        // File container is null, so it will come from the URI.
        fileContainer = parsedHost[0];
      }

      try {
        webHdfsFsUri = new URI(webHdfsFileSystem.getScheme(), fsUri.getUserInfo(), fsUri.getHost(), port, fsUri.getPath(), fsUri.getQuery(), fsUri.getFragment());
      } catch (final URISyntaxException ex) {
        throw new IOException(ex);
      }
    }

    final Configuration webHdfsFsConf = new Configuration(conf);

    // All relevant fs.hdlfs configurations
    final boolean retryPolicyEnabled = conf.getBoolean(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_DEFAULT);
    webHdfsFsConf.setBoolean(HdfsClientConfigKeys.HttpClient.RETRY_POLICY_ENABLED_KEY, retryPolicyEnabled);

    final String retryPolicySpec = conf.get(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT);
    webHdfsFsConf.set(HdfsClientConfigKeys.HttpClient.RETRY_POLICY_SPEC_KEY, retryPolicySpec);

    final int retryMaxAttempts = conf.getInt(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT);
    webHdfsFsConf.setInt(HdfsClientConfigKeys.HttpClient.RETRY_MAX_ATTEMPTS_KEY, retryMaxAttempts);

    final int failovermaxAttempts = conf.getInt(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
    webHdfsFsConf.setInt(HdfsClientConfigKeys.HttpClient.FAILOVER_MAX_ATTEMPTS_KEY, failovermaxAttempts);

    final int failoverSleepBase = conf.getInt(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_SLEEP_BASE_MS_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_SLEEP_BASE_MS_DEFAULT);
    webHdfsFsConf.setInt(HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_BASE_KEY, failoverSleepBase);

    final int failoverSleepMax = conf.getInt(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_SLEEP_MAX_MS_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_SLEEP_MAX_MS_DEFAULT);
    webHdfsFsConf.setInt(HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_MAX_KEY, failoverSleepMax);

    // Timeout
    final int connectTimeout = conf.getInt("fs.hdlfs.socket.connect-timeout", URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT);
    webHdfsFsConf.setInt("dfs.webhdfs.socket.connect-timeout", connectTimeout);

    final int readTimeout = conf.getInt("fs.hdlfs.socket.read-timeout", URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT);
    webHdfsFsConf.setInt("dfs.webhdfs.socket.read-timeout", readTimeout);

    // we can still have an empty file container if connection-id property is used
    webHdfsFsConf.set(HdfsClientConfigKeys.DFS_WEBHDFS_CUSTOM_CONNECTION_CONFIGURATOR_IMPL, HdlfsConnectionConfigurator.class.getName());
    webHdfsFsConf.set(HdlfsConstants.FS_HDLFS_FILECONTAINER_KEY, fileContainer);
    webHdfsFsConf.setBoolean(HdlfsFileSystemCapabilities.HDLFS_OPERATION_LISTSTATUSBATCH_USE_WITH_PAGE_ID_KEY, true);
    webHdfsFsConf.setBoolean(HdlfsFileSystemCapabilities.HDLFS_STATUS_CODE_FORBIDDEN_MUTATE_EXCEPTION_KEY, true);

    final OpenOperationMode openOperationMode = conf.getEnum(HdlfsConstants.FS_HDLFS_OPERATION_OPEN_MODE_KEY, OpenOperationMode.DEFAULT);

    if (openOperationMode == OpenOperationMode.DEFAULT) {
      webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.chunked.enabled", true);
      final int chunkSize = conf.getInt(HdlfsConstants.FS_HDLFS_OPERATION_OPEN_CHUNK_SIZE, HdlfsConstants.FS_HDLFS_OPERATION_OPEN_CHUNK_SIZE_DEFAULT);
      webHdfsFsConf.setInt("fs.webhdfs.operation.open.chunk.size", chunkSize);
      webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.bound.check.enabled", false);
      webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.expect-redirect.enabled", false);
      webHdfsFsConf.setBoolean("fs.webhdfs.change.detection.enabled", true);
    } else if (openOperationMode == OpenOperationMode.COMPATIBLE) {
      webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.chunked.enabled", false);
      webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.bound.check.enabled", true);
      webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.expect-redirect.enabled", true);
      webHdfsFsConf.setBoolean("fs.webhdfs.change.detection.enabled", false);
    }

    webHdfsFileSystem.initialize(webHdfsFsUri, webHdfsFsConf);

    return webHdfsFileSystem;
  }

  private DirectAccessType resolveDirectAccessType(final Configuration conf, final WebHdfsFileSystem webHdfsFileSystem) throws IOException {
    final String directAccessModeConf = conf.get(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MODE_KEY);

    if (directAccessModeConf == null) {
      return null;
    }

    final DirectAccessMode directAccessMode;

    try {
      directAccessMode = DirectAccessMode.valueOf(directAccessModeConf);
    } catch (final IllegalArgumentException ex) {
      throw new IllegalArgumentException(String.format("Direct Access mode configuration should be [%s], [%s] or [%s], but was [%s]", DirectAccessMode.DISABLED.name(), DirectAccessMode.ACCEPT.name(), DirectAccessMode.EXPECT.name(), directAccessModeConf), ex);
    }

    if (directAccessMode == DirectAccessMode.DISABLED) {
      return null;
    }

    final WhoamiResult whoamiResult = webHdfsFileSystem.whoami();

    final String directAccessTypeRawValue = whoamiResult.getOptions()
            .stream()
            .filter(option -> HdlfsConstants.WHOAMI_OPTIONS_DIRECT_ACCESS_TYPE_KEY.equals(option.getKey()))
            .map(WhoamiResultOption::getValue)
            .findFirst()
            .orElse(null);

    if (directAccessTypeRawValue == null) {
      if (directAccessMode == DirectAccessMode.EXPECT) {
        throw new IllegalArgumentException(String.format("Direct Access mode is set to [%s] but it is not supported by the server.", DirectAccessMode.EXPECT.name()));
      } else if (directAccessMode == DirectAccessMode.ACCEPT) {
        LOG.info("Direct Access is not available. Will proceed since Direct Access mode is [{}].", DirectAccessMode.ACCEPT.name());
      }

      return null;
    }

    DirectAccessType directAccessType = null;

    try {
      directAccessType = DirectAccessType.valueOf(directAccessTypeRawValue);
    } catch (final IllegalArgumentException ex) {
      if (directAccessMode == DirectAccessMode.EXPECT) {
        throw new IllegalArgumentException(String.format("Unrecognized Direct Access type [%s]", directAccessTypeRawValue), ex);
      } else if (directAccessMode == DirectAccessMode.ACCEPT) {
        LOG.warn("Unrecognized Direct Access type [{}]. Will proceed since Direct Access mode is [{}].", directAccessTypeRawValue, DirectAccessMode.ACCEPT.name());
      }
    }

    return directAccessType;
  }

  private HdlfsBaseFileSystem resolveDelegateFs(final DirectAccessType directAccessType, final WebHdfsFileSystem webHdfsFileSystem) {
    if (directAccessType == DirectAccessType.GCS) {
      return new GcsFileSystem(webHdfsFileSystem, directAccessHttpClient);
    } else if (directAccessType == DirectAccessType.S3) {
      return new S3FileSystem(webHdfsFileSystem, directAccessHttpClient);
    } else if (directAccessType == DirectAccessType.WASB) {
      return new WasbFileSystem(webHdfsFileSystem, directAccessHttpClient, directAccessMultipartUploadThreadPool);
    } else {
      return new HdlfsBaseFileSystem(webHdfsFileSystem);
    }
  }

}

// © 2021-2024 SAP SE or an SAP affiliate company. All rights reserved.




© 2015 - 2025 Weber Informatics LLC | Privacy Policy