com.sap.hana.datalake.files.HdlfsFileSystem Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sap-hdlfs Show documentation
Show all versions of sap-hdlfs Show documentation
An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.
// © 2021-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files;
import com.sap.hana.datalake.files.directaccess.gcs.GcsFileSystem;
import com.sap.hana.datalake.files.directaccess.s3.S3FileSystem;
import com.sap.hana.datalake.files.directaccess.wasb.WasbFileSystem;
import com.sap.hana.datalake.files.enumeration.DirectAccessMode;
import com.sap.hana.datalake.files.enumeration.DirectAccessType;
import com.sap.hana.datalake.files.enumeration.OpenOperationMode;
import com.sap.hana.datalake.files.operations.create.ByteBufferPool;
import com.sap.hana.datalake.files.utils.http.HttpClientUtils;
import com.sap.hana.datalake.files.utils.threads.ThreadUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.SWebHdfsFileSystem;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.URLConnectionFactory;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;
import org.apache.http.client.HttpClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HdlfsFileSystem extends FileSystem implements HdlfsFileSystemCapabilities, AtomicWritingCapable {
public static final String SCHEME = "hdlfs";
private static final Pattern AUTHORITY_PATTERN = Pattern.compile(HdlfsConstants.FS_HDLFS_AUTHORITY_REGEX);
private static final Logger LOG = LoggerFactory.getLogger(HdlfsFileSystem.class);
private static volatile HttpClient directAccessHttpClient;
private static volatile ExecutorService directAccessMultipartUploadThreadPool;
private static boolean initializeDirectAccessHttpClient(final Configuration config) {
if (directAccessHttpClient == null) {
synchronized (HdlfsFileSystem.class) {
if (directAccessHttpClient == null) {
final HttpClientUtils.Builder builder = new HttpClientUtils.Builder()
.setConnectionsMaxCount(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_MAX_COUNT_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_MAX_COUNT_DEFAULT))
.setConnectionsKeepAliveSeconds(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_KEEP_ALIVE_SECONDS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_KEEP_ALIVE_SECONDS_DEFAULT))
.setConnectionsTtlSeconds(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_TTL_SECONDS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_TTL_SECONDS_DEFAULT))
.setValidateConnectionsAfterInactivityMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_VALIDATE_AFTER_INACTIVITY_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECTIONS_VALIDATE_AFTER_INACTIVITY_MS_DEFAULT))
.setConnectTimeoutMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECT_TIMEOUT_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_CONNECT_TIMEOUT_MS_DEFAULT))
.setSocketTimeoutMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_SOCKET_TIMEOUT_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_SOCKET_TIMEOUT_MS_DEFAULT))
.setRetriesMaxCount(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MAX_COUNT_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MAX_COUNT_DEFAULT))
.setRetriesMinIntervalMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MIN_INTERVAL_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MIN_INTERVAL_MS_DEFAULT))
.setRetriesMaxIntervalMs(config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MAX_INTERVAL_MS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRIES_MAX_INTERVAL_MS_DEFAULT))
.setRetrySentRequests(config.getBoolean(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRY_SENT_REQUESTS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_RETRY_SENT_REQUESTS_DEFAULT))
.setNonRetriableExceptions(config.getClasses(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_NON_RETRIABLE_EXCEPTIONS_KEY, HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_HTTP_CLIENT_NON_RETRIABLE_EXCEPTIONS_DEFAULT));
directAccessHttpClient = HttpClientUtils.createHttpClient(builder);
LOG.debug("DirectAccessHttpClient(builder={}) initialized", builder);
return true;
}
}
}
return false;
}
private static boolean initializeDirectAccessMultipartUploadThreadPool(final Configuration config) {
if (directAccessMultipartUploadThreadPool == null) {
synchronized (HdlfsFileSystem.class) {
if (directAccessMultipartUploadThreadPool == null) {
final int threadPoolSize = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_SIZE_KEY,
HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_SIZE_DEFAULT);
final int threadPoolQueueSize = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_QUEUE_SIZE_KEY,
HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_QUEUE_SIZE_DEFAULT);
final long threadPoolKeepAliveSecs = config.getLong(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_KEEP_ALIVE_SECS_KEY,
HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MULTIPART_UPLOAD_THREAD_POOL_KEEP_ALIVE_SECS_DEFAULT);
directAccessMultipartUploadThreadPool = ThreadUtils.newDaemonThreadBlockingExecutor(
threadPoolSize,
threadPoolQueueSize,
/* allowCoreThreadTimeOut */ true,
threadPoolKeepAliveSecs, TimeUnit.SECONDS,
/* fairSemaphore */ false,
/* namePrefix */ "DirectAccessMPU-thread",
config);
LOG.debug("DirectAccessMultipartUploadThreadPool(maxThreads={}, queueSize={}, keepAlive={}s) initialized", threadPoolSize,
threadPoolQueueSize, threadPoolKeepAliveSecs);
return true;
}
}
}
return false;
}
static String[] parseFsUriAuthority(final URI fsUri) {
final String authority = fsUri.getAuthority();
final Matcher matcher = AUTHORITY_PATTERN.matcher(authority);
if (matcher.find()) {
final String container = matcher.group(1);
final String endpoint = matcher.group(2);
return new String[] { container, endpoint };
} else {
return null;
}
}
static boolean isParsedHostValid(final String[] parsedHost) {
if (parsedHost == null || parsedHost.length != 2) {
return false;
}
final String fileContainer = parsedHost[0];
final String suffix = parsedHost[1];
return fileContainer != null && !fileContainer.isEmpty() && suffix != null && !suffix.isEmpty();
}
private HdlfsBaseFileSystem delegateFS;
private DirectAccessType directAccessType;
@Override
public void initialize(final URI fsUri, final Configuration conf) throws IOException {
this.initializeGlobalByteBufferPool(conf);
super.initialize(fsUri, conf);
this.setConf(conf);
final WebHdfsFileSystem webHdfsFileSystem = this.initializeWebHdfsFileSystem(fsUri, conf);
this.directAccessType = this.resolveDirectAccessType(conf, webHdfsFileSystem);
if (this.directAccessType != null) {
if (!initializeDirectAccessHttpClient(conf)) {
LOG.info("DirectAccessHttpClient had already been initialized; new config values will be ignored");
}
if (this.directAccessType == DirectAccessType.WASB && !initializeDirectAccessMultipartUploadThreadPool(conf)) {
LOG.info("DirectAccessMultipartUploadThreadPool had already been initialized; new config values will be ignored");
}
}
this.delegateFS = this.resolveDelegateFs(this.directAccessType, webHdfsFileSystem);
this.delegateFS.initialize(fsUri, conf);
}
@Override
public String getScheme() {
return SCHEME;
}
@Override
public URI getUri() {
return this.delegateFS.getUri();
}
@Override
public FSDataInputStream open(final Path path, final int bufferSize) throws IOException {
return this.delegateFS.open(path, bufferSize);
}
@Override
public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite, final int bufferSize,
final short replication, final long blockSize, final Progressable progress) throws IOException {
return this.delegateFS.create(path, fsPermission, overwrite, bufferSize, replication, blockSize, progress);
}
@Override
public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite, final int bufferSize,
final short replication, final long blockSize, final Progressable progress, final boolean atomic) throws IOException {
LOG.debug("Parameter [atomic] will be ignored");
return this.create(path, fsPermission, overwrite, bufferSize, replication, blockSize, progress);
}
@Override
public FSDataOutputStream append(final Path path, final int bufferSize, final Progressable progress) throws IOException {
return this.delegateFS.append(path, bufferSize, progress);
}
@Override
public boolean rename(final Path pathFrom, final Path pathTo) throws IOException {
return this.delegateFS.rename(pathFrom, pathTo);
}
@Override
public boolean delete(final Path path, final boolean recursive) throws IOException {
return this.delegateFS.delete(path, recursive);
}
@Override
public FileStatus[] listStatus(final Path path) throws IOException {
return this.delegateFS.listStatus(path);
}
@Override
protected DirectoryEntries listStatusBatch(final Path path, final byte[] token) throws IOException {
return this.delegateFS.listStatusBatch(path, token);
}
@Override
public DirectoryEntries listStatusRecursive(final Path path, final byte[] token) throws IOException {
return this.delegateFS.listStatusRecursive(path, token);
}
@Override
public void setWorkingDirectory(final Path path) {
this.delegateFS.setWorkingDirectory(path);
}
@Override
public Path getWorkingDirectory() {
return this.delegateFS.getWorkingDirectory();
}
@Override
public boolean mkdirs(final Path path, final FsPermission fp) throws IOException {
return this.delegateFS.mkdirs(path, fp);
}
@Override
public FileStatus getFileStatus(final Path path) throws IOException {
return this.delegateFS.getFileStatus(path);
}
@Override
public Token>[] addDelegationTokens(final String renewer, final Credentials credentials) throws IOException {
return this.delegateFS.addDelegationTokens(renewer, credentials);
}
@Override
public DeleteBatchResult deleteBatch(final Collection files) throws IOException {
return this.delegateFS.deleteBatch(files);
}
@Override
public DeleteBatchResult deleteBatch(final Collection files, final boolean shouldWaitForResult) throws IOException {
return this.delegateFS.deleteBatch(files, shouldWaitForResult);
}
@Override
public DeleteBatchResult completeDeleteBatch(final String token, final long waitTimeSeconds) throws IOException {
return this.delegateFS.completeDeleteBatch(token, waitTimeSeconds);
}
@Override
public MergeResult merge(final Path path, final Collection sources) throws IOException {
return this.delegateFS.merge(path, sources);
}
@Override
public CopyResult copy(final Path path, final Path destination) throws IOException {
return this.delegateFS.copy(path, destination);
}
public Catalog getCatalog() {
return this.delegateFS.getCatalog();
}
HdlfsBaseFileSystem getDelegateFS() {
return this.delegateFS;
}
WebHdfsFileSystem getWebHdfsFileSystem() {
return this.delegateFS.getWebHdfsFileSystem();
}
FsCache getFsCache() {
return this.delegateFS.getFsCache();
}
RetryPolicy getRetryPolicy() {
return this.delegateFS.getRetryPolicy();
}
DirectAccessType getDirectAccessType() {
return this.directAccessType;
}
void addPathToPendingFilesCreated(final Path path) {
this.delegateFS.addPathToPendingFilesCreated(path);
}
void addConsistentWritePathPatterns(final Collection patterns) {
this.delegateFS.addConsistentWritePathPatterns(patterns);
}
void removeConsistentWritePathPatterns(final Collection patterns) {
this.delegateFS.removeConsistentWritePathPatterns(patterns);
}
// Necessary for unit tests
protected WebHdfsFileSystem createWebHdfsFileSystem(final boolean sslEnabled) {
return sslEnabled ? new SWebHdfsFileSystem() : new WebHdfsFileSystem();
}
private void initializeGlobalByteBufferPool(final Configuration conf) {
// The ByteBufferPool is the same that is used by Storage Gateway and HDL Files, and it uses DIRECT buffers by default
// In the context of our Spark driver, we want the default to be HEAP buffers
final String bufferType = conf.get(HdlfsConstants.FS_GLOBAL_BUFFER_POOL_BUFFER_TYPE_KEY, HdlfsConstants.FS_GLOBAL_BUFFER_POOL_BUFFER_TYPE_DEFAULT);
final String threadLocalBufferType = conf.get(HdlfsConstants.FS_GLOBAL_BUFFER_POOL_THREAD_LOCAL_BUFFER_TYPE_KEY, HdlfsConstants.FS_GLOBAL_BUFFER_POOL_THREAD_LOCAL_BUFFER_TYPE_DEFAULT);
// Override the default configuration
conf.set(HdlfsConstants.FS_GLOBAL_BUFFER_POOL_BUFFER_TYPE_KEY, bufferType);
conf.set(HdlfsConstants.FS_GLOBAL_BUFFER_POOL_THREAD_LOCAL_BUFFER_TYPE_KEY, threadLocalBufferType);
ByteBufferPool.initialize(conf);
}
private WebHdfsFileSystem initializeWebHdfsFileSystem(final URI fsUri, final Configuration conf) throws IOException {
final boolean sslEnabled = conf.getBoolean(HdlfsConstants.FS_HDLFS_SSL_ENABLED_KEY, true);
final WebHdfsFileSystem webHdfsFileSystem = this.createWebHdfsFileSystem(sslEnabled);
final int port = fsUri.getPort() > 0 ? fsUri.getPort() : (sslEnabled ? HdlfsConstants.HTTPS_PORT : HdlfsConstants.HTTP_PORT);
URI webHdfsFsUri;
String fileContainer;
// We start by checking if the endpoint was provided as a configuration.
// If it was, then we inject it in the delegatedFS URI as the new authority.
// In this case, we consider the old authority as the fileContainer to be used.
final String endpoint = conf.get(HdlfsConstants.FS_HDLFS_ENDPOINT_KEY);
final String[] parsedHost = parseFsUriAuthority(fsUri);
if (endpoint != null && !endpoint.isEmpty()) {
// In case endpoint parameter was specified, but we have a fully qualified URI, the fully qualified URI will be used.
if (isParsedHostValid(parsedHost)) {
fileContainer = parsedHost[0];
try {
webHdfsFsUri = new URI(webHdfsFileSystem.getScheme(), fsUri.getUserInfo(), fsUri.getHost(), port, fsUri.getPath(), fsUri.getQuery(), fsUri.getFragment());
} catch (final URISyntaxException ex) {
throw new IOException(ex);
}
} else {
// Otherwise, use the endpoint parameter as the host and use the FsUri authority as the filecontainer
fileContainer = fsUri.getAuthority();
LOG.debug("Endpoint [{}] was provided, injecting it in the delegatedFS URI as the new authority and considering old authority [{}] as the FileContainer", endpoint, fileContainer);
try {
webHdfsFsUri = new URI(webHdfsFileSystem.getScheme(), endpoint, fsUri.getPath(), fsUri.getQuery(), fsUri.getFragment());
} catch (final URISyntaxException ex) {
throw new IOException(ex);
}
}
} else {
/* If the endpoint, was not provided, we ensure that:
- If "fs.hdlfs.filecontainer" property is provided, simply use that value to set the header.
- If "fs.hdlfs.filecontainer" property is not provided, we need to parse the file container from the URI to
keep supporting previous configurations.
In both cases, we use the URI as it is.
*/
fileContainer = conf.get(HdlfsConstants.FS_HDLFS_FILECONTAINER_KEY);
if (fileContainer == null) {
if (!isParsedHostValid(parsedHost)) {
throw new IOException("No valid fully qualified URI, endpoint or FileContainer was provided");
}
// File container is null, so it will come from the URI.
fileContainer = parsedHost[0];
}
try {
webHdfsFsUri = new URI(webHdfsFileSystem.getScheme(), fsUri.getUserInfo(), fsUri.getHost(), port, fsUri.getPath(), fsUri.getQuery(), fsUri.getFragment());
} catch (final URISyntaxException ex) {
throw new IOException(ex);
}
}
final Configuration webHdfsFsConf = new Configuration(conf);
// All relevant fs.hdlfs configurations
final boolean retryPolicyEnabled = conf.getBoolean(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_DEFAULT);
webHdfsFsConf.setBoolean(HdfsClientConfigKeys.HttpClient.RETRY_POLICY_ENABLED_KEY, retryPolicyEnabled);
final String retryPolicySpec = conf.get(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT);
webHdfsFsConf.set(HdfsClientConfigKeys.HttpClient.RETRY_POLICY_SPEC_KEY, retryPolicySpec);
final int retryMaxAttempts = conf.getInt(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT);
webHdfsFsConf.setInt(HdfsClientConfigKeys.HttpClient.RETRY_MAX_ATTEMPTS_KEY, retryMaxAttempts);
final int failovermaxAttempts = conf.getInt(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
webHdfsFsConf.setInt(HdfsClientConfigKeys.HttpClient.FAILOVER_MAX_ATTEMPTS_KEY, failovermaxAttempts);
final int failoverSleepBase = conf.getInt(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_SLEEP_BASE_MS_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_SLEEP_BASE_MS_DEFAULT);
webHdfsFsConf.setInt(HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_BASE_KEY, failoverSleepBase);
final int failoverSleepMax = conf.getInt(HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_SLEEP_MAX_MS_KEY, HdlfsConstants.FS_HDLFS_HTTP_CLIENT_FAILOVER_SLEEP_MAX_MS_DEFAULT);
webHdfsFsConf.setInt(HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_MAX_KEY, failoverSleepMax);
// Timeout
final int connectTimeout = conf.getInt("fs.hdlfs.socket.connect-timeout", URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT);
webHdfsFsConf.setInt("dfs.webhdfs.socket.connect-timeout", connectTimeout);
final int readTimeout = conf.getInt("fs.hdlfs.socket.read-timeout", URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT);
webHdfsFsConf.setInt("dfs.webhdfs.socket.read-timeout", readTimeout);
// we can still have an empty file container if connection-id property is used
webHdfsFsConf.set(HdfsClientConfigKeys.DFS_WEBHDFS_CUSTOM_CONNECTION_CONFIGURATOR_IMPL, HdlfsConnectionConfigurator.class.getName());
webHdfsFsConf.set(HdlfsConstants.FS_HDLFS_FILECONTAINER_KEY, fileContainer);
webHdfsFsConf.setBoolean(HdlfsFileSystemCapabilities.HDLFS_OPERATION_LISTSTATUSBATCH_USE_WITH_PAGE_ID_KEY, true);
webHdfsFsConf.setBoolean(HdlfsFileSystemCapabilities.HDLFS_STATUS_CODE_FORBIDDEN_MUTATE_EXCEPTION_KEY, true);
final OpenOperationMode openOperationMode = conf.getEnum(HdlfsConstants.FS_HDLFS_OPERATION_OPEN_MODE_KEY, OpenOperationMode.DEFAULT);
if (openOperationMode == OpenOperationMode.DEFAULT) {
webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.chunked.enabled", true);
final int chunkSize = conf.getInt(HdlfsConstants.FS_HDLFS_OPERATION_OPEN_CHUNK_SIZE, HdlfsConstants.FS_HDLFS_OPERATION_OPEN_CHUNK_SIZE_DEFAULT);
webHdfsFsConf.setInt("fs.webhdfs.operation.open.chunk.size", chunkSize);
webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.bound.check.enabled", false);
webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.expect-redirect.enabled", false);
webHdfsFsConf.setBoolean("fs.webhdfs.change.detection.enabled", true);
} else if (openOperationMode == OpenOperationMode.COMPATIBLE) {
webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.chunked.enabled", false);
webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.bound.check.enabled", true);
webHdfsFsConf.setBoolean("fs.webhdfs.operation.open.expect-redirect.enabled", true);
webHdfsFsConf.setBoolean("fs.webhdfs.change.detection.enabled", false);
}
webHdfsFileSystem.initialize(webHdfsFsUri, webHdfsFsConf);
return webHdfsFileSystem;
}
private DirectAccessType resolveDirectAccessType(final Configuration conf, final WebHdfsFileSystem webHdfsFileSystem) throws IOException {
final String directAccessModeConf = conf.get(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_MODE_KEY);
if (directAccessModeConf == null) {
return null;
}
final DirectAccessMode directAccessMode;
try {
directAccessMode = DirectAccessMode.valueOf(directAccessModeConf);
} catch (final IllegalArgumentException ex) {
throw new IllegalArgumentException(String.format("Direct Access mode configuration should be [%s], [%s] or [%s], but was [%s]", DirectAccessMode.DISABLED.name(), DirectAccessMode.ACCEPT.name(), DirectAccessMode.EXPECT.name(), directAccessModeConf), ex);
}
if (directAccessMode == DirectAccessMode.DISABLED) {
return null;
}
final WhoamiResult whoamiResult = webHdfsFileSystem.whoami();
final String directAccessTypeRawValue = whoamiResult.getOptions()
.stream()
.filter(option -> HdlfsConstants.WHOAMI_OPTIONS_DIRECT_ACCESS_TYPE_KEY.equals(option.getKey()))
.map(WhoamiResultOption::getValue)
.findFirst()
.orElse(null);
if (directAccessTypeRawValue == null) {
if (directAccessMode == DirectAccessMode.EXPECT) {
throw new IllegalArgumentException(String.format("Direct Access mode is set to [%s] but it is not supported by the server.", DirectAccessMode.EXPECT.name()));
} else if (directAccessMode == DirectAccessMode.ACCEPT) {
LOG.info("Direct Access is not available. Will proceed since Direct Access mode is [{}].", DirectAccessMode.ACCEPT.name());
}
return null;
}
DirectAccessType directAccessType = null;
try {
directAccessType = DirectAccessType.valueOf(directAccessTypeRawValue);
} catch (final IllegalArgumentException ex) {
if (directAccessMode == DirectAccessMode.EXPECT) {
throw new IllegalArgumentException(String.format("Unrecognized Direct Access type [%s]", directAccessTypeRawValue), ex);
} else if (directAccessMode == DirectAccessMode.ACCEPT) {
LOG.warn("Unrecognized Direct Access type [{}]. Will proceed since Direct Access mode is [{}].", directAccessTypeRawValue, DirectAccessMode.ACCEPT.name());
}
}
return directAccessType;
}
private HdlfsBaseFileSystem resolveDelegateFs(final DirectAccessType directAccessType, final WebHdfsFileSystem webHdfsFileSystem) {
if (directAccessType == DirectAccessType.GCS) {
return new GcsFileSystem(webHdfsFileSystem, directAccessHttpClient);
} else if (directAccessType == DirectAccessType.S3) {
return new S3FileSystem(webHdfsFileSystem, directAccessHttpClient);
} else if (directAccessType == DirectAccessType.WASB) {
return new WasbFileSystem(webHdfsFileSystem, directAccessHttpClient, directAccessMultipartUploadThreadPool);
} else {
return new HdlfsBaseFileSystem(webHdfsFileSystem);
}
}
}
// © 2021-2024 SAP SE or an SAP affiliate company. All rights reserved.
© 2015 - 2025 Weber Informatics LLC | Privacy Policy