com.sap.hana.datalake.files.HdlfsFileSystem Maven / Gradle / Ivy
Go to download
An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.
package com.sap.hana.datalake.files;
import com.sap.hana.datalake.files.enumeration.CreateOperationMode;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.SWebHdfsFileSystem;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HdlfsFileSystem extends FileSystem {
public static final String SCHEME = "hdlfs";
private static final Logger LOG = LoggerFactory.getLogger(HdlfsFileSystem.class);
private static final String TEMPORARY_DIR = "/_temporary/";
private static final Pattern AUTHORITY_PATTERN = Pattern.compile(HdlfsConstants.FS_HDLFS_AUTHORITY_REGEX);
private static final String FILEOUTPUTCOMMITTER_VERSION = "2";
private FileSystem delegateFS;
private URI uri;
private CreateOperationMode createOperationMode;
private boolean optimizeFileOutputCommit;
private boolean fsUriRequiresPort;
@Override
public void initialize(final URI name, final Configuration conf) throws IOException {
final boolean sslEnabled = conf.getBoolean(HdlfsConstants.FS_HDLFS_SSL_ENABLED_KEY, true);
this.initialize(name, conf, sslEnabled ? new SWebHdfsFileSystem() : new WebHdfsFileSystem());
}
public void initialize(final URI fsUri, final Configuration conf, final WebHdfsFileSystem webHdfsFileSystem) throws IOException {
super.initialize(fsUri, conf);
String fileContainer;
fileContainer = conf.get(HdlfsConstants.FS_HDLFS_FILECONTAINER_KEY);
/* As a requirement,
- If "fs.hdlfs.connection.id" property is specified, it should be complemented with fs.hdlfs.filecontainer -> "" for RMS.
- If "fs.hdlfs.filecontainer" property is provided, simply use that value to set the header.
- If "fs.hdlfs.filecontainer" property is not provided, we need to parse the file container from the URI to
keep supporting previous configurations.
*/
if (fileContainer == null) {
final String[] parsedHost = this.parseURI(fsUri);
fileContainer = parsedHost[0];
final String hdlfsEndpoint = parsedHost[1];
if (fileContainer == null || fileContainer.isEmpty()) {
throw new IOException("Failed to parse File container from the URI.");
}
if (hdlfsEndpoint == null || hdlfsEndpoint.isEmpty()) {
throw new IOException("Failed to parse HDLFS endpoint from the URI.");
}
}
// by default, all optimizations made for file output committer following the stochator pattern will be disabled
this.optimizeFileOutputCommit = conf.getBoolean(HdlfsConstants.FS_HDLFS_OPTIMIZE_FILEOUTPUTCOMMIT_ENABLED_KEY, false);
/* We do not need the fileoutputcommitter validation in case we are connecting to the remote object store
via the "fs.hdlfs.connection.id" property
Also when connecting to a file container, we only check for file output committer version if the
optimizations are enabled.
*/
if (!fileContainer.isEmpty() && this.isOptimizeFileOutputCommitEnabled()) {
final String fileOutputCommitterVersion = conf.get(HdlfsConstants.MAPREDUCE_FILEOUTPUTCOMMITTER_ALGORITHM_VERSION_KEY);
if (!FILEOUTPUTCOMMITTER_VERSION.equals(fileOutputCommitterVersion)) {
throw new IllegalArgumentException(String.format("File Output Committer version is %s but should be %s",
fileOutputCommitterVersion, FILEOUTPUTCOMMITTER_VERSION));
}
}
this.uri = fsUri;
this.fsUriRequiresPort = fsUri.getPort() > 0;
int port = fsUri.getPort() > 0 ? fsUri.getPort() : ((webHdfsFileSystem instanceof SWebHdfsFileSystem) ? 443: 80);
final Configuration delegateFsConf = new Configuration(conf);
final URI delegateFsUri;
try {
delegateFsUri = new URI(webHdfsFileSystem.getScheme(), fsUri.getUserInfo(), fsUri.getHost(),
port, fsUri.getPath(), fsUri.getQuery(), fsUri.getFragment());
} catch (final URISyntaxException ex) {
throw new IOException(ex);
}
// we can still have an empty file container if connection-id property is used
delegateFsConf.set(HdfsClientConfigKeys.DFS_WEBHDFS_CUSTOM_CONNECTION_CONFIGURATOR_IMPL, HdlfsConnectionConfigurator.class.getName());
delegateFsConf.set(HdlfsConstants.FS_HDLFS_FILECONTAINER_KEY, fileContainer);
this.createOperationMode = conf.getEnum(HdlfsConstants.FS_HDLFS_OPERATION_CREATE_MODE_KEY, CreateOperationMode.DEFAULT);
this.delegateFS = webHdfsFileSystem;
this.delegateFS.initialize(delegateFsUri, delegateFsConf);
}
@Override
public String getScheme() {
return SCHEME;
}
@Override
public URI getUri() {
return this.uri;
}
@Override
public FSDataInputStream open(final Path path, final int bufferSize) throws IOException {
LOG.debug("Performing OPEN on: {}", path.toString());
final Path delegateFsPath = this.rewritePathToDelegateFs(path);
return this.delegateFS.open(delegateFsPath, bufferSize);
}
@Override
public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite, final int bufferSize,
final short replication, final long blockSize, final Progressable progress) throws IOException {
final Path delegateFsPath;
if (!this.isOptimizeFileOutputCommitEnabled()) {
// file output committer optimizations disabled, simply delegate the request
LOG.debug("FileOutputCommitter optimizations disabled. Performing CREATE on: {}", path.toString());
this.checkCreateOperation(path, overwrite);
delegateFsPath = this.rewritePathToDelegateFs(path);
return this.delegateFS.create(delegateFsPath, fsPermission, overwrite, bufferSize, replication, blockSize, progress);
}
final String pathStr = path.toString();
final int idxOfTempDir = pathStr.indexOf(TEMPORARY_DIR);
final Path pathWithFile;
if (idxOfTempDir >= 0) {
final String parentFolder = path.getParent().toString();
final int attemptFolderIdx = parentFolder.lastIndexOf("/");
final String attemptId = parentFolder.substring(attemptFolderIdx + 1);
final String fileName = attemptId + "-" + path.getName();
final String outputDirWithFile = pathStr.substring(0, idxOfTempDir + 1) + fileName;
pathWithFile = new Path(outputDirWithFile);
} else {
pathWithFile = path;
}
LOG.debug("Performing CREATE on: {}", pathWithFile);
this.checkCreateOperation(pathWithFile, overwrite);
delegateFsPath = this.rewritePathToDelegateFs(pathWithFile);
return this.delegateFS.create(delegateFsPath, fsPermission, true, bufferSize, replication, blockSize, progress);
}
@Override
public FSDataOutputStream append(final Path path, final int bufferSize, final Progressable progress) throws IOException {
LOG.debug("Performing APPEND on: {}", path.toString());
final Path delegateFsPath = this.rewritePathToDelegateFs(path);
return this.delegateFS.append(delegateFsPath, bufferSize, progress);
}
@Override
public boolean rename(final Path pathFrom, final Path pathTo) throws IOException {
final Path delegateFsPathFrom;
final Path delegateFsPathTo;
if (!this.isOptimizeFileOutputCommitEnabled()) {
// file output committer optimizations disabled, simply delegate the request
LOG.debug("FileOutputCommitter optimizations disabled. Performing RENAME: {} to {}", pathFrom.toString(), pathTo.toString());
delegateFsPathFrom = this.rewritePathToDelegateFs(pathFrom);
delegateFsPathTo = this.rewritePathToDelegateFs(pathTo);
return this.delegateFS.rename(delegateFsPathFrom, delegateFsPathTo);
}
final String pathFromStr = pathFrom.toString();
if (pathFromStr.contains(TEMPORARY_DIR)) {
LOG.debug("Skipping RENAME on path as this is a temporary folder: {}", pathFromStr);
return true;
}
LOG.debug("Performing RENAME: {} to {}", pathFrom, pathTo);
delegateFsPathFrom = this.rewritePathToDelegateFs(pathFrom);
delegateFsPathTo = this.rewritePathToDelegateFs(pathTo);
return this.delegateFS.rename(delegateFsPathFrom, delegateFsPathTo);
}
@Override
public boolean delete(final Path path, final boolean recursive) throws IOException {
final Path delegateFsPath;
if (!this.isOptimizeFileOutputCommitEnabled()) {
// file output committer optimizations disabled, simply delegate the request
LOG.debug("FileOutputCommitter optimizations disabled. Performing DELETE on: {}", path.toString());
delegateFsPath = this.rewritePathToDelegateFs(path);
return this.delegateFS.delete(delegateFsPath, recursive);
}
final String pathStr = path.toString();
if (pathStr.contains(TEMPORARY_DIR)) {
LOG.debug("Skipping DELETE on path as this is a temporary folder: {}", pathStr);
return true;
}
LOG.debug("Performing DELETE on: {}", pathStr);
delegateFsPath = this.rewritePathToDelegateFs(path);
return this.delegateFS.delete(delegateFsPath, recursive);
}
@Override
public FileStatus[] listStatus(final Path path) throws IOException {
LOG.debug("Performing LISTSTATUS on: {}", path.toString());
final Path delegateFsPath = this.rewritePathToDelegateFs(path);
return Arrays.stream(this.delegateFS.listStatus(delegateFsPath))
.map(status -> HdlfsFileStatus.create(status, this.fsUriRequiresPort))
.toArray(HdlfsFileStatus[]::new);
}
@Override
public void setWorkingDirectory(final Path path) {
final Path delegateFsPath = this.rewritePathToDelegateFs(path);
this.delegateFS.setWorkingDirectory(delegateFsPath);
}
@Override
public Path getWorkingDirectory() {
final Path workingDirectory = this.delegateFS.getWorkingDirectory();
return this.rewritePathFromDelegateFs(workingDirectory);
}
@Override
public boolean mkdirs(final Path path, final FsPermission fp) {
LOG.debug("MKDIRS operation is not supported, doing nothing.");
return true;
}
@Override
public FileStatus getFileStatus(final Path path) throws IOException {
LOG.debug("Performing GETFILESTATUS on: {}", path.toString());
final Path delegateFsPath = this.rewritePathToDelegateFs(path);
return HdlfsFileStatus.create(this.delegateFS.getFileStatus(delegateFsPath), this.fsUriRequiresPort);
}
@Override
public Token>[] addDelegationTokens(final String renewer, final Credentials credentials) throws IOException {
return this.delegateFS.addDelegationTokens(renewer, credentials);
}
private String[] parseURI(final URI name) throws IOException {
final String authority = name.getAuthority();
final Matcher matcher = AUTHORITY_PATTERN.matcher(authority);
if (matcher.find()) {
final String container = matcher.group(1);
final String endpoint = matcher.group(2);
return new String[] { container, endpoint };
} else {
throw new IOException("Authority does not conform to pattern");
}
}
protected Path rewritePathToDelegateFs(final Path path) {
final URI pathUri = path.toUri();
if (!SCHEME.equals(pathUri.getScheme())) {
return path;
}
final URI delegateFsUri = this.delegateFS.getUri();
return new Path(delegateFsUri.getScheme(), delegateFsUri.getAuthority(), pathUri.getPath());
}
protected Path rewritePathFromDelegateFs(final Path path) {
final URI pathUri = path.toUri();
if (pathUri.getScheme() == null || SCHEME.equals(pathUri.getScheme())) {
return path;
}
final URI delegateFsUri = this.delegateFS.getUri();
if (fsUriRequiresPort) {
return new Path(HdlfsFileSystem.SCHEME, delegateFsUri.getAuthority(), pathUri.getPath());
} else {
return new Path(HdlfsFileSystem.SCHEME, delegateFsUri.getHost(), pathUri.getPath());
}
}
protected boolean isOptimizeFileOutputCommitEnabled() {
return this.optimizeFileOutputCommit;
}
protected void checkCreateOperation(final Path path, final boolean overwrite) throws IOException {
if (this.createOperationMode == CreateOperationMode.OVERWRITE) {
if (!overwrite) {
LOG.debug("Operation CREATE(path={}); ignoring parameter [overwrite=false]", path);
}
return;
}
if (this.createOperationMode == CreateOperationMode.DEFAULT && overwrite) {
return;
}
final boolean isDirectory;
try {
final Path delegateFsPath = this.rewritePathToDelegateFs(path);
final FileStatus fileStatus = this.delegateFS.getFileStatus(delegateFsPath);
isDirectory = fileStatus.isDirectory();
} catch (final FileNotFoundException ex) {
LOG.debug("Path not found: " + path.toString(), ex);
return;
}
if (this.createOperationMode == CreateOperationMode.DEFAULT) {
if (!isDirectory) {
throw new FileAlreadyExistsException(path.toString() + " already exists");
}
return;
}
if (this.createOperationMode == CreateOperationMode.COMPATIBLE && (!overwrite || isDirectory)) {
final String errorMessage = path.toString() + (isDirectory ? " is a directory" : " already exists");
throw new FileAlreadyExistsException(errorMessage);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy