com.sap.hana.datalake.files.directaccess.gcs.GcsFileSystem Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sap-hdlfs Show documentation
Show all versions of sap-hdlfs Show documentation
An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.
// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files.directaccess.gcs;
import com.sap.hana.datalake.files.HdlfsBaseFileSystem;
import com.sap.hana.datalake.files.HdlfsConstants;
import com.sap.hana.datalake.files.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.util.Progressable;
import org.apache.http.client.HttpClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
@InterfaceAudience.Private
public class GcsFileSystem extends HdlfsBaseFileSystem {
private static final Logger LOG = LoggerFactory.getLogger(GcsFileSystem.class);
// GCS limitation: https://cloud.google.com/storage/docs/performing-resumable-uploads#chunked-upload
private static final int GCS_DIRECT_ACCESS_MINIMUM_CHUNK_SIZE = 262_144; // 256 KiB
private int createChunkSize;
private int openChunkSize;
private int openSignedUrlExpirationSafetyMargin;
private int createSignedUrlExpirationSafetyMargin;
private HttpClient httpClient;
public GcsFileSystem(final WebHdfsFileSystem webHdfsFileSystem, final HttpClient httpClient) {
super(webHdfsFileSystem);
this.httpClient = httpClient;
}
@Override
public void initialize(final URI fsUri, final Configuration conf) throws IOException {
super.initialize(fsUri, conf);
final Configuration config = this.getConf();
this.openChunkSize = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_OPEN_CHUNK_SIZE_BYTES_KEY,
HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_OPEN_CHUNK_SIZE_BYTES_DEFAULT);
this.openSignedUrlExpirationSafetyMargin = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_OPEN_SIGNED_URL_EXPIRATION_SAFETY_MARGIN_SECONDS_KEY,
HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_OPEN_SIGNED_URL_EXPIRATION_SAFETY_MARGIN_SECONDS_DEFAULT);
this.createSignedUrlExpirationSafetyMargin = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_CREATE_SIGNED_URL_EXPIRATION_SAFETY_MARGIN_SECONDS_KEY,
HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_CREATE_SIGNED_URL_EXPIRATION_SAFETY_MARGIN_SECONDS_DEFAULT);
this.createChunkSize = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_GCS_CREATE_CHUNK_SIZE_BYTES_KEY,
HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_GCS_CREATE_CHUNK_SIZE_BYTES_DEFAULT);
if (this.createChunkSize < GCS_DIRECT_ACCESS_MINIMUM_CHUNK_SIZE) {
throw new IllegalArgumentException(String.format("Illegal value for property [%s]. Value was [%s] but minimum is [%s]", HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_GCS_CREATE_CHUNK_SIZE_BYTES_KEY, this.createChunkSize, GCS_DIRECT_ACCESS_MINIMUM_CHUNK_SIZE));
}
}
@Override
public FSDataInputStream open(final Path path, final int bufferSize) throws IOException {
LOG.debug("Calling open implementation from {}", GcsFileSystem.class);
final Path delegateFsPath = this.rewritePathToDelegateFs(path);
this.assertObjectExistsIfNeeded(delegateFsPath);
return new FSDataInputStream(new GcsDirectAccessInputStream(delegateFsPath, this.openChunkSize,
this.openSignedUrlExpirationSafetyMargin, this.getWebHdfsFileSystem(), this.httpClient));
}
@Override
public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite,
final int bufferSize, final short replication, final long blockSize,
final Progressable progress) throws IOException {
LOG.debug("Calling create implementation from {}", GcsFileSystem.class);
this.checkCreateOperation(path, overwrite);
final Path delegateFsPath = this.rewritePathToDelegateFs(path);
final GcsDirectAccessOutputStream gcsDirectAccessOutputStream = new GcsDirectAccessOutputStream(delegateFsPath, overwrite, this.createChunkSize, this.getWebHdfsFileSystem(), this.httpClient, path, this.createSignedUrlExpirationSafetyMargin);
return new FSDataOutputStream(gcsDirectAccessOutputStream, /* stats */ null);
}
@Override
protected void setupMultipartUpload(final Configuration conf) {
// Multipart upload is not needed for the ManifestCommitter, which is used when Direct Access is enabled
// As GcsFileSystem will only be used with Direct Access, we can skip this setup
}
}
// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.
© 2015 - 2025 Weber Informatics LLC | Privacy Policy