All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.directaccess.gcs.GcsFileSystem Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.

There is a newer version: 3.0.27
Show newest version
// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files.directaccess.gcs;

import com.sap.hana.datalake.files.HdlfsBaseFileSystem;
import com.sap.hana.datalake.files.HdlfsConstants;
import com.sap.hana.datalake.files.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.util.Progressable;
import org.apache.http.client.HttpClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;

@InterfaceAudience.Private
public class GcsFileSystem extends HdlfsBaseFileSystem {

  private static final Logger LOG = LoggerFactory.getLogger(GcsFileSystem.class);

  // GCS limitation: https://cloud.google.com/storage/docs/performing-resumable-uploads#chunked-upload
  private static final int GCS_DIRECT_ACCESS_MINIMUM_CHUNK_SIZE = 262_144; // 256 KiB

  private int createChunkSize;
  private int openChunkSize;
  private int openSignedUrlExpirationSafetyMargin;
  private int createSignedUrlExpirationSafetyMargin;
  private HttpClient httpClient;

  public GcsFileSystem(final WebHdfsFileSystem webHdfsFileSystem, final HttpClient httpClient) {
    super(webHdfsFileSystem);
    this.httpClient = httpClient;
  }

  @Override
  public void initialize(final URI fsUri, final Configuration conf) throws IOException {
    super.initialize(fsUri, conf);
    final Configuration config = this.getConf();

    this.openChunkSize = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_OPEN_CHUNK_SIZE_BYTES_KEY,
            HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_OPEN_CHUNK_SIZE_BYTES_DEFAULT);
    this.openSignedUrlExpirationSafetyMargin = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_OPEN_SIGNED_URL_EXPIRATION_SAFETY_MARGIN_SECONDS_KEY,
            HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_OPEN_SIGNED_URL_EXPIRATION_SAFETY_MARGIN_SECONDS_DEFAULT);
    this.createSignedUrlExpirationSafetyMargin = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_CREATE_SIGNED_URL_EXPIRATION_SAFETY_MARGIN_SECONDS_KEY,
            HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_CREATE_SIGNED_URL_EXPIRATION_SAFETY_MARGIN_SECONDS_DEFAULT);

    this.createChunkSize = config.getInt(HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_GCS_CREATE_CHUNK_SIZE_BYTES_KEY,
            HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_GCS_CREATE_CHUNK_SIZE_BYTES_DEFAULT);

    if (this.createChunkSize < GCS_DIRECT_ACCESS_MINIMUM_CHUNK_SIZE) {
      throw new IllegalArgumentException(String.format("Illegal value for property [%s]. Value was [%s] but minimum is [%s]", HdlfsConstants.FS_HDLFS_DIRECT_ACCESS_GCS_CREATE_CHUNK_SIZE_BYTES_KEY, this.createChunkSize, GCS_DIRECT_ACCESS_MINIMUM_CHUNK_SIZE));
    }
  }

  @Override
  public FSDataInputStream open(final Path path, final int bufferSize) throws IOException {
    LOG.debug("Calling open implementation from {}", GcsFileSystem.class);

    final Path delegateFsPath = this.rewritePathToDelegateFs(path);
    this.assertObjectExistsIfNeeded(delegateFsPath);

    return new FSDataInputStream(new GcsDirectAccessInputStream(delegateFsPath, this.openChunkSize,
            this.openSignedUrlExpirationSafetyMargin, this.getWebHdfsFileSystem(), this.httpClient));
  }

  @Override
  public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite,
                                   final int bufferSize, final short replication, final long blockSize,
                                   final Progressable progress) throws IOException {
    LOG.debug("Calling create implementation from {}", GcsFileSystem.class);

    this.checkCreateOperation(path, overwrite);

    final Path delegateFsPath = this.rewritePathToDelegateFs(path);
    final GcsDirectAccessOutputStream gcsDirectAccessOutputStream = new GcsDirectAccessOutputStream(delegateFsPath, overwrite, this.createChunkSize, this.getWebHdfsFileSystem(), this.httpClient, path, this.createSignedUrlExpirationSafetyMargin);

    return new FSDataOutputStream(gcsDirectAccessOutputStream, /* stats */ null);
  }

  @Override
  protected void setupMultipartUpload(final Configuration conf) {
    // Multipart upload is not needed for the ManifestCommitter, which is used when Direct Access is enabled
    // As GcsFileSystem will only be used with Direct Access, we can skip this setup
  }
}

// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.




© 2015 - 2025 Weber Informatics LLC | Privacy Policy