All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.directaccess.s3.S3DirectAccessInputStream Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.

There is a newer version: 3.0.27
Show newest version
// © 2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files.directaccess.s3;

import com.sap.hana.datalake.files.HdlfsFileSystemCapabilities;
import com.sap.hana.datalake.files.directaccess.BaseDirectAccessInputStream;
import com.sap.hana.datalake.files.directaccess.BaseSignedUrl;
import com.sap.hana.datalake.files.utils.http.HttpClientUtils;
import org.apache.hadoop.fs.Path;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* package-private */ class S3DirectAccessInputStream extends BaseDirectAccessInputStream {

  private static final String CONTENT_RANGE_HEADER_NAME = "Content-Range";
  private static final Pattern BACKEND_STORED_CONTENT_LENGTH_PATTERN = Pattern.compile("^bytes \\d+-\\d+/(\\d+)$");

  private S3SignedUrl signedUrl;

  /* package-private */ S3DirectAccessInputStream(final Path path,
          final int chunkSize,
          final int signedUrlExpirationSafetyMargin,
          final WebHdfsFileSystem webHdfsFileSystem,
          final HttpClient httpClient) {
    super(path, chunkSize, signedUrlExpirationSafetyMargin, webHdfsFileSystem, httpClient);
  }

  @Override
  protected synchronized BaseSignedUrl getSignedUrl() throws IOException {
    if (this.signedUrl != null && !this.signedUrl.isExpired()) {
      return this.signedUrl;
    }

    final HdlfsFileSystemCapabilities.DirectAccessResponse directAccessResponse = this.webHdfsFileSystem.openDirectAccess(this.path);
    final HdlfsFileSystemCapabilities.S3DirectAccessProperties s3DirectAccessProperties =
            (HdlfsFileSystemCapabilities.S3DirectAccessProperties) directAccessResponse.getProperties();

    this.signedUrl = S3SignedUrl.from(s3DirectAccessProperties.getSignedUrl(), this.signedUrlExpirationSafetyMargin);

    return this.signedUrl;
  }

  @Override
  protected long getBackendStoredContentLength(final HttpResponse response) {
    final String contentRangeHeaderValue = HttpClientUtils.getRequiredHeaderValue(response, CONTENT_RANGE_HEADER_NAME);
    final Matcher matcher = BACKEND_STORED_CONTENT_LENGTH_PATTERN.matcher(contentRangeHeaderValue);

    if (matcher.matches()) {
      return Long.parseLong(matcher.group(1));
    } else {
      // should not happen - S3 should always return a header as `Content-Range: bytes x-y/z` where x,y,z are natural numbers
      throw new IllegalStateException(String.format("Could not extract stored file content length from [%s] header: [%s]", CONTENT_RANGE_HEADER_NAME, contentRangeHeaderValue));
    }
  }
  
}

// © 2024 SAP SE or an SAP affiliate company. All rights reserved.




© 2015 - 2025 Weber Informatics LLC | Privacy Policy