All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.directaccess.wasb.WasbDirectAccessInputStream Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.

There is a newer version: 3.0.27
Show newest version
// © 2023 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files.directaccess.wasb;

import com.sap.hana.datalake.files.HdlfsFileSystemCapabilities;
import com.sap.hana.datalake.files.directaccess.BaseDirectAccessInputStream;
import com.sap.hana.datalake.files.directaccess.BaseSignedUrl;
import com.sap.hana.datalake.files.utils.http.HttpClientUtils;
import org.apache.hadoop.fs.Path;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* package-private */ class WasbDirectAccessInputStream extends BaseDirectAccessInputStream {

  private static final String CONTENT_RANGE_HEADER_NAME = "Content-Range";
  private static final Pattern BACKEND_STORED_CONTENT_LENGTH_PATTERN = Pattern.compile("^bytes \\d+-\\d+/(\\d+)$");

  private WasbSignedUrl signedUrl;

  /* package-private */ WasbDirectAccessInputStream(final Path path, final int chunkSize, final int signedUrlExpirationSafetyMargin, final WebHdfsFileSystem webHdfsFileSystem, final HttpClient httpClient) {
    super(path, chunkSize, signedUrlExpirationSafetyMargin, webHdfsFileSystem, httpClient);
  }

  @Override
  protected synchronized BaseSignedUrl getSignedUrl() throws IOException {
    if (this.signedUrl != null && !this.signedUrl.isExpired()) {
      return this.signedUrl;
    }

    final HdlfsFileSystemCapabilities.DirectAccessResponse directAccessResponse = this.webHdfsFileSystem.openDirectAccess(this.path);
    final HdlfsFileSystemCapabilities.WasbDirectAccessProperties wasbDirectAccessProperties =
        (HdlfsFileSystemCapabilities.WasbDirectAccessProperties) directAccessResponse.getProperties();

    this.signedUrl = WasbSignedUrl.from(wasbDirectAccessProperties.getSignedUrl(), this.signedUrlExpirationSafetyMargin);

    return this.signedUrl;
  }

  @Override
  protected long getBackendStoredContentLength(final HttpResponse response) {
    final String contentRangeHeaderValue = HttpClientUtils.getRequiredHeaderValue(response, CONTENT_RANGE_HEADER_NAME);
    final Matcher matcher = BACKEND_STORED_CONTENT_LENGTH_PATTERN.matcher(contentRangeHeaderValue);

    if (matcher.matches()) {
      return Long.parseLong(matcher.group(1));
    } else {
      // should not happen - Azure should always return a header as `Content-Range: bytes x-y/z` where x,y,z are natural numbers
      // according to their docs https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob?tabs=azure-ad#response-headers
      // their response headers are HTTP/1.1 compliant, and that says that a 206 Partial Content should always contain the `z` value,
      // and `*` is not allowed for that case.
      throw new IllegalStateException(String.format("Could not extract stored file content length from %s header", CONTENT_RANGE_HEADER_NAME));
    }
  }

}

// © 2023 SAP SE or an SAP affiliate company. All rights reserved.




© 2015 - 2025 Weber Informatics LLC | Privacy Policy