com.sap.hana.datalake.files.directaccess.wasb.WasbDirectAccessInputStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sap-hdlfs Show documentation
Show all versions of sap-hdlfs Show documentation
An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.
// © 2023 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files.directaccess.wasb;
import com.sap.hana.datalake.files.HdlfsFileSystemCapabilities;
import com.sap.hana.datalake.files.directaccess.BaseDirectAccessInputStream;
import com.sap.hana.datalake.files.directaccess.BaseSignedUrl;
import com.sap.hana.datalake.files.utils.http.HttpClientUtils;
import org.apache.hadoop.fs.Path;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/* package-private */ class WasbDirectAccessInputStream extends BaseDirectAccessInputStream {
private static final String CONTENT_RANGE_HEADER_NAME = "Content-Range";
private static final Pattern BACKEND_STORED_CONTENT_LENGTH_PATTERN = Pattern.compile("^bytes \\d+-\\d+/(\\d+)$");
private WasbSignedUrl signedUrl;
/* package-private */ WasbDirectAccessInputStream(final Path path, final int chunkSize, final int signedUrlExpirationSafetyMargin, final WebHdfsFileSystem webHdfsFileSystem, final HttpClient httpClient) {
super(path, chunkSize, signedUrlExpirationSafetyMargin, webHdfsFileSystem, httpClient);
}
@Override
protected synchronized BaseSignedUrl getSignedUrl() throws IOException {
if (this.signedUrl != null && !this.signedUrl.isExpired()) {
return this.signedUrl;
}
final HdlfsFileSystemCapabilities.DirectAccessResponse directAccessResponse = this.webHdfsFileSystem.openDirectAccess(this.path);
final HdlfsFileSystemCapabilities.WasbDirectAccessProperties wasbDirectAccessProperties =
(HdlfsFileSystemCapabilities.WasbDirectAccessProperties) directAccessResponse.getProperties();
this.signedUrl = WasbSignedUrl.from(wasbDirectAccessProperties.getSignedUrl(), this.signedUrlExpirationSafetyMargin);
return this.signedUrl;
}
@Override
protected long getBackendStoredContentLength(final HttpResponse response) {
final String contentRangeHeaderValue = HttpClientUtils.getRequiredHeaderValue(response, CONTENT_RANGE_HEADER_NAME);
final Matcher matcher = BACKEND_STORED_CONTENT_LENGTH_PATTERN.matcher(contentRangeHeaderValue);
if (matcher.matches()) {
return Long.parseLong(matcher.group(1));
} else {
// should not happen - Azure should always return a header as `Content-Range: bytes x-y/z` where x,y,z are natural numbers
// according to their docs https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob?tabs=azure-ad#response-headers
// their response headers are HTTP/1.1 compliant, and that says that a 206 Partial Content should always contain the `z` value,
// and `*` is not allowed for that case.
throw new IllegalStateException(String.format("Could not extract stored file content length from %s header", CONTENT_RANGE_HEADER_NAME));
}
}
}
// © 2023 SAP SE or an SAP affiliate company. All rights reserved.
© 2015 - 2025 Weber Informatics LLC | Privacy Policy