com.sap.hana.datalake.files.directaccess.s3.S3DirectAccessOutputStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sap-hdlfs Show documentation
Show all versions of sap-hdlfs Show documentation
An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.
// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files.directaccess.s3;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.Path;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.util.Preconditions;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPut;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.InputStream;
import java.util.Base64;
import java.util.Map;
import java.util.regex.Pattern;
import com.sap.hana.datalake.files.HdlfsFileSystemCapabilities;
import com.sap.hana.datalake.files.HdlfsOutputStream;
import com.sap.hana.datalake.files.directaccess.BaseDirectAccessOutputStream;
import com.sap.hana.datalake.files.directaccess.BaseSignedUrl;
import com.sap.hana.datalake.files.utils.AbortableResultImpl;
import com.sap.hana.datalake.files.utils.ArrayUtils;
import com.sap.hana.datalake.files.utils.DataChunk;
import com.sap.hana.datalake.files.utils.http.HttpClientUtils;
import com.sap.hana.datalake.files.utils.http.RepeatableInputStreamEntity;
/* package-private */ class S3DirectAccessOutputStream extends BaseDirectAccessOutputStream {
private static final Logger LOG = LoggerFactory.getLogger(S3DirectAccessOutputStream.class);
private static final Pattern S3_SIGNATURE_TOKEN_PATTERN = Pattern.compile("([?&])(X-Amz-Security-Token|X-Amz-Signature|X-Amz-Credential)=[^\\s&?$]*", Pattern.CASE_INSENSITIVE);
private final int signedUrlExpirationSafetyMargin;
private final HdlfsOutputStream.Builder prebuiltOutputStream;
private HdlfsOutputStream hdlfsOutputStream;
private S3UploadSignedUrl signedUrl;
private boolean isMultipartUpload;
/* package-private */ S3DirectAccessOutputStream(final Path path,
final boolean overwrite,
final int chunkSize,
final int signedUrlExpirationSafetyMargin,
final WebHdfsFileSystem webHdfsFileSystem,
final HttpClient httpClient,
final HdlfsOutputStream.Builder prebuiltOutputStream) {
super(path, overwrite, chunkSize, webHdfsFileSystem, httpClient);
this.signedUrlExpirationSafetyMargin = signedUrlExpirationSafetyMargin;
this.prebuiltOutputStream = prebuiltOutputStream;
this.isMultipartUpload = false;
}
@Override
public synchronized void write(@Nonnull final byte[] data, final int offset, final int length) throws IOException {
LOG.trace("Started write operation into stream: {}", this);
Preconditions.checkNotNull(data, "data buffer cannot be null");
ArrayUtils.validateBoundaries(data, offset, length);
if (this.closed) {
throw new IllegalStateException("Stream already closed");
}
if (length == 0) {
return;
}
final int writtenBytes;
if (this.isMultipartUpload) {
this.hdlfsOutputStream.write(data, offset, length);
writtenBytes = length; // assume we were able to write the full content using the output stream, which should administrate DataChunks by itself
} else {
final DataChunk chunk = this.getOrCreateCurrentChunk();
writtenBytes = chunk.write(data, offset, length);
this.md5Hasher.update(data, offset, writtenBytes);
LOG.trace("Wrote [{}] bytes to the current data chunk; the remaining capacity is [{}] bytes", writtenBytes, chunk.getRemainingCapacity());
}
if (writtenBytes < length) {
// The current chunk ran out of space and there is still data left to be written, so we move to HdlfsMultiPartUpload.
if (!this.isMultipartUpload) {
this.isMultipartUpload = true;
this.hdlfsOutputStream = this.prebuiltOutputStream.withInitialChunk(this.currentChunk).build();
this.currentChunk = null;
}
final int leftOverOffset = offset + writtenBytes;
final int leftOverLength = length - writtenBytes;
this.write(data, leftOverOffset, leftOverLength);
}
LOG.trace("Write operation into stream {} completed", this);
}
@Override
public synchronized void close() throws IOException {
if (this.closed) {
return;
}
this.closed = true;
try {
if (this.isMultipartUpload) {
// multipart upload complete
this.hdlfsOutputStream.close();
} else {
// closed while we are at less than or equal to one chunk, we can upload directly
this.uploadSingleChunk();
}
} finally {
this.cleanUpOnClose();
}
}
@Override
public synchronized AbortableResult abort() {
if (this.isMultipartUpload) {
return this.hdlfsOutputStream.abort();
} else {
// aborted while we are at less than or equal to one chunk, we can just close the chunk and prevent uploading
if (this.closed) {
return new AbortableResultImpl(/* alreadyClosed */ true, /* cleanupException */ null);
}
this.cleanUpOnClose();
this.closed = true;
return new AbortableResultImpl(/* alreadyClosed */ false, /* cleanupException */ null);
}
}
@Override
protected boolean isFileAlreadyExistError(final HttpResponse httpResponse) {
// S3 does not support mutual exclusion for concurrent writes, we will need to use FSCache for this behavior since `overwrite=false` behavior is not supported natively
return false;
}
protected synchronized BaseSignedUrl getSignedUrlForCurrentChunk() throws IOException {
final DataChunk chunk = this.getOrCreateCurrentChunk();
final boolean signedUrlNotNull = this.signedUrl != null;
final boolean isSignedUrlForCurrentChunk = signedUrlNotNull && this.signedUrl.getChunkId().equals(chunk.getId());
final boolean isSignedUrlExpired = signedUrlNotNull && this.signedUrl.isExpired();
if (isSignedUrlForCurrentChunk && !isSignedUrlExpired) {
return this.signedUrl;
}
final HdlfsFileSystemCapabilities.S3CreateDirectAccessOptions directAccessOptions =
HdlfsFileSystemCapabilities.S3CreateDirectAccessOptions.builder(chunk.getMd5Checksum()).build();
final HdlfsFileSystemCapabilities.DirectAccessResponse directAccessResponse = this.webHdfsFileSystem.createDirectAccess(this.path, directAccessOptions);
final HdlfsFileSystemCapabilities.S3DirectAccessProperties s3DirectAccessProperties =
(HdlfsFileSystemCapabilities.S3DirectAccessProperties) directAccessResponse.getProperties();
this.signedUrl = S3UploadSignedUrl.from(s3DirectAccessProperties.getSignedUrl(), this.signedUrlExpirationSafetyMargin, chunk.getId());
return this.signedUrl;
}
private void uploadSingleChunk() throws IOException {
if (this.currentChunk == null) {
this.currentChunk = this.getOrCreateCurrentChunk();
}
final String chunkMd5 = Base64.getEncoder().encodeToString(this.md5Hasher.digest());
this.currentChunk.setMd5Checksum(chunkMd5);
try {
final BaseSignedUrl signedUrlForCurrentChunk = this.getSignedUrlForCurrentChunk();
final HttpPut request = new HttpPut(signedUrlForCurrentChunk.getUrl());
this.setChunkRequestEntity(request, this.currentChunk);
this.setRequestHeaders(request, signedUrlForCurrentChunk);
// Execute the request
LOG.debug("Uploading {} of {} using signed URL [{}]", this.currentChunk, this, this.getRequestUrlWithNoAuthInfo(request));
final HttpResponse response = this.httpClient.execute(request);
this.eTag = this.getETagFromResponse(response);
this.validateUploadResponse(response, this.currentChunk);
} catch (final Exception ex) {
final String errorMessage = String.format("Could not upload %s", this.currentChunk);
throw new IOException(errorMessage, ex);
} finally {
IOUtils.closeQuietly(this.currentChunk);
}
}
private void setRequestHeaders(final HttpPut request, final BaseSignedUrl signedUrl) {
final Map signedUrlHeaders = signedUrl.getHeaders();
if (!signedUrlHeaders.containsKey(HttpHeaders.CONTENT_MD5)) {
throw new IllegalStateException(String.format("Signed URL does not contain required header [%s]", HttpHeaders.CONTENT_MD5));
}
signedUrlHeaders.forEach(request::setHeader);
}
private String getRequestUrlWithNoAuthInfo(final HttpPut request) {
return S3_SIGNATURE_TOKEN_PATTERN.matcher(request.getURI().toString()).replaceAll("$1$2=[REDACTED]");
}
private void validateUploadResponse(final HttpResponse response, final Object object) throws IOException {
final int statusCode = response.getStatusLine().getStatusCode();
LOG.debug("{} upload request responded with status code: {}", object, statusCode);
if (!HttpClientUtils.isSuccessfulStatusCode(statusCode)) {
throw HttpClientUtils.getAndLogExceptionForUnsuccessfulResponse(response, String.format("Error uploading %s", object), LOG);
}
}
private void setChunkRequestEntity(final HttpPut request, final DataChunk chunk) {
final int chunkDataSize = chunk.getDataSize();
final InputStream chunkInputStream = chunk.getInputStream();
request.setEntity(new RepeatableInputStreamEntity(chunkInputStream, chunkDataSize));
}
}
// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.
© 2015 - 2025 Weber Informatics LLC | Privacy Policy