com.sap.hana.datalake.files.directaccess.BaseDirectAccessOutputStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sap-hdlfs Show documentation
Show all versions of sap-hdlfs Show documentation
An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.
// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files.directaccess;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.Abortable;
import org.apache.hadoop.fs.Path;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import java.io.IOException;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.Locale;
import java.util.Optional;
import java.util.UUID;
import com.sap.hana.datalake.files.HasETag;
import com.sap.hana.datalake.files.HdlfsConstants;
import com.sap.hana.datalake.files.utils.DataChunk;
import com.sap.hana.datalake.files.utils.http.HttpClientUtils;
public abstract class BaseDirectAccessOutputStream extends OutputStream implements HasETag, Abortable {
protected static final String MD5_DIGEST_ALGORITHM = "MD5";
protected static final String MD5_DIGEST_ALGORITHM_LOWERCASE = MD5_DIGEST_ALGORITHM.toLowerCase(Locale.getDefault());
protected final byte[] singleByte = new byte[1];
protected final Path path;
protected final boolean overwrite;
protected final int chunkSize;
protected final WebHdfsFileSystem webHdfsFileSystem;
protected final HttpClient httpClient;
protected final MessageDigest md5Hasher;
protected DataChunk currentChunk;
protected int chunksCount;
protected boolean closed;
protected String eTag;
private final String chunkIdPrefix;
public BaseDirectAccessOutputStream(final Path path, final boolean overwrite, final int chunkSize,
final WebHdfsFileSystem webHdfsFileSystem, final HttpClient httpClient) {
this.path = path;
this.overwrite = overwrite;
this.chunkSize = chunkSize;
this.webHdfsFileSystem = webHdfsFileSystem;
this.httpClient = httpClient;
this.closed = false;
this.chunksCount = 0;
this.chunkIdPrefix = UUID.randomUUID() + "-";
try {
this.md5Hasher = MessageDigest.getInstance(MD5_DIGEST_ALGORITHM);
} catch (final NoSuchAlgorithmException ex) {
throw new RuntimeException(String.format("Could not initialize %s", this.getClass().getName()), ex);
}
}
@Override
public String getETag() {
return this.eTag;
}
@Override
public synchronized void write(final int data) throws IOException {
this.singleByte[0] = (byte) data;
this.write(this.singleByte, 0, 1);
}
protected void cleanUpOnClose() {
IOUtils.closeQuietly(this.currentChunk);
this.currentChunk = null;
}
protected synchronized DataChunk getOrCreateCurrentChunk() {
if (this.currentChunk == null) {
this.chunksCount++;
this.currentChunk = DataChunk.newInstance(this.chunkSize, this.generateChunkId(this.chunksCount));
}
return this.currentChunk;
}
protected String getETagFromResponse(final HttpResponse response) {
return Optional.ofNullable(HttpClientUtils.getHeaderValue(response, HttpHeaders.ETAG))
.map(s -> s.replace("\"", ""))
.orElse(null);
}
protected String generateChunkId(final int chunkNumber) {
final String chunkId = String.format("%s%06d", this.chunkIdPrefix, chunkNumber);
final byte[] chunkIdBytes = chunkId.getBytes(HdlfsConstants.DEFAULT_CHARSET);
return Base64.getEncoder().encodeToString(chunkIdBytes);
}
protected abstract boolean isFileAlreadyExistError(final HttpResponse httpResponse);
}
// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.
© 2015 - 2025 Weber Informatics LLC | Privacy Policy