All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.directaccess.BaseDirectAccessOutputStream Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.

There is a newer version: 3.0.27
Show newest version
// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files.directaccess;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.Abortable;
import org.apache.hadoop.fs.Path;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;

import java.io.IOException;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.Locale;
import java.util.Optional;
import java.util.UUID;

import com.sap.hana.datalake.files.HasETag;
import com.sap.hana.datalake.files.HdlfsConstants;
import com.sap.hana.datalake.files.utils.DataChunk;
import com.sap.hana.datalake.files.utils.http.HttpClientUtils;

public abstract class BaseDirectAccessOutputStream extends OutputStream implements HasETag, Abortable {

  protected static final String MD5_DIGEST_ALGORITHM = "MD5";
  protected static final String MD5_DIGEST_ALGORITHM_LOWERCASE = MD5_DIGEST_ALGORITHM.toLowerCase(Locale.getDefault());

  protected final byte[] singleByte = new byte[1];

  protected final Path path;
  protected final boolean overwrite;
  protected final int chunkSize;
  protected final WebHdfsFileSystem webHdfsFileSystem;
  protected final HttpClient httpClient;
  protected final MessageDigest md5Hasher;

  protected DataChunk currentChunk;
  protected int chunksCount;
  protected boolean closed;
  protected String eTag;

  private final String chunkIdPrefix;

  public BaseDirectAccessOutputStream(final Path path, final boolean overwrite, final int chunkSize,
          final WebHdfsFileSystem webHdfsFileSystem, final HttpClient httpClient) {
    this.path = path;
    this.overwrite = overwrite;
    this.chunkSize = chunkSize;
    this.webHdfsFileSystem = webHdfsFileSystem;
    this.httpClient = httpClient;
    this.closed = false;
    this.chunksCount = 0;
    this.chunkIdPrefix = UUID.randomUUID() + "-";

    try {
      this.md5Hasher = MessageDigest.getInstance(MD5_DIGEST_ALGORITHM);
    } catch (final NoSuchAlgorithmException ex) {
      throw new RuntimeException(String.format("Could not initialize %s", this.getClass().getName()), ex);
    }
  }

  @Override
  public String getETag() {
    return this.eTag;
  }

  @Override
  public synchronized void write(final int data) throws IOException {
    this.singleByte[0] = (byte) data;
    this.write(this.singleByte, 0, 1);
  }

  protected void cleanUpOnClose() {
    IOUtils.closeQuietly(this.currentChunk);
    this.currentChunk = null;
  }

  protected synchronized DataChunk getOrCreateCurrentChunk() {
    if (this.currentChunk == null) {
      this.chunksCount++;
      this.currentChunk = DataChunk.newInstance(this.chunkSize, this.generateChunkId(this.chunksCount));
    }

    return this.currentChunk;
  }

  protected String getETagFromResponse(final HttpResponse response) {
    return Optional.ofNullable(HttpClientUtils.getHeaderValue(response, HttpHeaders.ETAG))
            .map(s -> s.replace("\"", ""))
            .orElse(null);
  }

  protected String generateChunkId(final int chunkNumber) {
    final String chunkId = String.format("%s%06d", this.chunkIdPrefix, chunkNumber);
    final byte[] chunkIdBytes = chunkId.getBytes(HdlfsConstants.DEFAULT_CHARSET);

    return Base64.getEncoder().encodeToString(chunkIdBytes);
  }

  protected abstract boolean isFileAlreadyExistError(final HttpResponse httpResponse);

}

// © 2023-2024 SAP SE or an SAP affiliate company. All rights reserved.




© 2015 - 2025 Weber Informatics LLC | Privacy Policy