All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.HdlfsKeepPendingMultipartUpload Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.

There is a newer version: 3.0.27
Show newest version
// © 2022 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files;

import com.sap.hana.datalake.files.shaded.com.fasterxml.jackson.databind.JsonNode;
import com.sap.hana.datalake.files.shaded.com.fasterxml.jackson.databind.ObjectMapper;
import com.sap.hana.datalake.files.utils.HdlfsRetryUtils;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

class HdlfsKeepPendingMultipartUpload extends HdlfsMultipartUpload {
  private static final Logger LOG = LoggerFactory.getLogger(HdlfsKeepPendingMultipartUpload.class);
  private final ObjectMapper mapper;

  HdlfsKeepPendingMultipartUpload(final Path targetPath, final HdlfsBaseFileSystem fileSystem,
                                         final Config multipartUploadConfig, final Progressable progressable,
                                         final RetryPolicy retryPolicy) {
    super(targetPath, fileSystem, multipartUploadConfig, progressable, retryPolicy);
    this.mapper = new ObjectMapper();
  }

  @Override
  public void complete() throws IOException {
    this.waitForChunksToBeUploaded();
    final Path pendingFilePath = this.createPendingFile(this.chunkPaths);

    if (this.fsCacheEnabled) {
      try {
        this.addPendingFileToFsCache(pendingFilePath.getParent(), pendingFilePath.getName());
      } catch (final IOException ex) {
        throw new IOException(String.format("Could not update FsCache with pending file from path %s", pendingFilePath));
      }
    }

    this.fileSystem.addPathToPendingFilesCreated(this.targetPath);
  }

  private JsonNode createPatch(String filename) {
    List> patch = new ArrayList<>();
    final Map patchOp = new HashMap<>();
    final Map entry = new HashMap<>();
    entry.put("markAsDeleted", false);
    entry.put("isPrefix", false);
    patchOp.put(HdlfsConstants.JSON_PATCH_OP_KEY, HdlfsConstants.JSON_PATCH_ADD_OP);
    patchOp.put(HdlfsConstants.JSON_PATCH_PATH_KEY, String.format("/%s",filename));
    patchOp.put(HdlfsConstants.JSON_PATCH_VALUE_KEY, entry);
    patch.add(patchOp);
    return this.mapper.valueToTree(patch);
  }

  private void addPendingFileToFsCache(final Path prefix, final String filename) throws IOException {
    final JsonNode jsonPatch = this.createPatch(filename);
    final FsCache fsCache = this.fileSystem.getFsCache();
    final FsCache.FsCacheOperationResult result = fsCache.applyPatch(prefix, jsonPatch);
    LOG.debug("Added pending file to FsCache, got result = [{}] from FsCache patch operation", result.getNode().toString());
  }

  private Path createPendingFile(List chunkPaths) throws IOException {
    LOG.debug("Creating pending file for path " + this.targetPath);

    final String finalTaskOutputLocation = this.getFinalTaskOutputLocation();
    final String taskAttemptId = this.getTaskAttemptId();
    final Path pendingFilePath = new Path(this.getPendingFileLocation());

    final HdlfsTaskOutputInfo info = new HdlfsTaskOutputInfo(taskAttemptId, finalTaskOutputLocation, pendingFilePath.toString(), chunkPaths.stream().map(Path::toString).collect(Collectors.toList()));

    final WebHdfsFileSystem webHdfsFileSystem = this.fileSystem.getWebHdfsFileSystem();

    final String operationName = String.format("Creation of pending file %s", pendingFilePath);

    return HdlfsRetryUtils.execWithRetry(operationName, this.retryPolicy, true, () -> {
      try (final FSDataOutputStream out = webHdfsFileSystem.create(pendingFilePath, true)) {
        out.write(this.mapper.writeValueAsBytes(info));
      } catch (final Exception ex) {
        LOG.error("Failed to create pending file for path = [{}]", pendingFilePath);
        throw ex;
      }

      return pendingFilePath;
    });
  }

  private String getFinalTaskOutputLocation() throws IOException {
    final String targetPathAsString = this.targetPath.toString();
    final int pendingMarkerLocation = targetPathAsString.indexOf(HdlfsConstants.PENDING_PREFIX_NAME);
    final int baseMarkerLocation = targetPathAsString.indexOf(HdlfsConstants.BASE_PREFIX_NAME);

    if (pendingMarkerLocation < 0 || baseMarkerLocation < 0) {
      final String message = String.format("Target path %s does not contain %s or %s marker", HdlfsConstants.PENDING_PREFIX_NAME, HdlfsConstants.BASE_PREFIX_NAME, targetPathAsString);
      throw new IOException(message);
    }

    final int baseMarkerLocationEnd = baseMarkerLocation + HdlfsConstants.BASE_PREFIX_NAME.length();
    final String taskOutputDirectory = targetPathAsString.substring(0, pendingMarkerLocation);
    final String taskOutputFileName = targetPathAsString.substring(baseMarkerLocationEnd + 1);

    return this.getRelativePathFromSchemaPath(taskOutputDirectory + taskOutputFileName);
  }

  private String getPendingFileLocation() {
    final String targetPathAsString = this.targetPath.toString();
    final int baseMarkerLocation = targetPathAsString.indexOf(HdlfsConstants.BASE_PREFIX_NAME);
    final int baseMarkerLocationEnd = baseMarkerLocation + HdlfsConstants.BASE_PREFIX_NAME.length();
    final String pendingFileName = targetPathAsString.substring(baseMarkerLocationEnd + 1).replace("/", "-") + HdlfsConstants.PENDING_SUFFIX;
    final String taskAttemptOutputLocation = targetPathAsString.substring(0, baseMarkerLocationEnd + 1);

    return this.getRelativePathFromSchemaPath(taskAttemptOutputLocation + pendingFileName);
  }

  private String getTaskAttemptId() {
    final String targetPathAsString = this.targetPath.toString();
    final int baseMarkerLocation = targetPathAsString.indexOf(HdlfsConstants.BASE_PREFIX_NAME);

    return new Path(targetPathAsString.substring(0, baseMarkerLocation)).getName();
  }

  private String getRelativePathFromSchemaPath(final String outputPath) {
    return new Path(outputPath).toUri().getPath();
  }
}

// © 2022 SAP SE or an SAP affiliate company. All rights reserved.





© 2015 - 2025 Weber Informatics LLC | Privacy Policy