All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.HdlfsMultipartUpload Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.

There is a newer version: 3.0.27
Show newest version
// © 2022-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files;

import com.sap.hana.datalake.files.classification.InterfaceAudience;
import com.sap.hana.datalake.files.utils.DataChunk;
import com.sap.hana.datalake.files.utils.HdlfsRetryUtils;
import com.sap.hana.datalake.files.utils.IOUtils;
import com.sap.hana.datalake.files.utils.threads.ThreadUtils;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.SemaphoredDelegatingExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

@InterfaceAudience.Private
public class HdlfsMultipartUpload implements HasETag {

  private static final Logger LOG = LoggerFactory.getLogger(HdlfsMultipartUpload.class);

  public static final  int MAX_CHUNKS = 50_000;

  private static final Lock GLOBAL_THREAD_POOL_INITIALIZER_LOCK = new ReentrantLock();
  private static final String TEMPORARY_DIR_NAME = "_temporary";

  private static ExecutorService globalThreadPool;
  private static int globalThreadPoolSize;
  private static int globalThreadPoolQueueSize;
  private static int globalThreadPoolKeepAliveSecs;

  public static void initializeGlobalThreadPool(final Config multipartUploadConfig, final Configuration fsConfig) {
    if (globalThreadPool == null) {
      LOG.debug("Initializing GlobalThreadPool...");

      try {
        GLOBAL_THREAD_POOL_INITIALIZER_LOCK.lock();

        if (globalThreadPool == null) {
          globalThreadPoolSize = multipartUploadConfig.getGlobalThreadPoolSize();
          globalThreadPoolQueueSize = multipartUploadConfig.getGlobalThreadPoolQueueSize();
          globalThreadPoolKeepAliveSecs = multipartUploadConfig.getGlobalThreadPoolKeepAliveSecs();
          globalThreadPool = ThreadUtils.newDaemonThreadBlockingExecutor(
                  globalThreadPoolSize,
                  globalThreadPoolQueueSize,
                  /* allowCoreThreadTimeOut */ true,
                  globalThreadPoolKeepAliveSecs, TimeUnit.SECONDS,
                  /* fairSemaphore */ false,
                  /* namePrefix */ "HdlfsMPU-thread",
                  fsConfig);

          LOG.debug("GlobalThreadPool(maxThreads={}, queueSize={}, keepAlive={}s) initialized",
                  globalThreadPoolSize,
                  globalThreadPoolQueueSize,
                  globalThreadPoolKeepAliveSecs);
        }
      } finally {
        GLOBAL_THREAD_POOL_INITIALIZER_LOCK.unlock();
      }
    } else {
      final boolean isSameConfig =
              multipartUploadConfig.getGlobalThreadPoolSize() == globalThreadPoolSize &&
              multipartUploadConfig.getGlobalThreadPoolQueueSize() == globalThreadPoolQueueSize &&
              multipartUploadConfig.getGlobalThreadPoolKeepAliveSecs() == globalThreadPoolKeepAliveSecs;

      if (!isSameConfig) {
        LOG.warn("GlobalThreadPool(maxThreads={}, queueSize={}, keepAlive={}s) had already been initialized; " +
                "the new config values will be ignored", globalThreadPoolSize, globalThreadPoolQueueSize,
                globalThreadPoolKeepAliveSecs);
      }
    }
  }

  final boolean fsCacheEnabled;
  final boolean isMPUDeleteTempDirEnabled;
  protected List chunkPaths;
  protected final Path targetPath;
  protected final RetryPolicy retryPolicy;
  final HdlfsBaseFileSystem fileSystem;
  private final List> chunkUploadFutures;
  private final ListeningExecutorService executorService;
  private final String id;
  private final RetryPolicy mergeFileNotFoundRetryPolicy;
  private final RetryPolicy deleteMpuTempDirRetryPolicy;
  private final Progressable progressable;

  private boolean cleanupComplete = false;
  private String eTag = null;

  public HdlfsMultipartUpload(final Path targetPath, final HdlfsBaseFileSystem fileSystem,
                              final Config multipartUploadConfig, final Progressable progressable) {
    this(targetPath, fileSystem, multipartUploadConfig, progressable, /* retryPolicy */ null);
  }

  public HdlfsMultipartUpload(final Path targetPath, final HdlfsBaseFileSystem fileSystem,
                              final Config multipartUploadConfig, final Progressable progressable,
                              final RetryPolicy retryPolicy) {
    Preconditions.checkNotNull(targetPath, "targetPath must not be null");
    Preconditions.checkNotNull(fileSystem, "fileSystem must not be null");
    Preconditions.checkNotNull(multipartUploadConfig, "multipartUploadConfig must not be null");
    Preconditions.checkArgument(multipartUploadConfig.isEnabled(), "multipartUploadConfig.isEnabled must be true");
    Preconditions.checkArgument(multipartUploadConfig.getMaxActiveChunks() > 0, "multipartUploadConfig.maxActiveChunks must be > 0");

    if (globalThreadPool == null) {
      final String errorMessage = "GlobalThreadPool is not ready";
      LOG.error(errorMessage);
      throw new IllegalStateException(errorMessage);
    }

    this.chunkUploadFutures = new ArrayList<>();
    this.chunkPaths = new ArrayList<>();
    this.fileSystem = fileSystem;
    this.executorService = MoreExecutors.listeningDecorator(
            new SemaphoredDelegatingExecutor(globalThreadPool, multipartUploadConfig.getMaxActiveChunks(), /* fair */ true));
    this.fsCacheEnabled = multipartUploadConfig.isFsCacheEnabled();
    this.isMPUDeleteTempDirEnabled = multipartUploadConfig.isMPUDeleteTempDirEnabled();
    this.id = UUID.randomUUID().toString();
    this.mergeFileNotFoundRetryPolicy = multipartUploadConfig.getMergeFileNotFoundRetryPolicy();
    this.deleteMpuTempDirRetryPolicy = multipartUploadConfig.getDeleteMpuTempDirRetryPolicy();
    this.progressable = progressable;
    this.retryPolicy = retryPolicy;
    this.targetPath = targetPath;

    this.logDebugWithTrackableId("MPU initialized");
  }

  public String getETag() {
    return this.eTag;
  }

  public String getId() {
    return this.id;
  }

  public Path getTargetPath() {
    return this.targetPath;
  }

  public boolean isCleanupComplete() {
    return this.cleanupComplete;
  }

  public boolean isTargetFileReady() {
    return this.eTag != null;
  }

  /**
   * Dispatch a data Chunk to be asynchronously uploaded
   * This method will block the caller if the number of chunks being actively uploaded
   * has reached to `HdlfsMultipartUpload.Config.maxActiveChunks`
   * @param chunk Chunk object to be dispatched for upload
   * @throws IllegalStateException if the total number of Chunks in this Multipart Upload
   * instance has reached to HdlfsMultipartUpload.MAX_CHUNKS
   */
  public void uploadChunkAsync(final DataChunk chunk, final int chunkIndex) throws IllegalStateException {
    if (this.chunkPaths.size() >= MAX_CHUNKS) {
      final String errorMessage = String.format("Number of chunks in the MPU exceeds the limit [%d]", MAX_CHUNKS);
      this.logErrorWithTrackableId(errorMessage);
      throw new IllegalStateException(errorMessage);
    }

    // chunkPaths has all chunks that have already been uploaded, so the new chunk index should always be exactly chunkPaths length
    if (chunkIndex != this.chunkPaths.size()) {
      final String errorMessage = String.format("Wrong chunk index; expected [%d], got [%s]", this.chunkPaths.size(), chunk.getId());
      this.logErrorWithTrackableId(errorMessage);
      throw new IllegalStateException(errorMessage);
    }

    final Path chunkFilePath = this.getPathForChunk(chunk);

    this.logDebugWithTrackableId("Submitting upload of Chunk(path=%s, size=%s)", chunkFilePath, chunk.getDataSize());

    /* Add the Chunk file path to a list that will be used for cleanup */
    this.chunkPaths.add(chunkFilePath);
    this.chunkUploadFutures.add(this.executorService.submit(() -> {
      try {
        final String operationTitle = String.format("Upload Chunk(path=%s, size=%d)", chunkFilePath, chunk.getDataSize());
        this.logDebugWithTrackableId(operationTitle);

        // transform it into an InputStream - no more writing possible to this chunk
        final InputStream chunkInputStream = chunk.getInputStream();

        return HdlfsRetryUtils.execWithRetry(operationTitle, this.retryPolicy, /* idempotent */ true, () -> {
          final WebHdfsFileSystem webHdfsFileSystem = this.fileSystem.getWebHdfsFileSystem();
          /* Create new file for Chunk and copy data from Chunk to CREATE output stream */
          try (final OutputStream createOutputStream = webHdfsFileSystem.create(chunkFilePath, /* overwrite */ true)) {
            /* In case of failures, chunkInputStream will be kept open for retries - DataChunk.copyBytesTo marks and resets the stream */
            IOUtils.copyBytesTo(chunkInputStream, createOutputStream, chunk.getDataSize());

            if (this.progressable != null) {
              this.progressable.progress();
            }

            this.logDebugWithTrackableId("Successfully uploaded Chunk(path=%s)", chunkFilePath);
            return chunkFilePath;
          }
        });
      } catch (final Exception ex) {
        final String errorMessage = String.format("Could not upload Chunk(path=%s)", chunkFilePath);
        this.logErrorWithTrackableId(errorMessage, ex);
        throw new IOException(errorMessage, ex);
      } finally {
        /* Make sure the chunk is closed */
        chunk.close();
      }
    }));
  }

  public void complete() throws IOException {
    this.waitForChunksToBeUploaded();
    this.mergeChunks();
    this.deleteChunks();

    if (this.isMPUDeleteTempDirEnabled) {
      final boolean isChunkDirEmpty = this.deleteChunkParentDirectory();

      if (isChunkDirEmpty) {
        // if one of the chunk's parent directory is empty, we will try to delete the _temporary folder. If it fails to
        // delete, that means some chunks are still being processed and we let the next chunk complete to try again.
        this.deleteTempDirectory();
      }
    }
  }

  public void abort() {
    this.logDebugWithTrackableId("Aborting MultipartUpload...");

    /* Cancel all active futures */
    for (final ListenableFuture chunkUploadFuture : this.chunkUploadFutures) {
      chunkUploadFuture.cancel(/* mayInterruptIfRunning */ true);
    }

    /* Delete created temp directories and chunks */
    try {
      this.deleteChunks();

      if (this.isMPUDeleteTempDirEnabled) {
        final boolean isChunkDirEmpty = this.deleteChunkParentDirectory();

        if (isChunkDirEmpty) {
          this.deleteTempDirectory();
        }
      }
    } catch (final IOException ex) {
      this.logWarnWithTrackableId("Error while cleaning up created temp directories and chunks", ex);
    }
  }

  @Override
  public String toString() {
    return "HdlfsMultipartUpload{" +
            "id='" + this.id + '\'' +
            ", targetPath=" + this.targetPath +
            ", eTag='" + this.eTag + '\'' +
            '}';
  }

  protected List waitForChunksToBeUploaded() throws IOException {
    this.logDebugWithTrackableId("Waiting for [%d] Chunks to be uploaded", this.chunkUploadFutures.size());

    try {
      final List chunkPaths = Futures.allAsList(this.chunkUploadFutures).get();
      this.logDebugWithTrackableId("All Chunks uploaded");
      return chunkPaths;
    } catch (final InterruptedException ex) {
      final String errorMessage = "Thread interrupted while waiting for Chunks to be uploaded";
      this.logErrorWithTrackableId(errorMessage, ex);
      Thread.currentThread().interrupt();
      throw new IOException(errorMessage, ex);
    } catch (final ExecutionException ex) {
      final String errorMessage = "Error while waiting for Chunks to be uploaded; aborting and cleaning up";
      final Throwable actualException = Optional.ofNullable(ex.getCause()).orElse(ex);

      this.logErrorWithTrackableId(errorMessage, actualException);
      this.abort();

      throw new IOException(errorMessage, actualException);
    }
  }

  protected HdlfsFileSystemCapabilities.MergeResult mergeChunks() throws IOException {
    final String operationTitle = String.format("Merge [%s] Chunks into target file (path=%s)", this.chunkPaths, this.targetPath);
    this.logDebugWithTrackableId(operationTitle);

    try {
      return HdlfsRetryUtils.execWithRetry(operationTitle, this.mergeFileNotFoundRetryPolicy, /* idempotent */ true, () -> {
        final WebHdfsFileSystem webHdfsFileSystem = this.fileSystem.getWebHdfsFileSystem();
        final HdlfsFileSystemCapabilities.MergeResult mergeResult = webHdfsFileSystem.merge(this.targetPath, this.chunkPaths);

        if (this.progressable != null) {
          this.progressable.progress();
        }

        this.eTag = mergeResult.getETag();
        this.logDebugWithTrackableId("Merge of [%d] Chunks into target file (path=%s) is complete (ETag: %s)", this.chunkPaths.size(), this.targetPath, this.eTag);

        return mergeResult;
      });
    } catch (final Exception ex) {
      final String errorMessage = String.format("Failed to merge [%d] MultipartUpload Chunks into target file (path=[%s])", this.chunkPaths.size(), this.targetPath);
      this.logErrorWithTrackableId(errorMessage, ex);
      throw new IOException(errorMessage, ex);
    }
  }

  protected HdlfsFileSystemCapabilities.DeleteBatchResult deleteChunks() throws IOException {
    this.logDebugWithTrackableId("Deleting [%d] Chunks", this.chunkPaths.size());

    try {
      final WebHdfsFileSystem webHdfsFileSystem = this.fileSystem.getWebHdfsFileSystem();
      final HdlfsFileSystemCapabilities.DeleteBatchResult deleteBatchResult = webHdfsFileSystem.deleteBatch(this.chunkPaths);

      this.logDebugWithTrackableId("Successfully deleted [%d] Chunks", this.chunkPaths.size());
      this.cleanupComplete = deleteBatchResult.isSuccessful();

      if (this.progressable != null) {
        this.progressable.progress();
      }

      return deleteBatchResult;
    } catch (final IOException ex) {
      final String errorMessage = String.format("Failed to delete [%d] MPU chunks", this.chunkPaths.size());
      this.logErrorWithTrackableId(errorMessage, ex);
      throw new IOException(errorMessage, ex);
    }
  }

  protected boolean deleteChunkParentDirectory() throws IOException {
    if (this.chunkPaths.isEmpty()) {
      return true;
    }

    final Path chunkParentDir = this.chunkPaths.get(0).getParent();
    final String operationTitle = String.format("Delete chunks directory [%s] ", chunkParentDir.toString());

    // Chunk paths were deleted in deleteBatch asynchronously so the parent directory might not be emptied yet.
    // We will keep retrying.
    try {
      return HdlfsRetryUtils.execWithRetry(operationTitle, this.deleteMpuTempDirRetryPolicy, false, () -> {
        final WebHdfsFileSystem webHdfsFileSystem = this.fileSystem.getWebHdfsFileSystem();
        return webHdfsFileSystem.delete(chunkParentDir, false);
      });
    } catch (final IOException ex) {
      final String errorMessage = String.format("Failed to delete directory [%s]", chunkParentDir);
      this.logErrorWithTrackableId(errorMessage, ex);
      throw new IOException(errorMessage, ex);
    }
  }

  protected void deleteTempDirectory() throws IOException {
    final Path path = new Path(this.targetPath.getParent(), TEMPORARY_DIR_NAME);
    final Path tempDir = this.makeAbsolutePath(path);
    final WebHdfsFileSystem webHdfsFileSystem = this.fileSystem.getWebHdfsFileSystem();

    try {
      this.logDebugWithTrackableId("Deleting directory [{}].", tempDir.toString());
      webHdfsFileSystem.delete(tempDir, false);
      this.logDebugWithTrackableId("Successfully deleted directory [{}]", tempDir.toString());
    } catch (final FileAlreadyExistsException | PathIsNotEmptyDirectoryException e) {
      final String errorMessage = String.format("Failed to delete directory [%s]", tempDir);
      this.logWarnWithTrackableId(errorMessage, e);
    } catch (final IOException ex) {
      final String errorMessage = String.format("Failed to delete directory [%s]", tempDir);
      this.logErrorWithTrackableId(errorMessage, ex);
      throw new IOException(errorMessage, ex);
    }
  }

  protected List getChunkPaths() {
    return this.chunkPaths;
  }

  protected List> getChunkUploadFutures() {
    return this.chunkUploadFutures;
  }

  protected Path getPathForChunk(final DataChunk chunk) {
    /* Chunk file path format: /_temporary//.chunk
     * NOTE: the path must be absolute, e.g.: /path/to/chunk
     */
    final Path targetParent = this.targetPath.getParent();
    final String targetHash = DigestUtils.sha256Hex(this.targetPath.toString());
    final UUID uuid = UUID.randomUUID();
    final String chunkId = chunk.getId();
    final Path chunkPath = new Path(targetParent, String.format("%s/%s/%s.chunk%s", TEMPORARY_DIR_NAME, targetHash, uuid, chunkId));

    return this.makeAbsolutePath(chunkPath);
  }

  protected Path makeAbsolutePath(final Path path) {
    final Path absolutePath;

    if (path.isAbsolute()) {
      absolutePath = path;
    } else {
      final WebHdfsFileSystem webHdfsFileSystem = this.fileSystem.getWebHdfsFileSystem();
      absolutePath = new Path(webHdfsFileSystem.getWorkingDirectory(), path);
    }

    // Remove URI components other than the path
    return new Path(absolutePath.toUri().getPath());
  }

  protected ListeningExecutorService getExecutorService() {
    return this.executorService;
  }

  private void logErrorWithTrackableId(final String errorMessage) {
    LOG.error("{}: {}", this, errorMessage);
  }

  private void logErrorWithTrackableId(final String errorMessage, final Throwable ex) {
    LOG.error("{}: {}", this, errorMessage, ex);
  }

  protected void logWarnWithTrackableId(final String errorMessage, final Throwable ex) {
    LOG.warn("{}: {}", this, errorMessage, ex);
  }

  private void logDebugWithTrackableId(final String message) {
    LOG.debug("{}: {}", this, message);
  }

  private void logDebugWithTrackableId(final String messageFormat, Object... args) {
    LOG.debug("{}: {}", this, String.format(messageFormat, args));
  }

  /**
   * MultipartUpload Config
   */
  public static class Config {

    private final static String CONFIG_KEY_PREFIX = "fs.hdlfs.multipart.upload.";
    private final static String CONFIG_KEY_MPU_DELETE_TEMP_DIR_PREFIX = "fs.hdlfs.multipart.upload.delete-temp-dir.";
    private final static String CONFIG_CHUNKS_KEY_PREFIX = CONFIG_KEY_PREFIX + "chunks.";
    private final static String CONFIG_GLOBAL_THREAD_POOL_KEY_PREFIX = CONFIG_KEY_PREFIX + "threads.global.";

    protected final static String CONFIG_ENABLED_KEY = CONFIG_KEY_PREFIX + "enabled";
    protected final static String CONFIG_KEY_MPU_DELETE_TEMP_DIR = CONFIG_KEY_MPU_DELETE_TEMP_DIR_PREFIX + "enabled";
    protected final static String CONFIG_CHUNKS_SIZE_KEY = CONFIG_CHUNKS_KEY_PREFIX + "size";
    protected final static String CONFIG_CHUNKS_MAX_ACTIVE_KEY = CONFIG_CHUNKS_KEY_PREFIX + "max-active";
    protected final static String CONFIG_GLOBAL_THREAD_POOL_KEEP_ALIVE_SECS_KEY = CONFIG_GLOBAL_THREAD_POOL_KEY_PREFIX + "keep-alive-secs";
    protected final static String CONFIG_GLOBAL_THREAD_POOL_QUEUE_SIZE_KEY = CONFIG_GLOBAL_THREAD_POOL_KEY_PREFIX + "queue-size";
    protected final static String CONFIG_GLOBAL_THREAD_POOL_SIZE_KEY = CONFIG_GLOBAL_THREAD_POOL_KEY_PREFIX + "pool-size";

    protected final static boolean DEFAULT_ENABLED = true;
    protected final static boolean DEFAULT_DISABLED = false;
    protected final static int DEFAULT_CHUNK_SIZE = 8 * 1024 * 1024; // 8 MiB
    protected final static int DEFAULT_GLOBAL_THREAD_POOL_KEEP_ALIVE_SECS = 60;
    protected final static int DEFAULT_GLOBAL_THREAD_POOL_QUEUE_SIZE = 128;
    protected final static int DEFAULT_GLOBAL_THREAD_POOL_SIZE = 16;
    protected final static int DEFAULT_MAX_ACTIVE_CHUNKS = 5;

    private final static int MAX_CHUNK_SIZE = 128 * 1024 * 1024; // 128 MiB

    private final static int MIN_CHUNK_SIZE = 8 * 1024 * 1024; // 8 MiB
    private final static int MIN_GLOBAL_THREAD_POOL_KEEP_ALIVE_SECS = 0;
    private final static int MIN_GLOBAL_THREAD_POOL_QUEUE_SIZE = 1;
    private final static int MIN_GLOBAL_THREAD_POOL_SIZE = 1;
    private final static int MIN_MAX_ACTIVE_CHUNKS = 1;

    private final boolean enabled;
    private final boolean mpuDeleteTempDirEnabled;
    private final int chunkSize;
    private final boolean fsCacheEnabled;
    private final int globalThreadPoolKeepAliveSecs;
    private final int globalThreadPoolQueueSize;
    private final int globalThreadPoolSize;
    private final boolean hdlfsOutputCommitterEnabled;
    private final int maxActiveChunks;
    private final RetryPolicy mergeFileNotFoundRetryPolicy;
    private final RetryPolicy deleteMpuTempDirRetryPolicy;

    public Config(final Configuration fsConf) {
      this.enabled = fsConf.getBoolean(CONFIG_ENABLED_KEY, DEFAULT_ENABLED);
      this.mpuDeleteTempDirEnabled = fsConf.getBoolean(CONFIG_KEY_MPU_DELETE_TEMP_DIR, DEFAULT_DISABLED);
      this.chunkSize = fsConf.getInt(CONFIG_CHUNKS_SIZE_KEY, DEFAULT_CHUNK_SIZE);
      this.fsCacheEnabled = fsConf.getBoolean(HdlfsConstants.FSCACHE_ENABLED, HdlfsConstants.FSCACHE_ENABLED_DEFAULT);
      this.globalThreadPoolKeepAliveSecs = fsConf.getInt(CONFIG_GLOBAL_THREAD_POOL_KEEP_ALIVE_SECS_KEY, DEFAULT_GLOBAL_THREAD_POOL_KEEP_ALIVE_SECS);
      this.globalThreadPoolQueueSize = fsConf.getInt(CONFIG_GLOBAL_THREAD_POOL_QUEUE_SIZE_KEY, DEFAULT_GLOBAL_THREAD_POOL_QUEUE_SIZE);
      this.globalThreadPoolSize = fsConf.getInt(CONFIG_GLOBAL_THREAD_POOL_SIZE_KEY, DEFAULT_GLOBAL_THREAD_POOL_SIZE);
      final String committerFactory = fsConf.get(HdlfsConstants.MAPREDUCE_OUTPUTCOMMITTER_FACTORY_SCHEME_HDLFS_KEY);
      this.hdlfsOutputCommitterEnabled = (committerFactory != null && committerFactory.equals(HdlfsCommitterFactory.class.getName()));
      this.maxActiveChunks = fsConf.getInt(CONFIG_CHUNKS_MAX_ACTIVE_KEY, DEFAULT_MAX_ACTIVE_CHUNKS);
      this.mergeFileNotFoundRetryPolicy = HdlfsRetryPolicies.createMergeFileNotFoundRetryPolicy(fsConf);
      this.deleteMpuTempDirRetryPolicy = HdlfsRetryPolicies.createMPUDeleteTempDirRetryPolicy(fsConf);

      this.checkConfigValues();
    }

    boolean isFsCacheEnabled() {
      return this.fsCacheEnabled;
    }

    public boolean isHdlfsOutputCommitterEnabled() {
      return this.hdlfsOutputCommitterEnabled;
    }

    public boolean isEnabled() {
      return this.enabled;
    }

    public boolean isMPUDeleteTempDirEnabled() {
      return mpuDeleteTempDirEnabled;
    }

    public int getChunkSize() {
      return this.chunkSize;
    }

    public int getMaxActiveChunks() {
      return this.maxActiveChunks;
    }

    public int getGlobalThreadPoolKeepAliveSecs() {
      return this.globalThreadPoolKeepAliveSecs;
    }

    public int getGlobalThreadPoolQueueSize() {
      return this.globalThreadPoolQueueSize;
    }

    public int getGlobalThreadPoolSize() {
      return this.globalThreadPoolSize;
    }

    public RetryPolicy getMergeFileNotFoundRetryPolicy() {
      return this.mergeFileNotFoundRetryPolicy;
    }

    public RetryPolicy getDeleteMpuTempDirRetryPolicy() {
      return this.deleteMpuTempDirRetryPolicy;
    }

    private void checkConfigValues() {
      if (this.enabled) {
        Preconditions.checkArgument(this.chunkSize <= MAX_CHUNK_SIZE,
                String.format("Config [%s] too large: [%d]; must be at most [%d]", CONFIG_CHUNKS_SIZE_KEY, this.chunkSize, MAX_CHUNK_SIZE));

        Preconditions.checkArgument(this.chunkSize >= MIN_CHUNK_SIZE,
                String.format("Config [%s] too small: [%d]; must be at least [%d]", CONFIG_CHUNKS_SIZE_KEY, this.chunkSize, MIN_CHUNK_SIZE));

        Preconditions.checkArgument(this.globalThreadPoolKeepAliveSecs >= MIN_GLOBAL_THREAD_POOL_KEEP_ALIVE_SECS,
                String.format("Config [%s] too small: [%d]; must be at least [%d]", CONFIG_GLOBAL_THREAD_POOL_KEEP_ALIVE_SECS_KEY, this.globalThreadPoolKeepAliveSecs, MIN_GLOBAL_THREAD_POOL_KEEP_ALIVE_SECS));

        Preconditions.checkArgument(this.globalThreadPoolQueueSize >= MIN_GLOBAL_THREAD_POOL_QUEUE_SIZE,
                String.format("Config [%s] too small: [%d]; must be at least [%d]", CONFIG_GLOBAL_THREAD_POOL_QUEUE_SIZE_KEY, this.globalThreadPoolQueueSize, MIN_GLOBAL_THREAD_POOL_QUEUE_SIZE));

        Preconditions.checkArgument(this.globalThreadPoolSize >= MIN_GLOBAL_THREAD_POOL_SIZE,
                String.format("Config [%s] too small: [%d]; must be at least [%d]", CONFIG_GLOBAL_THREAD_POOL_SIZE_KEY, this.globalThreadPoolSize, MIN_GLOBAL_THREAD_POOL_SIZE));

        Preconditions.checkArgument(this.maxActiveChunks >= MIN_MAX_ACTIVE_CHUNKS,
                String.format("Config [%s] too small: [%d]; must be at least [%d]", CONFIG_CHUNKS_MAX_ACTIVE_KEY, this.maxActiveChunks, MIN_MAX_ACTIVE_CHUNKS));
      }
    }
  }

}

// © 2022-2024 SAP SE or an SAP affiliate company. All rights reserved.




© 2015 - 2025 Weber Informatics LLC | Privacy Policy