com.sap.hana.datalake.files.HdlfsOutputCommitter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sap-hdlfs Show documentation
Show all versions of sap-hdlfs Show documentation
An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.
// © 2022-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files;
import com.sap.hana.datalake.files.shaded.com.fasterxml.jackson.databind.JsonNode;
import com.sap.hana.datalake.files.shaded.com.fasterxml.jackson.databind.ObjectMapper;
import com.sap.hana.datalake.files.utils.HdlfsRetryUtils;
import com.sap.hana.datalake.files.utils.threads.ThreadUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
public class HdlfsOutputCommitter extends PathOutputCommitter {
public static final String FS_HDLFS_JOBUUID = "fs.hdlfs.commit.jobuuid";
private static final String COMMITTER_THREADS_PREFIX = "HdlfsOutputCommitter-thread";
private static final String EMPTY_STRING = "";
private static final Logger LOG = LoggerFactory.getLogger(HdlfsOutputCommitter.class);
private static volatile ExecutorService globalThreadPool;
private final Path outputPath;
private final Path workPath;
private final HdlfsFileSystem destFS;
private final Path relativeOutputPath; // relative to the destFS
private final String jobId;
private final ListeningExecutorService executorService;
private final boolean fsCacheEnabled;
private final ObjectMapper mapper;
private final RetryPolicy retryPolicy;
private final RetryPolicy mergeFileNotFoundRetryPolicy;
private final boolean isDeleteBatchAsyncEnabled;
private final boolean shouldCompleteAsyncDeleteBatch;
private final int deleteBatchCompleteWaitTimeSeconds;
private final int deleteBatchSize;
private final int maxDeleteRetries;
private final int deleteRetryTimeoutMs;
public HdlfsOutputCommitter(final Path outputPath, final TaskAttemptContext context) throws IOException {
super(outputPath, context);
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputPath.getFileSystem(conf);
if (!(fs instanceof HdlfsFileSystem)) {
throw new IllegalArgumentException("Destination FileSystem for HdlfsOutputCommitter must be an instance of HdlfsFileSystem");
}
this.jobId = this.getOrCreateJobId(conf, context.getJobID());
this.destFS = (HdlfsFileSystem) fs;
this.workPath = this.getTaskAttemptPath(context, this.jobId, outputPath);
this.outputPath = fs.makeQualified(outputPath);
this.relativeOutputPath = new Path(this.getRelativePathFromSchemaPath(outputPath));
LOG.debug("Initializing HdlfsOutputCommitter(jobId={}, outputPath={}, workPath={})", this.jobId, this.outputPath, this.workPath);
this.mapper = new ObjectMapper();
this.retryPolicy = this.destFS.getRetryPolicy();
final ExecutorService threadPool = getOrCreateGlobalThreadPool(conf);
this.executorService = MoreExecutors.listeningDecorator(threadPool);
this.mergeFileNotFoundRetryPolicy = HdlfsRetryPolicies.createMergeFileNotFoundRetryPolicy(conf);
this.fsCacheEnabled = conf.getBoolean(HdlfsConstants.FSCACHE_ENABLED, HdlfsConstants.FSCACHE_ENABLED_DEFAULT);
this.isDeleteBatchAsyncEnabled = conf.getBoolean(HdlfsConstants.FS_OPERATION_DELETE_BATCH_ASYNC_ENABLED_KEY, HdlfsConstants.FS_OPERATION_DELETE_BATCH_ASYNC_ENABLED_DEFAULT);
this.shouldCompleteAsyncDeleteBatch = conf.getBoolean(HdlfsConstants.FS_OPERATION_DELETE_BATCH_ASYNC_COMPLETE_ENABLED_KEY, HdlfsConstants.FS_OPERATION_DELETE_BATCH_ASYNC_COMPLETE_ENABLED_DEFAULT);
this.deleteBatchCompleteWaitTimeSeconds = conf.getInt(HdlfsConstants.FS_OPERATION_DELETE_BATCH_ASYNC_COMPLETE_WAIT_TIME_SECS_KEY, HdlfsConstants.FS_OPERATION_DELETE_BATCH_ASYNC_COMPLETE_WAIT_TIME_SECS_DEFAULT);
this.deleteBatchSize = conf.getInt(HdlfsConstants.HDLFS_OUTPUT_COMMITTER_DELETE_BATCH_SIZE_KEY, HdlfsConstants.HDLFS_OUTPUT_COMMITTER_DELETE_BATCH_SIZE_DEFAULT);
this.maxDeleteRetries = conf.getInt(HdlfsConstants.HDLFS_OUTPUT_COMMITTER_DELETE_BATCH_MAX_RETRIES_KEY, HdlfsConstants.HDLFS_OUTPUT_COMMITTER_DELETE_BATCH_MAX_RETRIES_DEFAULT);
this.deleteRetryTimeoutMs = conf.getInt(HdlfsConstants.HDLFS_OUTPUT_COMMITTER_DELETE_BATCH_RETRY_TIMEOUT_MS_KEY, HdlfsConstants.HDLFS_OUTPUT_COMMITTER_DELETE_BATCH_RETRY_TIMEOUT_MS_DEFAULT);
LOG.debug("HdlfsOutputCommitter was initialized successfully");
}
private static ExecutorService getOrCreateGlobalThreadPool(final Configuration conf) {
if (globalThreadPool == null) {
synchronized (HdlfsOutputCommitter.class) {
if (globalThreadPool == null) {
// First instance of the class in a given spark node will create the thread pool based on the node configuration, or based on client configuration
final int availableCores = Runtime.getRuntime().availableProcessors();
final int threadPoolMaxActiveTasks = conf.getInt(HdlfsConstants.HDLFS_OUTPUT_COMMITTER_THREADPOOL_MAX_ACTIVE_TASKS_KEY, 4 * availableCores);
final int threadPoolMaxWaitingTasks = conf.getInt(HdlfsConstants.HDLFS_OUTPUT_COMMITTER_THREADPOOL_MAX_WAITING_TASKS_KEY, 4 * threadPoolMaxActiveTasks);
final int threadPoolKeepAliveSeconds = conf.getInt(HdlfsConstants.HDLFS_OUTPUT_COMMITTER_THREADPOOL_KEEP_ALIVE_SECS_KEY, HdlfsConstants.HDLFS_OUTPUT_COMMITTER_THREADPOOL_KEEP_ALIVE_SECS_DEFAULT);
globalThreadPool = ThreadUtils.newDaemonThreadBlockingExecutor(
threadPoolMaxActiveTasks,
threadPoolMaxWaitingTasks,
/* allowCoreThreadTimeOut */ true,
threadPoolKeepAliveSeconds, TimeUnit.SECONDS,
/* fairSemaphore */ false,
COMMITTER_THREADS_PREFIX,
conf);
}
}
}
return globalThreadPool;
}
@Override
public Path getOutputPath() {
return this.outputPath;
}
@Override
public Path getWorkPath() {
return this.workPath;
}
@Override
public void setupJob(final JobContext context) {
LOG.info("Setting up job with id = [{}]", this.jobId);
final Configuration conf = context.getConfiguration();
conf.set(FS_HDLFS_JOBUUID, this.jobId);
}
@Override
public void setupTask(final TaskAttemptContext context) {
// no op
LOG.info("Setting up task with id = [{}]", context.getTaskAttemptID());
}
// we only need to commit tasks that actually write output to the filesystem
@Override
public boolean needsTaskCommit(final TaskAttemptContext taskAttemptContext) throws IOException {
final Path taskAttemptDir = this.getTaskAttemptPath(taskAttemptContext, this.jobId, this.relativeOutputPath);
try {
this.destFS.getFileStatus(taskAttemptDir);
LOG.debug("Task attempt {} contains data and must be committed", taskAttemptContext.getTaskAttemptID());
return true;
} catch (final FileNotFoundException ex) {
LOG.debug("Task attempt {} does not contain data and will not be committed", taskAttemptContext.getTaskAttemptID());
return false;
}
}
@Override
public void commitTask(final TaskAttemptContext taskAttemptContext) throws IOException {
LOG.debug("Committing task with id = [{}]", taskAttemptContext.getTaskAttemptID());
final Path taskAttemptDir = this.getTaskAttemptPath(taskAttemptContext, this.jobId, this.relativeOutputPath);
final FileStatus[] taskAttemptFiles = this.destFS.listStatus(taskAttemptDir);
Preconditions.checkNotNull(taskAttemptFiles, "Task Attempt Directory can not be empty");
final Path pendingSetPath = this.getTaskPendingSetPath(taskAttemptContext);
final HdlfsTaskCommitInfo taskCommitInfo = new HdlfsTaskCommitInfo(this.getTaskAttemptName(taskAttemptContext));
final List pendingFilesToBeDeleted = new ArrayList<>();
for (final FileStatus fs : taskAttemptFiles) {
final String relativePath = this.getRelativePathFromSchemaPath(fs.getPath());
if (fs.isFile() && relativePath.endsWith(HdlfsConstants.PENDING_SUFFIX)) {
LOG.debug("Reading pending file " + relativePath);
final HdlfsTaskOutputInfo outputInfo = this.readTaskOutputInfo(this.destFS, new Path(relativePath));
taskCommitInfo.addTaskOutputInfo(outputInfo);
pendingFilesToBeDeleted.add(relativePath);
LOG.debug("Pending file [{}] was added to pendingset file [{}]", relativePath, pendingSetPath);
}
}
LOG.debug("Creating pendingset file [{}]", pendingSetPath);
final String operationName = String.format("Creation of PendingSet %s", pendingSetPath);
HdlfsRetryUtils.execWithRetry(operationName, this.retryPolicy, true, () -> {
try (final FSDataOutputStream out = this.destFS.create(pendingSetPath, true)) {
out.write(this.mapper.writeValueAsBytes(taskCommitInfo));
} catch (final Exception ex) {
final String message = String.format("Exception when writing pendingset file %s output.", pendingSetPath);
throw new IOException(message, ex);
}
return pendingSetPath;
});
LOG.debug("Pendingset [{}] created", pendingSetPath);
// created the pendingset file, if FsCache is enabled now we need to update the cache
if (this.fsCacheEnabled) {
final List entries = Collections.singletonList(new FsCacheEntry(pendingSetPath.getParent(), pendingSetPath.getName(), false, false));
this.addEntriesToFsCache(entries);
}
this.performDeleteBatch(pendingFilesToBeDeleted, false);
this.performFsCacheCleanup(taskAttemptDir);
LOG.debug("Task with id = [{}] committed successfully", taskAttemptContext.getTaskAttemptID());
}
@Override
public void abortTask(final TaskAttemptContext taskAttemptContext) throws IOException {
final Path taskAttPath = this.getTaskAttemptPath(taskAttemptContext, this.jobId, this.relativeOutputPath);
final List toDelete = new ArrayList<>();
// we need to delete a possible existing task pendingSet file, and mark it as deleted in FsCache
final Path taskPendingSet = this.getTaskPendingSetPath(taskAttemptContext);
toDelete.add(taskPendingSet.toString());
if (this.fsCacheEnabled) {
final List entries = Collections.singletonList(new FsCacheEntry(taskPendingSet.getParent(), taskPendingSet.getName(), false, true));
this.addEntriesToFsCache(entries);
}
FileStatus[] taskAttFiles = null;
try {
taskAttFiles = this.destFS.listStatus(taskAttPath);
} catch (final FileNotFoundException ex) {
LOG.info("Task {} has not written any data under its task attempt directory, there is nothing to cleanup", taskAttemptContext.getTaskAttemptID());
}
if (taskAttFiles != null) {
for (final FileStatus fileStatus : taskAttFiles) {
this.innerAbortTask(fileStatus, toDelete);
}
}
try {
this.performDeleteBatch(toDelete, true);
} catch (final IOException ex) {
final String message = String.format("Failed to abort task attempt %s. Could not delete task prefix files", taskAttemptContext.getTaskAttemptID());
throw new IOException(message, ex);
}
}
private void innerAbortTask(final FileStatus taskAttPathFs, final List toDelete) throws IOException {
final Path relativePath = new Path(this.getRelativePathFromSchemaPath(taskAttPathFs.getPath()));
if (taskAttPathFs.isFile()) {
toDelete.add(relativePath.toString());
return;
}
final FileStatus[] prefixFiles = this.destFS.listStatus(relativePath);
Preconditions.checkNotNull(prefixFiles, String.format("Path %s is marked as a directory and can not be empty", relativePath));
for (final FileStatus nextFs : prefixFiles) {
this.innerAbortTask(nextFs, toDelete);
}
}
@Override
public void commitJob(final JobContext context) throws IOException {
LOG.debug("Initiating job commit for Job(id={}) with output path = [{}]", this.jobId, this.relativeOutputPath);
final Path jobAttemptPath = this.getJobAttemptPath(this.jobId, this.relativeOutputPath);
// List all files under the job attempt path, we are looking for .pendingset files to commit
final FileStatus[] jobAttemptFiles = this.destFS.listStatus(jobAttemptPath);
Preconditions.checkNotNull(jobAttemptFiles, "Job attempt directory can not be empty");
final List> taskOutputCommitFutures = new ArrayList<>();
final Set fsCacheFiles = Collections.synchronizedSet(new TreeSet<>());
for (final FileStatus fs : jobAttemptFiles) {
final String relativePath = this.getRelativePathFromSchemaPath(fs.getPath());
if (fs.isFile() && relativePath.endsWith(HdlfsConstants.PENDINGSET_SUFFIX)) {
final PendingSetCommitter pendingSetCommitter = new PendingSetCommitter(relativePath, this.destFS, fsCacheFiles);
taskOutputCommitFutures.add(this.executorService.submit(pendingSetCommitter));
}
}
try {
for (final Future taskOutputCommitFuture : taskOutputCommitFutures) {
taskOutputCommitFuture.get();
}
} catch (final Exception ex) {
throw new IOException("Failed to commit the output of the job.", ex);
}
this.addEntriesToFsCache(fsCacheFiles);
this.performFsCacheCleanup(jobAttemptPath);
LOG.debug("Job(id={}) with output path = [{}] committed successfully", this.jobId, this.relativeOutputPath);
}
// Collection must be sorted by prefix
void addEntriesToFsCache(final Collection entries) throws IOException {
if (entries == null) {
return;
}
final List currentBatch = new ArrayList<>();
for (final FsCacheEntry entry : entries) {
if (!currentBatch.isEmpty() && !currentBatch.get(currentBatch.size() - 1).getPrefix().equals(entry.getPrefix())) {
// dispatch current batch
final Path currentPrefix = currentBatch.get(0).getPrefix();
this.executeFsCachePatchOperation(currentPrefix, currentBatch);
// init next batch
currentBatch.clear();
}
currentBatch.add(entry);
}
if (!currentBatch.isEmpty()) {
final Path currentPrefix = currentBatch.get(0).getPrefix();
this.executeFsCachePatchOperation(currentPrefix, currentBatch);
}
}
private void executeFsCachePatchOperation(final Path path, final List entries) throws IOException {
final JsonNode patch = this.createFsCachePatchJsonNode(entries);
final FsCache fsCache = this.destFS.getFsCache();
try {
fsCache.applyPatch(path, patch);
} catch (final IOException ex) {
LOG.error("Failed to apply patch {} to path {} in FsCache", patch.toString(), path);
throw ex;
}
}
private JsonNode createFsCachePatchJsonNode(final List entries) {
final List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy