org.apache.hadoop.fs.s3a.commit.CommitConstants Maven / Gradle / Ivy
Show all versions of hadoop-aws Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.commit;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import static org.apache.hadoop.fs.s3a.Constants.XA_HEADER_PREFIX;
/**
* Constants for working with committers.
*/
@SuppressWarnings("unused")
@InterfaceAudience.Public
@InterfaceStability.Unstable
public final class CommitConstants {
private CommitConstants() {
}
/**
* Path for "magic" writes: path and {@link #PENDING_SUFFIX} files:
* {@value}.
*/
public static final String MAGIC = "__magic";
/**
* Marker of the start of a directory tree for calculating
* the final path names: {@value}.
*/
public static final String BASE = "__base";
/**
* Suffix applied to pending commit metadata: {@value}.
*/
public static final String PENDING_SUFFIX = ".pending";
/**
* Suffix applied to multiple pending commit metadata: {@value}.
*/
public static final String PENDINGSET_SUFFIX = ".pendingset";
/**
* Prefix to use for config options: {@value}.
*/
public static final String OPT_PREFIX = "fs.s3a.committer.";
/**
* Flag to indicate whether support for the Magic committer is enabled
* in the filesystem.
* Value: {@value}.
*/
public static final String MAGIC_COMMITTER_PREFIX
= "fs.s3a.committer.magic";
/**
* Flag to indicate whether support for the Magic committer is enabled
* in the filesystem.
* Value: {@value}.
*/
public static final String MAGIC_COMMITTER_ENABLED
= MAGIC_COMMITTER_PREFIX + ".enabled";
/**
* Flag to indicate whether a stream is a magic output stream;
* returned in {@code StreamCapabilities}
* Value: {@value}.
*/
public static final String STREAM_CAPABILITY_MAGIC_OUTPUT
= "fs.s3a.capability.magic.output.stream";
/**
* Flag to indicate that a store supports magic committers.
* returned in {@code PathCapabilities}
* Value: {@value}.
*/
public static final String STORE_CAPABILITY_MAGIC_COMMITTER
= "fs.s3a.capability.magic.committer";
/**
* Flag to indicate whether a stream is a magic output stream;
* returned in {@code StreamCapabilities}
* Value: {@value}.
*/
@Deprecated
public static final String STREAM_CAPABILITY_MAGIC_OUTPUT_OLD
= "s3a:magic.output.stream";
/**
* Flag to indicate that a store supports magic committers.
* returned in {@code PathCapabilities}
* Value: {@value}.
*/
@Deprecated
public static final String STORE_CAPABILITY_MAGIC_COMMITTER_OLD
= "s3a:magic.committer";
/**
* Is the committer enabled by default: {@value}.
*/
public static final boolean DEFAULT_MAGIC_COMMITTER_ENABLED = true;
/**
* This is the "Pending" directory of the {@code FileOutputCommitter};
* data written here is, in that algorithm, renamed into place.
* Value: {@value}.
*/
public static final String TEMPORARY = "_temporary";
/**
* Temp data which is not auto-committed: {@value}.
*/
public static final String TEMP_DATA = TEMPORARY;
/**
* Flag to trigger creation of a marker file on job completion.
*/
public static final String CREATE_SUCCESSFUL_JOB_OUTPUT_DIR_MARKER
= "mapreduce.fileoutputcommitter.marksuccessfuljobs";
/**
* Marker file to create on success: {@value}.
*/
public static final String _SUCCESS = "_SUCCESS";
/** Default job marker option: {@value}. */
public static final boolean DEFAULT_CREATE_SUCCESSFUL_JOB_DIR_MARKER = true;
/**
* Key to set for the S3A schema to use the specific committer.
*/
public static final String S3A_COMMITTER_FACTORY_KEY = String.format(
"mapreduce.outputcommitter.factory.scheme.s3a");
/**
* S3 Committer factory: {@value}.
* This uses the value of {@link #FS_S3A_COMMITTER_NAME}
* to choose the final committer.
*/
public static final String S3A_COMMITTER_FACTORY =
S3ACommitterFactory.CLASSNAME;
/**
* Option to identify the S3A committer:
* {@value}.
*/
public static final String FS_S3A_COMMITTER_NAME =
"fs.s3a.committer.name";
/**
* Option for {@link #FS_S3A_COMMITTER_NAME}:
* classic/file output committer: {@value}.
*/
public static final String COMMITTER_NAME_FILE = "file";
/**
* Option for {@link #FS_S3A_COMMITTER_NAME}:
* magic output committer: {@value}.
*/
public static final String COMMITTER_NAME_MAGIC = "magic";
/**
* Option for {@link #FS_S3A_COMMITTER_NAME}:
* directory output committer: {@value}.
*/
public static final String COMMITTER_NAME_DIRECTORY = "directory";
/**
* Option for {@link #FS_S3A_COMMITTER_NAME}:
* partition output committer: {@value}.
*/
public static final String COMMITTER_NAME_PARTITIONED = "partitioned";
/**
* Option for final files to have a uniqueness name through job attempt info,
* falling back to a new UUID if there is no job attempt information to use.
* {@value}.
* When writing data with the "append" conflict option, this guarantees
* that new data will not overwrite any existing data.
*/
public static final String FS_S3A_COMMITTER_STAGING_UNIQUE_FILENAMES =
"fs.s3a.committer.staging.unique-filenames";
/**
* Default value for {@link #FS_S3A_COMMITTER_STAGING_UNIQUE_FILENAMES}:
* {@value}.
*/
public static final boolean DEFAULT_STAGING_COMMITTER_UNIQUE_FILENAMES = true;
/**
* Staging committer conflict resolution policy: {@value}.
* Supported: fail, append, replace.
*/
public static final String FS_S3A_COMMITTER_STAGING_CONFLICT_MODE =
"fs.s3a.committer.staging.conflict-mode";
/** Conflict mode: {@value}. */
public static final String CONFLICT_MODE_FAIL = "fail";
/** Conflict mode: {@value}. */
public static final String CONFLICT_MODE_APPEND = "append";
/** Conflict mode: {@value}. */
public static final String CONFLICT_MODE_REPLACE = "replace";
/** Default conflict mode: {@value}. */
public static final String DEFAULT_CONFLICT_MODE = CONFLICT_MODE_APPEND;
/**
* Number of threads in committers for parallel operations on files
* (upload, commit, abort, delete...): {@value}.
* Two thread pools this size are created, one for the outer
* task-level parallelism, and one for parallel execution
* within tasks (POSTs to commit individual uploads)
* If the value is negative, it is inverted and then multiplied
* by the number of cores in the CPU.
*/
public static final String FS_S3A_COMMITTER_THREADS =
"fs.s3a.committer.threads";
/**
* Default value for {@link #FS_S3A_COMMITTER_THREADS}: {@value}.
*/
public static final int DEFAULT_COMMITTER_THREADS = 32;
/**
* Path in the cluster filesystem for temporary data: {@value}.
* This is for HDFS, not the local filesystem.
* It is only for the summary data of each file, not the actual
* data being committed.
*/
public static final String FS_S3A_COMMITTER_STAGING_TMP_PATH =
"fs.s3a.committer.staging.tmp.path";
/**
* Should committers abort all pending uploads to the destination
* directory?
*
* Deprecated: switch to {@link #FS_S3A_COMMITTER_ABORT_PENDING_UPLOADS}.
*/
@Deprecated
public static final String FS_S3A_COMMITTER_STAGING_ABORT_PENDING_UPLOADS =
"fs.s3a.committer.staging.abort.pending.uploads";
/**
* Should committers abort all pending uploads to the destination
* directory?
*
* Value: {@value}.
*
* Change this is if more than one committer is
* writing to the same destination tree simultaneously; otherwise
* the first job to complete will cancel all outstanding uploads from the
* others. If disabled, configure the bucket lifecycle to remove uploads
* after a time period, and/or set up a workflow to explicitly delete
* entries. Otherwise there is a risk that uncommitted uploads may run up
* bills.
*/
public static final String FS_S3A_COMMITTER_ABORT_PENDING_UPLOADS =
"fs.s3a.committer.abort.pending.uploads";
/**
* Default configuration value for
* {@link #FS_S3A_COMMITTER_ABORT_PENDING_UPLOADS}.
* Value: {@value}.
*/
public static final boolean DEFAULT_FS_S3A_COMMITTER_ABORT_PENDING_UPLOADS =
true;
/**
* The limit to the number of committed objects tracked during
* job commits and saved to the _SUCCESS file.
*/
public static final int SUCCESS_MARKER_FILE_LIMIT = 100;
/** Extra Data key for task attempt in pendingset files. */
public static final String TASK_ATTEMPT_ID = "task.attempt.id";
/**
* Require the spark UUID to be passed down: {@value}.
* This is to verify that SPARK-33230 has been applied to spark, and that
* {@link InternalCommitterConstants#SPARK_WRITE_UUID} is set.
*
* MUST ONLY BE SET WITH SPARK JOBS.
*
*/
public static final String FS_S3A_COMMITTER_REQUIRE_UUID =
"fs.s3a.committer.require.uuid";
/**
* Default value for {@link #FS_S3A_COMMITTER_REQUIRE_UUID}: {@value}.
*/
public static final boolean DEFAULT_S3A_COMMITTER_REQUIRE_UUID =
false;
/**
* Generate a UUID in job setup rather than fall back to
* YARN Application attempt ID.
*
* MUST ONLY BE SET WITH SPARK JOBS.
*
*/
public static final String FS_S3A_COMMITTER_GENERATE_UUID =
"fs.s3a.committer.generate.uuid";
/**
* Default value for {@link #FS_S3A_COMMITTER_GENERATE_UUID}: {@value}.
*/
public static final boolean DEFAULT_S3A_COMMITTER_GENERATE_UUID =
false;
/**
* Magic Marker header to declare final file length on magic uploads
* marker objects: {@value}.
*/
public static final String X_HEADER_MAGIC_MARKER =
"x-hadoop-s3a-magic-data-length";
/**
* XAttr name of magic marker, with "header." prefix: {@value}.
*/
public static final String XA_MAGIC_MARKER = XA_HEADER_PREFIX
+ X_HEADER_MAGIC_MARKER;
/**
* Task Attempt ID query header: {@value}.
*/
public static final String PARAM_TASK_ATTEMPT_ID = "ta";
/**
* Directory for saving job summary reports.
* These are the _SUCCESS files, but are saved even on
* job failures.
* Value: {@value}.
*/
public static final String OPT_SUMMARY_REPORT_DIR =
OPT_PREFIX + "summary.report.directory";
/**
* Experimental feature to collect thread level IO statistics.
* When set the committers will reset the statistics in
* task setup and propagate to the job committer.
* The job comitter will include those and its own statistics.
* Do not use if the execution engine is collecting statistics,
* as the multiple reset() operations will result in incomplete
* statistics.
* Value: {@value}.
*/
public static final String S3A_COMMITTER_EXPERIMENTAL_COLLECT_IOSTATISTICS =
OPT_PREFIX + "experimental.collect.iostatistics";
/**
* Default value for {@link #S3A_COMMITTER_EXPERIMENTAL_COLLECT_IOSTATISTICS}.
* Value: {@value}.
*/
public static final boolean S3A_COMMITTER_EXPERIMENTAL_COLLECT_IOSTATISTICS_DEFAULT =
false;
}