org.apache.hadoop.tools.DistCpOptionSwitch Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-distcp Show documentation
Show all versions of hadoop-distcp Show documentation
Apache Hadoop Distributed Copy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools;
import org.apache.commons.cli.Option;
import org.apache.hadoop.conf.Configuration;
/**
* Enumeration mapping configuration keys to distcp command line
* options.
*/
public enum DistCpOptionSwitch {
/**
* Ignores any failures during copy, and continues with rest.
* Logs failures in a file
*/
IGNORE_FAILURES(DistCpConstants.CONF_LABEL_IGNORE_FAILURES,
new Option("i", false, "Ignore failures during copy")),
/**
* Preserves status of file/path in the target.
* Default behavior with -p, is to preserve replication,
* block size, user, group, permission, checksum type and timestamps on the
* target file. Note that when preserving checksum type, block size is also
* preserved.
*
* If any of the optional switches are present among rbugpcaxt, then
* only the corresponding file attribute is preserved.
*/
PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
new Option("p", true, "preserve status (rbugpcaxt)(replication, " +
"block-size, user, group, permission, checksum-type, ACL, XATTR, " +
"timestamps). If -p is specified with no , then preserves " +
"replication, block size, user, group, permission, checksum type " +
"and timestamps. " +
"raw.* xattrs are preserved when both the source and destination " +
"paths are in the /.reserved/raw hierarchy (HDFS only). raw.* xattr" +
"preservation is independent of the -p flag. " +
"Refer to the DistCp documentation for more details.")),
/**
* Update target location by copying only files that are missing
* in the target. This can be used to periodically sync two folders
* across source and target. Typically used with DELETE_MISSING
* Incompatible with ATOMIC_COMMIT
*/
SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS,
new Option("update", false, "Update target, copying only missing" +
"files or directories")),
/**
* Deletes missing files in target that are missing from source
* This allows the target to be in sync with the source contents
* Typically used in conjunction with SYNC_FOLDERS
* Incompatible with ATOMIC_COMMIT
*/
DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING,
new Option("delete", false, "Delete from target, " +
"files missing in source. Delete is applicable only"
+ " with update or overwrite options")),
/**
* Configuration file to use with hftps:// for securely copying
* files across clusters. Typically the configuration file contains
* truststore/keystore information such as location, password and type
*/
SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF,
new Option("mapredSslConf", true, "Configuration for ssl config file" +
", to use with hftps://. Must be in the classpath.")),
/**
* Number of threads for building source file listing (before map-reduce
* phase, max one listStatus per thread at a time).
*/
NUM_LISTSTATUS_THREADS(DistCpConstants.CONF_LABEL_LISTSTATUS_THREADS,
new Option("numListstatusThreads", true, "Number of threads to " +
"use for building file listing (max " +
DistCpOptions.maxNumListstatusThreads + ").")),
/**
* Max number of maps to use during copy. DistCp will split work
* as equally as possible among these maps
*/
MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS,
new Option("m", true, "Max number of concurrent maps to use for copy")),
/**
* Source file listing can be provided to DistCp in a file.
* This allows DistCp to copy random list of files from source
* and copy them to target
*/
SOURCE_FILE_LISTING(DistCpConstants.CONF_LABEL_SOURCE_LISTING,
new Option("f", true, "List of files that need to be copied")),
/**
* Copy all the source files and commit them atomically to the target
* This is typically useful in cases where there is a process
* polling for availability of a file/dir. This option is incompatible
* with SYNC_FOLDERS and DELETE_MISSING
*/
ATOMIC_COMMIT(DistCpConstants.CONF_LABEL_ATOMIC_COPY,
new Option("atomic", false, "Commit all changes or none")),
/**
* Work path to be used only in conjunction in Atomic commit
*/
WORK_PATH(DistCpConstants.CONF_LABEL_WORK_PATH,
new Option("tmp", true, "Intermediate work path to be used for atomic commit")),
/**
* Log path where distcp output logs are written to
*/
LOG_PATH(DistCpConstants.CONF_LABEL_LOG_PATH,
new Option("log", true, "Folder on DFS where distcp execution logs are saved")),
/**
* Log additional info (path, size) in the SKIP/COPY log.
*/
VERBOSE_LOG(DistCpConstants.CONF_LABEL_VERBOSE_LOG,
new Option("v", false,
"Log additional info (path, size) in the SKIP/COPY log")),
/**
* Copy strategy is use. This could be dynamic or uniform size etc.
* DistCp would use an appropriate input format based on this.
*/
COPY_STRATEGY(DistCpConstants.CONF_LABEL_COPY_STRATEGY,
new Option("strategy", true, "Copy strategy to use. Default is " +
"dividing work based on file sizes")),
/**
* Skip CRC checks between source and target, when determining what
* files need to be copied.
*/
SKIP_CRC(DistCpConstants.CONF_LABEL_SKIP_CRC,
new Option("skipcrccheck", false, "Whether to skip CRC checks between " +
"source and target paths.")),
/**
* Overwrite target-files unconditionally.
*/
OVERWRITE(DistCpConstants.CONF_LABEL_OVERWRITE,
new Option("overwrite", false, "Choose to overwrite target files " +
"unconditionally, even if they exist.")),
APPEND(DistCpConstants.CONF_LABEL_APPEND,
new Option("append", false,
"Reuse existing data in target files and append new data to them if possible")),
DIFF(DistCpConstants.CONF_LABEL_DIFF,
new Option("diff", false,
"Use snapshot diff report to identify the difference between source and target"),
2),
RDIFF(DistCpConstants.CONF_LABEL_RDIFF,
new Option("rdiff", false,
"Use target snapshot diff report to identify changes made on target"),
2),
/**
* Should DisctpExecution be blocking
*/
BLOCKING("",
new Option("async", false, "Should distcp execution be blocking")),
FILE_LIMIT("",
new Option("filelimit", true, "(Deprecated!) Limit number of files " +
"copied to <= n")),
SIZE_LIMIT("",
new Option("sizelimit", true, "(Deprecated!) Limit number of files " +
"copied to <= n bytes")),
BLOCKS_PER_CHUNK(DistCpConstants.CONF_LABEL_BLOCKS_PER_CHUNK,
new Option("blocksperchunk", true, "If set to a positive value, files"
+ "with more blocks than this value will be split into chunks of "
+ " blocks to be transferred in parallel, and "
+ "reassembled on the destination. By default, is "
+ "0 and the files will be transmitted in their entirety without "
+ "splitting. This switch is only applicable when the source file "
+ "system implements getBlockLocations method and the target file "
+ "system implements concat method")),
/**
* Configurable copy buffer size.
*/
COPY_BUFFER_SIZE(DistCpConstants.CONF_LABEL_COPY_BUFFER_SIZE,
new Option("copybuffersize", true, "Size of the copy buffer to use. "
+ "By default is "
+ DistCpConstants.COPY_BUFFER_SIZE_DEFAULT + "B.")),
/**
* Specify bandwidth per map in MB
*/
BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
new Option("bandwidth", true, "Specify bandwidth per map in MB")),
/**
* Path containing a list of strings, which when found in the path of
* a file to be copied excludes that file from the copy job.
*/
FILTERS(DistCpConstants.CONF_LABEL_FILTERS_FILE,
new Option("filters", true, "The path to a file containing a list of"
+ " strings for paths to be excluded from the copy."));
public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct";
private final String confLabel;
private final Option option;
DistCpOptionSwitch(String confLabel, Option option) {
this.confLabel = confLabel;
this.option = option;
}
DistCpOptionSwitch(String confLabel, Option option, int argNum) {
this(confLabel, option);
this.option.setArgs(argNum);
}
/**
* Get Configuration label for the option
* @return configuration label name
*/
public String getConfigLabel() {
return confLabel;
}
/**
* Get CLI Option corresponding to the distcp option
* @return option
*/
public Option getOption() {
return option;
}
/**
* Get Switch symbol
* @return switch symbol char
*/
public String getSwitch() {
return option.getOpt();
}
@Override
public String toString() {
return super.name() + " {" +
"confLabel='" + confLabel + '\'' +
", option=" + option + '}';
}
/**
* Helper function to add an option to hadoop configuration object
* @param conf - Configuration object to include the option
* @param option - Option to add
* @param value - Value
*/
public static void addToConf(Configuration conf,
DistCpOptionSwitch option,
String value) {
conf.set(option.getConfigLabel(), value);
}
/**
* Helper function to set an option to hadoop configuration object
* @param conf - Configuration object to include the option
* @param option - Option to add
*/
public static void addToConf(Configuration conf,
DistCpOptionSwitch option) {
conf.set(option.getConfigLabel(), "true");
}
}