org.apache.hadoop.util.GenericOptionsParser Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* GenericOptionsParser
is a utility to parse command line
* arguments generic to the Hadoop framework.
*
* GenericOptionsParser
recognizes several standard command
* line arguments, enabling applications to easily specify a namenode, a
* ResourceManager, additional configuration resources etc.
*
* Generic Options
*
* The supported generic options are:
*
* -conf <configuration file> specify a configuration file
* -D <property=value> use value for given property
* -fs <local|namenode:port> specify a namenode
* -jt <local|resourcemanager:port> specify a ResourceManager
* -files <comma separated list of files> specify comma separated
* files to be copied to the map reduce cluster
* -libjars <comma separated list of jars> specify comma separated
* jar files to include in the classpath.
* -archives <comma separated list of archives> specify comma
* separated archives to be unarchived on the compute machines.
*
*
*
The general command line syntax is:
*
* bin/hadoop command [genericOptions] [commandOptions]
*
*
*
Generic command line arguments might modify
* Configuration
objects, given to constructors.
*
* The functionality is implemented using Commons CLI.
*
* Examples:
*
* $ bin/hadoop dfs -fs darwin:8020 -ls /data
* list /data directory in dfs with namenode darwin:8020
*
* $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
* list /data directory in dfs with namenode darwin:8020
*
* $ bin/hadoop dfs -conf core-site.xml -conf hdfs-site.xml -ls /data
* list /data directory in dfs with multiple conf files specified.
*
* $ bin/hadoop job -D yarn.resourcemanager.address=darwin:8032 -submit job.xml
* submit a job to ResourceManager darwin:8032
*
* $ bin/hadoop job -jt darwin:8032 -submit job.xml
* submit a job to ResourceManager darwin:8032
*
* $ bin/hadoop job -jt local -submit job.xml
* submit a job to local runner
*
* $ bin/hadoop jar -libjars testlib.jar
* -archives test.tgz -files file.txt inputjar args
* job submission with libjars, files and archives
*
*
* @see Tool
* @see ToolRunner
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class GenericOptionsParser {
private static final Logger LOG =
LoggerFactory.getLogger(GenericOptionsParser.class);
private Configuration conf;
private CommandLine commandLine;
private final boolean parseSuccessful;
/**
* Create an options parser with the given options to parse the args.
* @param opts the options
* @param args the command line arguments
* @throws IOException
*/
public GenericOptionsParser(Options opts, String[] args)
throws IOException {
this(new Configuration(), opts, args);
}
/**
* Create an options parser to parse the args.
* @param args the command line arguments
* @throws IOException
*/
public GenericOptionsParser(String[] args)
throws IOException {
this(new Configuration(), new Options(), args);
}
/**
* Create a GenericOptionsParser
to parse only the generic
* Hadoop arguments.
*
* The array of string arguments other than the generic arguments can be
* obtained by {@link #getRemainingArgs()}.
*
* @param conf the Configuration
to modify.
* @param args command-line arguments.
* @throws IOException
*/
public GenericOptionsParser(Configuration conf, String[] args)
throws IOException {
this(conf, new Options(), args);
}
/**
* Create a GenericOptionsParser
to parse given options as well
* as generic Hadoop options.
*
* The resulting CommandLine
object can be obtained by
* {@link #getCommandLine()}.
*
* @param conf the configuration to modify
* @param options options built by the caller
* @param args User-specified arguments
* @throws IOException
*/
public GenericOptionsParser(Configuration conf,
Options options, String[] args) throws IOException {
this.conf = conf;
parseSuccessful = parseGeneralOptions(options, args);
}
/**
* Returns an array of Strings containing only application-specific arguments.
*
* @return array of String
s containing the un-parsed arguments
* or empty array if commandLine was not defined.
*/
public String[] getRemainingArgs() {
return (commandLine == null) ? new String[]{} : commandLine.getArgs();
}
/**
* Get the modified configuration
* @return the configuration that has the modified parameters.
*/
public Configuration getConfiguration() {
return conf;
}
/**
* Returns the commons-cli CommandLine
object
* to process the parsed arguments.
*
* Note: If the object is created with
* {@link #GenericOptionsParser(Configuration, String[])}, then returned
* object will only contain parsed generic options.
*
* @return CommandLine
representing list of arguments
* parsed against Options descriptor.
*/
public CommandLine getCommandLine() {
return commandLine;
}
/**
* Query for the parse operation succeeding.
* @return true if parsing the CLI was successful
*/
public boolean isParseSuccessful() {
return parseSuccessful;
}
/**
* Specify properties of each generic option.
* Important: as {@link OptionBuilder} is not thread safe, subclasses
* must synchronize use on {@code OptionBuilder.class}
*/
@SuppressWarnings("static-access")
protected Options buildGeneralOptions(Options opts) {
synchronized (OptionBuilder.class) {
Option fs = OptionBuilder.withArgName("file:///|hdfs://namenode:port")
.hasArg()
.withDescription("specify default filesystem URL to use, "
+ "overrides 'fs.defaultFS' property from configurations.")
.create("fs");
Option jt = OptionBuilder.withArgName("local|resourcemanager:port")
.hasArg()
.withDescription("specify a ResourceManager")
.create("jt");
Option oconf = OptionBuilder.withArgName("configuration file")
.hasArg()
.withDescription("specify an application configuration file")
.create("conf");
Option property = OptionBuilder.withArgName("property=value")
.hasArg()
.withDescription("use value for given property")
.create('D');
Option libjars = OptionBuilder.withArgName("paths")
.hasArg()
.withDescription(
"comma separated jar files to include in the classpath.")
.create("libjars");
Option files = OptionBuilder.withArgName("paths")
.hasArg()
.withDescription("comma separated files to be copied to the " +
"map reduce cluster")
.create("files");
Option archives = OptionBuilder.withArgName("paths")
.hasArg()
.withDescription("comma separated archives to be unarchived" +
" on the compute machines.")
.create("archives");
// file with security tokens
Option tokensFile = OptionBuilder.withArgName("tokensFile")
.hasArg()
.withDescription("name of the file with the tokens")
.create("tokenCacheFile");
opts.addOption(fs);
opts.addOption(jt);
opts.addOption(oconf);
opts.addOption(property);
opts.addOption(libjars);
opts.addOption(files);
opts.addOption(archives);
opts.addOption(tokensFile);
return opts;
}
}
/**
* Modify configuration according user-specified generic options.
*
* @param line User-specified generic options
*/
private void processGeneralOptions(CommandLine line) throws IOException {
if (line.hasOption("fs")) {
FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
}
if (line.hasOption("jt")) {
String optionValue = line.getOptionValue("jt");
if (optionValue.equalsIgnoreCase("local")) {
conf.set("mapreduce.framework.name", optionValue);
}
conf.set("yarn.resourcemanager.address", optionValue,
"from -jt command line option");
}
if (line.hasOption("conf")) {
String[] values = line.getOptionValues("conf");
for(String value : values) {
conf.addResource(new Path(value));
}
}
if (line.hasOption('D')) {
String[] property = line.getOptionValues('D');
for(String prop : property) {
String[] keyval = prop.split("=", 2);
if (keyval.length == 2) {
conf.set(keyval[0], keyval[1], "from command line");
}
}
}
if (line.hasOption("libjars")) {
// for libjars, we allow expansion of wildcards
conf.set("tmpjars",
validateFiles(line.getOptionValue("libjars"), true),
"from -libjars command line option");
//setting libjars in client classpath
URL[] libjars = getLibJars(conf);
if(libjars!=null && libjars.length>0) {
conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
Thread.currentThread().setContextClassLoader(
new URLClassLoader(libjars,
Thread.currentThread().getContextClassLoader()));
}
}
if (line.hasOption("files")) {
conf.set("tmpfiles",
validateFiles(line.getOptionValue("files")),
"from -files command line option");
}
if (line.hasOption("archives")) {
conf.set("tmparchives",
validateFiles(line.getOptionValue("archives")),
"from -archives command line option");
}
conf.setBoolean("mapreduce.client.genericoptionsparser.used", true);
// tokensFile
if(line.hasOption("tokenCacheFile")) {
String fileName = line.getOptionValue("tokenCacheFile");
// check if the local file exists
FileSystem localFs = FileSystem.getLocal(conf);
Path p = localFs.makeQualified(new Path(fileName));
localFs.getFileStatus(p);
if(LOG.isDebugEnabled()) {
LOG.debug("setting conf tokensFile: " + fileName);
}
UserGroupInformation.getCurrentUser().addCredentials(
Credentials.readTokenStorageFile(p, conf));
conf.set("mapreduce.job.credentials.binary", p.toString(),
"from -tokenCacheFile command line option");
}
}
/**
* If libjars are set in the conf, parse the libjars.
* @param conf
* @return libjar urls
* @throws IOException
*/
public static URL[] getLibJars(Configuration conf) throws IOException {
String jars = conf.get("tmpjars");
if (jars == null || jars.trim().isEmpty()) {
return null;
}
String[] files = jars.split(",");
List cp = new ArrayList();
for (String file : files) {
Path tmp = new Path(file);
if (tmp.getFileSystem(conf).equals(FileSystem.getLocal(conf))) {
cp.add(FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL());
} else {
LOG.warn("The libjars file " + tmp + " is not on the local " +
"filesystem. It will not be added to the local classpath.");
}
}
return cp.toArray(new URL[0]);
}
/**
* Takes input as a comma separated list of files
* and verifies if they exist. It defaults for file:///
* if the files specified do not have a scheme.
* it returns the paths uri converted defaulting to file:///.
* So an input of /home/user/file1,/home/user/file2 would return
* file:///home/user/file1,file:///home/user/file2.
*
* This method does not recognize wildcards.
*
* @param files the input files argument
* @return a comma-separated list of validated and qualified paths, or null
* if the input files argument is null
*/
private String validateFiles(String files) throws IOException {
return validateFiles(files, false);
}
/**
* takes input as a comma separated list of files
* and verifies if they exist. It defaults for file:///
* if the files specified do not have a scheme.
* it returns the paths uri converted defaulting to file:///.
* So an input of /home/user/file1,/home/user/file2 would return
* file:///home/user/file1,file:///home/user/file2.
*
* @param files the input files argument
* @param expandWildcard whether a wildcard entry is allowed and expanded. If
* true, any directory followed by a wildcard is a valid entry and is replaced
* with the list of jars in that directory. It is used to support the wildcard
* notation in a classpath.
* @return a comma-separated list of validated and qualified paths, or null
* if the input files argument is null
*/
private String validateFiles(String files, boolean expandWildcard)
throws IOException {
if (files == null) {
return null;
}
String[] fileArr = files.split(",");
if (fileArr.length == 0) {
throw new IllegalArgumentException("File name can't be empty string");
}
List finalPaths = new ArrayList<>(fileArr.length);
for (int i =0; i < fileArr.length; i++) {
String tmp = fileArr[i];
if (tmp.isEmpty()) {
throw new IllegalArgumentException("File name can't be empty string");
}
URI pathURI;
final String wildcard = "*";
boolean isWildcard = tmp.endsWith(wildcard) && expandWildcard;
try {
if (isWildcard) {
// strip the wildcard
tmp = tmp.substring(0, tmp.length() - 1);
}
// handle the case where a wildcard alone ("*") or the wildcard on the
// current directory ("./*") is specified
pathURI = matchesCurrentDirectory(tmp) ?
new File(Path.CUR_DIR).toURI() :
new URI(tmp);
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}
Path path = new Path(pathURI);
FileSystem localFs = FileSystem.getLocal(conf);
if (pathURI.getScheme() == null) {
//default to the local file system
//check if the file exists or not first
localFs.getFileStatus(path);
if (isWildcard) {
expandWildcard(finalPaths, path, localFs);
} else {
finalPaths.add(path.makeQualified(localFs.getUri(),
localFs.getWorkingDirectory()).toString());
}
} else {
// check if the file exists in this file system
// we need to recreate this filesystem object to copy
// these files to the file system ResourceManager is running
// on.
FileSystem fs = path.getFileSystem(conf);
// existence check
fs.getFileStatus(path);
if (isWildcard) {
expandWildcard(finalPaths, path, fs);
} else {
finalPaths.add(path.makeQualified(fs.getUri(),
fs.getWorkingDirectory()).toString());
}
}
}
if (finalPaths.isEmpty()) {
throw new IllegalArgumentException("Path " + files + " cannot be empty.");
}
return StringUtils.join(",", finalPaths);
}
private boolean matchesCurrentDirectory(String path) {
return path.isEmpty() || path.equals(Path.CUR_DIR) ||
path.equals(Path.CUR_DIR + File.separator);
}
private void expandWildcard(List finalPaths, Path path, FileSystem fs)
throws IOException {
FileStatus status = fs.getFileStatus(path);
if (!status.isDirectory()) {
throw new FileNotFoundException(path + " is not a directory.");
}
// get all the jars in the directory
List jars = FileUtil.getJarsInDirectory(path.toString(),
fs.equals(FileSystem.getLocal(conf)));
if (jars.isEmpty()) {
LOG.warn(path + " does not have jars in it. It will be ignored.");
} else {
for (Path jar: jars) {
finalPaths.add(jar.makeQualified(fs.getUri(),
fs.getWorkingDirectory()).toString());
}
}
}
/**
* Windows powershell and cmd can parse key=value themselves, because
* /pkey=value is same as /pkey value under windows. However this is not
* compatible with how we get arbitrary key values in -Dkey=value format.
* Under windows -D key=value or -Dkey=value might be passed as
* [-Dkey, value] or [-D key, value]. This method does undo these and
* return a modified args list by manually changing [-D, key, value]
* into [-D, key=value]
*
* @param args command line arguments
* @return fixed command line arguments that GnuParser can parse
*/
private String[] preProcessForWindows(String[] args) {
if (!Shell.WINDOWS) {
return args;
}
if (args == null) {
return null;
}
List newArgs = new ArrayList(args.length);
for (int i=0; i < args.length; i++) {
if (args[i] == null) {
continue;
}
String prop = null;
if (args[i].equals("-D")) {
newArgs.add(args[i]);
if (i < args.length - 1) {
prop = args[++i];
}
} else if (args[i].startsWith("-D")) {
prop = args[i];
} else {
newArgs.add(args[i]);
}
if (prop != null) {
if (prop.contains("=")) {
// everything good
} else {
if (i < args.length - 1) {
prop += "=" + args[++i];
}
}
newArgs.add(prop);
}
}
return newArgs.toArray(new String[newArgs.size()]);
}
/**
* Parse the user-specified options, get the generic options, and modify
* configuration accordingly.
*
* @param opts Options to use for parsing args.
* @param args User-specified arguments
* @return true if the parse was successful
*/
private boolean parseGeneralOptions(Options opts, String[] args)
throws IOException {
opts = buildGeneralOptions(opts);
CommandLineParser parser = new GnuParser();
boolean parsed = false;
try {
commandLine = parser.parse(opts, preProcessForWindows(args), true);
processGeneralOptions(commandLine);
parsed = true;
} catch(ParseException e) {
LOG.warn("options parsing failed: "+e.getMessage());
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("general options are: ", opts);
}
return parsed;
}
/**
* Print the usage message for generic command-line options supported.
*
* @param out stream to print the usage message to.
*/
public static void printGenericCommandUsage(PrintStream out) {
out.println("Generic options supported are:");
out.println("-conf "
+ "specify an application configuration file");
out.println("-D "
+ "define a value for a given property");
out.println("-fs "
+ "specify default filesystem URL to use, overrides "
+ "'fs.defaultFS' property from configurations.");
out.println("-jt "
+ "specify a ResourceManager");
out.println("-files "
+ "specify a comma-separated list of files to be copied to the map "
+ "reduce cluster");
out.println("-libjars "
+ "specify a comma-separated list of jar files to be included in the "
+ "classpath");
out.println("-archives "
+ "specify a comma-separated list of archives to be unarchived on the "
+ "compute machines");
out.println();
out.println("The general command line syntax is:");
out.println("command [genericOptions] [commandOptions]");
out.println();
}
}