org.apache.hadoop.util.GenericOptionsParser Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* GenericOptionsParser
is a utility to parse command line
* arguments generic to the Hadoop framework.
*
* GenericOptionsParser
recognizes several standarad command
* line arguments, enabling applications to easily specify a namenode, a
* jobtracker, additional configuration resources etc.
*
* Generic Options
*
* The supported generic options are:
*
* -conf <configuration file> specify a configuration file
* -D <property=value> use value for given property
* -fs <local|namenode:port> specify a namenode
* -jt <local|jobtracker:port> specify a job tracker
* -files <comma separated list of files> specify comma separated
* files to be copied to the map reduce cluster
* -libjars <comma separated list of jars> specify comma separated
* jar files to include in the classpath.
* -archives <comma separated list of archives> specify comma
* separated archives to be unarchived on the compute machines.
*
*
* The general command line syntax is:
*
* bin/hadoop command [genericOptions] [commandOptions]
*
*
* Generic command line arguments might modify
* Configuration
objects, given to constructors.
*
* The functionality is implemented using Commons CLI.
*
* Examples:
*
* $ bin/hadoop dfs -fs darwin:8020 -ls /data
* list /data directory in dfs with namenode darwin:8020
*
* $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
* list /data directory in dfs with namenode darwin:8020
*
* $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
* list /data directory in dfs with conf specified in hadoop-site.xml
*
* $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
* submit a job to job tracker darwin:50020
*
* $ bin/hadoop job -jt darwin:50020 -submit job.xml
* submit a job to job tracker darwin:50020
*
* $ bin/hadoop job -jt local -submit job.xml
* submit a job to local runner
*
* $ bin/hadoop jar -libjars testlib.jar
* -archives test.tgz -files file.txt inputjar args
* job submission with libjars, files and archives
*
*
* @see Tool
* @see ToolRunner
*/
public class GenericOptionsParser {
private static final Log LOG = LogFactory.getLog(GenericOptionsParser.class);
private Configuration conf;
private CommandLine commandLine;
/**
* Create an options parser with the given options to parse the args.
* @param opts the options
* @param args the command line arguments
*/
public GenericOptionsParser(Options opts, String[] args) {
this(new Configuration(), new Options(), args);
}
/**
* Create an options parser to parse the args.
* @param args the command line arguments
*/
public GenericOptionsParser(String[] args) {
this(new Configuration(), new Options(), args);
}
/**
* Create a GenericOptionsParser to parse only the generic Hadoop
* arguments.
*
* The array of string arguments other than the generic arguments can be
* obtained by {@link #getRemainingArgs()}.
*
* @param conf the Configuration
to modify.
* @param args command-line arguments.
*/
public GenericOptionsParser(Configuration conf, String[] args) {
this(conf, new Options(), args);
}
/**
* Create a GenericOptionsParser
to parse given options as well
* as generic Hadoop options.
*
* The resulting CommandLine
object can be obtained by
* {@link #getCommandLine()}.
*
* @param conf the configuration to modify
* @param options options built by the caller
* @param args User-specified arguments
*/
public GenericOptionsParser(Configuration conf, Options options, String[] args) {
parseGeneralOptions(options, conf, args);
this.conf = conf;
}
/**
* Returns an array of Strings containing only application-specific arguments.
*
* @return array of String
s containing the un-parsed arguments
* or empty array if commandLine was not defined.
*/
public String[] getRemainingArgs() {
return (commandLine == null) ? new String[]{} : commandLine.getArgs();
}
/**
* Get the modified configuration
* @return the configuration that has the modified parameters.
*/
public Configuration getConfiguration() {
return conf;
}
/**
* Returns the commons-cli CommandLine
object
* to process the parsed arguments.
*
* Note: If the object is created with
* {@link #GenericOptionsParser(Configuration, String[])}, then returned
* object will only contain parsed generic options.
*
* @return CommandLine
representing list of arguments
* parsed against Options descriptor.
*/
public CommandLine getCommandLine() {
return commandLine;
}
/**
* Specify properties of each generic option
*/
@SuppressWarnings("static-access")
private static Options buildGeneralOptions(Options opts) {
Option fs = OptionBuilder.withArgName("local|namenode:port")
.hasArg()
.withDescription("specify a namenode")
.create("fs");
Option jt = OptionBuilder.withArgName("local|jobtracker:port")
.hasArg()
.withDescription("specify a job tracker")
.create("jt");
Option oconf = OptionBuilder.withArgName("configuration file")
.hasArg()
.withDescription("specify an application configuration file")
.create("conf");
Option property = OptionBuilder.withArgName("property=value")
.hasArg()
.withDescription("use value for given property")
.create('D');
Option libjars = OptionBuilder.withArgName("paths")
.hasArg()
.withDescription("comma separated jar files to include in the classpath.")
.create("libjars");
Option files = OptionBuilder.withArgName("paths")
.hasArg()
.withDescription("comma separated files to be copied to the " +
"map reduce cluster")
.create("files");
Option archives = OptionBuilder.withArgName("paths")
.hasArg()
.withDescription("comma separated archives to be unarchived" +
" on the compute machines.")
.create("archives");
opts.addOption(fs);
opts.addOption(jt);
opts.addOption(oconf);
opts.addOption(property);
opts.addOption(libjars);
opts.addOption(files);
opts.addOption(archives);
return opts;
}
/**
* Modify configuration according user-specified generic options
* @param conf Configuration to be modified
* @param line User-specified generic options
*/
private void processGeneralOptions(Configuration conf,
CommandLine line) {
if (line.hasOption("fs")) {
FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
}
if (line.hasOption("jt")) {
conf.set("mapred.job.tracker", line.getOptionValue("jt"));
}
if (line.hasOption("conf")) {
String[] values = line.getOptionValues("conf");
for(String value : values) {
conf.addResource(new Path(value));
}
}
try {
if (line.hasOption("libjars")) {
conf.set("tmpjars",
validateFiles(line.getOptionValue("libjars"), conf));
//setting libjars in client classpath
URL[] libjars = getLibJars(conf);
if(libjars!=null && libjars.length>0) {
conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
Thread.currentThread().setContextClassLoader(
new URLClassLoader(libjars,
Thread.currentThread().getContextClassLoader()));
}
}
if (line.hasOption("files")) {
conf.set("tmpfiles",
validateFiles(line.getOptionValue("files"), conf));
}
if (line.hasOption("archives")) {
conf.set("tmparchives",
validateFiles(line.getOptionValue("archives"), conf));
}
} catch (IOException ioe) {
System.err.println(StringUtils.stringifyException(ioe));
}
if (line.hasOption('D')) {
String[] property = line.getOptionValues('D');
for(String prop : property) {
String[] keyval = prop.split("=", 2);
if (keyval.length == 2) {
conf.set(keyval[0], keyval[1]);
}
}
}
conf.setBoolean("mapred.used.genericoptionsparser", true);
}
/**
* If libjars are set in the conf, parse the libjars.
* @param conf
* @return libjar urls
* @throws IOException
*/
public static URL[] getLibJars(Configuration conf) throws IOException {
String jars = conf.get("tmpjars");
if(jars==null) {
return null;
}
String[] files = jars.split(",");
URL[] cp = new URL[files.length];
for (int i=0;i specify an application configuration file");
out.println("-D use value for given property");
out.println("-fs specify a namenode");
out.println("-jt specify a job tracker");
out.println("-files " +
"specify comma separated files to be copied to the map reduce cluster");
out.println("-libjars " +
"specify comma separated jar files to include in the classpath.");
out.println("-archives " +
"specify comma separated archives to be unarchived" +
" on the compute machines.\n");
out.println("The general command line syntax is");
out.println("bin/hadoop command [genericOptions] [commandOptions]\n");
}
}