All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.util.GenericOptionsParser Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.util;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.net.URL;
import java.net.URLClassLoader;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/**
 * GenericOptionsParser is a utility to parse command line
 * arguments generic to the Hadoop framework. 
 * 
 * GenericOptionsParser recognizes several standarad command 
 * line arguments, enabling applications to easily specify a namenode, a 
 * jobtracker, additional configuration resources etc.
 * 
 * 

Generic Options

* *

The supported generic options are:

*

 *     -conf <configuration file>     specify a configuration file
 *     -D <property=value>            use value for given property
 *     -fs <local|namenode:port>      specify a namenode
 *     -jt <local|jobtracker:port>    specify a job tracker
 *     -files <comma separated list of files>    specify comma separated
 *                            files to be copied to the map reduce cluster
 *     -libjars <comma separated list of jars>   specify comma separated
 *                            jar files to include in the classpath.
 *     -archives <comma separated list of archives>    specify comma
 *             separated archives to be unarchived on the compute machines.

 * 

* *

The general command line syntax is:

*

 * bin/hadoop command [genericOptions] [commandOptions]
 * 

* *

Generic command line arguments might modify * Configuration objects, given to constructors.

* *

The functionality is implemented using Commons CLI.

* *

Examples:

*

 * $ bin/hadoop dfs -fs darwin:8020 -ls /data
 * list /data directory in dfs with namenode darwin:8020
 * 
 * $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
 * list /data directory in dfs with namenode darwin:8020
 *     
 * $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
 * list /data directory in dfs with conf specified in hadoop-site.xml
 *     
 * $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
 * submit a job to job tracker darwin:50020
 *     
 * $ bin/hadoop job -jt darwin:50020 -submit job.xml
 * submit a job to job tracker darwin:50020
 *     
 * $ bin/hadoop job -jt local -submit job.xml
 * submit a job to local runner
 * 
 * $ bin/hadoop jar -libjars testlib.jar 
 * -archives test.tgz -files file.txt inputjar args
 * job submission with libjars, files and archives
 * 

* * @see Tool * @see ToolRunner */ public class GenericOptionsParser { private static final Log LOG = LogFactory.getLog(GenericOptionsParser.class); private Configuration conf; private CommandLine commandLine; /** * Create an options parser with the given options to parse the args. * @param opts the options * @param args the command line arguments */ public GenericOptionsParser(Options opts, String[] args) { this(new Configuration(), new Options(), args); } /** * Create an options parser to parse the args. * @param args the command line arguments */ public GenericOptionsParser(String[] args) { this(new Configuration(), new Options(), args); } /** * Create a GenericOptionsParser to parse only the generic Hadoop * arguments. * * The array of string arguments other than the generic arguments can be * obtained by {@link #getRemainingArgs()}. * * @param conf the Configuration to modify. * @param args command-line arguments. */ public GenericOptionsParser(Configuration conf, String[] args) { this(conf, new Options(), args); } /** * Create a GenericOptionsParser to parse given options as well * as generic Hadoop options. * * The resulting CommandLine object can be obtained by * {@link #getCommandLine()}. * * @param conf the configuration to modify * @param options options built by the caller * @param args User-specified arguments */ public GenericOptionsParser(Configuration conf, Options options, String[] args) { parseGeneralOptions(options, conf, args); this.conf = conf; } /** * Returns an array of Strings containing only application-specific arguments. * * @return array of Strings containing the un-parsed arguments * or empty array if commandLine was not defined. */ public String[] getRemainingArgs() { return (commandLine == null) ? new String[]{} : commandLine.getArgs(); } /** * Get the modified configuration * @return the configuration that has the modified parameters. */ public Configuration getConfiguration() { return conf; } /** * Returns the commons-cli CommandLine object * to process the parsed arguments. * * Note: If the object is created with * {@link #GenericOptionsParser(Configuration, String[])}, then returned * object will only contain parsed generic options. * * @return CommandLine representing list of arguments * parsed against Options descriptor. */ public CommandLine getCommandLine() { return commandLine; } /** * Specify properties of each generic option */ @SuppressWarnings("static-access") private static Options buildGeneralOptions(Options opts) { Option fs = OptionBuilder.withArgName("local|namenode:port") .hasArg() .withDescription("specify a namenode") .create("fs"); Option jt = OptionBuilder.withArgName("local|jobtracker:port") .hasArg() .withDescription("specify a job tracker") .create("jt"); Option oconf = OptionBuilder.withArgName("configuration file") .hasArg() .withDescription("specify an application configuration file") .create("conf"); Option property = OptionBuilder.withArgName("property=value") .hasArg() .withDescription("use value for given property") .create('D'); Option libjars = OptionBuilder.withArgName("paths") .hasArg() .withDescription("comma separated jar files to include in the classpath.") .create("libjars"); Option files = OptionBuilder.withArgName("paths") .hasArg() .withDescription("comma separated files to be copied to the " + "map reduce cluster") .create("files"); Option archives = OptionBuilder.withArgName("paths") .hasArg() .withDescription("comma separated archives to be unarchived" + " on the compute machines.") .create("archives"); opts.addOption(fs); opts.addOption(jt); opts.addOption(oconf); opts.addOption(property); opts.addOption(libjars); opts.addOption(files); opts.addOption(archives); return opts; } /** * Modify configuration according user-specified generic options * @param conf Configuration to be modified * @param line User-specified generic options */ private void processGeneralOptions(Configuration conf, CommandLine line) { if (line.hasOption("fs")) { FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); } if (line.hasOption("jt")) { conf.set("mapred.job.tracker", line.getOptionValue("jt")); } if (line.hasOption("conf")) { String[] values = line.getOptionValues("conf"); for(String value : values) { conf.addResource(new Path(value)); } } try { if (line.hasOption("libjars")) { conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf)); //setting libjars in client classpath URL[] libjars = getLibJars(conf); if(libjars!=null && libjars.length>0) { conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } if (line.hasOption("files")) { conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf)); } if (line.hasOption("archives")) { conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf)); } } catch (IOException ioe) { System.err.println(StringUtils.stringifyException(ioe)); } if (line.hasOption('D')) { String[] property = line.getOptionValues('D'); for(String prop : property) { String[] keyval = prop.split("=", 2); if (keyval.length == 2) { conf.set(keyval[0], keyval[1]); } } } conf.setBoolean("mapred.used.genericoptionsparser", true); } /** * If libjars are set in the conf, parse the libjars. * @param conf * @return libjar urls * @throws IOException */ public static URL[] getLibJars(Configuration conf) throws IOException { String jars = conf.get("tmpjars"); if(jars==null) { return null; } String[] files = jars.split(","); URL[] cp = new URL[files.length]; for (int i=0;i specify an application configuration file"); out.println("-D use value for given property"); out.println("-fs specify a namenode"); out.println("-jt specify a job tracker"); out.println("-files " + "specify comma separated files to be copied to the map reduce cluster"); out.println("-libjars " + "specify comma separated jar files to include in the classpath."); out.println("-archives " + "specify comma separated archives to be unarchived" + " on the compute machines.\n"); out.println("The general command line syntax is"); out.println("bin/hadoop command [genericOptions] [commandOptions]\n"); } }