org.apache.mahout.driver.MahoutDriver Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mahout-mr Show documentation
Show all versions of mahout-mr Show documentation
Scalable machine learning libraries
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.driver;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import com.google.common.io.Closeables;
import org.apache.hadoop.util.ProgramDriver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* General-purpose driver class for Mahout programs. Utilizes org.apache.hadoop.util.ProgramDriver to run
* main methods of other classes, but first loads up default properties from a properties file.
*
* To run locally:
*
* $MAHOUT_HOME/bin/mahout run shortJobName [over-ride ops]
*
* Works like this: by default, the file "driver.classes.props" is loaded from the classpath, which
* defines a mapping between short names like "vectordump" and fully qualified class names.
* The format of driver.classes.props is like so:
*
*
* fully.qualified.class.name = shortJobName : descriptive string
*
* The default properties to be applied to the program run is pulled out of, by default, ".props"
* (also off of the classpath).
*
* The format of the default properties files is as follows:
*
i|input = /path/to/my/input
o|output = /path/to/my/output
m|jarFile = /path/to/jarFile
# etc - each line is shortArg|longArg = value
*
* The next argument to the Driver is supposed to be the short name of the class to be run (as defined in the
* driver.classes.props file).
*
* Then the class which will be run will have it's main called with
*
* main(new String[] { "--input", "/path/to/my/input", "--output", "/path/to/my/output" });
*
* After all the "default" properties are loaded from the file, any further command-line arguments are taken in,
* and over-ride the defaults.
*
* So if your driver.classes.props looks like so:
*
* org.apache.mahout.utils.vectors.VectorDumper = vecDump : dump vectors from a sequence file
*
* and you have a file core/src/main/resources/vecDump.props which looks like
*
o|output = /tmp/vectorOut
s|seqFile = /my/vector/sequenceFile
*
* And you execute the command-line:
*
* $MAHOUT_HOME/bin/mahout run vecDump -s /my/otherVector/sequenceFile
*
* Then org.apache.mahout.utils.vectors.VectorDumper.main() will be called with arguments:
* {"--output", "/tmp/vectorOut", "-s", "/my/otherVector/sequenceFile"}
*/
public final class MahoutDriver {
private static final Logger log = LoggerFactory.getLogger(MahoutDriver.class);
private MahoutDriver() {
}
public static void main(String[] args) throws Throwable {
Properties mainClasses = loadProperties("driver.classes.props");
if (mainClasses == null) {
mainClasses = loadProperties("driver.classes.default.props");
}
if (mainClasses == null) {
throw new IOException("Can't load any properties file?");
}
boolean foundShortName = false;
ProgramDriver programDriver = new ProgramDriver();
for (Object key : mainClasses.keySet()) {
String keyString = (String) key;
if (args.length > 0 && shortName(mainClasses.getProperty(keyString)).equals(args[0])) {
foundShortName = true;
}
if (args.length > 0 && keyString.equalsIgnoreCase(args[0]) && isDeprecated(mainClasses, keyString)) {
log.error(desc(mainClasses.getProperty(keyString)));
return;
}
if (isDeprecated(mainClasses, keyString)) {
continue;
}
addClass(programDriver, keyString, mainClasses.getProperty(keyString));
}
if (args.length < 1 || args[0] == null || "-h".equals(args[0]) || "--help".equals(args[0])) {
programDriver.driver(args);
return;
}
String progName = args[0];
if (!foundShortName) {
addClass(programDriver, progName, progName);
}
shift(args);
Properties mainProps = loadProperties(progName + ".props");
if (mainProps == null) {
log.warn("No {}.props found on classpath, will use command-line arguments only", progName);
mainProps = new Properties();
}
Map argMap = new HashMap<>();
int i = 0;
while (i < args.length && args[i] != null) {
List argValues = new ArrayList<>();
String arg = args[i];
i++;
if (arg.startsWith("-D")) { // '-Dkey=value' or '-Dkey=value1,value2,etc' case
String[] argSplit = arg.split("=");
arg = argSplit[0];
if (argSplit.length == 2) {
argValues.add(argSplit[1]);
}
} else { // '-key [values]' or '--key [values]' case.
while (i < args.length && args[i] != null) {
if (args[i].startsWith("-")) {
break;
}
argValues.add(args[i]);
i++;
}
}
argMap.put(arg, argValues.toArray(new String[argValues.size()]));
}
// Add properties from the .props file that are not overridden on the command line
for (String key : mainProps.stringPropertyNames()) {
String[] argNamePair = key.split("\\|");
String shortArg = '-' + argNamePair[0].trim();
String longArg = argNamePair.length < 2 ? null : "--" + argNamePair[1].trim();
if (!argMap.containsKey(shortArg) && (longArg == null || !argMap.containsKey(longArg))) {
argMap.put(longArg, new String[] {mainProps.getProperty(key)});
}
}
// Now add command-line args
List argsList = new ArrayList<>();
argsList.add(progName);
for (Map.Entry entry : argMap.entrySet()) {
String arg = entry.getKey();
if (arg.startsWith("-D")) { // arg is -Dkey - if value for this !isEmpty(), then arg -> -Dkey + "=" + value
String[] argValues = entry.getValue();
if (argValues.length > 0 && !argValues[0].trim().isEmpty()) {
arg += '=' + argValues[0].trim();
}
argsList.add(1, arg);
} else {
argsList.add(arg);
for (String argValue : Arrays.asList(argMap.get(arg))) {
if (!argValue.isEmpty()) {
argsList.add(argValue);
}
}
}
}
long start = System.currentTimeMillis();
programDriver.driver(argsList.toArray(new String[argsList.size()]));
if (log.isInfoEnabled()) {
log.info("Program took {} ms (Minutes: {})", System.currentTimeMillis() - start,
(System.currentTimeMillis() - start) / 60000.0);
}
}
private static boolean isDeprecated(Properties mainClasses, String keyString) {
return "deprecated".equalsIgnoreCase(shortName(mainClasses.getProperty(keyString)));
}
private static Properties loadProperties(String resource) throws IOException {
InputStream propsStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(resource);
if (propsStream != null) {
try {
Properties properties = new Properties();
properties.load(propsStream);
return properties;
} finally {
Closeables.close(propsStream, true);
}
}
return null;
}
private static String[] shift(String[] args) {
System.arraycopy(args, 1, args, 0, args.length - 1);
args[args.length - 1] = null;
return args;
}
private static String shortName(String valueString) {
return valueString.contains(":") ? valueString.substring(0, valueString.indexOf(':')).trim() : valueString;
}
private static String desc(String valueString) {
return valueString.contains(":") ? valueString.substring(valueString.indexOf(':')).trim() : valueString;
}
private static void addClass(ProgramDriver driver, String classString, String descString) {
try {
Class> clazz = Class.forName(classString);
driver.addClass(shortName(descString), clazz, desc(descString));
} catch (Throwable t) {
log.warn("Unable to add class: {}", classString, t);
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy