All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.pig.util.SequenceFileConfig Maven / Gradle / Ivy

There is a newer version: 4.17
Show newest version
package com.twitter.elephantbird.pig.util;

import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.UnrecognizedOptionException;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.pig.impl.PigContext;

import com.google.common.collect.Lists;
import com.twitter.elephantbird.pig.load.SequenceFileLoader;
import com.twitter.elephantbird.pig.store.SequenceFileStorage;

/**
 * Configuration supporting Pig load and store function implementations for reading and writing
 * Hadoop {@link SequenceFile}s.
 *
 * @author Andy Schlaikjer
 * @see SequenceFileLoader
 * @see SequenceFileStorage
 * @see WritableConverter
 */
public class SequenceFileConfig {
  public static final String CONVERTER_PARAM = "converter";
  public final CommandLine keyArguments;
  public final CommandLine valueArguments;
  public final CommandLine genericArguments;
  public final WritableConverter keyConverter;
  public final WritableConverter valueConverter;

  /**
   * Parses options from argument strings. Available options for key and value argument strings
   * include:
   *
   * 
*
-c|--converter cls
*
{@link WritableConverter} implementation class to use for conversion of data. Defaults to * {@link TextConverter} for both key and value.
*
* * Any extra arguments found will be treated as String arguments for the WritableConverter * constructor. For instance, the argument string {@code "-c MyConverter 123 abc"} specifies * WritableConverter class {@code MyConverter} along with two constructor arguments {@code "123"} * and {@code "abc"}. This will cause SequenceFileLoader to attempt to invoke the following * constructors, in order, to create a new instance of MyConverter: * *
    *
  1. MyConverter(String arg1, String arg2) -- constructor arguments are passed as * explicit arguments.
  2. *
  3. MyConverter(String[] args) -- constructor arguments are passed within a String * array.
  4. *
  5. MyConverter(String... args) -- same as above, with var args syntax.
  6. *
  7. MyConverter(String argString) -- constructor arguments are joined with space * char to create {@code argString}.
  8. *
* * If none of these constructors exist, a RuntimeException will be thrown. * *

* Note that WritableConverter constructor arguments prefixed by one or more hyphens will be * interpreted as options for SequenceFileLoader itself, resulting in an * {@link UnrecognizedOptionException}. To avoid this, place these values after a {@code --} * (double-hyphen) token: * *

   * A = LOAD '$data' USING com.twitter.elephantbird.pig.load.SequenceFileLoader (
   *   '-c ...IntWritableConverter',
   *   '-c ...MyComplexWritableConverter basic options here -- --complex -options here'
   * );
   * 
* * No generic options are exposed by default. {@link SequenceFileLoader} and * {@link SequenceFileStorage} may include more options. * * @param keyArgs argument string containing key options. * @param valueArgs argument string containing value options. * @param genericArgs argument string containing generic options. * @throws ParseException * @throws IOException */ public SequenceFileConfig(String keyArgs, String valueArgs, String genericArgs) throws ParseException, IOException { // parse key, value arguments Options keyValueOptions = getKeyValueOptions(); Options genericOptions = getGenericOptions(); keyArguments = parseArguments(keyValueOptions, keyArgs); valueArguments = parseArguments(keyValueOptions, valueArgs); genericArguments = parseArguments(genericOptions, genericArgs); // construct key, value converters keyConverter = getWritableConverter(keyArguments); valueConverter = getWritableConverter(valueArguments); // initialize key, value converters initialize(); } /** * Constructor without other arguments (backwards compatible). * * @throws ParseException * @throws IOException */ public SequenceFileConfig(String keyArgs, String valueArgs) throws ParseException, IOException { this(keyArgs, valueArgs, ""); } /** * Default constructor. Defaults used for all options. * * @throws ParseException * @throws IOException */ public SequenceFileConfig() throws ParseException, IOException { this("", ""); } /** * @return Options instance containing valid key/value options. */ protected Options getKeyValueOptions() { @SuppressWarnings("static-access") Option converterOption = OptionBuilder .withLongOpt(CONVERTER_PARAM) .hasArg() .withArgName("cls") .withDescription( String.format("Converter type to use for conversion of data. Defaults to '%s'.", TextConverter.class.getName())).create("c"); return new Options().addOption(converterOption); } /** * @return Options instance containing valid global options. */ protected Options getGenericOptions() { return new Options(); } /** * @param args * @return CommandLine instance containing options parsed from argument string. * @throws ParseException */ private static CommandLine parseArguments(Options options, String args) throws ParseException { CommandLine cmdline = null; try { cmdline = new GnuParser().parse(options, args.split(" ")); } catch (ParseException e) { new HelpFormatter().printHelp(SequenceFileStorage.class.getName() + "(keyArgs, valueArgs)", options); throw e; } return cmdline; } /** * @param arguments * @return new WritableConverter instance constructed using given arguments. */ @SuppressWarnings("unchecked") private static WritableConverter getWritableConverter( CommandLine arguments) { // get remaining non-empty argument strings from commandline String[] converterArgs = removeEmptyArgs(arguments.getArgs()); try { // get converter classname String converterClassName = arguments.getOptionValue(CONVERTER_PARAM, TextConverter.class.getName()); // get converter class Class> converterClass = PigContext.resolveClassName(converterClassName); // construct converter instance if (converterArgs == null || converterArgs.length == 0) { // use default ctor return converterClass.newInstance(); } else { try { // look up ctor having explicit number of String arguments Class[] parameterTypes = new Class[converterArgs.length]; Arrays.fill(parameterTypes, String.class); Constructor> ctor = converterClass.getConstructor(parameterTypes); return ctor.newInstance((Object[]) converterArgs); } catch (NoSuchMethodException e) { try { // look up ctor having single String[] (or String... varargs) argument Constructor> ctor = converterClass.getConstructor(new Class[] { String[].class }); return ctor.newInstance((Object) converterArgs); } catch (NoSuchMethodException e2) { // look up ctor having single String argument and join args together Constructor> ctor = converterClass.getConstructor(new Class[] { String.class }); StringBuilder sb = new StringBuilder(converterArgs[0]); for (int i = 1; i < converterArgs.length; ++i) { sb.append(" ").append(converterArgs[i]); } return ctor.newInstance(sb.toString()); } } } } catch (Exception e) { throw new RuntimeException("Failed to create WritableConverter instance", e); } } /** * @param args * @return new String[] containing non-empty values from args. */ private static String[] removeEmptyArgs(String[] args) { List converterArgsFiltered = Lists.newArrayList(); for (String arg : args) { if (arg == null || arg.isEmpty()) continue; converterArgsFiltered.add(arg); } return converterArgsFiltered.toArray(new String[0]); } /** * Initializes key, value WritableConverters. * * @throws IOException */ protected void initialize() throws IOException { keyConverter.initialize(null); valueConverter.initialize(null); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy