com.ontology2.bakemono.mapreduce.SelfAwareTool Maven / Gradle / Ivy
package com.ontology2.bakemono.mapreduce;
import com.google.common.base.Function;
import static com.google.common.collect.Iterables.*;
import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Maps.newHashMap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.ontology2.bakemono.joins.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.springframework.beans.factory.BeanNameAware;
import javax.annotation.Nullable;
import java.lang.reflect.Field;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.util.List;
import java.util.Map;
public class SelfAwareTool extends SingleJobTool implements BeanNameAware {
Log LOG= LogFactory.getLog(SelfAwareTool.class);
String beanName;
static final Function STRING2PATH=new Function() {
@Nullable @Override
public Path apply(@Nullable String input) {
return new Path(input);
}
};
public static T readField(Object that,String name) {
try {
Field f=that.getClass().getField(name);
return (T) f.get(that);
} catch(NoSuchFieldException|IllegalAccessException ex) {
return null;
}
};
//
// Try to instantiate this class without creating a subclass and something awful will
// happen!
//
public SelfAwareTool() {
}
@Override
protected String getName() {
return beanName;
}
@Override
protected Class extends Mapper> getMapperClass() {
String thisClass=getClass().getName();
if(thisClass.endsWith("Tool")) {
thisClass=thisClass.substring(0,thisClass.length()-4);
}
String tryMapper=thisClass+"Mapper";
try {
return (Class extends Mapper>) Class.forName(tryMapper);
} catch(ClassNotFoundException x) {
return null;
}
}
@Override
protected Class extends Reducer> getReducerClass() {
String thisClass=getClass().getName();
if(thisClass.endsWith("Tool")) {
thisClass=thisClass.substring(0,thisClass.length()-4);
}
String tryMapper=thisClass+"Reducer";
try {
return (Class extends Reducer>) Class.forName(tryMapper);
} catch(ClassNotFoundException x) {
return super.getReducerClass(); // necessary because some jobs don't have a reducer
}
}
public Class extends Writable> getMapInputKeyClass() {
Type[] parameters= TypeDetective.sniffTypeParameters(getMapperClass(), Mapper.class);
return toWritableClass(parameters[0]);
}
public Class extends Writable> getMapInputValueClass() {
Type[] parameters= TypeDetective.sniffTypeParameters(getMapperClass(), Mapper.class);
return toWritableClass(parameters[1]);
}
@Override
public Class extends Writable> getMapOutputKeyClass() {
Type[] parameters= TypeDetective.sniffTypeParameters(getMapperClass(), Mapper.class);
return toWritableClass(parameters[2]);
}
@Override
public Class extends Writable> getMapOutputValueClass() {
Type[] parameters= TypeDetective.sniffTypeParameters(getMapperClass(), Mapper.class);
return toWritableClass(parameters[3]);
}
@Override
public Class extends Writable> getOutputKeyClass() {
Type[] parameters= TypeDetective.sniffTypeParameters(getReducerClass(), Reducer.class);
return toWritableClass(parameters[2]);
}
@Override
public Class extends Writable> getOutputValueClass() {
Class mapperClass=getReducerClass();
Type[] parameters= TypeDetective.sniffTypeParameters(getReducerClass(), Reducer.class);
return toWritableClass(parameters[3]);
}
public static Class toWritableClass(Type t) {
if (t instanceof Class)
return (Class) t;
if (t instanceof ParameterizedType) {
ParameterizedType pt=(ParameterizedType) t;
// yeah yeah, some day this gets generalized and spun out into it's own class
// which can be wired up through Spring if we want to -- the gist of this is that
// there is an "official" implementation of a particular concrete subclass for a
// given generic type
if(TaggedItem.class.equals(pt.getRawType())) {
if(pt.getActualTypeArguments()[0].equals(Text.class))
return TaggedTextItem.class;
}
return (Class) pt.getRawType();
}
throw new RuntimeException("Can't identify type ["+t+"] as a class");
}
protected Multimap tagMap=HashMultimap.create();
//
// Note that this has the side effect of setting the tagMap
//
@Override
public Iterable getInputPaths() {
Map declaredInputPaths=searchForInputPaths(getOptionsClass());
if(declaredInputPaths.size()>1) {
List allPaths=newArrayList();
tagMap=HashMultimap.create();
for(Map.Entry pair:declaredInputPaths.entrySet()) {
try {
Object o=pair.getKey().get(options);
if (o instanceof String) {
Path that=STRING2PATH.apply((String) o);
allPaths.add(that);
tagMap.put(pair.getValue(),that);
} else if(o instanceof Iterable) {
for(Path that:transform((Iterable) o,STRING2PATH)) {
allPaths.add(that);
tagMap.put(pair.getValue(),that);
}
}
} catch(IllegalAccessException iae) {
LOG.warn("Java access controls blocked access to @InputPath on field "+pair.getKey());
}
}
return allPaths;
}
Iterable s=readField(options,"input");
if(s==null)
return null;
return transform(s, STRING2PATH);
}
@Override
public Multimap getTagMap() {
return tagMap;
}
@Override
public int getNumReduceTasks() {
Integer numReduceTasks=readField(options,"reducerCount");
return numReduceTasks==null ? 0 : numReduceTasks;
}
@Override
protected Path getOutputPath() {
String s=readField(options,"output");
if(s==null)
return null;
return STRING2PATH.apply(s);
}
@Override
public Class extends InputFormat> getInputFormatClass() {
Class inKey=getMapInputKeyClass();
Class inValue=getMapInputValueClass();
if ( inValue==Text.class) {
if(inKey==LongWritable.class) {
return TextInputFormat.class;
} else if(inKey==Text.class) {
return KeyValueTextInputFormat.class;
}
}
return SequenceFileInputFormat.class;
}
@Override
public Class extends OutputFormat> getOutputFormatClass() {
Class outKey=getOutputKeyClass();
Class outValue=getOutputValueClass();
if (outKey==Text.class) {
if (outValue==Text.class || outValue==NullWritable.class)
return TextOutputFormat.class;
} else if (outValue==Text.class && outKey==NullWritable.class)
return TextOutputFormat.class;
return SequenceFileOutputFormat.class;
}
@Override
public Class getOptionsClass() {
return (Class) (TypeDetective.sniffTypeParameters(getClass(), SelfAwareTool.class))[0];
}
@Override
public void setBeanName(String s) {
beanName=s;
}
public static class NoGenericTypeInformationAvailable extends IllegalArgumentException {
public NoGenericTypeInformationAvailable() {
}
public NoGenericTypeInformationAvailable(String s) {
super(s);
}
public NoGenericTypeInformationAvailable(String message, Throwable cause) {
super(message, cause);
}
public NoGenericTypeInformationAvailable(Throwable cause) {
super(cause);
}
}
//
// Note the following is not really correct, but it's tricky to make sense of
// what we're getting back from reflection. In particular, when we see the type
// parameters of Mapper, we see a type variable rather than the actual type. I
// think we could figure the type because the type variable is filled in down
// the inheritence stack, but this will get the app working for now
//
public Class extends RawComparator> getGroupingComparatorClass() {
Class mapInput=getMapOutputKeyClass();
if(TaggedItem.class.isAssignableFrom(mapInput)) {
return TaggedTextKeyGroupComparator.class;
}
return super.getGroupingComparatorClass();
}
public Class extends Partitioner> getPartitionerClass() {
Class mapInput=getMapOutputKeyClass();
if(TaggedItem.class.isAssignableFrom(mapInput)) {
return TaggedKeyPartitioner.class;
}
return super.getPartitionerClass();
}
public Class extends RawComparator> getSortComparatorClass() {
Class mapInput=getMapOutputKeyClass();
if(TaggedItem.class.isAssignableFrom(mapInput)) {
return TaggedTextKeySortComparator.class;
}
return super.getGroupingComparatorClass();
}
public static Map searchForInputPaths(Class optionClass) {
Map map=newHashMap();
for(Field f:optionClass.getFields()) {
InputPath p=f.getAnnotation(InputPath.class);
if(p!=null)
map.put(f,p.value());
}
return map;
}
}