org.archive.hadoop.ResourceInputFormat Maven / Gradle / Ivy
package org.archive.hadoop;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.archive.resource.MetaData;
public class ResourceInputFormat extends FileInputFormat{
@Override
public RecordReader createRecordReader(InputSplit inputSplit,
TaskAttemptContext context) throws IOException, InterruptedException {
return new ResourceRecordReader();
}
/* (non-Javadoc)
* @see org.apache.hadoop.mapreduce.lib.input.FileInputFormat#isSplitable(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.fs.Path)
*/
@Override
protected boolean isSplitable(JobContext context, Path filename) {
// TODO: ensure this works... it should be may be losing records between..
return false;
}
}