All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thinkaurelius.titan.hadoop.formats.script.ScriptInputFormat Maven / Gradle / Ivy

There is a newer version: 1.0.0
Show newest version
package com.thinkaurelius.titan.hadoop.formats.script;

import com.thinkaurelius.titan.hadoop.FaunusVertex;
import com.thinkaurelius.titan.hadoop.formats.VertexQueryFilter;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

import static com.thinkaurelius.titan.hadoop.compat.HadoopCompatLoader.DEFAULT_COMPAT;

/**
 * ScriptInputFormat supports the arbitrary parsing of a \n-based file format.
 * Each line of the file is passed to the Gremlin/Groovy script identified by the titan.hadoop.input.script.file property.
 * The Gremlin/Groovy file must have a method with the following signature:
 * 

* def boolean read(HadoopVertex vertex, String line) { ... } *

* The HadoopVertex argument is a reusable object to avoid object creation (see HadoopVertex.reuse(long)). * The String argument is the \n-line out of the file at the titan.hadoop.input.location. * The boolean denotes whether or not the provided line yielded a successful creation of a HadoopVertex. * * @author Marko A. Rodriguez (http://markorodriguez.com) */ public class ScriptInputFormat extends FileInputFormat implements Configurable { private VertexQueryFilter vertexQuery; private Configuration config; @Override public RecordReader createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException { return new ScriptRecordReader(this.vertexQuery, context); } @Override protected boolean isSplitable(final JobContext context, final Path file) { return null == new CompressionCodecFactory(DEFAULT_COMPAT.getJobContextConfiguration(context)).getCodec(file); } @Override public void setConf(final Configuration config) { this.config = config; this.vertexQuery = VertexQueryFilter.create(config); } @Override public Configuration getConf() { return this.config; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy