All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ontology2.bakemono.sieve3.Sieve3Mapper Maven / Gradle / Ivy

There is a newer version: 3.2
Show newest version
package com.ontology2.bakemono.sieve3;

import java.io.IOException;
import java.util.Map;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.springframework.context.ApplicationContext;

import com.google.common.collect.Maps;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Node_URI;
import com.hp.hpl.jena.graph.Triple;
import com.ontology2.bakemono.abstractions.KeyValueAcceptor;
import com.ontology2.bakemono.abstractions.NamedKeyValueAcceptor;
import com.ontology2.bakemono.abstractions.PrimaryKeyValueAcceptor;
import com.ontology2.bakemono.abstractions.Spring;
import com.ontology2.bakemono.jena.WritableTriple;
import com.ontology2.bakemono.mapred.RealMultipleOutputs;
import com.ontology2.bakemono.primitiveTriples.PrimitiveTriple;
import com.ontology2.bakemono.primitiveTriples.PrimitiveTripleCodec;
import com.ontology2.bakemono.pse3.PSE3Counters;
import com.ontology2.centipede.Codec;
import com.ontology2.bakemono.sieve3.Sieve3Configuration.Rule;

public class Sieve3Mapper extends Mapper {

    private static final LongWritable ONE = new LongWritable(1);
    
    Sieve3Configuration sieve3conf;
    RealMultipleOutputs mos;
    KeyValueAcceptor other;
    Map> outputs=Maps.newHashMap();

    private ApplicationContext applicationContext;
    final static Codec primitiveTripleCodec=new PrimitiveTripleCodec(); 
    
    @Override
    public void setup(Context context) throws IOException,
    InterruptedException {
        applicationContext=Spring.getApplicationContext(context.getConfiguration());
        mos=new RealMultipleOutputs(context);
        super.setup(context);
        other=new PrimaryKeyValueAcceptor(context);
        sieve3conf = Sieve3Tool.createDefaultConfiguration(applicationContext);
        
        for(Rule r:sieve3conf.getRules())
            outputs.put(r.getOutputName(), new NamedKeyValueAcceptor(mos,r.getOutputName()));
        
    }
    
    //
    // We might be able to ditch a hashtable lookup here to speed things up
    //
    
    @Override
    public void map(LongWritable arg0, Text line, Context c) throws IOException, InterruptedException {
        PrimitiveTriple row3=primitiveTripleCodec.decode(line.toString());
        for(Rule r:sieve3conf.getRules())
            if(r.getCondition().apply(row3)) {
                outputs.get(r.getOutputName()).write(row3,ONE,c);
                return;
            }

        other.write(row3, ONE, c);
    }
    
    @Override
    protected void cleanup(org.apache.hadoop.mapreduce.Mapper.Context context)
            throws IOException, InterruptedException {
        super.cleanup(context);
        mos.close();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy