All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Retriever.MapReduce.RetrieverMRReduce Maven / Gradle / Ivy

   package io.github.repir.Retriever.MapReduce;

import java.io.IOException;
import java.util.HashSet;
import io.github.repir.MapReduceTools.RRConfiguration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import io.github.repir.Strategy.Collector.Collector;
import io.github.repir.Strategy.Collector.CollectorCachable;
import io.github.repir.Repository.Repository;
import io.github.repir.Strategy.Strategy;
import io.github.repir.Retriever.Query;
import io.github.repir.tools.lib.Log;

/**
 * The reducer is generic, using the passed query with the name of the retrieval
 * model to aggregate the results that were collected by each mapper. Each
 * reducer reduces only a single query. The incoming Query object is used to
 * reconstruct the same retrieval model in every location (mappers and reducer),
 * so that the retrieval model can process the map and writeReduce steps similar to
 * retrieval on a single machine.
 * 

* @author jeroen */ public class RetrieverMRReduce extends Reducer { public static Log log = new Log(RetrieverMRReduce.class); RRConfiguration conf; CollectorCachable collector; Repository repository; Retriever retriever; String reducers[]; Strategy strategy; @Override protected void setup(Context context) throws IOException, InterruptedException { repository = new Repository(context.getConfiguration()); conf = repository.getConf(); retriever = new Retriever(repository, context); reducers = conf.getStrings("retriever.reducers"); } @Override public void reduce(CollectorKey key, Iterable tfs, Context context) throws IOException, InterruptedException { // topicrun.outfile is set to some unique name, which is used to write the query // output to if ( strategy == null ) { String reducer = reducers[ key.getReducer() ]; log.info("reducer %s %b", reducer, key.isQuery); if (key.isQuery) { Query q = key.getQuery(); q.setRepository(repository); strategy = retriever.constructStrategy(q); strategy.prepareAggregation(); for (CollectorValue v : tfs) { Collector aggregator = strategy.collectors.get(v.collectorid); v.collector.setStrategy(strategy); v.collector.decode(); aggregator.aggregate(v.collector); } strategy.collectors.finishReduce(); q = strategy.finishReduceTask(); //log.info("%d %s %d", q.id, q.query, q.queryresults.length); strategy.prepareWriteReduce(q); strategy.writeReduce(q); strategy.finishWriteReduce(); } else { HashSet partitionaggregated = new HashSet(); log.info("new collector %d %s %s", key.reducer, key.collector, retriever); Collector aggregator = key.collector; aggregator.setRetriever(retriever); if (collector == null && aggregator instanceof CollectorCachable) { collector = (CollectorCachable)aggregator; collector.startAppend(); } for (CollectorValue v : tfs) { if (partitionaggregated.contains(v.partition)) aggregator.aggregateDuplicatePartition(v.collector); else { aggregator.aggregate(v.collector); partitionaggregated.add(v.partition); } } if (collector != null) ((CollectorCachable)aggregator).streamappend(collector); } } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { if (collector != null) collector.finishAppend(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy