io.github.repir.Strategy.Strategy Maven / Gradle / Ivy
The newest version!
package io.github.repir.Strategy;
import io.github.repir.Repository.Repository;
import io.github.repir.Retriever.Query;
import io.github.repir.Retriever.Retriever;
import io.github.repir.Strategy.Collector.Collector;
import io.github.repir.Strategy.Collector.MasterCollector;
import io.github.repir.Strategy.Operator.Operator;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.lib.ClassTools;
import io.github.repir.tools.lib.Log;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Collection;
import java.util.HashSet;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* The Strategy contains the logic for the retrieval/analysis of the task,
* which is independent of the Query given. In an abstract sense, a strategy
* is always a process that collects results by inspecting stored features.
* Two specialization branches exists: a {@link RetrievalModel} results in
* a list of ranked {@link Document}s, and an P@link
*
*
* A Strategy is created by {@link #buildGraph(Retriever.Query)
* }
* which uses a {@link Query} request that is parsed into an {@link GraphRoot}. For standard
* retrieval, the default Strategy should work fine, but alternatively, a Strategy can
* alter the GraphRoot (e.g. {@link RetrievalModelRM3}).
*
* @author jeroen
*/
public abstract class Strategy {
public static Log log = new Log(Strategy.class);
public Retriever retriever;
public Datafile fileout;
public Query query;
public Repository repository;
public int partition;
public MasterCollector collectors;
/**
* Use {@link #create(Retriever.Retriever, Retriever.Query)} instead.
*
* @param retriever
*/
public Strategy(Retriever retriever) {
this.retriever = retriever;
repository = retriever.getRepository();
collectors = new MasterCollector();
collectors.setRepository(repository);
}
/**
* Override to set a different collector for the retrieval model
*/
public abstract void setCollector();
/**
* RetrievalModels should be instantiated using this function on a Query request. The Query
* request contains the specific classes for construction of a RerievalModel and parses the query
* String into an GraphRoot.
*
* @param retriever
* @param queryrequest
* @return Strategy
*/
public static Strategy create(Retriever retriever, Query queryrequest, Class assignableClass) {
//log.info("create( %s )", queryrequest.strategyclass);
Strategy strategy = null;
try {
Class clazz = io.github.repir.tools.lib.ClassTools.toClass(queryrequest.getStrategyClass(), Strategy.class.getPackage().getName());
Constructor cons = ClassTools.getAssignableConstructor(clazz, assignableClass, Retriever.class);
strategy = (Strategy) cons.newInstance(retriever);
strategy.setQuery(queryrequest);
} catch (ClassNotFoundException ex) {
log.fatalexception(ex, "create() invalid StrategyClass", queryrequest.getStrategyClass());
} catch (InstantiationException ex) {
log.fatalexception(ex, "create( %s, %s )", retriever, queryrequest);
} catch (IllegalAccessException ex) {
log.fatalexception(ex, "create( %s, %s )", retriever, queryrequest);
} catch (IllegalArgumentException ex) {
log.fatalexception(ex, "create( %s, %s )", retriever, queryrequest);
} catch (InvocationTargetException ex) {
log.fatalexception(ex, "create( %s, %s )", retriever, queryrequest);
}
return strategy;
}
public static Strategy create(Retriever retriever, Query queryrequest) {
return create(retriever, queryrequest, Strategy.class);
}
public final void setQuery(Query q) {
this.query = q;
repository.addConfiguration(q.getConfiguration());
}
/**
* Used to setup the Strategy so that results can be collected and aggregated.
* This is typically used in the Reducer to create a Strategy for the
* aggregation of results collected per segments.
*/
public final void prepareAggregation() {
prepareAggregationDetail();
setCollector();
collectors.prepareAggregation();
}
public void prepareRetrieval() {
collectors.prepareRetrieval();
}
public abstract void prepareAggregationDetail();
public Collection reducerList() {
prepareAggregationDetail();
setCollector();
HashSet reducers = new HashSet();
for (Collector c : collectors) {
reducers.addAll(c.getReducerIDs());
}
return reducers;
}
public abstract void doMapTask();
/**
* After the collectors are shuffled and sorted to the reducers, they are
* aggregated if the collectors are equals(). Between aggregation and sending
* the results to the master process (using prepareWriteReduce(), writeReduce(),
* and finishWriteReduce()), this hook is called to allow processing in the reducer.
*/
public abstract Query finishReduceTask();
public void prepareWriteReduce(Query q) {
fileout = new Datafile(repository.getFS(), repository.configuredString("topicrun.outfile") + "_" + q.getID() + "_" + q.getVariantID());
log.info("outfile %s", fileout.getCanonicalPath());
fileout.openWrite();
}
public void writeReduce(Query q) {
q.write(fileout.rwbuffer);
}
public void finishWriteReduce() {
fileout.closeWrite();
}
/* hook to modify storedfeatures when they are cloned for the next cycle */
public Operator cloneFeature(Operator f, GraphRoot newmodel, int cycle) {
return f.clone(newmodel);
}
/**
* @return querystring that is used to construct a processing Graph. This is
* a one time conversion, which should be based on Query.stemmedquery and is
* set as Query.query which is used operational. A RetrievalModel can override
* this method to modify the query string used.
*/
public String getQueryToRetrieve() {
return query.query;
}
public int getDocumentLimit() {
return query.documentlimit;
}
public String getScorefunctionClass() {
return query.getScorefunctionClass();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy