All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Retriever.Query Maven / Gradle / Ivy

The newest version!
package io.github.repir.Retriever;

import io.github.repir.tools.io.struct.StructureReader;
import io.github.repir.tools.io.struct.StructureWriter;
import io.github.repir.Repository.Feature;
import io.github.repir.Repository.Repository;
import io.github.repir.tools.lib.Log;
import io.github.repir.Strategy.Strategy;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import io.github.repir.Strategy.RetrievalModel;
import io.github.repir.tools.io.buffer.BufferSerializable;
import io.github.repir.tools.io.EOCException;
import io.github.repir.tools.lib.ClassTools;
import java.util.Collections;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * A Query object contains the query request, settings such as retrievalmodel,
 * scorefunction, retrieved features, query specific parameter settings,
 * and optionally contains the query results after retrieval. 
 * Query itself does not contain much logic, but is more a data container.
 * 

* @author jeroen */ public class Query implements BufferSerializable,Comparable { public static Log log = new Log(Query.class); public Strategy strategy; public Repository repository; /** * The document class used to construct new Document objects. */ public String documentclass; private Constructor documentconstructor; private Constructor documentconstructor2; public String documentcomparatorclass; private Comparator documentcomparator; /** * The original query string can be reformulated, by the tokenizer used, * which will remove any meaningless code, or by the retrieval model. */ public String originalquery; public String query; /** * If true then stopwords are removed. This is usually done during the * preRetrieval phase, meaning that the InferenceModel initially contains * stopwords, so that for instances literal phrases that contain stopwords * can set their span to a correct width. When the stopwords are removed from * the InferenceModel, the phrase spans are already set. */ public boolean removeStopwords; /** * The maximum number of documents to readValue. */ public int documentlimit; /** * Query ID that can be used by the caller to identify results. */ public int id; public String domain; /** * The returned ranked set of documents. Although this is commonly returned, * a custom Strategy could also return custom data through the * Collector. */ private Document queryresults[] = new Document[0]; /** * The requestedfeatures that are required to be fetched in order to * score/report each document. If the Boolean is true, the Feature will be * reported */ public ArrayList reportedFeatures = new ArrayList(); public int partition; public ArrayList resultsarraylist = new ArrayList(); public ArrayList variants = new ArrayList(); ArrayList dovariants = new ArrayList(); /** * A common way to construct a Query object is to use {@link IndexReader.IndexReader#constructQueryRequest(int, java.lang.String) * } * which sets all parameter to the repository defaults. */ public Query() { } public Query(Repository repository, int queryid, String query) { addVariant(new Variant( repository.getConf().get("retriever.strategy", "RetrievalModel"), repository.getConf().get("retriever.scorefunction", "ScoreFunctionKLD"), null)); this.repository = repository; this.id = queryid; this.originalquery = query; documentlimit = repository.getConf().getInt("retriever.documentlimit", 10); documentclass = repository.getConf().get("retriever.documentclass", Document.class.getCanonicalName()); documentcomparatorclass = repository.getConf().get("retriever.documentcomparatorclass", DocumentComparator.class.getCanonicalName()); removeStopwords = repository.getConf().getBoolean("retriever.removestopwords", false); } /** * @param queryid Query ID that can be used by the caller to identify results * @param query String that is used to readValue documents * @param strategy name of the Strategy to use * @param documentlimit maximum number retrieved documents */ public Query(Repository repository, int queryid, String query, String retrievalmodel, int limit) { this(repository, queryid, query); this.setStrategyClassname(retrievalmodel); this.documentlimit = limit; } public Query(Query q) { repository = q.repository; id = q.id; originalquery = q.originalquery; query = q.query; documentlimit = q.documentlimit; documentclass = q.documentclass; documentcomparatorclass = q.documentcomparatorclass; for (String f : q.reportedFeatures) reportedFeatures.add(f); removeStopwords = q.removeStopwords; partition = q.partition; domain = q.domain; for (Variant v : q.variants) variants.add(new Variant(v.retrievalmodelclass, v.scorefunctionclass, v.configuration)); dovariants.addAll(q.dovariants); } public void setRepository(Repository repository) { this.repository = repository; } public RetrievalModel getRetrievalModel() { return (RetrievalModel)strategy; } public boolean done() { return getStrategyClass() == null; } public void addFeature(String featurename) { if (!reportedFeatures.contains(featurename)) { reportedFeatures.add(featurename); } } public void addCollectionID() { String name = repository.getCollectionIDFeature().getCanonicalName(); if (!reportedFeatures.contains(name)) { reportedFeatures.add(name); } } public void addFeatureClass(Class featureclass, String ... param) { addFeature(Feature.canonicalName(featureclass, param)); } public void clearFeatures() { reportedFeatures = new ArrayList(); } public String getConfiguration() { return variants.get(getVariantID()).configuration; } public String getStrategyClass() { return variants.get(getVariantID()).retrievalmodelclass; } public String getScorefunctionClass() { return variants.get(getVariantID()).scorefunctionclass; } public void setStrategyClassname(String strategyclass) { variants.get(getVariantID()).retrievalmodelclass = strategyclass; } public void setStrategyClass(Class strategyclass) { variants.get(getVariantID()).retrievalmodelclass = strategyclass.getSimpleName(); } public void setScorefunctionClassname(String scorefunctionclass) { variants.get(getVariantID()).scorefunctionclass = scorefunctionclass; } public void setScorefunctionClass(Class scorefunctionclass) { variants.get(getVariantID()).scorefunctionclass = scorefunctionclass.getSimpleName(); } public void setConfiguration(String configuration) { variants.get(getVariantID()).configuration = configuration; } public void addFeature(Feature feature) { String name = feature.getLabel(); addFeature(name); } public void add(Document d) { this.resultsarraylist.add(d); } public void clearResults() { resultsarraylist.clear(); queryresults = null; } public Document[] getQueryResults() { if (queryresults == null || (resultsarraylist != null && queryresults.length != resultsarraylist.size())) { Collections.sort(resultsarraylist, new DocumentComparator()); queryresults = this.resultsarraylist.toArray(new Document[resultsarraylist.size()]); } return queryresults; } @Override public void write(StructureWriter writer) { writer.write(partition); writer.write(id); writer.write(domain); writer.write(originalquery); writer.write(query); writer.write(documentlimit); writer.write(documentclass); writer.write(documentcomparatorclass); writer.write(removeStopwords); writer.writeStr(reportedFeatures); writer.write(variants.size()); for (Variant v : variants) { v.write(writer); } writer.writeC(dovariants); writer.write(getQueryResults().length); for (Document doc : getQueryResults()) { doc.write(writer); } } @Override public void read(StructureReader reader) throws EOCException { readHeader(reader); int results = reader.readInt(); queryresults = new Document[results]; for (int i = 0; i < results; i++) { Document doc = createDocument(); doc.read(reader); queryresults[i] = doc; } } public void readHeader(StructureReader reader) throws EOCException { partition = reader.readInt(); id = reader.readInt(); domain = reader.readString(); originalquery = reader.readString(); query = reader.readString(); documentlimit = reader.readInt(); documentclass = reader.readString(); documentcomparatorclass = reader.readString(); removeStopwords = reader.readBoolean(); reportedFeatures = reader.readStrArrayList(); int variants = reader.readInt(); for (int i = 0; i < variants; i++) { Variant v = new Variant(); v.read(reader); this.variants.add(v); } dovariants = reader.readCIntArrayList(); } /** * for debug purposes */ public void print(String feature) { int rank = 1; for (Document d : getQueryResults()) { log.printf("%d %5d#%3d %f", rank++, d.docid, d.partition, d.score); if (rank > 10) { break; } } } public int compareTo(Query o) { return (this.id < o.id)?-1:1; } public Document createDocument() { if (documentconstructor == null) { try { Class dclass = ClassTools.toClass(this.documentclass, Document.class.getPackage().getName()); documentconstructor = ClassTools.getAssignableConstructor(dclass, Document.class); } catch (ClassNotFoundException ex) { log.fatalexception(ex, "createDocument(%s) invalid Document class", documentclass); } } Document d = (Document)ClassTools.construct(documentconstructor); return d; } public Document createDocument(RetrievalModel retrievalmodel, int id, int partition) { if (documentconstructor2 == null) { try { Class dclass = ClassTools.toClass(this.documentclass, Document.class.getPackage().getName()); documentconstructor2 = ClassTools.getAssignableConstructor(dclass, Document.class, RetrievalModel.class, int.class, int.class); } catch (ClassNotFoundException ex) { log.fatalexception(ex, "createDocument(%s) invalid Document class", documentclass); } } Document d = (Document)ClassTools.construct(documentconstructor2, retrievalmodel, id, partition); return d; } public Comparator getDocumentComparator() { if (documentcomparator == null) { try { Class clazz = ClassTools.toClass(documentcomparatorclass, DocumentComparator.class.getPackage().getName()); Constructor c = ClassTools.getAssignableConstructor(clazz, Comparator.class); documentcomparator = (Comparator)ClassTools.construct(c); } catch (ClassNotFoundException ex) { log.fatalexception(ex, "createDocumentComparator(%s) invalid DocumentComparator class", documentcomparatorclass); } } return documentcomparator; } public void setVariantID( int variant ) { id = createVariantID( variant ); } private int createVariantID( int variant ) { int newid = getID(id); newid |= (variant << 10); return newid; } public int getVariantID() { return getVariantID(id); } public int getID() { return getID(id); } public static int getID( int queryid ) { return queryid & 1023; } public static int getVariantID( int queryid ) { return queryid >> 10; } public Collection getReducerID() { ArrayList reducers = new ArrayList(); for (int i : dovariants) { reducers.add(Integer.toString(createVariantID(i))); } return reducers; } public void initVariants() { variants = new ArrayList(); dovariants = new ArrayList(); } public void addVariant( String retrievalmodelclass, String scorefunctionclass, String settings ) { addVariant(new Variant( retrievalmodelclass, scorefunctionclass, settings)); } public void addVariant( Variant v ) { dovariants.add(variants.size()); variants.add( v ); } public int variantCount() { return dovariants.size(); } public Iterable variantIterator() { return new VariantIter(); } public Query splitVariants() { Query q = new Query( this ); q.dovariants = new ArrayList(); int count = variantCount() / 2; for (int i = 0; i < count; i++) q.dovariants.add(dovariants.remove(0)); return q; } public static class Variant implements BufferSerializable { /** * The name of the next Strategy to use. In Query request, this is the * initial Strategy, which can be followed by consecutive * RetrievalModels for multi-pass retrieval strategies. Query objects that * are returned with results have a null-value for the strategyclass, * indicating that no next retrieval pass is necessary. */ public String retrievalmodelclass; public String scorefunctionclass; public String configuration; public Variant() {} public Variant(String retrievalmodelclass, String scorefunctionclass, String settings ) { this.retrievalmodelclass = retrievalmodelclass; this.scorefunctionclass = scorefunctionclass; this.configuration = settings; } @Override public void read(StructureReader reader) throws EOCException { retrievalmodelclass = reader.readString(); scorefunctionclass = reader.readString(); configuration = reader.readString(); } @Override public void write(StructureWriter writer) { writer.write(retrievalmodelclass); writer.write(scorefunctionclass); writer.write(configuration); } } class VariantIter implements Iterator, Iterable { Iterator iter; public VariantIter() { iter = dovariants.iterator(); } @Override public boolean hasNext() { return iter.hasNext(); } @Override public Query next() { int v = iter.next(); setVariantID(v); return Query.this; } @Override public void remove() { throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. } @Override public Iterator iterator() { return this; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy