All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.ProximityStats Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository;

import io.github.repir.Repository.ProximityStats.File;
import io.github.repir.Repository.ProximityStats.Record;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.io.struct.StructuredFileKeyValue;
import io.github.repir.tools.io.struct.StructuredFileKeyValueRecord;
import io.github.repir.tools.lib.ArrayTools;
import io.github.repir.tools.lib.Log;
import io.github.repir.tools.lib.MathTools;
import io.github.repir.tools.lib.PrintTools;

/**
 * This feature caches co-occurrence statistics (collection frequency and document 
 * frequency) that were retrieved, to speedup retrieval when the same co-occurrence
 * statistics are needed again. This is used by the default FeatureProximity operators. 
 * They first look if the co-occurrence statistics for a proximity operator are
 * already available, if not a CollectorProximity is used to fetch these in a pre-pass
 * which are automatically stored for reuse. 
 * 

* Note that this only applies to simple * Proximity operators on query, RR can also handle complex proximity operators * that operate for instance on other proximity operators or synonyms, but these * are not cached but retrieved in a pre-pass and passed, and the statistics are * non-persistently passed on to the next (final) retrieval pass. * @author jer */ public class ProximityStats extends StoredDynamicFeature { public static Log log = new Log(ProximityStats.class); private ProximityStats(Repository repository) { super(repository); } public static ProximityStats get(Repository repository) { String label = canonicalName(ProximityStats.class); ProximityStats proximitystats = (ProximityStats)repository.getStoredFeature(label); if (proximitystats == null) { proximitystats = new ProximityStats(repository); repository.storeFeature(label, proximitystats); } return proximitystats; } @Override public File createFile(Datafile df) { return new File(df); } public class File extends StructuredFileKeyValue { public StringField query = this.addString("query"); public CLongField cf = this.addCLong("cf"); public CIntField df = this.addCInt("df"); public File(Datafile df) { super(df); } @Override public Record newRecord() { return new Record(); } @Override public Record closingRecord() { Record r = new Record(); r.query = ""; r.cf=-1; r.df=-1; return r; } } public class Record implements StructuredFileKeyValueRecord { public String query; public long cf; public int df; public String toString() { return PrintTools.sprintf("bucketindex=%d query=%s cf=%d df=%d", this.hashCode(), query, cf, df); } @Override public int hashCode() { return MathTools.finishHash(MathTools.hashCode(query.hashCode())); } @Override public boolean equals(Object r) { if (r instanceof Record) { Record record = (Record)r; return query.equals( record.query ); } return false; } @Override public void write(File file) { file.query.write(query); file.cf.write(cf); file.df.write(df); } @Override public void read(File file) { query = file.query.value; cf = file.cf.value; df = file.df.value; } public void convert(StructuredFileKeyValueRecord record) { Record r = (Record)record; r.query = query; r.cf = cf; r.df = df; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy