All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.SynStats Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository;

import io.github.repir.Repository.SynStats.File;
import io.github.repir.Repository.SynStats.Record;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.io.struct.StructuredFileKeyValue;
import io.github.repir.tools.io.struct.StructuredFileKeyValueRecord;
import io.github.repir.tools.lib.Log;
import io.github.repir.tools.lib.MathTools;
import io.github.repir.tools.lib.PrintTools;

/**
 * This feature caches statistics for synonym sets (collection frequency and document 
 * frequency) that were retrieved, to speedup retrieval when the same synonym
 * statistics are needed again. This is used by the default FeatureSynonym operator. 
 * They first look if the synonym statistics are already available, if not a 
 * Collector is used to fetch these in a pre-pass
 * which are automatically stored for reuse. 
 * 

* Note that in contrast to ProximityStats, this feature will also store complex * synonyms that operate for instance on other proximity operators. For this, * the query-set is sorted and stored as a query string. * @author jer */ public class SynStats extends StoredDynamicFeature { public static Log log = new Log(SynStats.class); private SynStats(Repository repository) { super(repository); } public static SynStats get(Repository repository) { String label = canonicalName(SynStats.class); SynStats termid = (SynStats)repository.getStoredFeature(label); if (termid == null) { termid = new SynStats(repository); repository.storeFeature(label, termid); } return termid; } @Override public File createFile(Datafile df) { return new File(df); } public class File extends StructuredFileKeyValue { public StringField query = this.addString("query"); public CLongField cf = this.addCLong("cf"); public CLongField df = this.addCLong("df"); public File(Datafile df) { super(df); } @Override public Record newRecord() { return new Record(); } @Override public Record closingRecord() { Record r = new Record(); r.query = ""; r.cf=-1; r.df=-1; return r; } } public class Record implements StructuredFileKeyValueRecord { public String query; public long cf; public long df; public String toString() { return PrintTools.sprintf("bucketindex=%d query=%s cf=%d df=%d", this.hashCode(), query, cf, df); } @Override public int hashCode() { return MathTools.finishHash(MathTools.hashCode(query.hashCode())); } @Override public boolean equals(Object r) { if (r instanceof Record) { Record record = (Record)r; return query.equals( record.query ); } return false; } @Override public void write(File file) { file.query.write(query); file.cf.write(cf); file.df.write(df); } @Override public void read(File file) { query = file.query.value; cf = file.cf.value; df = file.df.value; } public void convert(StructuredFileKeyValueRecord record) { Record r = (Record)record; r.query = query; r.cf = cf; r.df = df; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy