
io.github.repir.Repository.TermDF Maven / Gradle / Ivy
The newest version!
package io.github.repir.Repository;
import java.util.ArrayList;
import java.util.HashMap;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.io.struct.StructuredFileByteJumpTable;
import io.github.repir.Repository.TermDF.File;
import io.github.repir.tools.lib.Const;
import io.github.repir.tools.lib.Log;
/**
* Stores the document frequency of terms in the Vocabulary, which can be accessed
* by {@link #readValue(int)} using the termID.
* @author jer
*/
public class TermDF extends StoredUnreportableFeature implements DictionaryFeature {
public static Log log = new Log(TermDF.class);
int keyid = 0;
public HashMap cache = new HashMap();
private TermDF(Repository repository) {
super(repository);
readCache();
}
public static TermDF get(Repository repository) {
String label = canonicalName(TermDF.class);
TermDF termdf = (TermDF)repository.getStoredFeature(label);
if (termdf == null) {
termdf = new TermDF(repository);
repository.storeFeature(label, termdf);
}
return termdf;
}
public void readCache() {
ArrayList termids = repository.configuredIntList("repository.cachedtermids");
if (termids.size() > 0) {
ArrayList df = repository.configuredLongList("repository.cachedtermdfs");
for (int i = 0; i < termids.size() && i < df.size(); i++) {
if (df.get(i) != Const.NULLLONG) {
cache.put(termids.get(i), df.get(i));
}
}
}
}
public void writeCache() {
ArrayList termids = repository.configuredIntList("repository.cachedtermids");
for (Integer s : cache.keySet())
if (!termids.contains(s))
termids.add(s);
if (termids.size() > 0) {
ArrayList df = new ArrayList();
NEXT:
for (Integer i : termids) {
Long v = cache.get(i);
if (v == null)
v = Const.NULLLONG;
df.add(v);
}
repository.getConf().setIntList("repository.cachedtermids", termids);
repository.getConf().setLongList("repository.cachedtermdfs", df);
}
}
public void dontCache() {
cache = null;
}
public void write(Long df) {
file.df.write(df);
}
public long readValue(int id) {
Long value;
if (cache != null) {
value = cache.get(id);
if (value != null)
return value;
}
if (file == null)
openRead();
file.read(id);
value = getValue();
if (cache != null)
cache.put(id, value);
return value;
}
public Long getValue() {
return file.df.value;
}
public void loadMem() {
openRead();
file.loadMem();
}
public void unloadMem() {
file.unloadMem();
}
@Override
public File createFile(Datafile datafile) {
return new File(datafile);
}
@Override
public void reduceInput(int id, String term, long cf, long df) {
write( df );
}
@Override
public void startReduce(long corpustermfreq, int corpusdocumentfrequency) {
openWrite();
}
@Override
public void finishReduce() {
closeWrite();
}
public static class File extends StructuredFileByteJumpTable {
public CLongField df = this.addCLong("df");
public File(Datafile df) {
super(df);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy