
io.github.repir.Repository.VocabularyToID Maven / Gradle / Ivy
The newest version!
package io.github.repir.Repository;
import io.github.repir.tools.extract.ExtractChannel;
import io.github.repir.tools.io.struct.StructuredFile;
import io.github.repir.tools.lib.Log;
import java.util.ArrayList;
import io.github.repir.tools.lib.ArrayTools;
/**
* Abstract class for looking up the TermID in a vocabulary based on the stemmed
* term string. This adds {@link #getContent(Extractor.EntityChannel)} which is
* used by the standard extractor to convert an EntityChannel consisting of
* string tokens to an array of TermID's.
*
* @author jer
* @param
*/
public abstract class VocabularyToID extends StoredUnreportableFeature implements DictionaryFeature {
public static Log log = new Log(VocabularyToID.class);
protected VocabularyToID(Repository repository) {
super(repository);
}
public abstract int get(String term);
public int[] getContent(ExtractChannel dc) {
ArrayList r = new ArrayList();
int p = 0;
if (dc != null) {
for (String chunk : dc) {
int termid = get(chunk.toString());
if (termid >= 0) {
r.add(termid);
} else {
//log.info("unknown word %s", chunk);
}
}
}
return ArrayTools.toIntArray(r);
}
public boolean exists(String term) {
return get(term) >= 0;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy