All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.VocabularyToID Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository;

import io.github.repir.tools.extract.ExtractChannel;
import io.github.repir.tools.io.struct.StructuredFile;
import io.github.repir.tools.lib.Log;
import java.util.ArrayList;
import io.github.repir.tools.lib.ArrayTools;

/**
 * Abstract class for looking up the TermID in a vocabulary based on the stemmed
 * term string. This adds {@link #getContent(Extractor.EntityChannel)} which is 
 * used by the standard extractor to convert an EntityChannel consisting of 
 * string tokens to an array of TermID's.
 * 

* @author jer * @param */ public abstract class VocabularyToID extends StoredUnreportableFeature implements DictionaryFeature { public static Log log = new Log(VocabularyToID.class); protected VocabularyToID(Repository repository) { super(repository); } public abstract int get(String term); public int[] getContent(ExtractChannel dc) { ArrayList r = new ArrayList(); int p = 0; if (dc != null) { for (String chunk : dc) { int termid = get(chunk.toString()); if (termid >= 0) { r.add(termid); } else { //log.info("unknown word %s", chunk); } } } return ArrayTools.toIntArray(r); } public boolean exists(String term) { return get(term) >= 0; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy