All Downloads are FREE. Search and download functionalities are using the official Maven repository.

justhalf.nlp.lemmatizer.BioLemmatizer Maven / Gradle / Ivy

package justhalf.nlp.lemmatizer;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.StringFormatterMessageFactory;

import edu.ucdenver.ccp.nlp.biolemmatizer.LemmataEntry;

/**
 * An implementation of {@link Lemmatizer} using BioLemmatizer
* * This assumes a medical domain. */ public class BioLemmatizer extends EnglishLemmatizer { public static final Logger LOGGER = LogManager.getLogger(BioLemmatizer.class, StringFormatterMessageFactory.INSTANCE); edu.ucdenver.ccp.nlp.biolemmatizer.BioLemmatizer lemmatizer; public BioLemmatizer() { LOGGER.info("Loading BioLemmatizer..."); long start = System.nanoTime(); lemmatizer = new edu.ucdenver.ccp.nlp.biolemmatizer.BioLemmatizer(); long end = System.nanoTime(); LOGGER.info("Loading BioLemmatizer done in %.3fs", (end-start)/1e9); } @Override public String lemmatize(String word) { for(String pos: new String[]{"VBZ", "NNS", "VBD", "VBG", "JJR", "JJS", "RBR", "RBS"}){ String lemma = lemmatize(word, pos); if(!lemma.equals(word)){ return lemma; } } return word; } @Override public String lemmatize(String word, String pos) { LemmataEntry entry = lemmatizer.lemmatizeByLexiconAndRules(word, pos); return entry.getLemmas().iterator().next().getLemma(); } @Override public boolean isThreadSafe() { return true; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy