justhalf.nlp.depparser.MedicalDepParser Maven / Gradle / Ivy
package justhalf.nlp.depparser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.StringFormatterMessageFactory;
import edu.emory.clir.clearnlp.component.mode.dep.AbstractDEPParser;
import edu.emory.clir.clearnlp.component.mode.dep.DEPConfiguration;
import edu.emory.clir.clearnlp.component.utils.GlobalLexica;
import edu.emory.clir.clearnlp.component.utils.NLPUtils;
import edu.emory.clir.clearnlp.dependency.DEPFeat;
import edu.emory.clir.clearnlp.dependency.DEPNode;
import edu.emory.clir.clearnlp.dependency.DEPTree;
import edu.emory.clir.clearnlp.util.lang.TLanguage;
import edu.stanford.nlp.international.Language;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
import edu.stanford.nlp.trees.EnglishGrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalRelation;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.TreeGraphNode;
import edu.stanford.nlp.trees.TypedDependency;
/**
* An implementation of {@link DepParser} using ClearNLP
*
* This dependency parser was trained on medical domain.
* See https://clearnlp.wikispaces.com/models for more details.
*/
public class MedicalDepParser implements DepParser {
public static final Logger LOGGER = LogManager.getLogger(MedicalDepParser.class, StringFormatterMessageFactory.INSTANCE);
public static final String DEFAULT_CONFIG_FILE = "clearnlp-config_decode_med_dep.xml";
public AbstractDEPParser parser;
public Language language;
public MedicalDepParser(){
this(DEFAULT_CONFIG_FILE, Language.English);
}
public MedicalDepParser(String configFile, Language language) {
try {
LOGGER.info("Initializing MedicalDepParser using the config %s", configFile);
long start = System.nanoTime();
this.language = language;
DEPConfiguration config = new DEPConfiguration(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(configFile));
GlobalLexica.init(IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(configFile));
parser = NLPUtils.getDEPParser(TLanguage.ENGLISH, "medical-en-dep.xz", config);
long end = System.nanoTime();
LOGGER.info("Initializing MedicalDepParser done in %.3fs", (end-start)/1e9);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public boolean isThreadSafe() {
return true;
}
@Override
public List parse(List sentence) {
check(sentence);
List words = new ArrayList();
int id=0;
for(CoreLabel wordLabel: sentence){
id++;
DEPNode node = new DEPNode(id, wordLabel.word(), wordLabel.tag(), new DEPFeat());
words.add(node);
}
DEPTree tree = new DEPTree(words);
parser.process(tree);
// The rest of this method is to convert into Stanford TypedDependency
IndexedWord root = new IndexedWord(new Word("ROOT"));
root.setIndex(0);
List dependencies = new ArrayList();
for(int i=0; i dependencies, TreeGraphNode rootNode) {
return new EnglishGrammaticalStructure(dependencies, rootNode);
}
private void check(List sentence){
for(CoreLabel word: sentence){
if(word.tag() == null || word.tag().length() == 0){
throw new IllegalStateException("MedicalDepParser requires every word in the sentence to have a POS tag");
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy