All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.utah.bmi.nlp.fastcner.uima.FastCNER_AE_General Maven / Gradle / Ivy

/*
 * Copyright  2017  Department of Biomedical Informatics, University of Utah
 * 

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.utah.bmi.nlp.fastcner.uima; import edu.utah.bmi.nlp.core.*; import edu.utah.bmi.nlp.fastcner.FastCNER; import edu.utah.bmi.nlp.fastner.uima.FastNER_AE_General; import edu.utah.bmi.nlp.uima.ae.RuleBasedAEInf; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.FSIndex; import org.apache.uima.examples.SourceDocumentInformation; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import java.util.*; /** * This is a demo to use FastCNER.java in UIMA AE. The type system is implemented through reflection. * * @author Jianlin Shi */ public class FastCNER_AE_General extends FastNER_AE_General { public static final String PARAM_REPLICATION_SUPPORT = "ReplicationSupport"; protected boolean replicationSupport; public static final String PARAM_MAXREPEATLENGTH = "MaxRepeatLength"; protected int maxRepeatLength; public void initialize(UimaContext cont) { super.initialize(cont); } protected LinkedHashMap initFastNER(UimaContext cont, String ruleStr) { Object obj; obj = cont.getConfigParameterValue(PARAM_REPLICATION_SUPPORT); if (obj == null) replicationSupport = true; else replicationSupport = (Boolean) obj; obj = cont.getConfigParameterValue(PARAM_MAXREPEATLENGTH); if (obj == null) maxRepeatLength = 50; else maxRepeatLength = (int) obj; fastNER = new FastCNER(ruleStr); ((FastCNER) fastNER).setReplicationSupport(replicationSupport); ((FastCNER) fastNER).setMaxRepeatLength(maxRepeatLength); if (markPseudo) fastNER.setRemovePseudo(false); return fastNER.getTypeDefinitions(); } public void process(JCas jcas) throws AnalysisEngineProcessException { IntervalST sectionTree = new IntervalST<>(); int totalSections = 0; if (assignSection || forceAssignSections) totalSections = indexSections(jcas, sectionTree); LinkedHashMap> sentences = new LinkedHashMap<>(); FSIndex annoIndex = jcas.getAnnotationIndex(SentenceType); Iterator annoIter = annoIndex.iterator(); int totalSentences = 0; while (annoIter.hasNext()) { Annotation sentence = (Annotation) annoIter.next(); totalSentences++; String sectionName = sectionTree.get(new Interval1D(sentence.getBegin(), sentence.getEnd())); if (sectionName == null) { if (totalSections == 0) sectionName = SourceDocumentInformation.class.getSimpleName(); else continue; } if (!sentences.containsKey(sectionName)) sentences.put(sectionName, new ArrayList<>()); sentences.get(sectionName).add(sentence); } if (totalSentences > 0) { for (String sectionName : sentences.keySet()) { boolean outsiders = true; if ((includeSections.size() == 0 && excludeSections.size() > 0 && !excludeSections.contains(sectionName)) || (includeSections.size() > 0 && includeSections.contains(sectionName)) || (includeSections.size() == 0 && excludeSections.size() == 0)) { outsiders = false; } for (Annotation sentence : sentences.get(sectionName)) { HashMap> concepts = ((FastCNER) fastNER).processAnnotation(sentence); // store found concepts in annotation if (concepts.size() > 0) { if (outsiders) saveOutsideScopeConcepts(jcas, concepts, sectionName); else saveConcepts(jcas, concepts, sectionName); } } } } else { Collection docAnnotation = JCasUtil.select(jcas, SourceDocumentInformation.class); if (docAnnotation != null && docAnnotation.size() > 0) logger.info("Document: " + docAnnotation.iterator().next().getUri() + " has not been properly sentence segmented. Use simple segmenter instead."); String text = jcas.getDocumentText(); ArrayList> simpleSentences = SimpleParser.tokenizeDecimalSmartWSentences(text, true); for (ArrayList sentence : simpleSentences) { Span sentenceSpan = new Span(sentence.get(0).begin, sentence.get(sentence.size() - 1).end); sentenceSpan.text = text.substring(sentenceSpan.begin, sentenceSpan.end); saveConcept(jcas, "Sentence", sentenceSpan.begin, sentenceSpan.end, null); for (Span token : sentence) { saveConcept(jcas, "Token", token.begin, token.end, null); } HashMap> concepts = ((FastCNER) fastNER).processSpan(sentenceSpan); // store found concepts in annotation if (concepts.size() > 0) { saveConcepts(jcas, concepts, null); } } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy