All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.utah.bmi.nlp.fastner.FastRuleWOG Maven / Gradle / Ivy

/*
 * Copyright  2017  Department of Biomedical Informatics, University of Utah
 * 

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.utah.bmi.nlp.fastner; import edu.utah.bmi.nlp.core.NERRule; import edu.utah.bmi.nlp.core.NERSpan; import edu.utah.bmi.nlp.core.Rule; import edu.utah.bmi.nlp.core.Span; import edu.utah.bmi.nlp.fastcner.UnicodeChecker; import java.util.ArrayList; import java.util.HashMap; import java.util.function.BiFunction; import static edu.utah.bmi.nlp.core.NERSpan.byRuleLength; import static edu.utah.bmi.nlp.core.NERSpan.scorewidth; /** * This is a class extended from FastRule, which apply the full rule as a match (does not consider group capturing) * * @author Jianlin Shi */ @SuppressWarnings("rawtypes") public class FastRuleWOG extends FastRule { // fields are defined in abstract class protected HashMap ruleLengths = new HashMap(); protected String spanCompareMethod = scorewidth; protected String widthCompareMethod = byRuleLength; public FastRuleWOG() { } public FastRuleWOG(String ruleStr) { initiate(ruleStr, false); } public FastRuleWOG(String ruleStr, boolean caseSensitive) { initiate(ruleStr, caseSensitive); } public FastRuleWOG(HashMap ruleStore) { initiate(ruleStore); } public void setCompareMethod(String method) { this.spanCompareMethod = method; } public void setWidthCompareMethod(String widthCompareMethod) { this.widthCompareMethod = widthCompareMethod; } protected boolean addRule(Rule rule) { // use to store the HashMap sub-chain that have the key chain that meet // the rule[] ArrayList rules_tmp = new ArrayList(); HashMap rule1 = rulesMap; HashMap rule2 = new HashMap(); HashMap rule_t; String[] ruleContent = rule.rule.split("\\s+"); ruleLengths.put(rule.id, ruleContent.length); int length = ruleContent.length; int i = 0; rules_tmp.add(rulesMap); while (i < length && rule1 != null && rule1.containsKey(ruleContent[i])) { rule1 = (HashMap) rule1.get(ruleContent[i]); i++; } // if the rule has been included if (i > length) return false; // start with the determinant, construct the last descendant HashMap // if (i == length) { if (rule1.containsKey(END)) { ((HashMap) rule1.get(END)).put(rule.ruleName, rule.id); } else { rule2.put(rule.ruleName, rule.id); rule1.put(END, rule2.clone()); } return true; } else { rule2.put(rule.ruleName, rule.id); rule2.put(END, rule2.clone()); rule2.remove(rule.ruleName); // filling the HashMap chain which ruleStore doesn't have the key chain for (int j = length - 1; j > i; j--) { rule_t = (HashMap) rule2.clone(); rule2.clear(); rule2.put(ruleContent[j], rule_t); } } rule1.put(ruleContent[i], rule2.clone()); return true; } public HashMap> processTokens(ArrayList contextTokens) { // use the first "startposition" to remember the original start matching // position. // use the 2nd one to remember the start position in which recursion. HashMap> matches = new HashMap>(); for (int i = 0; i < contextTokens.size(); i++) { // System.out.println(contextTokens.get(i)); processTokens(contextTokens, rulesMap, i, -1, i, matches); } if (removePseudo) removePseudoMatches(matches); return matches; } protected void processTokens(ArrayList contextTokens, HashMap rule, int matchBegin, int matchEnd, int currentPosition, HashMap> matches) { process(contextTokens, getStringText, getBeginId, getEndId, rule, matchBegin, matchEnd, currentPosition, matches); } public HashMap> processSpans(ArrayList contextTokens) { // use the first "startposition" to remember the original start matching // position. // use the 2nd one to remember the start position in which recursion. HashMap> matches = new HashMap>(); for (int i = 0; i < contextTokens.size(); i++) { // System.out.println(contextTokens.get(i)); processSpans(contextTokens, rulesMap, i, -1, i, matches); } if (removePseudo) removePseudoMatches(matches); return matches; } protected void processSpans(ArrayList contextTokens, HashMap rule, int matchBegin, int matchEnd, int currentPosition, HashMap> matches) { process(contextTokens, getSpanText, getSpanBegin, getSpanEnd, rule, matchBegin, matchEnd, currentPosition, matches); } protected void process(ArrayList contextTokens, BiFunction getText, BiFunction getBegin, BiFunction getEnd, HashMap rule, int matchBegin, int matchEnd, int currentPosition, HashMap> matches) { // when reach the end of the tunedcontext, end the iteration if (currentPosition < contextTokens.size()) { // start processing the tunedcontext tokens String thisToken = getText.apply(contextTokens, currentPosition); // System.out.println("thisToken-"+thisToken); if (rule.containsKey("\\w+")) { process(contextTokens, getText, getBegin, getEnd, (HashMap) rule.get("\\w+"), matchBegin, matchEnd, currentPosition + 1, matches); } // if the end of a rule is met if (rule.containsKey(END)) { // if no () is used in this definition, use the whole rule string addDeterminants(rule, matches, getBegin.apply(contextTokens, matchBegin), getEnd.apply(contextTokens, (matchEnd == -1 ? currentPosition - 1 : matchEnd))); } // if the current token match the element of a rule if (rule.containsKey(thisToken)) { process(contextTokens, getText, getBegin, getEnd, (HashMap) rule.get(thisToken), matchBegin, matchEnd, currentPosition + 1, matches); } if (rule.containsKey("\\d+") && UnicodeChecker.isNumber(thisToken)) { process(contextTokens, getText, getBegin, getEnd, (HashMap) rule.get("\\d+"), matchBegin, matchEnd, currentPosition + 1, matches); } } else if (currentPosition == contextTokens.size() && rule.containsKey(END)) { // if no () is used in this definition, use the whole rule string matchEnd = matchEnd == -1 ? currentPosition - 1 : matchEnd; addDeterminants(rule, matches, getBegin.apply(contextTokens, matchBegin), getEnd.apply(contextTokens, matchEnd)); } } @SuppressWarnings("unchecked") protected void addDeterminants(HashMap rule, HashMap> matches, int matchBegin, int matchEnd) { HashMap deterRule = (HashMap) rule.get(END); Span currentSpan; ArrayList currentSpanList; for (Object key : deterRule.keySet()) { // claim as Span instance, to be compatible with old methods int ruleId = deterRule.get(key); boolean contain = ruleLengths.containsKey(ruleId); contain = ruleStore.containsKey(ruleId); currentSpan = new NERSpan(matchBegin, matchEnd, ruleId, ruleLengths.get(ruleId), ruleStore.get(ruleId).score, ""); ((NERSpan) currentSpan).setCompareMethod(spanCompareMethod); ((NERSpan) currentSpan).setWidthCompareMethod(widthCompareMethod); logger.finest(getRule(currentSpan.ruleId).toString()); if (matches.containsKey((String) key)) { // because the ruleStore are all processed at the same time from the input left to the input right, // it becomes more efficient to compare the overlaps currentSpanList = matches.get((String) key); Span lastSpan = currentSpanList.get(currentSpanList.size() - 1); // Since there is no directional preference, assume the span is not exclusive within each determinant. if (currentSpan.end < lastSpan.end) { // if currentSpan is within lastSpan continue; } else if (lastSpan.end > currentSpan.begin) { // if overlap and current span has priority than last span if (((NERSpan) currentSpan).compareTo((NERSpan) lastSpan) > 0) { currentSpanList.remove(currentSpanList.size() - 1); } else { continue; } } currentSpanList.add(currentSpan); } else { currentSpanList = new ArrayList(); currentSpanList.add(currentSpan); } matches.put((String) key, currentSpanList); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy