opennlp.uima.namefind.AbstractNameFinder Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.uima.namefind;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import opennlp.tools.util.Span;
import opennlp.uima.util.AnnotationComboIterator;
import opennlp.uima.util.AnnotationIteratorPair;
import opennlp.uima.util.AnnotatorUtil;
import opennlp.uima.util.UimaUtil;
abstract class AbstractNameFinder extends CasAnnotator_ImplBase {
protected final String name;
protected Type mSentenceType;
protected Type mTokenType;
protected Type mNameType;
protected Map mNameTypeMapping = Collections.emptyMap();
protected UimaContext context;
protected Logger mLogger;
private Boolean isRemoveExistingAnnotations;
AbstractNameFinder(String name) {
this.name = name;
}
protected void initialize() throws ResourceInitializationException {
}
public final void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
this.context = context;
mLogger = context.getLogger();
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the " + name + ".");
}
isRemoveExistingAnnotations = AnnotatorUtil.getOptionalBooleanParameter(
context, UimaUtil.IS_REMOVE_EXISTINGS_ANNOTAIONS);
if (isRemoveExistingAnnotations == null) {
isRemoveExistingAnnotations = false;
}
initialize();
}
/**
* Initializes the type system.
*/
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {
// sentence type
mSentenceType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
UimaUtil.SENTENCE_TYPE_PARAMETER);
// token type
mTokenType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
UimaUtil.TOKEN_TYPE_PARAMETER);
// name type
mNameType = AnnotatorUtil.getOptionalTypeParameter(context, typeSystem,
NameFinder.NAME_TYPE_PARAMETER);
String typeMapString = (String) context.getConfigParameterValue(
NameFinder.NAME_TYPE_MAP_PARAMETER);
if (typeMapString != null) {
Map nameTypeMap = new HashMap<>();
String[] mappings = typeMapString.split(",");
for (String mapping : mappings) {
String[] parts = mapping.split(":");
if (parts.length == 2) {
nameTypeMap.put(parts[0].trim(), typeSystem.getType(parts[1].trim()));
}
else {
mLogger.log(Level.WARNING,
String.format("Failed to parse a part of the type mapping [%s]", mapping));
}
}
mNameTypeMapping = Collections.unmodifiableMap(nameTypeMap);
}
if (mNameType == null && mNameTypeMapping.size() == 0) {
throw new AnalysisEngineProcessException(
new Exception("No name type or valid name type mapping configured!"));
}
}
protected void postProcessAnnotations(Span[] detectedNames,
AnnotationFS[] nameAnnotations) {
}
/**
* Called if the current document is completely processed.
*/
protected void documentDone(CAS cas) {
}
protected abstract Span[] find(CAS cas, String[] tokens);
/**
* Performs name finding on the given cas object.
*/
public final void process(CAS cas) {
if (isRemoveExistingAnnotations) {
final AnnotationComboIterator sentenceNameCombo = new AnnotationComboIterator(cas,
mSentenceType, mNameType);
List removeAnnotations = new LinkedList<>();
for (AnnotationIteratorPair annotationIteratorPair : sentenceNameCombo) {
for (AnnotationFS nameAnnotation : annotationIteratorPair.getSubIterator()) {
removeAnnotations.add(nameAnnotation);
}
}
for (AnnotationFS annotation : removeAnnotations) {
cas.removeFsFromIndexes(annotation);
}
}
final AnnotationComboIterator sentenceTokenCombo = new AnnotationComboIterator(cas,
mSentenceType, mTokenType);
for (AnnotationIteratorPair annotationIteratorPair : sentenceTokenCombo) {
final List sentenceTokenAnnotationList = new LinkedList<>();
final List sentenceTokenList = new LinkedList<>();
for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) {
sentenceTokenAnnotationList.add(tokenAnnotation);
sentenceTokenList.add(tokenAnnotation.getCoveredText());
}
Span[] names = find(cas,
sentenceTokenList.toArray(new String[sentenceTokenList.size()]));
AnnotationFS[] nameAnnotations = new AnnotationFS[names.length];
for (int i = 0; i < names.length; i++) {
int startIndex = sentenceTokenAnnotationList.get(
names[i].getStart()).getBegin();
int endIndex = sentenceTokenAnnotationList.get(
names[i].getEnd() - 1).getEnd();
Type nameType = mNameTypeMapping.get(names[i].getType());
if (nameType == null) {
nameType = mNameType;
}
// Types in the model which are not mapped should be ignored,
// this allows the usage of only some types in the model
if (nameType != null) {
nameAnnotations[i] = cas.createAnnotation(nameType, startIndex, endIndex);
cas.getIndexRepository().addFS(nameAnnotations[i]);
}
}
postProcessAnnotations(names, nameAnnotations);
}
documentDone(cas);
}
}