opennlp.uima.util.AnnotationComboIterator Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.uima.util;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
/**
* UIMA Annotation iterator combination of super- and subiterator.
*
*
* This class supports a common idiom in UIMA annotation iteration, where you need to iterate over
* two kinds of annotations in lock-step. For example, you often want to iterate over all sentences,
* then do something on each sentence and all tokens in that sentence. Here's how to do this with
* this class.
*
*
* CAS cas = ...
* Type sentenceType = ..., tokenType = ...
* // Init with CAS, upper and lower type.
* AnnotationComboIterator it = new AnnotationComboIterator(cas, sentenceType, tokenType);
* // Iterate over sentences
* for (AnnotationIteratorPair aiPair : it) {
* // Obtain sentence annotation
* AnnotationFS sentence = aiPair.getAnnotation();
* // Do something with sentence...
*
* // Iterate over tokens
* for (AnnotationFS token : aiPair.getSubIterator()) {
* // Do something with tokens...
* }
* }
*
*
* The combo iterator returns in its next()
method a pair of an annotation of the upper
* type (e.g., sentence), and an iterator over annotations of the lower type (e.g., tokens). Note
* that both the upper and lower iterator also implement the Iterable interface and can be use
* directly in for-loops.
*
*
* Note that only this usage is safe. To keep the implementation efficient, the combo iterator keeps
* two iterators internally that it increments in lock-step. Do not attempt, for example, to collect
* more than one of the subiterators (token iterator in our example). Do not use this class if your
* intended usage does not fall into this pattern.
*/
public class AnnotationComboIterator implements Iterable,
Iterator {
// Internal implementation of subiterator
private class AnnotationIterator implements Iterable, Iterator {
private AnnotationIterator() {
super();
}
public AnnotationIterator iterator() {
return this;
}
public boolean hasNext() {
if (AnnotationComboIterator.this.nextLowerChecked) {
return AnnotationComboIterator.this.nextLowerAvailable;
}
AnnotationComboIterator.this.nextLowerChecked = true;
AnnotationComboIterator.this.nextLowerAvailable = false;
if (AnnotationComboIterator.this.lowerIt.isValid()) {
AnnotationFS lowerFS = AnnotationComboIterator.this.lowerIt.get();
int lowerBegin = lowerFS.getBegin();
while (lowerBegin < AnnotationComboIterator.this.upperBegin) {
AnnotationComboIterator.this.lowerIt.moveToNext();
if (AnnotationComboIterator.this.lowerIt.isValid()) {
lowerFS = AnnotationComboIterator.this.lowerIt.get();
lowerBegin = lowerFS.getBegin();
} else {
return false;
}
}
if (AnnotationComboIterator.this.upperEnd >= lowerFS.getEnd()) {
AnnotationComboIterator.this.nextLowerAvailable = true;
}
}
return AnnotationComboIterator.this.nextLowerAvailable;
}
public AnnotationFS next() {
if (AnnotationComboIterator.this.nextLowerChecked) {
if (!AnnotationComboIterator.this.nextLowerAvailable) {
throw new NoSuchElementException();
}
} else if (!hasNext()) {
throw new NoSuchElementException();
}
AnnotationComboIterator.this.nextLowerChecked = false;
final AnnotationFS rv = AnnotationComboIterator.this.lowerIt.get();
AnnotationComboIterator.this.lowerIt.moveToNext();
return rv;
}
public void remove() {
throw new UnsupportedOperationException();
}
}
// The upper iterator (e.g., sentence iterator)
private final FSIterator upperIt;
// The lower iterator (e.g., token iterator)
private final FSIterator lowerIt;
// Start position of current upper (e.g., sentence) annotation. Together with the end position,
// this determines the boundaries for the lower iterator.
private int upperBegin;
// End position of current upper annotation.
private int upperEnd;
// Have we already checked that a next lower annotation is available? Premature optimization...
private boolean nextLowerChecked = false;
// State variable that holds the status of the lower iterator only in case that nextLowerChecked
// is true.
private boolean nextLowerAvailable = false;
/**
* Create a new combo iterator.
*
* @param cas
* The CAS we're operating on.
* @param upper
* The type of the upper iterator, e.g., sentence.
* @param lower
* The type of the lower iterator, e.g., token.
*/
public AnnotationComboIterator(CAS cas, Type upper, Type lower) {
this.upperIt = cas.getAnnotationIndex(upper).iterator();
this.lowerIt = cas.getAnnotationIndex(lower).iterator();
this.upperIt.moveToFirst();
this.lowerIt.moveToFirst();
if (this.upperIt.isValid()) {
final AnnotationFS upperFS = this.upperIt.get();
this.upperBegin = upperFS.getBegin();
this.upperEnd = upperFS.getEnd();
} else {
this.nextLowerChecked = true;
}
}
public boolean hasNext() {
return this.upperIt.hasNext();
}
public AnnotationIteratorPair next() {
if (!this.upperIt.hasNext()) {
throw new NoSuchElementException();
}
final AnnotationFS upperFS = this.upperIt.next();
this.upperBegin = upperFS.getBegin();
this.upperEnd = upperFS.getEnd();
this.nextLowerChecked = false;
return new AnnotationIteratorPair(upperFS, new AnnotationIterator());
}
public Iterator iterator() {
return this;
}
/**
* Not supported.
*/
public void remove() {
throw new UnsupportedOperationException();
}
}