All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.uima.util.AnnotationComboIterator Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package opennlp.uima.util;

import java.util.Iterator;
import java.util.NoSuchElementException;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;

/**
 * UIMA Annotation iterator combination of super- and subiterator.
 * 
 * 

* This class supports a common idiom in UIMA annotation iteration, where you need to iterate over * two kinds of annotations in lock-step. For example, you often want to iterate over all sentences, * then do something on each sentence and all tokens in that sentence. Here's how to do this with * this class. * *

 * CAS cas = ...
 * Type sentenceType = ..., tokenType = ...
 * // Init with CAS, upper and lower type.
 * AnnotationComboIterator it = new AnnotationComboIterator(cas, sentenceType, tokenType);
 * // Iterate over sentences
 * for (AnnotationIteratorPair aiPair : it) {
 *   // Obtain sentence annotation
 *   AnnotationFS sentence = aiPair.getAnnotation();
 *   // Do something with sentence...
 * 
 *   // Iterate over tokens
 *   for (AnnotationFS token : aiPair.getSubIterator()) {
 *     // Do something with tokens...
 *   }
 * }
 * 
* * The combo iterator returns in its next() method a pair of an annotation of the upper * type (e.g., sentence), and an iterator over annotations of the lower type (e.g., tokens). Note * that both the upper and lower iterator also implement the Iterable interface and can be use * directly in for-loops. * *

* Note that only this usage is safe. To keep the implementation efficient, the combo iterator keeps * two iterators internally that it increments in lock-step. Do not attempt, for example, to collect * more than one of the subiterators (token iterator in our example). Do not use this class if your * intended usage does not fall into this pattern. */ public class AnnotationComboIterator implements Iterable, Iterator { // Internal implementation of subiterator private class AnnotationIterator implements Iterable, Iterator { private AnnotationIterator() { super(); } public AnnotationIterator iterator() { return this; } public boolean hasNext() { if (AnnotationComboIterator.this.nextLowerChecked) { return AnnotationComboIterator.this.nextLowerAvailable; } AnnotationComboIterator.this.nextLowerChecked = true; AnnotationComboIterator.this.nextLowerAvailable = false; if (AnnotationComboIterator.this.lowerIt.isValid()) { AnnotationFS lowerFS = (AnnotationFS) AnnotationComboIterator.this.lowerIt.get(); int lowerBegin = lowerFS.getBegin(); while (lowerBegin < AnnotationComboIterator.this.upperBegin) { AnnotationComboIterator.this.lowerIt.moveToNext(); if (AnnotationComboIterator.this.lowerIt.isValid()) { lowerFS = (AnnotationFS) AnnotationComboIterator.this.lowerIt.next(); lowerBegin = lowerFS.getBegin(); } else { return false; } } if (AnnotationComboIterator.this.upperEnd >= lowerFS.getEnd()) { AnnotationComboIterator.this.nextLowerAvailable = true; } } return AnnotationComboIterator.this.nextLowerAvailable; } public AnnotationFS next() { if (AnnotationComboIterator.this.nextLowerChecked) { if (!AnnotationComboIterator.this.nextLowerAvailable) { throw new NoSuchElementException(); } } else if (!hasNext()) { throw new NoSuchElementException(); } AnnotationComboIterator.this.nextLowerChecked = false; final AnnotationFS rv = (AnnotationFS) AnnotationComboIterator.this.lowerIt.get(); AnnotationComboIterator.this.lowerIt.moveToNext(); return rv; } public void remove() { throw new UnsupportedOperationException(); } } // The upper iterator (e.g., sentence iterator) private final FSIterator upperIt; // The lower iterator (e.g., token iterator) private final FSIterator lowerIt; // Start position of current upper (e.g., sentence) annotation. Together with the end position, // this determines the boundaries for the lower iterator. private int upperBegin; // End position of current upper annotation. private int upperEnd; // Have we already checked that a next lower annotation is available? Premature optimization... private boolean nextLowerChecked = false; // State variable that holds the status of the lower iterator only in case that nextLowerChecked // is true. private boolean nextLowerAvailable = false; /** * Create a new combo iterator. * * @param cas * The CAS we're operating on. * @param upper * The type of the upper iterator, e.g., sentence. * @param lower * The type of the lower iterator, e.g., token. */ public AnnotationComboIterator(CAS cas, Type upper, Type lower) { this.upperIt = cas.getAnnotationIndex(upper).iterator(); this.lowerIt = cas.getAnnotationIndex(lower).iterator(); this.upperIt.moveToFirst(); this.lowerIt.moveToFirst(); if (this.upperIt.isValid()) { final AnnotationFS upperFS = (AnnotationFS) this.upperIt.get(); this.upperBegin = upperFS.getBegin(); this.upperEnd = upperFS.getEnd(); } else { this.nextLowerChecked = true; } } public boolean hasNext() { return this.upperIt.hasNext(); } public AnnotationIteratorPair next() { if (!this.upperIt.hasNext()) { throw new NoSuchElementException(); } final AnnotationFS upperFS = (AnnotationFS) this.upperIt.next(); this.upperBegin = upperFS.getBegin(); this.upperEnd = upperFS.getEnd(); this.nextLowerChecked = false; return new AnnotationIteratorPair(upperFS, new AnnotationIterator()); } public Iterator iterator() { return this; } /** * Not supported. */ public void remove() { throw new UnsupportedOperationException(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy