gate.alignment.gui.DefaultIteratingMethod Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of alignment Show documentation
A selection of tools for processing parallel texts.
There is a newer version: 8.6.1
package gate.alignment.gui;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.compound.CompoundDocument;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;

/**
 * Default implementation of the IteratingMethod interface. Purpose of
 * the IteratingMethod is to allow users to define their own iterating
 * sequence for the alignment editor. For example, it could be a
 * sentence alignment algorithm that decides which sentence in the
 * source language should be paired with which sentence in the target
 * language. Alignment editor takes one pair of annotations (one from
 * the source document and one from the target document) and displays
 * them.
 * 
 * In this implementation, annotations from source and target documents
 * are paired in order of their occurrence in the document. If the
 * unitAnnotationType is set to Sentence for both the source and target
 * documents, Sentence annotations in both the documents are sorted
 * using the gate.util.OffsetComparator. Then first sentence from the
 * sourceDocument is paired with the first sentence in the target
 * document. The same is true for the second, third and for rest of the
 * sentences in both documents.
 * 
 * @author niraj
 */
@SuppressWarnings("serial")
public class DefaultIteratingMethod implements IteratingMethod {

  /**
   * Offset comparator used for sorting annotations
   */
  private OffsetComparator comparator;

  /**
   * Unit of Alignment in the source document (e.g. Token)
   */
  private String srcTokenAnnotationType;

  /**
   * Unit of alignment in the target document (e.g. Token)
   */
  private String tgtTokenAnnotationType;

  /**
   * ID of the source document)
   */
  private String srcDocumentID;

  /**
   * ID of the target document)
   */
  private String tgtDocumentID;

  /**
   * Internal instance of AASequence that is used for obtaining
   * annotations from the source document one by one.
   */
  private AASequence srcSequence;

  /**
   * Internal instance of AASequence that is used for obtaining
   * annotations from the target document one by one.
   */
  private AASequence tgtSequence;

  /**
   * The document, that is being aligned in the Alignment Editor.
   */
  private CompoundDocument compoundDocument;

  /**
   * Constructor
   */
  public DefaultIteratingMethod() {
    comparator = new OffsetComparator();
  }

  /**
   * This method, given necessary parameters, initialises the internal
   * resources.
   * 
   * @param alignedDocument - source document for which alignment is
   *          taking place
   * @param srcDocumentId - id of the source document
   * @param tgtDocumentId - id of the target document
   * @param srcInputAS - annotation set in the source document which to
   *          take annotations from
   * @param tgtInputAS - annotation set in the target document which to
   *          take annotations from
   * @param srcTokenAnnotationType - e.g. Token for word alignment
   * @param tgtTokenAnnotationType - e.g. Token for word alignment
   * @param srcUnitAnnotationType - e.g. Sentence for sentence alignment
   * @param tgtUnitAnnotationType - e.g. Sentence for sentence alignment
   * @throws IteratingMethodException - could throw exception if
   *           something is wrong with the parameters or with the source
   *           and target documents.
   */
  public void init(CompoundDocument alignedDocument, String srcDocumentId,
          String tgtDocumentId, String srcInputAS, String tgtInputAS,
          String srcTokenAnnotationType, String tgtTokenAnnotationType,
          String srcUnitAnnotationType, String tgtUnitAnnotationType)
          throws IteratingMethodException {

    this.compoundDocument = alignedDocument;
    this.srcDocumentID = srcDocumentId;
    this.tgtDocumentID = tgtDocumentId;
    this.srcTokenAnnotationType = srcTokenAnnotationType;
    this.tgtTokenAnnotationType = tgtTokenAnnotationType;

    Document doc = compoundDocument.getDocument(srcDocumentId);
    AnnotationSet as = srcInputAS.equals("")
            || srcInputAS.trim().length() == 0 ? doc.getAnnotations() : doc
            .getAnnotations(srcInputAS);
    srcSequence = new AASequence(doc, as, srcUnitAnnotationType);
    doc = compoundDocument.getDocument(tgtDocumentId);
    AnnotationSet as1 = tgtInputAS.equals("")
            || tgtInputAS.trim().length() == 0 ? doc.getAnnotations() : doc
            .getAnnotations(tgtInputAS);
    tgtSequence = new AASequence(doc, as1, tgtUnitAnnotationType);
  }

  /**
   * Retrieves the underlying text for the given annotation in the
   * document with given document id.
   */
  public String getText(Annotation annot, String documentId) {

    try {
      if(documentId.equals(srcDocumentID)) {
        return srcSequence.getText(annot);
      }
      else if(documentId.equals(tgtDocumentID)) {
        return tgtSequence.getText(annot);
      }
    }
    catch(InvalidOffsetException ioe) {
      throw new GateRuntimeException(ioe);
    }

    return null;
  }

  /**
   * Similar to getContained method of the gate.AnnotationSet.
   * 
   * @param annot - this method uses annot.getStartNode().getOffset()
   *          and annot.getEndNode().getOffset() to decide boundaries.
   * @param documentId - id of the document to be used in the compound
   *          document.
   * @param tokenAnnotationType - type of the annotations to be retrieved
   * @return gate.AnnotationSet with annotations of type annotationType
   */
  public AnnotationSet getUnderlyingAnnotations(Annotation annot,
          String documentId, String tokenAnnotationType) {
    if(documentId.equals(srcDocumentID)) {
      return srcSequence.getUnderlyingAnnotations(annot,
              tokenAnnotationType == null
                      ? this.srcTokenAnnotationType
                      : tokenAnnotationType);
    }
    else if(documentId.equals(tgtDocumentID)) {
      return tgtSequence.getUnderlyingAnnotations(annot,
              tokenAnnotationType == null
                      ? this.tgtTokenAnnotationType
                      : tokenAnnotationType);
    }
    return null;
  }

  /**
   * Cache for currentPair
   */
  private Pair currentPair;

  /**
   * retrieves the next possible pair.
   */
  public Pair next() {
    Pair pair = new Pair(srcDocumentID, srcSequence.next(), tgtDocumentID,
            tgtSequence.next());
    this.currentPair = pair;
    return pair;
  }

  /**
   * retrieves the previous possible pair.
   */
  public Pair previous() {
    Pair pair = new Pair(srcDocumentID, srcSequence.previous(), tgtDocumentID,
            tgtSequence.previous());
    this.currentPair = pair;
    return pair;
  }

  /**
   * returns the current pair.
   */
  public Pair current() {
    return currentPair;
  }

  /**
   * Returns true if there is any next pair available to return.
   */
  public boolean hasNext() {
    return srcSequence.hasNext() && tgtSequence.hasNext();
  }

  /**
   * Return true if there is any previous pair available to return;
   */
  public boolean hasPrevious() {
    return srcSequence.hasPrevious() && tgtSequence.hasPrevious();
  }

  /**
   * Internal class used for maintaining annotation sequences
   */
  class AASequence {
    Document document;
    AnnotationSet set;
    List annotations;

    int counter = -1;

    public AASequence(Document doc, AnnotationSet set, String parentType) {
      this.document = doc;
      this.set = set;
      // collecting all sentences for example
      annotations = new ArrayList(set.get(parentType));
      Collections.sort(annotations, comparator);
    }

    public boolean hasNext() {
      if(counter + 1 < annotations.size()) {
        return true;
      }
      else {
        return false;
      }
    }

    // return next sentence
    public Annotation next() {
      counter++;
      return annotations.get(counter);
    }

    public Annotation previous() {
      counter--;
      return annotations.get(counter);
    }

    public boolean hasPrevious() {
      if(counter - 1 >= 0) {
        return true;
      }
      return false;
    }

    public void reset() {
      counter = -1;
    }

    public AnnotationSet getUnderlyingAnnotations(Annotation parentAnnot,
            String annotationType) {
      return set.getContained(parentAnnot.getStartNode().getOffset(),
              parentAnnot.getEndNode().getOffset()).get(annotationType);
    }

    public String getText(Annotation ann) throws InvalidOffsetException {
      return document.getContent().getContent(ann.getStartNode().getOffset(),
              ann.getEndNode().getOffset()).toString();
    }
  }

}