All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.composite.impl.SegmentProcessingPR Maven / Gradle / Ivy

There is a newer version: 8.6.1
Show newest version
package gate.composite.impl;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Corpus;
import gate.CorpusController;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.LanguageAnalyser;
import gate.ProcessingResource;
import gate.composite.CombiningMethod;
import gate.composite.CombiningMethodException;
import gate.composite.CompositeDocument;
import gate.compound.CompoundDocument;
import gate.compound.impl.CompoundDocumentImpl;
import gate.corpora.DocumentImpl;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.util.GateRuntimeException;
import gate.util.OffsetComparator;
import gate.util.Out;

/**
 * As the name suggests, the PR is useful processing segments of the text. Given
 * a analyser, annotation type and a document, this PR creates a composite
 * documents for every annotation with type as specified by the <annotation
 * type>. Since the composite documents are linked with their original
 * documents, when the PR processing the composite document, the composite
 * document takes care of transferring relevant annotations back to the original
 * document. This is a good way of processing just a segment of a document.
 * 
 * @author niraj
 */
public class SegmentProcessingPR extends AbstractLanguageAnalyser implements
                                                                 ProcessingResource {

  private static final long serialVersionUID = 8528040629940314055L;

  /**
   * Controller that should be used to process segments.
   */
  private LanguageAnalyser analyser;

  /**
   * annotation type that the segment is annotated with.
   */
  private String segmentAnnotationType;

  /**
   * Only the annotation that has the specified feature should be considered for
   * annotating.
   */
  private String segmentAnnotationFeatureName;

  /**
   * Only the annotation that has the feature specified as
   * segmentAnnotationFeatureName with the value of
   * segmentAnnotaitonFeatureValue then only the annotation is considered for
   * annotating.
   */
  private String segmentAnnotationFeatureValue;

  /**
   * Annotation set that contains the segment annotation and the annotations to
   * be copied to the composite document.
   */
  private String inputASName;

  private boolean debug = false;

  /**
   * Should be called to execute this PR on a document.
   */
  public void execute() throws ExecutionException {
    // if no document provided
    if(document == null) { throw new ExecutionException("Document is null!"); }
    // annotation set to use
    AnnotationSet set =
        inputASName == null || inputASName.trim().length() == 0 ? document
            .getAnnotations() : document.getAnnotations(inputASName);
    AnnotationSet segmentSet = set.get(segmentAnnotationType);
    if(set.isEmpty()) {
      Out.prln("Could not find annotations of type: " + segmentAnnotationType
          + " in the document: " + document.getName());
      return;
    }
    
    // a combining method that creates a composite document with the
    // annotation as identified by the annotation id
    CombiningMethod combiningMethodInst = new CombineFromAnnotID();
    
    CompoundDocument compoundDoc = new CompoundDocumentImpl();
    // initializing an empty compound document
    try {
      compoundDoc.init();
    } catch(ResourceInstantiationException e) {
      throw new ExecutionException(e);
    }
    
    String originalDocument = document.getName();
    if(document instanceof CompoundDocument) {
      if(debug) {
        System.out
            .println("Document is a compound document and using the memeber \""
                + document.getName() + "\" for processing");
      }
      compoundDoc.addDocument(document.getName(),
          ((CompoundDocument)document).getCurrentDocument());
    } else {
      if(debug) {
        System.out.println("Document is a normal GATE document with name \""
            + document.getName() + "\"");
      }
      // add the current document as a member of the compound document
      compoundDoc.addDocument(document.getName(), document);
    }
    Corpus tempCorpus = null;
    Corpus oldCorpus = analyser.getCorpus();
    Document oldDoc = analyser.getDocument();

    try {
      Map map = new HashMap();
      map.put(CombineFromAnnotID.INPUT_AS_NAME_FEATURE_NAME, inputASName);
      map.put(CombineFromAnnotID.DOCUMENT_ID_FEATURE_NAME, document.getName());
      FeatureMap hideMap = Factory.newFeatureMap();
      Gate.setHiddenAttribute(hideMap, true);
      tempCorpus =
          (Corpus)Factory.createResource("gate.corpora.CorpusImpl",
              Factory.newFeatureMap(), hideMap, "compoundDocCorpus");
      tempCorpus.add(compoundDoc);
      analyser.setDocument(compoundDoc);
      analyser.setCorpus(tempCorpus);
      List segmentList = new ArrayList(segmentSet);
      Collections.sort(segmentList, new OffsetComparator());
      for(Annotation annotation : segmentList) {
        if(debug) {
          System.out.println("Processing annotation" + annotation.getType()
              + "=>" + annotation.getId());
        }
        // only consider the annotation if it has a specific feature and a value
        if(segmentAnnotationFeatureName != null
            && segmentAnnotationFeatureName.length() != 0
            && segmentAnnotationFeatureValue != null
            && segmentAnnotationFeatureValue.length() != 0) {
          Object value =
              annotation.getFeatures().get(segmentAnnotationFeatureName);
          if(value == null || !value.equals(segmentAnnotationFeatureValue)) {
            continue;
          }
        }
        String nameForCompositeDoc = "Composite" + Gate.genSym();
        map.put(CombineFromAnnotID.ANNOTATION_ID_FEATURE_NAME,
            annotation.getId());
        CompositeDocument compositeDoc = null;
        try {
          if(debug) {
            System.out.println("Creating temp composite document:"
                + nameForCompositeDoc);
          }
          compositeDoc = combiningMethodInst.combine(compoundDoc, map);
          compositeDoc.setName(nameForCompositeDoc);
          
          // try and make sure any annotations created in the segment will have
          // IDs that are valid in the original document
          if(document instanceof DocumentImpl) {
            ((CompositeDocumentImpl)compositeDoc)
                .setNextAnnotationId(((DocumentImpl)document)
                    .peakAtNextAnnotationId());
          }
          
          compoundDoc.addDocument(nameForCompositeDoc, compositeDoc);
          // change focus to composite document
          compoundDoc.setCurrentDocument(nameForCompositeDoc);
          // now run the application on the composite document
          analyser.execute();
        } catch(CombiningMethodException e) {
          throw new ExecutionException(e);
        } finally {
                    
          // finally get rid of the composite document
          compoundDoc.removeDocument(nameForCompositeDoc);
          if(compositeDoc != null) {
            gate.Factory.deleteResource(compositeDoc);
          }
        }
      }
    } catch(ResourceInstantiationException e) {
      throw new ExecutionException(e);
    } finally {
      // make sure you are resetting the reference
      analyser.setCorpus(oldCorpus);
      analyser.setDocument(oldDoc);
      
      Factory.deleteResource(compoundDoc);

      compoundDoc.removeDocument(originalDocument);
      if(tempCorpus != null) {
        // clear the corpus before deleting it
        tempCorpus.clear();
        gate.Factory.deleteResource(tempCorpus);
      }
    }
  }

  /**
   * Gets the set analyser. The analyser is used for processing the segmented
   * document.
   */
  public LanguageAnalyser getAnalyser() {
    return analyser;
  }

  /**
   * Sets the analyser. The analyser is used for processing the segmented
   * document.
   * 
   * @param analyser
   */
  public void setAnalyser(LanguageAnalyser analyser) {
    this.analyser = analyser;
  }

  /**
   * Sets the analyser. The analyser is used for processing the segmented
   * document.
   */
  @Deprecated
  public void setController(CorpusController controller) {
    if(!(controller instanceof LanguageAnalyser)) { throw new GateRuntimeException(
        "controller must be of type LanguageAnalyser"); }
    setAnalyser((LanguageAnalyser)controller);
  }

  /**
   * Annotation type that has been used for segmenting the document. The PR uses
   * annotations of this type to create new composite documents and process them
   * individually.
   */
  public String getSegmentAnnotationType() {
    return segmentAnnotationType;
  }

  /**
   * Annotation type that has been used for segmenting the document. The PR uses
   * annotations of this type to create new composite documents and process them
   * individually.
   */
  public void setSegmentAnnotationType(String segmentAnnotationType) {
    this.segmentAnnotationType = segmentAnnotationType;
  }

  /**
   * Annotation set to use for obtaining segment annotations and the annotations
   * to copy into the composite document.
   */
  public String getInputASName() {
    return inputASName;
  }

  /**
   * Annotation set to use for obtaining segment annotations and the annotations
   * to copy into the composite document.
   */
  public void setInputASName(String inputAS) {
    this.inputASName = inputAS;
  }

  public String getSegmentAnnotationFeatureName() {
    return segmentAnnotationFeatureName;
  }

  public void setSegmentAnnotationFeatureName(
      String segmentAnnotationFeatureName) {
    this.segmentAnnotationFeatureName = segmentAnnotationFeatureName;
  }

  public String getSegmentAnnotationFeatureValue() {
    return segmentAnnotationFeatureValue;
  }

  public void setSegmentAnnotationFeatureValue(
      String segmentAnnotationFeatureValue) {
    this.segmentAnnotationFeatureValue = segmentAnnotationFeatureValue;
  }

} // class SegmentProcessingPR




© 2015 - 2024 Weber Informatics LLC | Privacy Policy