All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.creole.coref.Coreferencer Maven / Gradle / Ivy

Go to download

ANNIE is a general purpose information extraction system that provides the building blocks of many other GATE applications.

There is a newer version: 9.1
Show newest version
/*
 *  Coreferencer.java
 *
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution as file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Marin Dimitrov, 18/Dec/2001
 *
 *  $Id: Coreferencer.java 19742 2016-11-16 17:58:23Z markagreenwood $
 */

package gate.creole.coref;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.log4j.Logger;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.Factory;
import gate.FeatureMap;
import gate.Resource;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.GateRuntimeException;
import gate.util.SimpleFeatureMapImpl;

@CreoleResource(name="ANNIE Pronominal Coreferencer", comment="Pronominal Coreference resolution component.", helpURL="http://gate.ac.uk/userguide/sec:annie:pronom-coref", icon="pronominal-coreferencer")
public class Coreferencer extends AbstractLanguageAnalyser {

  private static final long serialVersionUID = -2343178168872843239L;

  public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";

  public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";

  public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE";

  public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset";

  /** --- */
  private static final boolean DEBUG = false;

  private static final Logger log = Logger.getLogger(Coreferencer.class);
  
  /** --- */
  private PronominalCoref pronominalModule;

  /** --- */
  public Coreferencer() {
    this.pronominalModule = new PronominalCoref();
  }

  /** Initialise this resource, and return it. */
  @Override
  public Resource init() throws ResourceInstantiationException {

    // load all submodules
    //pronominalModule.init();
	pronominalModule = (PronominalCoref)Factory.createResource("gate.creole.coref.PronominalCoref");

    return this;
  } // init()

  /**
   * Reinitialises the processing resource. After calling this method
   * the resource should be in the state it is after calling init. If
   * the resource depends on external resources (such as rules files)
   * then the resource will re-read those resources. If the data used to
   * create the resource has changed since the resource has been created
   * then the resource will change too after calling reInit().
   */
  @Override
  public void reInit() throws ResourceInstantiationException {
    //init();
	  pronominalModule.reInit();
  } // reInit()

  /** Set the document to run on. */
  @Override
  public void setDocument(Document newDocument) {

    // Assert.assertNotNull(newDocument);

    this.pronominalModule.setDocument(newDocument);
    super.setDocument(newDocument);
  }
  
  @Override
  public void cleanup() {
    super.cleanup();
    pronominalModule.cleanup();
  }

  /** --- */
  @RunTime
  @Optional
  @CreoleParameter(comment="The annotation set to be used for the generated annotations")
  public void setAnnotationSetName(String annotationSetName) {
    this.pronominalModule.setAnnotationSetName(annotationSetName);
  }

  /** --- */
  public String getAnnotationSetName() {
    return this.pronominalModule.getAnnotationSetName();
  }

  /** --- */
  @RunTime
  @Optional
  @CreoleParameter(comment="Whether or not to resolve it pronouns", defaultValue="false")
  public void setResolveIt(Boolean newValue) {
    this.pronominalModule.setResolveIt(newValue);
  }

  /** --- */
  public Boolean getResolveIt() {
    return this.pronominalModule.getResolveIt();
  }

  /**
   * This method runs the coreferencer. It assumes that all the needed
   * parameters are set. If they are not, an exception will be fired.
   */
  @Override
  public void execute() throws ExecutionException {

    fireStatusChanged("Pronominal Coreferencer processing: "
            + document.getName());
    this.pronominalModule.execute();
    generateCorefChains();
    fireStatusChanged("Pronominal Coreferencer completed");
  }

  /** --- */
  private void generateCorefChains() throws GateRuntimeException {

    // 1. get the resolved corefs
    Map ana2ant = this.pronominalModule.getResolvedAnaphora();

    // 2. get the outout annotation set
    String asName = getAnnotationSetName();
    AnnotationSet outputSet = null;

    if(null == asName || asName.equals("")) {
      outputSet = getDocument().getAnnotations();
    }
    else {
      outputSet = getDocument().getAnnotations(asName);
    }

    // 3. generate new annotations
    Iterator> it = ana2ant.entrySet().iterator();
    while(it.hasNext()) {
      Map.Entry currLink = it.next();
      Annotation anaphor = currLink.getKey();
      Annotation antecedent = currLink.getValue();

      if(DEBUG) {
        AnnotationSet corefSet = getDocument().getAnnotations("COREF");
        Long antOffset = new Long(0);

        if(null != antecedent) {
          antOffset = antecedent.getStartNode().getOffset();
        }

        FeatureMap features = new SimpleFeatureMapImpl();
        features.put("antecedent", antOffset);
        corefSet.add(anaphor.getStartNode(), anaphor.getEndNode(), "COREF",
                features);
      }

      // do we have antecedent?
      if(null == antecedent) {
        continue;
      }

      // get the ortho-matches of the antecedent
      
      Object matchesObj =
              antecedent.getFeatures().get(ANNOTATION_COREF_FEATURE_NAME);
      @SuppressWarnings("unchecked")
      List matches =
              matchesObj instanceof List ? (List)matchesObj : null;
      if(matchesObj != null && matches == null) {
        log.warn("Illegal value for " + ANNOTATION_COREF_FEATURE_NAME
                + " feature was ignored.");

      }
        
      if(null == matches) {
        matches = new ArrayList();
        matches.add(antecedent.getId());
        antecedent.getFeatures().put(ANNOTATION_COREF_FEATURE_NAME, matches);
        // check if the document has a list of matches
        // if yes, simply add the new list to it
        // if not, create it and add the list of matches to it
        if(document.getFeatures().containsKey(DOCUMENT_COREF_FEATURE_NAME)) {
          @SuppressWarnings("unchecked")
          Map>> matchesMap = (Map>>)document.getFeatures().get(
                  DOCUMENT_COREF_FEATURE_NAME);
          List> matchesList = matchesMap.get(getAnnotationSetName());
          if(matchesList == null) {
            matchesList = new ArrayList>();
            matchesMap.put(getAnnotationSetName(), matchesList);
          }
          matchesList.add(matches);
        }
        else {
          Map>> matchesMap = new HashMap>>();
          List> matchesList = new ArrayList>();
          matchesMap.put(getAnnotationSetName(), matchesList);
          matchesList.add(matches);
          document.getFeatures().put(DOCUMENT_COREF_FEATURE_NAME, matchesMap);
        }// if else
      }// if matches == null

      FeatureMap features = new SimpleFeatureMapImpl();
      features.put(COREF_TYPE_FEATURE_NAME, "PRONOUN");
      features.put(ANNOTATION_COREF_FEATURE_NAME, matches);
      features.put(COREF_ANTECEDENT_FEATURE_NAME, antecedent.getStartNode()
              .getOffset());

      //see if the annotation we want to add already exists
      AnnotationSet existing = outputSet.get(antecedent.getType(), anaphor
              .getStartNode().getOffset(), anaphor.getEndNode().getOffset());

      if(existing.size() > 0) {
        //if it exists simply update the existing annotation
        Annotation annot = existing.iterator().next();
        annot.getFeatures().putAll(features);
        matches.add(annot.getId());
      }
      else {
        //if it doesn't exist create a new annotation
        matches.add(outputSet.add(anaphor.getStartNode(), anaphor.getEndNode(),
                antecedent.getType(), features));
      }
    }
  }

  public String getInanimatedEntityTypes() {
    return this.pronominalModule.getInanimatedEntityTypes();
  }

  @RunTime
  @Optional
  @CreoleParameter(comment="List of annotation types for non animated entities", defaultValue="Organization;Location")
  public void setInanimatedEntityTypes(String inanimatedEntityTypes) {
    this.pronominalModule.setInanimatedEntityTypes(inanimatedEntityTypes);
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy