All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceDomReader Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version

package edu.stanford.nlp.ie.machinereading.domains.ace.reader; 
import edu.stanford.nlp.util.logging.Redwood;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import edu.stanford.nlp.ie.machinereading.common.DomReader;

/**
 * DOM reader for an ACE specification.
 *
 * @author David McClosky
 */
public class AceDomReader extends DomReader  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(AceDomReader.class);

  private static AceCharSeq parseCharSeq(Node node) {
    Node child = getChildByName(node, "charseq");
    String start = getAttributeValue(child, "START");
    String end = getAttributeValue(child, "END");
    String text = child.getFirstChild().getNodeValue();
    return new AceCharSeq(text,
			  Integer.parseInt(start),
			  Integer.parseInt(end));
  }

  /**
   * Extracts one entity mention
   */
  private static AceEntityMention parseEntityMention(Node node) {
    String id = getAttributeValue(node, "ID");
    String type = getAttributeValue(node, "TYPE");
    String ldctype = getAttributeValue(node, "LDCTYPE");
    AceCharSeq extent = parseCharSeq(getChildByName(node, "extent"));
    AceCharSeq head = parseCharSeq(getChildByName(node, "head"));
    return (new AceEntityMention(id, type, ldctype, extent, head));
  }

  /**
   * Extracts info about one relation mention
   */
  private static AceRelationMention parseRelationMention(Node node,
							 AceDocument doc) {
    String id = getAttributeValue(node, "ID");
    AceCharSeq extent = parseCharSeq(getChildByName(node, "extent"));
    String lc = getAttributeValue(node, "LEXICALCONDITION");

    // create the mention
    AceRelationMention mention = new AceRelationMention(id, extent, lc);

    // find the mention args
    List args = getChildrenByName(node, "relation_mention_argument");
    for(Node arg: args){
      String role = getAttributeValue(arg, "ROLE");
      String refid = getAttributeValue(arg, "REFID");
      AceEntityMention am = doc.getEntityMention(refid);

      if(am != null){
      	am.addRelationMention(mention);
      	if(role.equalsIgnoreCase("arg-1")){
      		mention.getArgs()[0] = new AceRelationMentionArgument(role, am);
      	} else if(role.equalsIgnoreCase("arg-2")){
      		mention.getArgs()[1] = new AceRelationMentionArgument(role, am);
      	} else {
      		throw new RuntimeException("Invalid relation mention argument role: " + role);
      	}
      }
    }

    return mention;
  }

  /**
   * Extracts info about one relation mention
   */
  private static AceEventMention parseEventMention(Node node,
               AceDocument doc) {
    String id = getAttributeValue(node, "ID");
    AceCharSeq extent = parseCharSeq(getChildByName(node, "extent"));
    AceCharSeq anchor = parseCharSeq(getChildByName(node, "anchor"));

    // create the mention
    AceEventMention mention = new AceEventMention(id, extent, anchor);

    // find the mention args
    List args = getChildrenByName(node, "event_mention_argument");
    for (Node arg : args) {
      String role = getAttributeValue(arg, "ROLE");
      String refid = getAttributeValue(arg, "REFID");
      AceEntityMention am = doc.getEntityMention(refid);

      if(am != null){
        am.addEventMention(mention);
        mention.addArg(am, role);
      }
    }

    return mention;
  }

  /**
   * Parses one ACE specification
   * @return Simply displays the events to stdout
   */
  public static AceDocument parseDocument(File f)
    throws IOException, SAXException, ParserConfigurationException {

    // parse the Dom document
    Document document = readDocument(f);

    //
    // create the ACE document object
    //
    Node docElement = document.getElementsByTagName("document").item(0);
    AceDocument aceDoc =
      new AceDocument(getAttributeValue(docElement, "DOCID"));

    //
    // read all entities
    //
    NodeList entities = document.getElementsByTagName("entity");
    int entityCount = 0;
    for(int i = 0; i < entities.getLength(); i ++){
      Node node = entities.item(i);

      //
      // the entity type and subtype
      //
      String id = getAttributeValue(node, "ID");
      String type = getAttributeValue(node, "TYPE");
      String subtype = getAttributeValue(node, "SUBTYPE");
      String cls = getAttributeValue(node, "CLASS");

      // create the entity
      AceEntity entity = new AceEntity(id, type, subtype, cls);
      aceDoc.addEntity(entity);

      // fetch all mentions of this event
      List mentions = getChildrenByName(node, "entity_mention");

      // parse all its mentions
      for (Node mention1 : mentions) {
        AceEntityMention mention = parseEntityMention(mention1);
        entity.addMention(mention);
        aceDoc.addEntityMention(mention);
      }

      entityCount++;
    }
    //log.info("Parsed " + entityCount + " XML entities.");

    //
    // read all relations
    //
    NodeList relations = document.getElementsByTagName("relation");
    for(int i = 0; i < relations.getLength(); i ++){
      Node node = relations.item(i);

      //
      // the relation type, subtype, tense, and modality
      //
      String id = getAttributeValue(node, "ID");
      String type = getAttributeValue(node, "TYPE");
      String subtype = getAttributeValue(node, "SUBTYPE");
      String modality = getAttributeValue(node, "MODALITY");
      String tense = getAttributeValue(node, "TENSE");

      // create the relation
      AceRelation relation = new AceRelation(id, type, subtype,
					     modality, tense);
      aceDoc.addRelation(relation);

      // XXX: fetch relation_arguments here!

      // fetch all mentions of this relation
      List mentions = getChildrenByName(node, "relation_mention");

      // traverse all mentions
      for (Node mention1 : mentions) {
        AceRelationMention mention = parseRelationMention(mention1, aceDoc);
        relation.addMention(mention);
        aceDoc.addRelationMention(mention);
      }
    }

    //
    // read all events
    //
    NodeList events = document.getElementsByTagName("event");
    for(int i = 0; i < events.getLength(); i ++){
      Node node = events.item(i);

      //
      // the event type, subtype, tense, and modality
      //
      String id = getAttributeValue(node, "ID");
      String type = getAttributeValue(node, "TYPE");
      String subtype = getAttributeValue(node, "SUBTYPE");
      String modality = getAttributeValue(node, "MODALITY");
      String polarity = getAttributeValue(node, "POLARITY");
      String genericity = getAttributeValue(node, "GENERICITY");
      String tense = getAttributeValue(node, "TENSE");

      // create the event
      AceEvent event = new AceEvent(id, type, subtype,
               modality, polarity, genericity, tense);
      aceDoc.addEvent(event);

      // fetch all mentions of this relation
      List mentions = getChildrenByName(node, "event_mention");

      // traverse all mentions
      for (Node mention1 : mentions) {
        AceEventMention mention = parseEventMention(mention1, aceDoc);
        event.addMention(mention);
        aceDoc.addEventMention(mention);
      }
    }

    return aceDoc;
  }

  public static void main(String [] argv) throws Exception {
    if (argv.length != 1) {
      log.info("Usage: java AceDomReader ");
      System.exit(1);
    }

    File f = new File(argv[0]);
    AceDocument doc = parseDocument(f);
    System.out.println("Processed ACE document:\n" + doc);
    ArrayList> r = doc.getAllRelationMentions();
    System.out.println("size: " + r.size());
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy