edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceDomReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.ie.machinereading.domains.ace.reader;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import edu.stanford.nlp.ie.machinereading.common.DomReader;
/**
* DOM reader for an ACE specification.
*
* @author David McClosky
*/
public class AceDomReader extends DomReader {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(AceDomReader.class);
private static AceCharSeq parseCharSeq(Node node) {
Node child = getChildByName(node, "charseq");
String start = getAttributeValue(child, "START");
String end = getAttributeValue(child, "END");
String text = child.getFirstChild().getNodeValue();
return new AceCharSeq(text,
Integer.parseInt(start),
Integer.parseInt(end));
}
/**
* Extracts one entity mention
*/
private static AceEntityMention parseEntityMention(Node node) {
String id = getAttributeValue(node, "ID");
String type = getAttributeValue(node, "TYPE");
String ldctype = getAttributeValue(node, "LDCTYPE");
AceCharSeq extent = parseCharSeq(getChildByName(node, "extent"));
AceCharSeq head = parseCharSeq(getChildByName(node, "head"));
return (new AceEntityMention(id, type, ldctype, extent, head));
}
/**
* Extracts info about one relation mention
*/
private static AceRelationMention parseRelationMention(Node node,
AceDocument doc) {
String id = getAttributeValue(node, "ID");
AceCharSeq extent = parseCharSeq(getChildByName(node, "extent"));
String lc = getAttributeValue(node, "LEXICALCONDITION");
// create the mention
AceRelationMention mention = new AceRelationMention(id, extent, lc);
// find the mention args
List args = getChildrenByName(node, "relation_mention_argument");
for(Node arg: args){
String role = getAttributeValue(arg, "ROLE");
String refid = getAttributeValue(arg, "REFID");
AceEntityMention am = doc.getEntityMention(refid);
if(am != null){
am.addRelationMention(mention);
if(role.equalsIgnoreCase("arg-1")){
mention.getArgs()[0] = new AceRelationMentionArgument(role, am);
} else if(role.equalsIgnoreCase("arg-2")){
mention.getArgs()[1] = new AceRelationMentionArgument(role, am);
} else {
throw new RuntimeException("Invalid relation mention argument role: " + role);
}
}
}
return mention;
}
/**
* Extracts info about one relation mention
*/
private static AceEventMention parseEventMention(Node node,
AceDocument doc) {
String id = getAttributeValue(node, "ID");
AceCharSeq extent = parseCharSeq(getChildByName(node, "extent"));
AceCharSeq anchor = parseCharSeq(getChildByName(node, "anchor"));
// create the mention
AceEventMention mention = new AceEventMention(id, extent, anchor);
// find the mention args
List args = getChildrenByName(node, "event_mention_argument");
for (Node arg : args) {
String role = getAttributeValue(arg, "ROLE");
String refid = getAttributeValue(arg, "REFID");
AceEntityMention am = doc.getEntityMention(refid);
if(am != null){
am.addEventMention(mention);
mention.addArg(am, role);
}
}
return mention;
}
/**
* Parses one ACE specification
* @return Simply displays the events to stdout
*/
public static AceDocument parseDocument(File f)
throws IOException, SAXException, ParserConfigurationException {
// parse the Dom document
Document document = readDocument(f);
//
// create the ACE document object
//
Node docElement = document.getElementsByTagName("document").item(0);
AceDocument aceDoc =
new AceDocument(getAttributeValue(docElement, "DOCID"));
//
// read all entities
//
NodeList entities = document.getElementsByTagName("entity");
int entityCount = 0;
for(int i = 0; i < entities.getLength(); i ++){
Node node = entities.item(i);
//
// the entity type and subtype
//
String id = getAttributeValue(node, "ID");
String type = getAttributeValue(node, "TYPE");
String subtype = getAttributeValue(node, "SUBTYPE");
String cls = getAttributeValue(node, "CLASS");
// create the entity
AceEntity entity = new AceEntity(id, type, subtype, cls);
aceDoc.addEntity(entity);
// fetch all mentions of this event
List mentions = getChildrenByName(node, "entity_mention");
// parse all its mentions
for (Node mention1 : mentions) {
AceEntityMention mention = parseEntityMention(mention1);
entity.addMention(mention);
aceDoc.addEntityMention(mention);
}
entityCount++;
}
//log.info("Parsed " + entityCount + " XML entities.");
//
// read all relations
//
NodeList relations = document.getElementsByTagName("relation");
for(int i = 0; i < relations.getLength(); i ++){
Node node = relations.item(i);
//
// the relation type, subtype, tense, and modality
//
String id = getAttributeValue(node, "ID");
String type = getAttributeValue(node, "TYPE");
String subtype = getAttributeValue(node, "SUBTYPE");
String modality = getAttributeValue(node, "MODALITY");
String tense = getAttributeValue(node, "TENSE");
// create the relation
AceRelation relation = new AceRelation(id, type, subtype,
modality, tense);
aceDoc.addRelation(relation);
// XXX: fetch relation_arguments here!
// fetch all mentions of this relation
List mentions = getChildrenByName(node, "relation_mention");
// traverse all mentions
for (Node mention1 : mentions) {
AceRelationMention mention = parseRelationMention(mention1, aceDoc);
relation.addMention(mention);
aceDoc.addRelationMention(mention);
}
}
//
// read all events
//
NodeList events = document.getElementsByTagName("event");
for(int i = 0; i < events.getLength(); i ++){
Node node = events.item(i);
//
// the event type, subtype, tense, and modality
//
String id = getAttributeValue(node, "ID");
String type = getAttributeValue(node, "TYPE");
String subtype = getAttributeValue(node, "SUBTYPE");
String modality = getAttributeValue(node, "MODALITY");
String polarity = getAttributeValue(node, "POLARITY");
String genericity = getAttributeValue(node, "GENERICITY");
String tense = getAttributeValue(node, "TENSE");
// create the event
AceEvent event = new AceEvent(id, type, subtype,
modality, polarity, genericity, tense);
aceDoc.addEvent(event);
// fetch all mentions of this relation
List mentions = getChildrenByName(node, "event_mention");
// traverse all mentions
for (Node mention1 : mentions) {
AceEventMention mention = parseEventMention(mention1, aceDoc);
event.addMention(mention);
aceDoc.addEventMention(mention);
}
}
return aceDoc;
}
public static void main(String [] argv) throws Exception {
if (argv.length != 1) {
log.info("Usage: java AceDomReader ");
System.exit(1);
}
File f = new File(argv[0]);
AceDocument doc = parseDocument(f);
System.out.println("Processed ACE document:\n" + doc);
ArrayList> r = doc.getAllRelationMentions();
System.out.println("size: " + r.size());
}
}