edu.stanford.nlp.ie.machinereading.common.DomReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.ie.machinereading.common;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
* Generic DOM reader for an XML file
*/
public class DomReader {
/**
* Searches (recursively) for the first child that has the given name
*/
protected static Node getChildByName(Node node, String name) {
NodeList children = node.getChildNodes();
// this node matches
if (node.getNodeName().equals(name))
return node;
// search children
for (int i = 0; i < children.getLength(); i++) {
Node found = getChildByName(children.item(i), name);
if (found != null)
return found;
}
// failed
return null;
}
/**
* Searches for all immediate children with the given name
*/
protected static List getChildrenByName(Node node, String name) {
List matches = new ArrayList<>();
NodeList children = node.getChildNodes();
// search children
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeName().equals(name)) {
matches.add(child);
}
}
return matches;
}
/**
* Searches for children that have the given attribute
*/
protected static Node getChildByAttribute(Node node, String attributeName, String attributeValue) {
NodeList children = node.getChildNodes();
NamedNodeMap attribs = node.getAttributes();
Node attribute = null;
// this node matches
if (attribs != null && (attribute = attribs.getNamedItem(attributeName)) != null
&& attribute.getNodeValue().equals(attributeValue))
return node;
// search children
for (int i = 0; i < children.getLength(); i++) {
Node found = getChildByAttribute(children.item(i), attributeName, attributeValue);
if (found != null)
return found;
}
// failed
return null;
}
/**
* Searches for children that have the given name and attribute
*/
protected static Node getChildByNameAndAttribute(Node node, String name, String attributeName, String attributeValue) {
NodeList children = node.getChildNodes();
NamedNodeMap attribs = node.getAttributes();
Node attribute = null;
// this node matches
if (node.getNodeName().equals(name) && attribs != null
&& (attribute = attribs.getNamedItem(attributeName)) != null
&& attribute.getNodeValue().equals(attributeValue))
return node;
// search children
for (int i = 0; i < children.getLength(); i++) {
Node found = getChildByAttribute(children.item(i), attributeName, attributeValue);
if (found != null)
return found;
}
// failed
return null;
}
/**
* Fetches the value of a given attribute
*/
public static String getAttributeValue(Node node, String attributeName) {
try {
return node.getAttributes().getNamedItem(attributeName).getNodeValue();
} catch (Exception e) {
}
return null;
}
/**
* Constructs one Document from an XML file
*/
public static Document readDocument(File f) throws IOException, SAXException, ParserConfigurationException {
Document document = null;
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// factory.setValidating(true);
// factory.setNamespaceAware(true);
try {
DocumentBuilder builder = factory.newDocumentBuilder();
document = builder.parse(f);
// displayDocument(document);
} catch (SAXException sxe) {
// Error generated during parsing)
Exception x = sxe;
if (sxe.getException() != null)
x = sxe.getException();
x.printStackTrace();
throw sxe;
} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
pce.printStackTrace();
throw pce;
} catch (IOException ioe) {
// I/O error
ioe.printStackTrace();
throw ioe;
}
return document;
} // readDocument
}