/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.rio.rdfxml;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Stack;
import javax.xml.transform.sax.SAXResult;
import org.apache.commons.io.input.BOMInputStream;
import org.eclipse.rdf4j.common.net.ParsedIRI;
import org.eclipse.rdf4j.common.xml.XMLUtil;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RioSetting;
import org.eclipse.rdf4j.rio.helpers.AbstractRDFParser;
import org.eclipse.rdf4j.rio.helpers.XMLParserSettings;
import org.eclipse.rdf4j.rio.helpers.XMLReaderBasedParser;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
/**
* A parser for XML-serialized RDF. This parser operates directly on the SAX events generated by a SAX-enabled XML
* parser. The XML parser should be compliant with SAX2. You should specify which SAX parser should be used by setting
* the org.xml.sax.driver
property. This parser is not thread-safe, therefore it's public methods are
* synchronized.
*
* To parse a document using this parser:
*
* Create an instance of RDFXMLParser, optionally supplying it with your own ValueFactory.
* Set the RDFHandler.
* Optionally, set the ParseErrorListener and/or ParseLocationListener.
* Optionally, specify whether the parser should verify the data it parses and whether it should stop immediately
* when it finds an error in the data (both default to true ).
* Call the parse method.
*
* Example code:
*
*
* // Use the SAX2-compliant Xerces parser:
* System.setProperty("org.xml.sax.driver", "org.apache.xerces.parsers.SAXParser");
*
* RDFParser parser = new RDFXMLParser();
* parser.setRDFHandler(myRDFHandler);
* parser.setParseErrorListener(myParseErrorListener);
* parser.setVerifyData(true);
* parser.stopAtFirstError(false);
*
* // Parse the data from inputStream, resolving any
* // relative URIs against http://foo/bar:
* parser.parse(inputStream, "http://foo/bar");
*
*
* Note that JAXP entity expansion limits may apply. Check the documentation on
* limits and using the
* jaxp.properties file if you get one of
* the following errors:
*
*
*
* JAXP00010001: The parser has encountered more than "64000" entity expansions in this document
* JAXP00010004: The accumulated size of entities is ... that exceeded the "50,000,000" limit
*
*
* As a work-around, try passing -Djdk.xml.totalEntitySizeLimit=0 -DentityExpansionLimit=0
to the JVM.
*
* @author Arjohn Kampman
* @see org.eclipse.rdf4j.model.ValueFactory
* @see org.eclipse.rdf4j.rio.RDFHandler
* @see org.eclipse.rdf4j.rio.ParseErrorListener
* @see org.eclipse.rdf4j.rio.ParseLocationListener
*/
public class RDFXMLParser extends XMLReaderBasedParser implements ErrorHandler {
/*-----------*
* Variables *
*-----------*/
/**
* A filter filtering calls to SAX methods specifically for this parser.
*/
private final SAXFilter saxFilter;
/**
* The base URI of the document. This variable is set when parse(inputStream, baseURI) is called and will
* not be changed during parsing.
*/
private String documentURI;
/**
* The language of literal values as can be specified using xml:lang attributes. This variable is set/modified by
* the SAXFilter during parsing such that it always represents the language of the context in which elements are
* reported.
*/
private String xmlLang;
/**
* A stack of node- and property elements.
*/
private final Stack elementStack = new Stack<>();
/**
* A set containing URIs that have been generated as a result of rdf:ID attributes. These URIs should be unique
* within a single document.
*/
private final Set usedIDs = new HashSet<>();
/*--------------*
* Constructors *
*--------------*/
/**
* Creates a new RDFXMLParser that will use a {@link SimpleValueFactory} to create RDF model objects.
*/
public RDFXMLParser() {
this(SimpleValueFactory.getInstance());
}
/**
* Creates a new RDFXMLParser that will use the supplied ValueFactory to create RDF model objects.
*
* @param valueFactory A ValueFactory.
*/
public RDFXMLParser(ValueFactory valueFactory) {
super(valueFactory);
// SAXFilter does some filtering and verifying of SAX events
saxFilter = new SAXFilter(this);
}
/*---------*
* Methods *
*---------*/
@Override
public final RDFFormat getRDFFormat() {
return RDFFormat.RDFXML;
}
/**
* Sets the parser in a mode to parse stand-alone RDF documents. In stand-alone RDF documents, the enclosing
* rdf:RDF root element is optional if this root element contains just one element (e.g.
* rdf:Description .
*/
public void setParseStandAloneDocuments(boolean standAloneDocs) {
getParserConfig().set(XMLParserSettings.PARSE_STANDALONE_DOCUMENTS, standAloneDocs);
}
/**
* Returns whether the parser is currently in a mode to parse stand-alone RDF documents.
*
* @see #setParseStandAloneDocuments
*/
public boolean getParseStandAloneDocuments() {
return getParserConfig().get(XMLParserSettings.PARSE_STANDALONE_DOCUMENTS);
}
@Override
public synchronized void parse(InputStream in, String baseURI)
throws IOException, RDFParseException, RDFHandlerException {
if (in == null) {
throw new IllegalArgumentException("Input stream cannot be 'null'");
}
InputSource inputSource = new InputSource(new BOMInputStream(in, false));
inputSource.setSystemId(baseURI);
parse(inputSource);
}
@Override
public synchronized void parse(Reader reader, String baseURI)
throws IOException, RDFParseException, RDFHandlerException {
if (reader == null) {
throw new IllegalArgumentException("Reader cannot be 'null'");
}
if (baseURI == null) {
throw new IllegalArgumentException("Base URI cannot be 'null'");
}
InputSource inputSource = new InputSource(reader);
inputSource.setSystemId(baseURI);
parse(inputSource);
}
private void parse(InputSource inputSource) throws IOException, RDFParseException, RDFHandlerException {
clear();
try {
documentURI = inputSource.getSystemId();
saxFilter.setParseStandAloneDocuments(getParserConfig().get(XMLParserSettings.PARSE_STANDALONE_DOCUMENTS));
// saxFilter.clear();
saxFilter.setDocumentURI(documentURI);
XMLReader xmlReader = getXMLReader();
xmlReader.setContentHandler(saxFilter);
xmlReader.setErrorHandler(this);
xmlReader.parse(inputSource);
} catch (SAXParseException e) {
Exception wrappedExc = e.getException();
if (wrappedExc == null) {
reportFatalError(e, e.getLineNumber(), e.getColumnNumber());
} else {
reportFatalError(wrappedExc, e.getLineNumber(), e.getColumnNumber());
}
} catch (SAXException e) {
Exception wrappedExc = e.getException();
if (wrappedExc == null) {
reportFatalError(e);
} else if (wrappedExc instanceof RDFParseException) {
throw (RDFParseException) wrappedExc;
} else if (wrappedExc instanceof RDFHandlerException) {
throw (RDFHandlerException) wrappedExc;
} else {
reportFatalError(wrappedExc);
}
} finally {
// Clean up
saxFilter.clear();
xmlLang = null;
elementStack.clear();
usedIDs.clear();
clear();
}
}
@Override
public Collection> getSupportedSettings() {
// Override to add RDF/XML specific supported settings
Set> results = new HashSet<>(super.getSupportedSettings());
results.addAll(getCompulsoryXmlPropertySettings());
results.addAll(getCompulsoryXmlFeatureSettings());
results.addAll(getOptionalXmlPropertySettings());
results.addAll(getOptionalXmlFeatureSettings());
results.add(XMLParserSettings.CUSTOM_XML_READER);
results.add(XMLParserSettings.FAIL_ON_DUPLICATE_RDF_ID);
results.add(XMLParserSettings.FAIL_ON_INVALID_NCNAME);
results.add(XMLParserSettings.FAIL_ON_INVALID_QNAME);
results.add(XMLParserSettings.FAIL_ON_MISMATCHED_TAGS);
results.add(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
results.add(XMLParserSettings.FAIL_ON_SAX_NON_FATAL_ERRORS);
results.add(XMLParserSettings.PARSE_STANDALONE_DOCUMENTS);
return results;
}
public SAXResult getSAXResult(String baseURI) {
if (baseURI == null) {
throw new IllegalArgumentException("Base URI cannot be 'null'");
}
documentURI = baseURI;
saxFilter.setDocumentURI(baseURI);
return new SAXResult(saxFilter);
}
void startDocument() throws RDFParseException, RDFHandlerException {
if (rdfHandler != null) {
rdfHandler.startRDF();
}
}
void endDocument() throws RDFParseException, RDFHandlerException {
if (rdfHandler != null) {
rdfHandler.endRDF();
}
}
/*-----------------------------*
* Methods called by SAXFilter *
*-----------------------------*/
@Override
protected void setBaseURI(ParsedIRI baseURI) {
// Note: we need to override this method to allow SAXFilter to access it
super.setBaseURI(baseURI);
}
@Override
protected void setBaseURI(String baseURI) {
// Note: we need to override this method to allow SAXFilter to access it
super.setBaseURI(baseURI);
}
void setXMLLang(String xmlLang) {
if ("".equals(xmlLang)) {
this.xmlLang = null;
} else {
this.xmlLang = xmlLang;
}
}
void startElement(String namespaceURI, String localName, String qName, Atts atts)
throws RDFParseException, RDFHandlerException {
if (topIsProperty()) {
// this element represents the subject and/or object of a statement
processNodeElt(namespaceURI, localName, qName, atts, false);
} else {
// this element represents a property
processPropertyElt(namespaceURI, localName, qName, atts, false);
}
}
void endElement(String namespaceURI, String localName, String qName) throws RDFParseException, RDFHandlerException {
Object topElement = peekStack(0);
if (topElement instanceof NodeElement) {
// Check if top node is 'volatile', meaning that it doesn't have a
// start- and end element associated with it.
if (((NodeElement) topElement).isVolatile()) {
elementStack.pop();
}
} else {
// topElement instanceof PropertyElement
PropertyElement predicate = (PropertyElement) topElement;
if (predicate.parseCollection()) {
Resource lastListResource = predicate.getLastListResource();
if (lastListResource == null) {
// no last list resource, list must have been empty.
NodeElement subject = (NodeElement) peekStack(1);
reportStatement(subject.getResource(), predicate.getURI(), RDF.NIL);
handleReification(RDF.NIL);
} else {
// Generate the final tail of the list.
reportStatement(lastListResource, RDF.REST, RDF.NIL);
}
}
}
elementStack.pop();
}
void emptyElement(String namespaceURI, String localName, String qName, Atts atts)
throws RDFParseException, RDFHandlerException {
if (topIsProperty()) {
// this element represents the subject and/or object of a statement
processNodeElt(namespaceURI, localName, qName, atts, true);
} else {
// this element represents a property
processPropertyElt(namespaceURI, localName, qName, atts, true);
}
}
void text(String text) throws RDFParseException, RDFHandlerException {
if (!topIsProperty()) {
reportError("unexpected literal", XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
return;
}
PropertyElement propEl = (PropertyElement) peekStack(0);
IRI datatype = propEl.getDatatype();
Literal lit = createLiteral(text, xmlLang, datatype);
NodeElement subject = (NodeElement) peekStack(1);
PropertyElement predicate = (PropertyElement) peekStack(0);
reportStatement(subject.getResource(), predicate.getURI(), lit);
handleReification(lit);
}
/*------------------------*
* RDF processing methods *
*------------------------*/
/* Process a node element (can be both subject and object) */
private void processNodeElt(String namespaceURI, String localName, String qName, Atts atts, boolean isEmptyElt)
throws RDFParseException, RDFHandlerException {
if (getParserConfig().get(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES)) {
// Check the element name
checkNodeEltName(namespaceURI, localName, qName);
}
Resource nodeResource = getNodeResource(atts);
NodeElement nodeElement = new NodeElement(nodeResource);
if (!elementStack.isEmpty()) {
// node can be object of a statement, or part of an rdf:List
NodeElement subject = (NodeElement) peekStack(1);
PropertyElement predicate = (PropertyElement) peekStack(0);
if (predicate.parseCollection()) {
Resource lastListRes = predicate.getLastListResource();
Resource newListRes = createNode();
if (lastListRes == null) {
// first element in the list
reportStatement(subject.getResource(), predicate.getURI(), newListRes);
handleReification(newListRes);
} else {
// not the first element in the list
reportStatement(lastListRes, RDF.REST, newListRes);
}
reportStatement(newListRes, RDF.FIRST, nodeResource);
predicate.setLastListResource(newListRes);
} else {
reportStatement(subject.getResource(), predicate.getURI(), nodeResource);
handleReification(nodeResource);
}
}
if (!localName.equals("Description") || !namespaceURI.equals(RDF.NAMESPACE)) {
// element name is uri's type
IRI className;
if ("".equals(namespaceURI)) {
// No namespace, use base URI
className = buildResourceFromLocalName(localName);
} else {
className = createURI(namespaceURI + localName);
}
reportStatement(nodeResource, RDF.TYPE, className);
}
Att type = atts.removeAtt(RDF.NAMESPACE, "type");
if (type != null) {
// rdf:type attribute, value is a URI-reference
IRI className = resolveURI(type.getValue());
reportStatement(nodeResource, RDF.TYPE, className);
}
if (getParserConfig().get(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES)) {
checkRDFAtts(atts);
}
processSubjectAtts(nodeElement, atts);
if (!isEmptyElt) {
elementStack.push(nodeElement);
}
}
/**
* Retrieves the resource of a node element (subject or object) using relevant attributes (rdf:ID, rdf:about and
* rdf:nodeID) from its attributes list.
*
* @return a resource or a bNode.
*/
private Resource getNodeResource(Atts atts) throws RDFParseException {
Att id = atts.removeAtt(RDF.NAMESPACE, "ID");
Att about = atts.removeAtt(RDF.NAMESPACE, "about");
Att nodeID = atts.removeAtt(RDF.NAMESPACE, "nodeID");
if (getParserConfig().get(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES)) {
int definedAttsCount = 0;
if (id != null) {
definedAttsCount++;
}
if (about != null) {
definedAttsCount++;
}
if (nodeID != null) {
definedAttsCount++;
}
if (definedAttsCount > 1) {
reportError("Only one of the attributes rdf:ID, rdf:about or rdf:nodeID can be used here",
XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
}
}
Resource result;
if (id != null) {
result = buildURIFromID(id.getValue());
} else if (about != null) {
result = resolveURI(about.getValue());
} else if (nodeID != null) {
result = createNode(nodeID.getValue());
} else {
// No resource specified, generate a bNode
result = createNode();
}
return result;
}
/**
* processes subject attributes.
*/
private void processSubjectAtts(NodeElement nodeElt, Atts atts) throws RDFParseException, RDFHandlerException {
Resource subject = nodeElt.getResource();
Iterator iter = atts.iterator();
while (iter.hasNext()) {
Att att = iter.next();
IRI predicate = createURI(att.getURI());
Literal lit = createLiteral(att.getValue(), xmlLang, null);
reportStatement(subject, predicate, lit);
}
}
private void processPropertyElt(String namespaceURI, String localName, String qName, Atts atts, boolean isEmptyElt)
throws RDFParseException, RDFHandlerException {
if (getParserConfig().get(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES)) {
checkPropertyEltName(namespaceURI, localName, qName, XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
}
// Get the URI of the property
IRI propURI;
if (namespaceURI.isEmpty()) {
// no namespace URI
reportError("unqualified property element <" + qName + "> not allowed",
XMLParserSettings.FAIL_ON_INVALID_QNAME);
// Use base URI as namespace:
propURI = buildResourceFromLocalName(localName);
} else {
propURI = createURI(namespaceURI + localName);
}
// List expansion rule
if (propURI.equals(RDF.LI)) {
NodeElement subject = (NodeElement) peekStack(0);
propURI = createURI(RDF.NAMESPACE + "_" + subject.getNextLiCounter());
}
// Push the property on the stack.
PropertyElement predicate = new PropertyElement(propURI);
elementStack.push(predicate);
// Check if property has a reification ID
Att id = atts.removeAtt(RDF.NAMESPACE, "ID");
if (id != null) {
IRI reifURI = buildURIFromID(id.getValue());
predicate.setReificationURI(reifURI);
}
// Check for presence of rdf:parseType attribute
Att parseType = atts.removeAtt(RDF.NAMESPACE, "parseType");
if (parseType != null) {
if (getParserConfig().get(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES)) {
checkNoMoreAtts(atts);
}
String parseTypeValue = parseType.getValue();
if (parseTypeValue.equals("Resource")) {
Resource objectResource = createNode();
NodeElement subject = (NodeElement) peekStack(1);
reportStatement(subject.getResource(), propURI, objectResource);
if (isEmptyElt) {
handleReification(objectResource);
} else {
NodeElement object = new NodeElement(objectResource);
object.setIsVolatile(true);
elementStack.push(object);
}
} else if (parseTypeValue.equals("Collection")) {
if (isEmptyElt) {
NodeElement subject = (NodeElement) peekStack(1);
reportStatement(subject.getResource(), propURI, RDF.NIL);
handleReification(RDF.NIL);
} else {
predicate.setParseCollection(true);
}
} else {
// other parseType
if (!parseTypeValue.equals("Literal")) {
reportWarning("unknown parseType: " + parseType.getValue());
}
if (isEmptyElt) {
NodeElement subject = (NodeElement) peekStack(1);
Literal lit = createLiteral("", null, RDF.XMLLITERAL);
reportStatement(subject.getResource(), propURI, lit);
handleReification(lit);
} else {
// The next string is an rdf:XMLLiteral
predicate.setDatatype(RDF.XMLLITERAL);
saxFilter.setParseLiteralMode();
}
}
}
// parseType == null
else if (isEmptyElt) {
// empty element without an rdf:parseType attribute
// Note: we handle rdf:datatype attributes here to allow datatyped
// empty strings in documents. The current spec does have a
// production rule that matches this, which is likely to be an
// omission on its part.
Att datatype = atts.getAtt(RDF.NAMESPACE, "datatype");
if (atts.size() == 0 || atts.size() == 1 && datatype != null) {
// element had no attributes, or only the optional
// rdf:ID and/or rdf:datatype attributes.
NodeElement subject = (NodeElement) peekStack(1);
IRI dtURI = null;
if (datatype != null) {
dtURI = createURI(datatype.getValue());
}
Literal lit = createLiteral("", xmlLang, dtURI);
reportStatement(subject.getResource(), propURI, lit);
handleReification(lit);
} else {
// Create resource for the statement's object.
Resource resourceRes = getPropertyResource(atts);
// All special rdf attributes have been checked/removed.
if (getParserConfig().get(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES)) {
checkRDFAtts(atts);
}
NodeElement resourceElt = new NodeElement(resourceRes);
NodeElement subject = (NodeElement) peekStack(1);
reportStatement(subject.getResource(), propURI, resourceRes);
handleReification(resourceRes);
Att type = atts.removeAtt(RDF.NAMESPACE, "type");
if (type != null) {
// rdf:type attribute, value is a URI-reference
IRI className = resolveURI(type.getValue());
reportStatement(resourceRes, RDF.TYPE, className);
}
processSubjectAtts(resourceElt, atts);
}
} else {
// Not an empty element, sub elements will follow.
// Check for rdf:datatype attribute
Att datatype = atts.removeAtt(RDF.NAMESPACE, "datatype");
if (datatype != null) {
IRI dtURI = resolveURI(datatype.getValue());
predicate.setDatatype(dtURI);
}
// No more attributes are expected.
if (getParserConfig().get(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES)) {
checkNoMoreAtts(atts);
}
}
if (isEmptyElt) {
// Empty element has been pushed on the stack
// at the start of this method, remove it.
elementStack.pop();
}
}
/**
* Retrieves the object resource of a property element using relevant attributes (rdf:resource and rdf:nodeID) from
* its attributes list.
*
* @return a resource or a bNode.
*/
private Resource getPropertyResource(Atts atts) throws RDFParseException {
Att resource = atts.removeAtt(RDF.NAMESPACE, "resource");
Att nodeID = atts.removeAtt(RDF.NAMESPACE, "nodeID");
if (getParserConfig().get(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES)) {
int definedAttsCount = 0;
if (resource != null) {
definedAttsCount++;
}
if (nodeID != null) {
definedAttsCount++;
}
if (definedAttsCount > 1) {
reportError("Only one of the attributes rdf:resource or rdf:nodeID can be used here",
XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
}
}
Resource result;
if (resource != null) {
result = resolveURI(resource.getValue());
} else if (nodeID != null) {
result = createNode(nodeID.getValue());
} else {
// No resource specified, generate a bNode
result = createNode();
}
return result;
}
/*
* Processes any rdf:ID attributes that generate reified statements. This method assumes that a PropertyElement
* (which can have an rdf:ID attribute) is on top of the stack, and a NodeElement is below that.
*/
private void handleReification(Value value) throws RDFParseException, RDFHandlerException {
PropertyElement predicate = (PropertyElement) peekStack(0);
if (predicate.isReified()) {
NodeElement subject = (NodeElement) peekStack(1);
IRI reifRes = predicate.getReificationURI();
reifyStatement(reifRes, subject.getResource(), predicate.getURI(), value);
}
}
private void reifyStatement(Resource reifNode, Resource subj, IRI pred, Value obj)
throws RDFParseException, RDFHandlerException {
reportStatement(reifNode, RDF.TYPE, RDF.STATEMENT);
reportStatement(reifNode, RDF.SUBJECT, subj);
reportStatement(reifNode, RDF.PREDICATE, pred);
reportStatement(reifNode, RDF.OBJECT, obj);
}
/**
* Builds a Resource from a non-qualified localname.
*/
private IRI buildResourceFromLocalName(String localName) throws RDFParseException {
return resolveURI("#" + localName);
}
/**
* Builds a Resource from the value of an rdf:ID attribute.
*/
private IRI buildURIFromID(String id) throws RDFParseException {
if (getParserConfig().get(XMLParserSettings.FAIL_ON_INVALID_NCNAME)) {
// Check if 'id' is a legal NCName
if (!XMLUtil.isNCName(id)) {
reportError("Not an XML Name: " + id, XMLParserSettings.FAIL_ON_INVALID_NCNAME);
}
}
IRI uri = resolveURI("#" + id);
if (getParserConfig().get(XMLParserSettings.FAIL_ON_DUPLICATE_RDF_ID)) {
// ID (URI) should be unique in the current document
if (!usedIDs.add(uri)) {
// URI was not added because the set already contained an equal
// strings
reportError("ID '" + id + "' has already been defined", XMLParserSettings.FAIL_ON_DUPLICATE_RDF_ID);
}
}
return uri;
}
@Override
protected Resource createNode(String nodeID) throws RDFParseException {
if (getParserConfig().get(XMLParserSettings.FAIL_ON_INVALID_NCNAME)) {
// Check if 'nodeID' is a legal NCName
if (!XMLUtil.isNCName(nodeID)) {
reportError("Not an XML Name: " + nodeID, XMLParserSettings.FAIL_ON_INVALID_NCNAME);
}
}
return super.createNode(nodeID);
}
private Object peekStack(int distFromTop) {
return elementStack.get(elementStack.size() - 1 - distFromTop);
}
private boolean topIsProperty() {
return elementStack.isEmpty() || peekStack(0) instanceof PropertyElement;
}
/**
* Checks whether the node element name is from the RDF namespace and, if so, if it is allowed to be used in a node
* element. If the name is equal to one of the disallowed names (RDF, ID, about, parseType, resource, nodeID,
* datatype and li), an error is generated. If the name is not defined in the RDF namespace, but it claims that it
* is from this namespace, a warning is generated.
*/
private void checkNodeEltName(String namespaceURI, String localName, String qName) throws RDFParseException {
if (RDF.NAMESPACE.equals(namespaceURI)) {
if (localName.equals("Description") || localName.equals("Seq") || localName.equals("Bag")
|| localName.equals("Alt") || localName.equals("Statement") || localName.equals("Property")
|| localName.equals("List") || localName.equals("subject") || localName.equals("predicate")
|| localName.equals("object") || localName.equals("type") || localName.equals("value")
|| localName.equals("first") || localName.equals("rest") || localName.equals("nil")
|| localName.startsWith("_")) {
// These are OK
} else if (localName.equals("li") || localName.equals("RDF") || localName.equals("ID")
|| localName.equals("about") || localName.equals("parseType") || localName.equals("resource")
|| localName.equals("nodeID") || localName.equals("datatype")) {
reportError("<" + qName + "> not allowed as node element",
XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
} else if (localName.equals("bagID") || localName.equals("aboutEach")
|| localName.equals("aboutEachPrefix")) {
reportError(qName + " is no longer a valid RDF name",
XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
} else {
reportWarning("unknown rdf element <" + qName + ">");
}
}
}
/**
* Checks whether the property element name is from the RDF namespace and, if so, if it is allowed to be used in a
* property element. If the name is equal to one of the disallowed names (RDF, ID, about, parseType, resource and
* li), an error is generated. If the name is not defined in the RDF namespace, but it claims that it is from this
* namespace, a warning is generated.
*
* @param setting
*/
private void checkPropertyEltName(String namespaceURI, String localName, String qName, RioSetting setting)
throws RDFParseException {
if (RDF.NAMESPACE.equals(namespaceURI)) {
if (localName.equals("li") || localName.equals("Seq") || localName.equals("Bag") || localName.equals("Alt")
|| localName.equals("Statement") || localName.equals("Property") || localName.equals("List")
|| localName.equals("subject") || localName.equals("predicate") || localName.equals("object")
|| localName.equals("type") || localName.equals("value") || localName.equals("first")
|| localName.equals("rest") || localName.equals("nil") || localName.startsWith("_")) {
// These are OK
} else if (localName.equals("Description") || localName.equals("RDF") || localName.equals("ID")
|| localName.equals("about") || localName.equals("parseType") || localName.equals("resource")
|| localName.equals("nodeID") || localName.equals("datatype")) {
reportError("<" + qName + "> not allowed as property element", setting);
} else if (localName.equals("bagID") || localName.equals("aboutEach")
|| localName.equals("aboutEachPrefix")) {
reportError(qName + " is no longer a valid RDF name", setting);
} else {
reportWarning("unknown rdf element <" + qName + ">");
}
}
}
/**
* Checks whether 'atts' contains attributes from the RDF namespace that are not allowed as attributes. If such an
* attribute is found, an error is generated and the attribute is removed from 'atts'. If the attribute is not
* defined in the RDF namespace, but it claims that it is from this namespace, a warning is generated.
*/
private void checkRDFAtts(Atts atts) throws RDFParseException {
Iterator iter = atts.iterator();
while (iter.hasNext()) {
Att att = iter.next();
if (RDF.NAMESPACE.equals(att.getNamespace())) {
String localName = att.getLocalName();
if (localName.equals("Seq") || localName.equals("Bag") || localName.equals("Alt")
|| localName.equals("Statement") || localName.equals("Property") || localName.equals("List")
|| localName.equals("subject") || localName.equals("predicate") || localName.equals("object")
|| localName.equals("type") || localName.equals("value") || localName.equals("first")
|| localName.equals("rest") || localName.equals("nil") || localName.startsWith("_")) {
// These are OK
} else if (localName.equals("Description") || localName.equals("li") || localName.equals("RDF")
|| localName.equals("ID") || localName.equals("about") || localName.equals("parseType")
|| localName.equals("resource") || localName.equals("nodeID") || localName.equals("datatype")) {
reportError("'" + att.getQName() + "' not allowed as attribute name",
XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
iter.remove();
} else if (localName.equals("bagID") || localName.equals("aboutEach")
|| localName.equals("aboutEachPrefix")) {
reportError(att.getQName() + " is no longer a valid RDF name",
XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
} else {
reportWarning("unknown rdf attribute '" + att.getQName() + "'");
}
}
}
}
/**
* Checks whether 'atts' is empty. If this is not the case, a warning is generated for each attribute that is still
* present.
*/
private void checkNoMoreAtts(Atts atts) throws RDFParseException {
if (atts.size() > 0) {
Iterator iter = atts.iterator();
while (iter.hasNext()) {
Att att = iter.next();
reportError("unexpected attribute '" + att.getQName() + "'",
XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES);
iter.remove();
}
}
}
/**
* Reports a stament to the configured RDFHandlerException.
*
* @param subject The statement's subject.
* @param predicate The statement's predicate.
* @param object The statement's object.
* @throws RDFHandlerException If the configured RDFHandlerException throws an RDFHandlerException.
*/
private void reportStatement(Resource subject, IRI predicate, Value object)
throws RDFParseException, RDFHandlerException {
Statement st = createStatement(subject, predicate, object);
if (rdfHandler != null) {
rdfHandler.handleStatement(st);
}
}
@Override
protected Literal createLiteral(String label, String lang, IRI datatype) throws RDFParseException {
Locator locator = saxFilter.getLocator();
if (locator != null) {
return createLiteral(label, lang, datatype, locator.getLineNumber(), locator.getColumnNumber());
} else {
return createLiteral(label, lang, datatype, -1, -1);
}
}
/**
* Overrides {@link AbstractRDFParser#reportWarning(String)}, adding line- and column number information to the
* error.
*/
@Override
protected void reportWarning(String msg) {
Locator locator = saxFilter.getLocator();
if (locator != null) {
reportWarning(msg, locator.getLineNumber(), locator.getColumnNumber());
} else {
reportWarning(msg, -1, -1);
}
}
/**
* Overrides {@link AbstractRDFParser#reportError(String, RioSetting)}, adding line- and column number information
* to the error.
*/
@Override
protected void reportError(String msg, RioSetting setting) throws RDFParseException {
Locator locator = saxFilter.getLocator();
if (locator != null) {
reportError(msg, locator.getLineNumber(), locator.getColumnNumber(), setting);
} else {
reportError(msg, -1, -1, setting);
}
}
/**
* Overrides {@link AbstractRDFParser#reportError(String, RioSetting)}, adding line- and column number information
* to the error.
*/
@Override
protected void reportError(Exception e, RioSetting setting) throws RDFParseException {
Locator locator = saxFilter.getLocator();
if (locator != null) {
reportError(e, locator.getLineNumber(), locator.getColumnNumber(), setting);
} else {
reportError(e, -1, -1, setting);
}
}
/**
* Overrides {@link AbstractRDFParser#reportFatalError(String)}, adding line- and column number information to the
* error.
*/
@Override
protected void reportFatalError(String msg) throws RDFParseException {
Locator locator = saxFilter.getLocator();
if (locator != null) {
reportFatalError(msg, locator.getLineNumber(), locator.getColumnNumber());
} else {
reportFatalError(msg, -1, -1);
}
}
/**
* Overrides {@link AbstractRDFParser#reportFatalError(Exception)}, adding line- and column number information to
* the error.
*/
@Override
protected void reportFatalError(Exception e) throws RDFParseException {
Locator locator = saxFilter.getLocator();
if (locator != null) {
reportFatalError(e, locator.getLineNumber(), locator.getColumnNumber());
} else {
reportFatalError(e, -1, -1);
}
}
/*-----------------------------------------------*
* Inner classes NodeElement and PropertyElement *
*-----------------------------------------------*/
static class NodeElement {
private final Resource resource;
private boolean isVolatile = false;
private int liCounter = 1;
public NodeElement(Resource resource) {
this.resource = resource;
}
public Resource getResource() {
return resource;
}
public void setIsVolatile(boolean isVolatile) {
this.isVolatile = isVolatile;
}
public boolean isVolatile() {
return isVolatile;
}
public int getNextLiCounter() {
return liCounter++;
}
}
static class PropertyElement {
/**
* The property URI.
*/
private final IRI uri;
/**
* An optional reification identifier.
*/
private IRI reificationURI;
/**
* An optional datatype.
*/
private IRI datatype;
/**
* Flag indicating whether this PropertyElement has an attribute rdf:parseType="Collection" .
*/
private boolean parseCollection = false;
/**
* The resource that was used to append the last part of an rdf:List.
*/
private Resource lastListResource;
public PropertyElement(IRI uri) {
this.uri = uri;
}
public IRI getURI() {
return uri;
}
public boolean isReified() {
return reificationURI != null;
}
public void setReificationURI(IRI reifURI) {
this.reificationURI = reifURI;
}
public IRI getReificationURI() {
return reificationURI;
}
public void setDatatype(IRI datatype) {
this.datatype = datatype;
}
public IRI getDatatype() {
return datatype;
}
public boolean parseCollection() {
return parseCollection;
}
public void setParseCollection(boolean parseCollection) {
this.parseCollection = parseCollection;
}
public Resource getLastListResource() {
return lastListResource;
}
public void setLastListResource(Resource resource) {
lastListResource = resource;
}
}
/**
* Implementation of SAX ErrorHandler.warning
*/
@Override
public void warning(SAXParseException exception) throws SAXException {
this.reportWarning(exception.getMessage());
}
/**
* Implementation of SAX ErrorHandler.error
*/
@Override
public void error(SAXParseException exception) throws SAXException {
try {
this.reportError(exception, XMLParserSettings.FAIL_ON_SAX_NON_FATAL_ERRORS);
} catch (RDFParseException rdfpe) {
throw new SAXException(rdfpe);
}
}
/**
* Implementation of SAX ErrorHandler.fatalError
*/
@Override
public void fatalError(SAXParseException exception) throws SAXException {
try {
this.reportFatalError(exception);
} catch (RDFParseException rdfpe) {
throw new SAXException(rdfpe);
}
}
}