
org.eclipse.rdf4j.rio.trix.TriXParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rdf4j-rio-trix Show documentation
Show all versions of rdf4j-rio-trix Show documentation
Rio parser and writer implementation for the TriX file format.
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*******************************************************************************/
package org.eclipse.rdf4j.rio.trix;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.BNODE_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.CONTEXT_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.DATATYPE_ATT;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.LANGUAGE_ATT;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.PLAIN_LITERAL_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.TRIPLE_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.TYPED_LITERAL_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.URI_TAG;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.input.BOMInputStream;
import org.eclipse.rdf4j.common.xml.SimpleSAXAdapter;
import org.eclipse.rdf4j.common.xml.SimpleSAXParser;
import org.eclipse.rdf4j.common.xml.XMLReaderFactory;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RioSetting;
import org.eclipse.rdf4j.rio.helpers.AbstractRDFParser;
import org.eclipse.rdf4j.rio.helpers.TriXParserSettings;
import org.eclipse.rdf4j.rio.helpers.XMLParserSettings;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
/**
* A parser that can parse RDF files that are in the TriX format
* .
*
* @author Arjohn Kampman
*/
public class TriXParser extends AbstractRDFParser implements ErrorHandler {
/*--------------*
* Constructors *
*--------------*/
private SimpleSAXParser saxParser;
/**
* Creates a new TriXParser that will use a {@link SimpleValueFactory} to create objects for resources,
* bNodes, literals and statements.
*/
public TriXParser() {
this(SimpleValueFactory.getInstance());
}
/**
* Creates a new TriXParser that will use the supplied ValueFactory to create objects for resources,
* bNodes, literals and statements.
*
* @param valueFactory
* A ValueFactory.
*/
public TriXParser(ValueFactory valueFactory) {
super(valueFactory);
}
/*---------*
* Methods *
*---------*/
@Override
public final RDFFormat getRDFFormat() {
return RDFFormat.TRIX;
}
/**
* Parses the data from the supplied InputStream, using the supplied baseURI to resolve any relative URI
* references.
*
* @param in
* The InputStream from which to read the data, must not be null.
* @param baseURI
* The URI associated with the data in the InputStream, must not be null.
* @throws IOException
* If an I/O error occurred while data was read from the InputStream.
* @throws RDFParseException
* If the parser has found an unrecoverable parse error.
* @throws RDFHandlerException
* If the configured statement handler encountered an unrecoverable error.
* @throws IllegalArgumentException
* If the supplied input stream or base URI is null.
*/
@Override
public void parse(InputStream in, String baseURI)
throws IOException, RDFParseException, RDFHandlerException
{
if (in == null) {
throw new IllegalArgumentException("Input stream cannot be 'null'");
}
if (baseURI == null) {
throw new IllegalArgumentException("Base URI cannot be 'null'");
}
InputSource inputSource = new InputSource(new BOMInputStream(in, false));
inputSource.setSystemId(baseURI);
parse(inputSource);
}
/**
* Parses the data from the supplied Reader, using the supplied baseURI to resolve any relative URI
* references.
*
* @param reader
* The Reader from which to read the data, must not be null.
* @param baseURI
* The URI associated with the data in the InputStream, must not be null.
* @throws IOException
* If an I/O error occurred while data was read from the InputStream.
* @throws RDFParseException
* If the parser has found an unrecoverable parse error.
* @throws RDFHandlerException
* If the configured statement handler has encountered an unrecoverable error.
* @throws IllegalArgumentException
* If the supplied reader or base URI is null.
*/
@Override
public void parse(Reader reader, String baseURI)
throws IOException, RDFParseException, RDFHandlerException
{
if (reader == null) {
throw new IllegalArgumentException("Reader cannot be 'null'");
}
if (baseURI == null) {
throw new IllegalArgumentException("Base URI cannot be 'null'");
}
InputSource inputSource = new InputSource(reader);
inputSource.setSystemId(baseURI);
parse(inputSource);
}
private void parse(InputSource inputStreamOrReader)
throws IOException, RDFParseException, RDFHandlerException
{
if (rdfHandler != null) {
rdfHandler.startRDF();
}
try {
XMLReader xmlReader;
if (getParserConfig().isSet(XMLParserSettings.CUSTOM_XML_READER)) {
xmlReader = getParserConfig().get(XMLParserSettings.CUSTOM_XML_READER);
}
else {
xmlReader = XMLReaderFactory.createXMLReader();
}
xmlReader.setErrorHandler(this);
saxParser = new SimpleSAXParser(xmlReader);
saxParser.setPreserveWhitespace(true);
saxParser.setListener(new TriXSAXHandler());
saxParser.parse(inputStreamOrReader);
}
catch (SAXParseException e) {
Exception wrappedExc = e.getException();
if (wrappedExc == null) {
reportFatalError(e, e.getLineNumber(), e.getColumnNumber());
}
else {
reportFatalError(wrappedExc, e.getLineNumber(), e.getColumnNumber());
}
}
catch (SAXException e) {
Exception wrappedExc = e.getException();
if (wrappedExc == null) {
reportFatalError(e);
}
else if (wrappedExc instanceof RDFParseException) {
throw (RDFParseException)wrappedExc;
}
else if (wrappedExc instanceof RDFHandlerException) {
throw (RDFHandlerException)wrappedExc;
}
else {
reportFatalError(wrappedExc);
}
}
finally {
clear();
}
if (rdfHandler != null) {
rdfHandler.endRDF();
}
}
@Override
protected Literal createLiteral(String label, String lang, IRI datatype)
throws RDFParseException
{
Locator locator = saxParser.getLocator();
if (locator != null) {
return createLiteral(label, lang, datatype, locator.getLineNumber(), locator.getColumnNumber());
}
else {
return createLiteral(label, lang, datatype, -1, -1);
}
}
/**
* Overrides {@link AbstractRDFParser#reportWarning(String)}, adding line- and column number information
* to the error.
*/
@Override
protected void reportWarning(String msg) {
Locator locator = saxParser.getLocator();
if (locator != null) {
reportWarning(msg, locator.getLineNumber(), locator.getColumnNumber());
}
else {
reportWarning(msg, -1, -1);
}
}
/**
* Overrides {@link AbstractRDFParser#reportError(String, RioSetting)}, adding line- and column number
* information to the error.
*/
@Override
protected void reportError(String msg, RioSetting setting)
throws RDFParseException
{
Locator locator = saxParser.getLocator();
if (locator != null) {
reportError(msg, locator.getLineNumber(), locator.getColumnNumber(), setting);
}
else {
reportError(msg, -1, -1, setting);
}
}
/**
* Overrides {@link AbstractRDFParser#reportFatalError(String)}, adding line- and column number
* information to the error.
*/
@Override
protected void reportFatalError(String msg)
throws RDFParseException
{
Locator locator = saxParser.getLocator();
if (locator != null) {
reportFatalError(msg, locator.getLineNumber(), locator.getColumnNumber());
}
else {
reportFatalError(msg, -1, -1);
}
}
/**
* Overrides {@link AbstractRDFParser#reportFatalError(Exception)}, adding line- and column number
* information to the error.
*/
@Override
protected void reportFatalError(Exception e)
throws RDFParseException
{
Locator locator = saxParser.getLocator();
if (locator != null) {
reportFatalError(e, locator.getLineNumber(), locator.getColumnNumber());
}
else {
reportFatalError(e, -1, -1);
}
}
/*----------------------------*
* Inner class TriXSAXHandler *
*----------------------------*/
private class TriXSAXHandler extends SimpleSAXAdapter {
private Resource currentContext;
private boolean parsingContext;
private List valueList;
public TriXSAXHandler() {
currentContext = null;
valueList = new ArrayList(3);
}
@Override
public void startTag(String tagName, Map atts, String text)
throws SAXException
{
try {
if (tagName.equals(URI_TAG)) {
valueList.add(createURI(text));
}
else if (tagName.equals(BNODE_TAG)) {
valueList.add(createBNode(text));
}
else if (tagName.equals(PLAIN_LITERAL_TAG)) {
String lang = atts.get(LANGUAGE_ATT);
valueList.add(createLiteral(text, lang, null));
}
else if (tagName.equals(TYPED_LITERAL_TAG)) {
String datatype = atts.get(DATATYPE_ATT);
if (datatype == null) {
reportError(DATATYPE_ATT + " attribute missing for typed literal",
TriXParserSettings.FAIL_ON_TRIX_MISSING_DATATYPE);
valueList.add(createLiteral(text, null, null));
}
else {
IRI dtURI = createURI(datatype);
valueList.add(createLiteral(text, null, dtURI));
}
}
else if (tagName.equals(TRIPLE_TAG)) {
if (parsingContext) {
try {
// First triple in a context, valueList can contain
// context information
if (valueList.size() > 1) {
reportError("At most 1 resource can be specified for the context",
TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
}
else if (valueList.size() == 1) {
try {
currentContext = (Resource)valueList.get(0);
}
catch (ClassCastException e) {
reportError("Context identifier should be a URI or blank node",
TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
}
}
}
finally {
parsingContext = false;
valueList.clear();
}
}
}
else if (tagName.equals(CONTEXT_TAG)) {
parsingContext = true;
}
}
catch (RDFParseException e) {
throw new SAXException(e);
}
}
@Override
public void endTag(String tagName)
throws SAXException
{
try {
if (tagName.equals(TRIPLE_TAG)) {
reportStatement();
}
else if (tagName.equals(CONTEXT_TAG)) {
currentContext = null;
}
}
catch (RDFParseException e) {
throw new SAXException(e);
}
catch (RDFHandlerException e) {
throw new SAXException(e);
}
}
private void reportStatement()
throws RDFParseException, RDFHandlerException
{
try {
if (valueList.size() != 3) {
reportError("exactly 3 values are required for a triple",
TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
return;
}
Resource subj;
IRI pred;
Value obj;
try {
subj = (Resource)valueList.get(0);
}
catch (ClassCastException e) {
reportError("First value for a triple should be a URI or blank node",
TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
return;
}
try {
pred = (IRI)valueList.get(1);
}
catch (ClassCastException e) {
reportError("Second value for a triple should be a URI",
TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
return;
}
obj = valueList.get(2);
Statement st = createStatement(subj, pred, obj, currentContext);
if (rdfHandler != null) {
rdfHandler.handleStatement(st);
}
}
finally {
valueList.clear();
}
}
} // end inner class TriXSAXHandler
/**
* Implementation of SAX ErrorHandler.warning
*/
@Override
public void warning(SAXParseException exception)
throws SAXException
{
this.reportWarning(exception.getMessage());
}
/**
* Implementation of SAX ErrorHandler.error
*/
@Override
public void error(SAXParseException exception)
throws SAXException
{
try {
this.reportError(exception.getMessage(), XMLParserSettings.FAIL_ON_SAX_NON_FATAL_ERRORS);
}
catch (RDFParseException rdfpe) {
throw new SAXException(rdfpe);
}
}
/**
* Implementation of SAX ErrorHandler.fatalError
*/
@Override
public void fatalError(SAXParseException exception)
throws SAXException
{
try {
this.reportFatalError(exception.getMessage());
}
catch (RDFParseException rdfpe) {
throw new SAXException(rdfpe);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy