All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.rdf4j.rio.trix.TriXParser Maven / Gradle / Ivy

There is a newer version: 5.1.0
Show newest version
/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *******************************************************************************/
package org.eclipse.rdf4j.rio.trix;

import static org.eclipse.rdf4j.rio.trix.TriXConstants.BNODE_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.CONTEXT_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.DATATYPE_ATT;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.LANGUAGE_ATT;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.PLAIN_LITERAL_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.TRIPLE_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.TYPED_LITERAL_TAG;
import static org.eclipse.rdf4j.rio.trix.TriXConstants.URI_TAG;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.input.BOMInputStream;
import org.eclipse.rdf4j.common.xml.SimpleSAXAdapter;
import org.eclipse.rdf4j.common.xml.SimpleSAXParser;
import org.eclipse.rdf4j.common.xml.XMLReaderFactory;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RioSetting;
import org.eclipse.rdf4j.rio.helpers.AbstractRDFParser;
import org.eclipse.rdf4j.rio.helpers.TriXParserSettings;
import org.eclipse.rdf4j.rio.helpers.XMLParserSettings;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;

/**
 * A parser that can parse RDF files that are in the TriX format
 * .
 * 
 * @author Arjohn Kampman
 */
public class TriXParser extends AbstractRDFParser implements ErrorHandler {

	/*--------------*
	 * Constructors *
	 *--------------*/

	private SimpleSAXParser saxParser;

	/**
	 * Creates a new TriXParser that will use a {@link SimpleValueFactory} to create objects for resources,
	 * bNodes, literals and statements.
	 */
	public TriXParser() {
		this(SimpleValueFactory.getInstance());
	}

	/**
	 * Creates a new TriXParser that will use the supplied ValueFactory to create objects for resources,
	 * bNodes, literals and statements.
	 * 
	 * @param valueFactory
	 *        A ValueFactory.
	 */
	public TriXParser(ValueFactory valueFactory) {
		super(valueFactory);
	}

	/*---------*
	 * Methods *
	 *---------*/

	@Override
	public final RDFFormat getRDFFormat() {
		return RDFFormat.TRIX;
	}

	/**
	 * Parses the data from the supplied InputStream, using the supplied baseURI to resolve any relative URI
	 * references.
	 * 
	 * @param in
	 *        The InputStream from which to read the data, must not be null.
	 * @param baseURI
	 *        The URI associated with the data in the InputStream, must not be null.
	 * @throws IOException
	 *         If an I/O error occurred while data was read from the InputStream.
	 * @throws RDFParseException
	 *         If the parser has found an unrecoverable parse error.
	 * @throws RDFHandlerException
	 *         If the configured statement handler encountered an unrecoverable error.
	 * @throws IllegalArgumentException
	 *         If the supplied input stream or base URI is null.
	 */
	@Override
	public void parse(InputStream in, String baseURI)
		throws IOException, RDFParseException, RDFHandlerException
	{
		if (in == null) {
			throw new IllegalArgumentException("Input stream cannot be 'null'");
		}
		if (baseURI == null) {
			throw new IllegalArgumentException("Base URI cannot be 'null'");
		}

		InputSource inputSource = new InputSource(new BOMInputStream(in, false));
		inputSource.setSystemId(baseURI);

		parse(inputSource);
	}

	/**
	 * Parses the data from the supplied Reader, using the supplied baseURI to resolve any relative URI
	 * references.
	 * 
	 * @param reader
	 *        The Reader from which to read the data, must not be null.
	 * @param baseURI
	 *        The URI associated with the data in the InputStream, must not be null.
	 * @throws IOException
	 *         If an I/O error occurred while data was read from the InputStream.
	 * @throws RDFParseException
	 *         If the parser has found an unrecoverable parse error.
	 * @throws RDFHandlerException
	 *         If the configured statement handler has encountered an unrecoverable error.
	 * @throws IllegalArgumentException
	 *         If the supplied reader or base URI is null.
	 */
	@Override
	public void parse(Reader reader, String baseURI)
		throws IOException, RDFParseException, RDFHandlerException
	{
		if (reader == null) {
			throw new IllegalArgumentException("Reader cannot be 'null'");
		}
		if (baseURI == null) {
			throw new IllegalArgumentException("Base URI cannot be 'null'");
		}

		InputSource inputSource = new InputSource(reader);
		inputSource.setSystemId(baseURI);

		parse(inputSource);
	}

	private void parse(InputSource inputStreamOrReader)
		throws IOException, RDFParseException, RDFHandlerException
	{
		if (rdfHandler != null) {
			rdfHandler.startRDF();
		}

		try {
			XMLReader xmlReader;

			if (getParserConfig().isSet(XMLParserSettings.CUSTOM_XML_READER)) {
				xmlReader = getParserConfig().get(XMLParserSettings.CUSTOM_XML_READER);
			}
			else {
				xmlReader = XMLReaderFactory.createXMLReader();
			}

			xmlReader.setErrorHandler(this);

			saxParser = new SimpleSAXParser(xmlReader);
			saxParser.setPreserveWhitespace(true);
			saxParser.setListener(new TriXSAXHandler());

			saxParser.parse(inputStreamOrReader);
		}
		catch (SAXParseException e) {
			Exception wrappedExc = e.getException();

			if (wrappedExc == null) {
				reportFatalError(e, e.getLineNumber(), e.getColumnNumber());
			}
			else {
				reportFatalError(wrappedExc, e.getLineNumber(), e.getColumnNumber());
			}
		}
		catch (SAXException e) {
			Exception wrappedExc = e.getException();

			if (wrappedExc == null) {
				reportFatalError(e);
			}
			else if (wrappedExc instanceof RDFParseException) {
				throw (RDFParseException)wrappedExc;
			}
			else if (wrappedExc instanceof RDFHandlerException) {
				throw (RDFHandlerException)wrappedExc;
			}
			else {
				reportFatalError(wrappedExc);
			}
		}
		finally {
			clear();
		}

		if (rdfHandler != null) {
			rdfHandler.endRDF();
		}
	}

	@Override
	protected Literal createLiteral(String label, String lang, IRI datatype)
		throws RDFParseException
	{
		Locator locator = saxParser.getLocator();
		if (locator != null) {
			return createLiteral(label, lang, datatype, locator.getLineNumber(), locator.getColumnNumber());
		}
		else {
			return createLiteral(label, lang, datatype, -1, -1);
		}
	}

	/**
	 * Overrides {@link AbstractRDFParser#reportWarning(String)}, adding line- and column number information
	 * to the error.
	 */
	@Override
	protected void reportWarning(String msg) {
		Locator locator = saxParser.getLocator();
		if (locator != null) {
			reportWarning(msg, locator.getLineNumber(), locator.getColumnNumber());
		}
		else {
			reportWarning(msg, -1, -1);
		}
	}

	/**
	 * Overrides {@link AbstractRDFParser#reportError(String, RioSetting)}, adding line- and column number
	 * information to the error.
	 */
	@Override
	protected void reportError(String msg, RioSetting setting)
		throws RDFParseException
	{
		Locator locator = saxParser.getLocator();
		if (locator != null) {
			reportError(msg, locator.getLineNumber(), locator.getColumnNumber(), setting);
		}
		else {
			reportError(msg, -1, -1, setting);
		}
	}

	/**
	 * Overrides {@link AbstractRDFParser#reportFatalError(String)}, adding line- and column number
	 * information to the error.
	 */
	@Override
	protected void reportFatalError(String msg)
		throws RDFParseException
	{
		Locator locator = saxParser.getLocator();
		if (locator != null) {
			reportFatalError(msg, locator.getLineNumber(), locator.getColumnNumber());
		}
		else {
			reportFatalError(msg, -1, -1);
		}
	}

	/**
	 * Overrides {@link AbstractRDFParser#reportFatalError(Exception)}, adding line- and column number
	 * information to the error.
	 */
	@Override
	protected void reportFatalError(Exception e)
		throws RDFParseException
	{
		Locator locator = saxParser.getLocator();
		if (locator != null) {
			reportFatalError(e, locator.getLineNumber(), locator.getColumnNumber());
		}
		else {
			reportFatalError(e, -1, -1);
		}
	}

	/*----------------------------*
	 * Inner class TriXSAXHandler *
	 *----------------------------*/

	private class TriXSAXHandler extends SimpleSAXAdapter {

		private Resource currentContext;

		private boolean parsingContext;

		private List valueList;

		public TriXSAXHandler() {
			currentContext = null;
			valueList = new ArrayList(3);
		}

		@Override
		public void startTag(String tagName, Map atts, String text)
			throws SAXException
		{
			try {
				if (tagName.equals(URI_TAG)) {
					valueList.add(createURI(text));
				}
				else if (tagName.equals(BNODE_TAG)) {
					valueList.add(createBNode(text));
				}
				else if (tagName.equals(PLAIN_LITERAL_TAG)) {
					String lang = atts.get(LANGUAGE_ATT);
					valueList.add(createLiteral(text, lang, null));
				}
				else if (tagName.equals(TYPED_LITERAL_TAG)) {
					String datatype = atts.get(DATATYPE_ATT);

					if (datatype == null) {
						reportError(DATATYPE_ATT + " attribute missing for typed literal",
								TriXParserSettings.FAIL_ON_TRIX_MISSING_DATATYPE);
						valueList.add(createLiteral(text, null, null));
					}
					else {
						IRI dtURI = createURI(datatype);
						valueList.add(createLiteral(text, null, dtURI));
					}
				}
				else if (tagName.equals(TRIPLE_TAG)) {
					if (parsingContext) {
						try {
							// First triple in a context, valueList can contain
							// context information
							if (valueList.size() > 1) {
								reportError("At most 1 resource can be specified for the context",
										TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
							}
							else if (valueList.size() == 1) {
								try {
									currentContext = (Resource)valueList.get(0);
								}
								catch (ClassCastException e) {
									reportError("Context identifier should be a URI or blank node",
											TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
								}
							}
						}
						finally {
							parsingContext = false;
							valueList.clear();
						}
					}
				}
				else if (tagName.equals(CONTEXT_TAG)) {
					parsingContext = true;
				}
			}
			catch (RDFParseException e) {
				throw new SAXException(e);
			}
		}

		@Override
		public void endTag(String tagName)
			throws SAXException
		{
			try {
				if (tagName.equals(TRIPLE_TAG)) {
					reportStatement();
				}
				else if (tagName.equals(CONTEXT_TAG)) {
					currentContext = null;
				}
			}
			catch (RDFParseException e) {
				throw new SAXException(e);
			}
			catch (RDFHandlerException e) {
				throw new SAXException(e);
			}
		}

		private void reportStatement()
			throws RDFParseException, RDFHandlerException
		{
			try {
				if (valueList.size() != 3) {
					reportError("exactly 3 values are required for a triple",
							TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
					return;
				}

				Resource subj;
				IRI pred;
				Value obj;

				try {
					subj = (Resource)valueList.get(0);
				}
				catch (ClassCastException e) {
					reportError("First value for a triple should be a URI or blank node",
							TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
					return;
				}

				try {
					pred = (IRI)valueList.get(1);
				}
				catch (ClassCastException e) {
					reportError("Second value for a triple should be a URI",
							TriXParserSettings.FAIL_ON_TRIX_INVALID_STATEMENT);
					return;
				}

				obj = valueList.get(2);

				Statement st = createStatement(subj, pred, obj, currentContext);
				if (rdfHandler != null) {
					rdfHandler.handleStatement(st);
				}
			}
			finally {
				valueList.clear();
			}
		}
	} // end inner class TriXSAXHandler

	/**
	 * Implementation of SAX ErrorHandler.warning
	 */
	@Override
	public void warning(SAXParseException exception)
		throws SAXException
	{
		this.reportWarning(exception.getMessage());
	}

	/**
	 * Implementation of SAX ErrorHandler.error
	 */
	@Override
	public void error(SAXParseException exception)
		throws SAXException
	{
		try {
			this.reportError(exception.getMessage(), XMLParserSettings.FAIL_ON_SAX_NON_FATAL_ERRORS);
		}
		catch (RDFParseException rdfpe) {
			throw new SAXException(rdfpe);
		}
	}

	/**
	 * Implementation of SAX ErrorHandler.fatalError
	 */
	@Override
	public void fatalError(SAXParseException exception)
		throws SAXException
	{
		try {
			this.reportFatalError(exception.getMessage());
		}
		catch (RDFParseException rdfpe) {
			throw new SAXException(rdfpe);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy