All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openrdf.rio.helpers.AbstractRDFParser Maven / Gradle / Ivy

/* 
 * Licensed to Aduna under one or more contributor license agreements.  
 * See the NOTICE.txt file distributed with this work for additional 
 * information regarding copyright ownership. 
 *
 * Aduna licenses this file to you under the terms of the Aduna BSD 
 * License (the "License"); you may not use this file except in compliance 
 * with the License. See the LICENSE.txt file distributed with this work 
 * for the full License.
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package org.openrdf.rio.helpers;

import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

import javax.xml.bind.annotation.adapters.HexBinaryAdapter;

import info.aduna.net.ParsedURI;

import org.openrdf.model.BNode;
import org.openrdf.model.IRI;
import org.openrdf.model.Literal;
import org.openrdf.model.Namespace;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.SimpleValueFactory;
import org.openrdf.rio.ParseErrorListener;
import org.openrdf.rio.ParseLocationListener;
import org.openrdf.rio.ParserConfig;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.RioSetting;

/**
 * Base class for {@link RDFParser}s offering common functionality for RDF
 * parsers.
 * 
 * @author Arjohn Kampman
 */
public abstract class AbstractRDFParser implements RDFParser {

	private final MessageDigest md5;

	/*-----------*
	 * Variables *
	 *-----------*/

	/**
	 * The RDFHandler that will handle the parsed RDF.
	 */
	protected RDFHandler rdfHandler;

	/**
	 * An optional ParseErrorListener to report parse errors to.
	 */
	private ParseErrorListener errListener;

	/**
	 * An optional ParseLocationListener to report parse progress in the form of
	 * line- and column numbers to.
	 */
	private ParseLocationListener locationListener;

	/**
	 * The ValueFactory to use for creating RDF model objects.
	 */
	protected ValueFactory valueFactory;

	/**
	 * The base URI for resolving relative URIs.
	 */
	private ParsedURI baseURI;

	/**
	 * Enables a consistent global mapping of blank node identifiers without
	 * using a map, but concatenating this as a prefix for the blank node
	 * identifiers supplied by the parser.
	 */
	private String nextBNodePrefix;

	/**
	 * Mapping from namespace prefixes to namespace names.
	 */
	private Map namespaceTable;

	/**
	 * A collection of configuration options for this parser.
	 */
	private ParserConfig parserConfig;

	/*--------------*
	 * Constructors *
	 *--------------*/

	/**
	 * Creates a new RDFParserBase that will use a {@link SimpleValueFactory} to
	 * create RDF model objects.
	 */
	public AbstractRDFParser() {
		this(SimpleValueFactory.getInstance());
	}

	/**
	 * Creates a new RDFParserBase that will use the supplied ValueFactory to
	 * create RDF model objects.
	 * 
	 * @param valueFactory
	 *        A ValueFactory.
	 */
	public AbstractRDFParser(ValueFactory valueFactory) {
		try {
			md5 = MessageDigest.getInstance("MD5");
		}
		catch (NoSuchAlgorithmException e) {
			throw new RuntimeException(e);
		}

		namespaceTable = new HashMap(16);
		nextBNodePrefix = createUniqueBNodePrefix();
		setValueFactory(valueFactory);
		setParserConfig(new ParserConfig());
	}

	/*---------*
	 * Methods *
	 *---------*/

	@Override
	public RDFParser setValueFactory(ValueFactory valueFactory) {
		this.valueFactory = valueFactory;
		return this;
	}

	@Override
	public RDFParser setRDFHandler(RDFHandler handler) {
		rdfHandler = handler;
		return this;
	}

	public RDFHandler getRDFHandler() {
		return rdfHandler;
	}

	@Override
	public RDFParser setParseErrorListener(ParseErrorListener el) {
		errListener = el;
		return this;
	}

	public ParseErrorListener getParseErrorListener() {
		return errListener;
	}

	@Override
	public RDFParser setParseLocationListener(ParseLocationListener el) {
		locationListener = el;
		return this;
	}

	public ParseLocationListener getParseLocationListener() {
		return locationListener;
	}

	@Override
	public RDFParser setParserConfig(ParserConfig config) {
		this.parserConfig = config;
		initializeNamespaceTableFromConfiguration();
		return this;
	}

	@Override
	public ParserConfig getParserConfig() {
		return this.parserConfig;
	}

	/*
	 * Default implementation, specific parsers are encouraged to override this method as necessary.
	 */
	@Override
	public Collection> getSupportedSettings() {
		Collection> result = new HashSet>();

		// Supported in RDFParserHelper.createLiteral
		result.add(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
		result.add(BasicParserSettings.VERIFY_DATATYPE_VALUES);
		result.add(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
		result.add(BasicParserSettings.DATATYPE_HANDLERS);

		// Supported in RDFParserHelper.createLiteral
		result.add(BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES);
		result.add(BasicParserSettings.VERIFY_LANGUAGE_TAGS);
		result.add(BasicParserSettings.NORMALIZE_LANGUAGE_TAGS);
		result.add(BasicParserSettings.LANGUAGE_HANDLERS);

		// Supported in RDFParserBase.resolveURI
		result.add(BasicParserSettings.VERIFY_RELATIVE_URIS);

		// Supported in RDFParserBase.createBNode(String)
		result.add(BasicParserSettings.PRESERVE_BNODE_IDS);

		result.add(BasicParserSettings.NAMESPACES);

		return result;
	}

	@Override
	public  RDFParser set(RioSetting setting, T value) {
		getParserConfig().set(setting, value);
		return this;
	}
	
	@Override
	public void setVerifyData(boolean verifyData) {
		this.parserConfig.set(BasicParserSettings.VERIFY_RELATIVE_URIS, verifyData);
	}

	/**
	 * @deprecated Use specific settings instead.
	 */
	@Deprecated
	public boolean verifyData() {
		return this.parserConfig.verifyData();
	}

	@Override
	public void setPreserveBNodeIDs(boolean preserveBNodeIDs) {
		this.parserConfig.set(BasicParserSettings.PRESERVE_BNODE_IDS, preserveBNodeIDs);
	}

	public boolean preserveBNodeIDs() {
		return this.parserConfig.get(BasicParserSettings.PRESERVE_BNODE_IDS);
	}

	@Deprecated
	@Override
	public void setStopAtFirstError(boolean stopAtFirstError) {
		getParserConfig().set(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES, stopAtFirstError);
		if (!stopAtFirstError) {
			getParserConfig().addNonFatalError(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES);
		}
		else {
			// TODO: Add a ParserConfig.removeNonFatalError function to avoid
			// this
			Set> set = new HashSet>(getParserConfig().getNonFatalErrors());
			set.remove(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES);
			getParserConfig().setNonFatalErrors(set);
		}
	}

	/**
	 * @deprecated Check specific settings instead.
	 */
	@Deprecated
	public boolean stopAtFirstError() {
		return this.parserConfig.stopAtFirstError();
	}

	@SuppressWarnings("deprecation")
	@Override
	public void setDatatypeHandling(DatatypeHandling datatypeHandling) {
		if (datatypeHandling == DatatypeHandling.VERIFY) {
			this.parserConfig.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
			this.parserConfig.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
		}
		else if (datatypeHandling == DatatypeHandling.NORMALIZE) {
			this.parserConfig.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
			this.parserConfig.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
			this.parserConfig.set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, true);
		}
		else {
			// Only ignore if they have not explicitly set any of the relevant
			// settings before this point
			if (!this.parserConfig.isSet(BasicParserSettings.NORMALIZE_DATATYPE_VALUES)
					&& !this.parserConfig.isSet(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES)
					&& !this.parserConfig.isSet(BasicParserSettings.NORMALIZE_DATATYPE_VALUES))
			{
				this.parserConfig.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);
				this.parserConfig.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false);
				this.parserConfig.set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);
			}
		}
	}

	/**
	 * @deprecated Use {@link BasicParserSettings#VERIFY_DATATYPE_VALUES} and
	 *             {@link BasicParserSettings#FAIL_ON_UNKNOWN_DATATYPES} and
	 *             {@link BasicParserSettings#NORMALIZE_DATATYPE_VALUES} instead.
	 */
	@Deprecated
	public DatatypeHandling datatypeHandling() {
		return this.parserConfig.datatypeHandling();
	}

	/**
	 * Parses and normalizes the supplied URI-string and sets it as the base URI
	 * for resolving relative URIs.
	 */
	protected void setBaseURI(String uriSpec) {
		// Store normalized base URI
		ParsedURI baseURI = new ParsedURI(uriSpec);
		baseURI.normalize();
		setBaseURI(baseURI);
	}

	/**
	 * Sets the base URI for resolving relative URIs.
	 */
	protected void setBaseURI(ParsedURI baseURI) {
		this.baseURI = baseURI;
	}

	/**
	 * Associates the specified prefix to the specified namespace.
	 */
	protected void setNamespace(String prefix, String namespace) {
		namespaceTable.put(prefix, namespace);
	}

	/**
	 * Gets the namespace that is associated with the specified prefix or throws
	 * an {@link RDFParseException}.
	 * 
	 * @throws RDFParseException
	 *         if no namespace is associated with this prefix
	 */
	protected String getNamespace(String prefix)
		throws RDFParseException
	{
		if (namespaceTable.containsKey(prefix))
			return namespaceTable.get(prefix);
		String msg = "Namespace prefix '" + prefix + "' used but not defined";

		if ("".equals(prefix)) {
			msg = "Default namespace used but not defined";
		}

		reportFatalError(msg);
		throw new RDFParseException(msg);
	}

	/**
	 * Clears any information that has been collected while parsing. This method
	 * must be called by subclasses when finishing the parse process.
	 */
	protected void clear() {
		baseURI = null;
		nextBNodePrefix = createUniqueBNodePrefix();
		namespaceTable.clear();

		initializeNamespaceTableFromConfiguration();
	}

	protected void initializeNamespaceTableFromConfiguration() {
		for (Namespace aNS : getParserConfig().get(BasicParserSettings.NAMESPACES)) {
			namespaceTable.put(aNS.getPrefix(), aNS.getName());
		}
	}

	/**
	 * Clears the map that keeps track of blank nodes that have been parsed.
	 * Normally, this map is clear when the document has been parsed completely,
	 * but subclasses can clear the map at other moments too, for example when a
	 * bnode scope ends.
	 * 
	 * @deprecated Map is no longer used.
	 */
	@Deprecated
	protected void clearBNodeIDMap() {
	}

	/**
	 * Resolves a URI-string against the base URI and creates a {@link IRI}
	 * object for it.
	 */
	protected IRI resolveURI(String uriSpec)
		throws RDFParseException
	{
		// Resolve relative URIs against base URI
		ParsedURI uri = new ParsedURI(uriSpec);

		if (uri.isRelative()) {
			if (baseURI == null) {
				reportFatalError("Unable to resolve URIs, no base URI has been set");
			}

			if (getParserConfig().get(BasicParserSettings.VERIFY_RELATIVE_URIS)) {
				if (uri.isRelative() && !uri.isSelfReference() && baseURI.isOpaque()) {
					reportError("Relative URI '" + uriSpec + "' cannot be resolved using the opaque base URI '"
							+ baseURI + "'", BasicParserSettings.VERIFY_RELATIVE_URIS);
				}
			}

			uri = baseURI.resolve(uri);
		}

		return createURI(uri.toString());
	}

	/**
	 * Creates a {@link IRI} object for the specified URI-string.
	 */
	protected IRI createURI(String uri)
		throws RDFParseException
	{
		try {
			return valueFactory.createIRI(uri);
		}
		catch (Exception e) {
			reportFatalError(e);
			return null; // required by compiler
		}
	}

	/**
	 * Creates a new {@link BNode} object.
	 */
	protected BNode createBNode()
		throws RDFParseException
	{
		try {
			return valueFactory.createBNode();
		}
		catch (Exception e) {
			reportFatalError(e);
			return null; // required by compiler
		}
	}

	/**
	 * Creates a {@link BNode} object for the specified identifier.
	 */
	protected BNode createBNode(String nodeID)
		throws RDFParseException
	{
		// If we are preserving blank node ids then we do not prefix them to
		// make
		// them globally unique
		if (preserveBNodeIDs()) {
			return valueFactory.createBNode(nodeID);
		}
		else {
			// Prefix the node ID with a unique UUID prefix to reduce
			// cross-document clashes
			// This is consistent as long as nextBNodePrefix is not modified
			// between parser runs

			String toAppend = nodeID;
			if (nodeID.length() > 32) {
				// we only hash the node ID if it is longer than the hash string
				// itself would be.
				byte[] chars = null;
				try {
					chars = nodeID.getBytes("UTF-8");
				}
				catch (UnsupportedEncodingException e) {
					throw new RuntimeException(e);
				}

				// we use an MD5 hash rather than the node ID itself to get a
				// fixed-length generated id, rather than
				// an ever-growing one (see SES-2171)
				toAppend = (new HexBinaryAdapter()).marshal(md5.digest(chars));
			}

			return valueFactory.createBNode(nextBNodePrefix + toAppend);

		}
	}

	/**
	 * Creates a {@link Literal} object with the supplied parameters.
	 */
	protected Literal createLiteral(String label, String lang, IRI datatype)
		throws RDFParseException
	{
		return RDFParserHelper.createLiteral(label, lang, datatype, getParserConfig(), getParseErrorListener(),
				valueFactory);
	}

	/**
	 * Creates a {@link Literal} object with the supplied parameters, using the
	 * lineNo and columnNo to enhance error messages or exceptions that may be
	 * generated during the creation of the literal.
	 * 
	 * @since 2.7.4
	 * @see org.openrdf.rio.helpers.RDFParserHelper#createLiteral(String, String,
	 *      IRI, ParserConfig, ParseErrorListener, ValueFactory, long, long)
	 */
	protected Literal createLiteral(String label, String lang, IRI datatype, long lineNo, long columnNo)
		throws RDFParseException
	{
		return RDFParserHelper.createLiteral(label, lang, datatype, getParserConfig(), getParseErrorListener(),
				valueFactory, lineNo, columnNo);
	}

	/**
	 * Creates a new {@link Statement} object with the supplied components.
	 */
	protected Statement createStatement(Resource subj, IRI pred, Value obj)
		throws RDFParseException
	{
		try {
			return valueFactory.createStatement(subj, pred, obj);
		}
		catch (Exception e) {
			reportFatalError(e);
			return null; // required by compiler
		}
	}

	/**
	 * Creates a new {@link Statement} object with the supplied components.
	 */
	protected Statement createStatement(Resource subj, IRI pred, Value obj, Resource context)
		throws RDFParseException
	{
		try {
			return valueFactory.createStatement(subj, pred, obj, context);
		}
		catch (Exception e) {
			reportFatalError(e);
			return null; // required by compiler
		}
	}

	/**
	 * Reports the specified line- and column number to the registered
	 * {@link ParseLocationListener}, if any.
	 */
	protected void reportLocation(long lineNo, long columnNo) {
		if (locationListener != null) {
			locationListener.parseLocationUpdate(lineNo, columnNo);
		}
	}

	/**
	 * Reports a warning to the registered ParseErrorListener, if any. This
	 * method simply calls {@link #reportWarning(String,long,long)} supplying
	 * -1 for the line- and column number.
	 */
	protected void reportWarning(String msg) {
		reportWarning(msg, -1, -1);
	}

	/**
	 * Reports a warning with associated line- and column number to the
	 * registered ParseErrorListener, if any.
	 */
	protected void reportWarning(String msg, long lineNo, long columnNo) {
		if (errListener != null) {
			errListener.warning(msg, lineNo, columnNo);
		}
	}

	/**
	 * Reports an error with associated line- and column number to the registered
	 * ParseErrorListener, if the given setting has been set to true.
	 * 

* This method also throws an {@link RDFParseException} when the given * setting has been set to true and it is not a nonFatalError. * * @param msg * The message to use for * {@link ParseErrorListener#error(String, long, long)} and for * {@link RDFParseException#RDFParseException(String, long, long)}. * @param relevantSetting * The boolean setting that will be checked to determine if this is an * issue that we need to look at at all. If this setting is true, then * the error listener will receive the error, and if * {@link ParserConfig#isNonFatalError(RioSetting)} returns true an * exception will be thrown. * @throws RDFParseException * If {@link ParserConfig#get(RioSetting)} returns true, and * {@link ParserConfig#isNonFatalError(RioSetting)} returns true for * the given setting. */ protected void reportError(String msg, RioSetting relevantSetting) throws RDFParseException { RDFParserHelper.reportError(msg, relevantSetting, getParserConfig(), getParseErrorListener()); } /** * Reports an error with associated line- and column number to the registered * ParseErrorListener, if the given setting has been set to true. *

* This method also throws an {@link RDFParseException} when the given * setting has been set to true and it is not a nonFatalError. * * @param msg * The message to use for * {@link ParseErrorListener#error(String, long, long)} and for * {@link RDFParseException#RDFParseException(String, long, long)}. * @param lineNo * Optional line number, should default to setting this as -1 if not * known. Used for * {@link ParseErrorListener#error(String, long, long)} and for * {@link RDFParseException#RDFParseException(String, long, long)}. * @param columnNo * Optional column number, should default to setting this as -1 if not * known. Used for * {@link ParseErrorListener#error(String, long, long)} and for * {@link RDFParseException#RDFParseException(String, long, long)}. * @param relevantSetting * The boolean setting that will be checked to determine if this is an * issue that we need to look at at all. If this setting is true, then * the error listener will receive the error, and if * {@link ParserConfig#isNonFatalError(RioSetting)} returns true an * exception will be thrown. * @throws RDFParseException * If {@link ParserConfig#get(RioSetting)} returns true, and * {@link ParserConfig#isNonFatalError(RioSetting)} returns true for * the given setting. */ protected void reportError(String msg, long lineNo, long columnNo, RioSetting relevantSetting) throws RDFParseException { RDFParserHelper.reportError(msg, lineNo, columnNo, relevantSetting, getParserConfig(), getParseErrorListener()); } /** * Reports an error with associated line- and column number to the registered * ParseErrorListener, if the given setting has been set to true. *

* This method also throws an {@link RDFParseException} when the given * setting has been set to true and it is not a nonFatalError. * * @param e * The exception whose message will be used for * {@link ParseErrorListener#error(String, long, long)} and for * {@link RDFParseException#RDFParseException(String, long, long)}. * @param lineNo * Optional line number, should default to setting this as -1 if not * known. Used for * {@link ParseErrorListener#error(String, long, long)} and for * {@link RDFParseException#RDFParseException(String, long, long)}. * @param columnNo * Optional column number, should default to setting this as -1 if not * known. Used for * {@link ParseErrorListener#error(String, long, long)} and for * {@link RDFParseException#RDFParseException(String, long, long)}. * @param relevantSetting * The boolean setting that will be checked to determine if this is an * issue that we need to look at at all. If this setting is true, then * the error listener will receive the error, and if * {@link ParserConfig#isNonFatalError(RioSetting)} returns true an * exception will be thrown. * @throws RDFParseException * If {@link ParserConfig#get(RioSetting)} returns true, and * {@link ParserConfig#isNonFatalError(RioSetting)} returns true for * the given setting. */ protected void reportError(Exception e, long lineNo, long columnNo, RioSetting relevantSetting) throws RDFParseException { RDFParserHelper.reportError(e, lineNo, columnNo, relevantSetting, getParserConfig(), getParseErrorListener()); } /** * Reports a fatal error to the registered ParseErrorListener, if any, and * throws a ParseException afterwards. This method simply calls * {@link #reportFatalError(String,long,long)} supplying -1 for the * line- and column number. */ protected void reportFatalError(String msg) throws RDFParseException { RDFParserHelper.reportFatalError(msg, getParseErrorListener()); } /** * Reports a fatal error with associated line- and column number to the * registered ParseErrorListener, if any, and throws a * ParseException afterwards. */ protected void reportFatalError(String msg, long lineNo, long columnNo) throws RDFParseException { RDFParserHelper.reportFatalError(msg, lineNo, columnNo, getParseErrorListener()); } /** * Reports a fatal error to the registered ParseErrorListener, if any, and * throws a ParseException afterwards. An exception is made for the * case where the supplied exception is a {@link RDFParseException}; in that * case the supplied exception is not wrapped in another ParseException and * the error message is not reported to the ParseErrorListener, assuming that * it has already been reported when the original ParseException was thrown. *

* This method simply calls {@link #reportFatalError(Exception,long,long)} * supplying -1 for the line- and column number. */ protected void reportFatalError(Exception e) throws RDFParseException { RDFParserHelper.reportFatalError(e, getParseErrorListener()); } /** * Reports a fatal error with associated line- and column number to the * registered ParseErrorListener, if any, and throws a * ParseException wrapped the supplied exception afterwards. An * exception is made for the case where the supplied exception is a * {@link RDFParseException}; in that case the supplied exception is not * wrapped in another ParseException and the error message is not reported to * the ParseErrorListener, assuming that it has already been reported when * the original ParseException was thrown. */ protected void reportFatalError(Exception e, long lineNo, long columnNo) throws RDFParseException { RDFParserHelper.reportFatalError(e, lineNo, columnNo, getParseErrorListener()); } private final String createUniqueBNodePrefix() { return "genid-" + UUID.randomUUID().toString().replaceAll("-", "") + "-"; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy