org.openrdf.rio.helpers.AbstractRDFParser Maven / Gradle / Ivy
/*
* Licensed to Aduna under one or more contributor license agreements.
* See the NOTICE.txt file distributed with this work for additional
* information regarding copyright ownership.
*
* Aduna licenses this file to you under the terms of the Aduna BSD
* License (the "License"); you may not use this file except in compliance
* with the License. See the LICENSE.txt file distributed with this work
* for the full License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package org.openrdf.rio.helpers;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import javax.xml.bind.annotation.adapters.HexBinaryAdapter;
import info.aduna.net.ParsedURI;
import org.openrdf.model.BNode;
import org.openrdf.model.IRI;
import org.openrdf.model.Literal;
import org.openrdf.model.Namespace;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.SimpleValueFactory;
import org.openrdf.rio.ParseErrorListener;
import org.openrdf.rio.ParseLocationListener;
import org.openrdf.rio.ParserConfig;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.RioSetting;
/**
* Base class for {@link RDFParser}s offering common functionality for RDF
* parsers.
*
* @author Arjohn Kampman
*/
public abstract class AbstractRDFParser implements RDFParser {
private final MessageDigest md5;
/*-----------*
* Variables *
*-----------*/
/**
* The RDFHandler that will handle the parsed RDF.
*/
protected RDFHandler rdfHandler;
/**
* An optional ParseErrorListener to report parse errors to.
*/
private ParseErrorListener errListener;
/**
* An optional ParseLocationListener to report parse progress in the form of
* line- and column numbers to.
*/
private ParseLocationListener locationListener;
/**
* The ValueFactory to use for creating RDF model objects.
*/
protected ValueFactory valueFactory;
/**
* The base URI for resolving relative URIs.
*/
private ParsedURI baseURI;
/**
* Enables a consistent global mapping of blank node identifiers without
* using a map, but concatenating this as a prefix for the blank node
* identifiers supplied by the parser.
*/
private String nextBNodePrefix;
/**
* Mapping from namespace prefixes to namespace names.
*/
private Map namespaceTable;
/**
* A collection of configuration options for this parser.
*/
private ParserConfig parserConfig;
/*--------------*
* Constructors *
*--------------*/
/**
* Creates a new RDFParserBase that will use a {@link SimpleValueFactory} to
* create RDF model objects.
*/
public AbstractRDFParser() {
this(SimpleValueFactory.getInstance());
}
/**
* Creates a new RDFParserBase that will use the supplied ValueFactory to
* create RDF model objects.
*
* @param valueFactory
* A ValueFactory.
*/
public AbstractRDFParser(ValueFactory valueFactory) {
try {
md5 = MessageDigest.getInstance("MD5");
}
catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
namespaceTable = new HashMap(16);
nextBNodePrefix = createUniqueBNodePrefix();
setValueFactory(valueFactory);
setParserConfig(new ParserConfig());
}
/*---------*
* Methods *
*---------*/
@Override
public RDFParser setValueFactory(ValueFactory valueFactory) {
this.valueFactory = valueFactory;
return this;
}
@Override
public RDFParser setRDFHandler(RDFHandler handler) {
rdfHandler = handler;
return this;
}
public RDFHandler getRDFHandler() {
return rdfHandler;
}
@Override
public RDFParser setParseErrorListener(ParseErrorListener el) {
errListener = el;
return this;
}
public ParseErrorListener getParseErrorListener() {
return errListener;
}
@Override
public RDFParser setParseLocationListener(ParseLocationListener el) {
locationListener = el;
return this;
}
public ParseLocationListener getParseLocationListener() {
return locationListener;
}
@Override
public RDFParser setParserConfig(ParserConfig config) {
this.parserConfig = config;
initializeNamespaceTableFromConfiguration();
return this;
}
@Override
public ParserConfig getParserConfig() {
return this.parserConfig;
}
/*
* Default implementation, specific parsers are encouraged to override this method as necessary.
*/
@Override
public Collection> getSupportedSettings() {
Collection> result = new HashSet>();
// Supported in RDFParserHelper.createLiteral
result.add(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
result.add(BasicParserSettings.VERIFY_DATATYPE_VALUES);
result.add(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
result.add(BasicParserSettings.DATATYPE_HANDLERS);
// Supported in RDFParserHelper.createLiteral
result.add(BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES);
result.add(BasicParserSettings.VERIFY_LANGUAGE_TAGS);
result.add(BasicParserSettings.NORMALIZE_LANGUAGE_TAGS);
result.add(BasicParserSettings.LANGUAGE_HANDLERS);
// Supported in RDFParserBase.resolveURI
result.add(BasicParserSettings.VERIFY_RELATIVE_URIS);
// Supported in RDFParserBase.createBNode(String)
result.add(BasicParserSettings.PRESERVE_BNODE_IDS);
result.add(BasicParserSettings.NAMESPACES);
return result;
}
@Override
public RDFParser set(RioSetting setting, T value) {
getParserConfig().set(setting, value);
return this;
}
@Override
public void setVerifyData(boolean verifyData) {
this.parserConfig.set(BasicParserSettings.VERIFY_RELATIVE_URIS, verifyData);
}
/**
* @deprecated Use specific settings instead.
*/
@Deprecated
public boolean verifyData() {
return this.parserConfig.verifyData();
}
@Override
public void setPreserveBNodeIDs(boolean preserveBNodeIDs) {
this.parserConfig.set(BasicParserSettings.PRESERVE_BNODE_IDS, preserveBNodeIDs);
}
public boolean preserveBNodeIDs() {
return this.parserConfig.get(BasicParserSettings.PRESERVE_BNODE_IDS);
}
@Deprecated
@Override
public void setStopAtFirstError(boolean stopAtFirstError) {
getParserConfig().set(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES, stopAtFirstError);
if (!stopAtFirstError) {
getParserConfig().addNonFatalError(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES);
}
else {
// TODO: Add a ParserConfig.removeNonFatalError function to avoid
// this
Set> set = new HashSet>(getParserConfig().getNonFatalErrors());
set.remove(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES);
getParserConfig().setNonFatalErrors(set);
}
}
/**
* @deprecated Check specific settings instead.
*/
@Deprecated
public boolean stopAtFirstError() {
return this.parserConfig.stopAtFirstError();
}
@SuppressWarnings("deprecation")
@Override
public void setDatatypeHandling(DatatypeHandling datatypeHandling) {
if (datatypeHandling == DatatypeHandling.VERIFY) {
this.parserConfig.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
this.parserConfig.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
}
else if (datatypeHandling == DatatypeHandling.NORMALIZE) {
this.parserConfig.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
this.parserConfig.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
this.parserConfig.set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, true);
}
else {
// Only ignore if they have not explicitly set any of the relevant
// settings before this point
if (!this.parserConfig.isSet(BasicParserSettings.NORMALIZE_DATATYPE_VALUES)
&& !this.parserConfig.isSet(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES)
&& !this.parserConfig.isSet(BasicParserSettings.NORMALIZE_DATATYPE_VALUES))
{
this.parserConfig.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);
this.parserConfig.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false);
this.parserConfig.set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);
}
}
}
/**
* @deprecated Use {@link BasicParserSettings#VERIFY_DATATYPE_VALUES} and
* {@link BasicParserSettings#FAIL_ON_UNKNOWN_DATATYPES} and
* {@link BasicParserSettings#NORMALIZE_DATATYPE_VALUES} instead.
*/
@Deprecated
public DatatypeHandling datatypeHandling() {
return this.parserConfig.datatypeHandling();
}
/**
* Parses and normalizes the supplied URI-string and sets it as the base URI
* for resolving relative URIs.
*/
protected void setBaseURI(String uriSpec) {
// Store normalized base URI
ParsedURI baseURI = new ParsedURI(uriSpec);
baseURI.normalize();
setBaseURI(baseURI);
}
/**
* Sets the base URI for resolving relative URIs.
*/
protected void setBaseURI(ParsedURI baseURI) {
this.baseURI = baseURI;
}
/**
* Associates the specified prefix to the specified namespace.
*/
protected void setNamespace(String prefix, String namespace) {
namespaceTable.put(prefix, namespace);
}
/**
* Gets the namespace that is associated with the specified prefix or throws
* an {@link RDFParseException}.
*
* @throws RDFParseException
* if no namespace is associated with this prefix
*/
protected String getNamespace(String prefix)
throws RDFParseException
{
if (namespaceTable.containsKey(prefix))
return namespaceTable.get(prefix);
String msg = "Namespace prefix '" + prefix + "' used but not defined";
if ("".equals(prefix)) {
msg = "Default namespace used but not defined";
}
reportFatalError(msg);
throw new RDFParseException(msg);
}
/**
* Clears any information that has been collected while parsing. This method
* must be called by subclasses when finishing the parse process.
*/
protected void clear() {
baseURI = null;
nextBNodePrefix = createUniqueBNodePrefix();
namespaceTable.clear();
initializeNamespaceTableFromConfiguration();
}
protected void initializeNamespaceTableFromConfiguration() {
for (Namespace aNS : getParserConfig().get(BasicParserSettings.NAMESPACES)) {
namespaceTable.put(aNS.getPrefix(), aNS.getName());
}
}
/**
* Clears the map that keeps track of blank nodes that have been parsed.
* Normally, this map is clear when the document has been parsed completely,
* but subclasses can clear the map at other moments too, for example when a
* bnode scope ends.
*
* @deprecated Map is no longer used.
*/
@Deprecated
protected void clearBNodeIDMap() {
}
/**
* Resolves a URI-string against the base URI and creates a {@link IRI}
* object for it.
*/
protected IRI resolveURI(String uriSpec)
throws RDFParseException
{
// Resolve relative URIs against base URI
ParsedURI uri = new ParsedURI(uriSpec);
if (uri.isRelative()) {
if (baseURI == null) {
reportFatalError("Unable to resolve URIs, no base URI has been set");
}
if (getParserConfig().get(BasicParserSettings.VERIFY_RELATIVE_URIS)) {
if (uri.isRelative() && !uri.isSelfReference() && baseURI.isOpaque()) {
reportError("Relative URI '" + uriSpec + "' cannot be resolved using the opaque base URI '"
+ baseURI + "'", BasicParserSettings.VERIFY_RELATIVE_URIS);
}
}
uri = baseURI.resolve(uri);
}
return createURI(uri.toString());
}
/**
* Creates a {@link IRI} object for the specified URI-string.
*/
protected IRI createURI(String uri)
throws RDFParseException
{
try {
return valueFactory.createIRI(uri);
}
catch (Exception e) {
reportFatalError(e);
return null; // required by compiler
}
}
/**
* Creates a new {@link BNode} object.
*/
protected BNode createBNode()
throws RDFParseException
{
try {
return valueFactory.createBNode();
}
catch (Exception e) {
reportFatalError(e);
return null; // required by compiler
}
}
/**
* Creates a {@link BNode} object for the specified identifier.
*/
protected BNode createBNode(String nodeID)
throws RDFParseException
{
// If we are preserving blank node ids then we do not prefix them to
// make
// them globally unique
if (preserveBNodeIDs()) {
return valueFactory.createBNode(nodeID);
}
else {
// Prefix the node ID with a unique UUID prefix to reduce
// cross-document clashes
// This is consistent as long as nextBNodePrefix is not modified
// between parser runs
String toAppend = nodeID;
if (nodeID.length() > 32) {
// we only hash the node ID if it is longer than the hash string
// itself would be.
byte[] chars = null;
try {
chars = nodeID.getBytes("UTF-8");
}
catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
// we use an MD5 hash rather than the node ID itself to get a
// fixed-length generated id, rather than
// an ever-growing one (see SES-2171)
toAppend = (new HexBinaryAdapter()).marshal(md5.digest(chars));
}
return valueFactory.createBNode(nextBNodePrefix + toAppend);
}
}
/**
* Creates a {@link Literal} object with the supplied parameters.
*/
protected Literal createLiteral(String label, String lang, IRI datatype)
throws RDFParseException
{
return RDFParserHelper.createLiteral(label, lang, datatype, getParserConfig(), getParseErrorListener(),
valueFactory);
}
/**
* Creates a {@link Literal} object with the supplied parameters, using the
* lineNo and columnNo to enhance error messages or exceptions that may be
* generated during the creation of the literal.
*
* @since 2.7.4
* @see org.openrdf.rio.helpers.RDFParserHelper#createLiteral(String, String,
* IRI, ParserConfig, ParseErrorListener, ValueFactory, long, long)
*/
protected Literal createLiteral(String label, String lang, IRI datatype, long lineNo, long columnNo)
throws RDFParseException
{
return RDFParserHelper.createLiteral(label, lang, datatype, getParserConfig(), getParseErrorListener(),
valueFactory, lineNo, columnNo);
}
/**
* Creates a new {@link Statement} object with the supplied components.
*/
protected Statement createStatement(Resource subj, IRI pred, Value obj)
throws RDFParseException
{
try {
return valueFactory.createStatement(subj, pred, obj);
}
catch (Exception e) {
reportFatalError(e);
return null; // required by compiler
}
}
/**
* Creates a new {@link Statement} object with the supplied components.
*/
protected Statement createStatement(Resource subj, IRI pred, Value obj, Resource context)
throws RDFParseException
{
try {
return valueFactory.createStatement(subj, pred, obj, context);
}
catch (Exception e) {
reportFatalError(e);
return null; // required by compiler
}
}
/**
* Reports the specified line- and column number to the registered
* {@link ParseLocationListener}, if any.
*/
protected void reportLocation(long lineNo, long columnNo) {
if (locationListener != null) {
locationListener.parseLocationUpdate(lineNo, columnNo);
}
}
/**
* Reports a warning to the registered ParseErrorListener, if any. This
* method simply calls {@link #reportWarning(String,long,long)} supplying
* -1 for the line- and column number.
*/
protected void reportWarning(String msg) {
reportWarning(msg, -1, -1);
}
/**
* Reports a warning with associated line- and column number to the
* registered ParseErrorListener, if any.
*/
protected void reportWarning(String msg, long lineNo, long columnNo) {
if (errListener != null) {
errListener.warning(msg, lineNo, columnNo);
}
}
/**
* Reports an error with associated line- and column number to the registered
* ParseErrorListener, if the given setting has been set to true.
*
* This method also throws an {@link RDFParseException} when the given
* setting has been set to true and it is not a nonFatalError.
*
* @param msg
* The message to use for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param relevantSetting
* The boolean setting that will be checked to determine if this is an
* issue that we need to look at at all. If this setting is true, then
* the error listener will receive the error, and if
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true an
* exception will be thrown.
* @throws RDFParseException
* If {@link ParserConfig#get(RioSetting)} returns true, and
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true for
* the given setting.
*/
protected void reportError(String msg, RioSetting relevantSetting)
throws RDFParseException
{
RDFParserHelper.reportError(msg, relevantSetting, getParserConfig(), getParseErrorListener());
}
/**
* Reports an error with associated line- and column number to the registered
* ParseErrorListener, if the given setting has been set to true.
*
* This method also throws an {@link RDFParseException} when the given
* setting has been set to true and it is not a nonFatalError.
*
* @param msg
* The message to use for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param lineNo
* Optional line number, should default to setting this as -1 if not
* known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param columnNo
* Optional column number, should default to setting this as -1 if not
* known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param relevantSetting
* The boolean setting that will be checked to determine if this is an
* issue that we need to look at at all. If this setting is true, then
* the error listener will receive the error, and if
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true an
* exception will be thrown.
* @throws RDFParseException
* If {@link ParserConfig#get(RioSetting)} returns true, and
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true for
* the given setting.
*/
protected void reportError(String msg, long lineNo, long columnNo, RioSetting relevantSetting)
throws RDFParseException
{
RDFParserHelper.reportError(msg, lineNo, columnNo, relevantSetting, getParserConfig(),
getParseErrorListener());
}
/**
* Reports an error with associated line- and column number to the registered
* ParseErrorListener, if the given setting has been set to true.
*
* This method also throws an {@link RDFParseException} when the given
* setting has been set to true and it is not a nonFatalError.
*
* @param e
* The exception whose message will be used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param lineNo
* Optional line number, should default to setting this as -1 if not
* known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param columnNo
* Optional column number, should default to setting this as -1 if not
* known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param relevantSetting
* The boolean setting that will be checked to determine if this is an
* issue that we need to look at at all. If this setting is true, then
* the error listener will receive the error, and if
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true an
* exception will be thrown.
* @throws RDFParseException
* If {@link ParserConfig#get(RioSetting)} returns true, and
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true for
* the given setting.
*/
protected void reportError(Exception e, long lineNo, long columnNo, RioSetting relevantSetting)
throws RDFParseException
{
RDFParserHelper.reportError(e, lineNo, columnNo, relevantSetting, getParserConfig(),
getParseErrorListener());
}
/**
* Reports a fatal error to the registered ParseErrorListener, if any, and
* throws a ParseException afterwards. This method simply calls
* {@link #reportFatalError(String,long,long)} supplying -1 for the
* line- and column number.
*/
protected void reportFatalError(String msg)
throws RDFParseException
{
RDFParserHelper.reportFatalError(msg, getParseErrorListener());
}
/**
* Reports a fatal error with associated line- and column number to the
* registered ParseErrorListener, if any, and throws a
* ParseException afterwards.
*/
protected void reportFatalError(String msg, long lineNo, long columnNo)
throws RDFParseException
{
RDFParserHelper.reportFatalError(msg, lineNo, columnNo, getParseErrorListener());
}
/**
* Reports a fatal error to the registered ParseErrorListener, if any, and
* throws a ParseException afterwards. An exception is made for the
* case where the supplied exception is a {@link RDFParseException}; in that
* case the supplied exception is not wrapped in another ParseException and
* the error message is not reported to the ParseErrorListener, assuming that
* it has already been reported when the original ParseException was thrown.
*
* This method simply calls {@link #reportFatalError(Exception,long,long)}
* supplying -1 for the line- and column number.
*/
protected void reportFatalError(Exception e)
throws RDFParseException
{
RDFParserHelper.reportFatalError(e, getParseErrorListener());
}
/**
* Reports a fatal error with associated line- and column number to the
* registered ParseErrorListener, if any, and throws a
* ParseException wrapped the supplied exception afterwards. An
* exception is made for the case where the supplied exception is a
* {@link RDFParseException}; in that case the supplied exception is not
* wrapped in another ParseException and the error message is not reported to
* the ParseErrorListener, assuming that it has already been reported when
* the original ParseException was thrown.
*/
protected void reportFatalError(Exception e, long lineNo, long columnNo)
throws RDFParseException
{
RDFParserHelper.reportFatalError(e, lineNo, columnNo, getParseErrorListener());
}
private final String createUniqueBNodePrefix() {
return "genid-" + UUID.randomUUID().toString().replaceAll("-", "") + "-";
}
}