org.eclipse.rdf4j.rio.helpers.RDFParserHelper Maven / Gradle / Ivy
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.rio.helpers;
import java.util.Optional;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.base.CoreDatatype;
import org.eclipse.rdf4j.model.util.LiteralUtilException;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.rio.DatatypeHandler;
import org.eclipse.rdf4j.rio.LanguageHandler;
import org.eclipse.rdf4j.rio.ParseErrorListener;
import org.eclipse.rdf4j.rio.ParserConfig;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.RioSetting;
/**
* Helper methods that may be used by {@link RDFParser} implementations.
*
* This class contains reference implementations of the workflows for {@link ParseErrorListener},
* {@link RDFParseException}, {@link ParserConfig}, {@link DatatypeHandler} and {@link LanguageHandler} related methods
*
* @author Peter Ansell
*/
public class RDFParserHelper {
/**
* Create a literal using the given parameters, including iterative verification and normalization by any
* {@link DatatypeHandler} or {@link LanguageHandler} implementations that are found in the {@link ParserConfig}.
*
* @param label The value for {@link Literal#getLabel()}, which may be iteratively normalized.
* @param lang If this is not null, and the datatype is either not null, or is equal to
* {@link RDF#LANGSTRING}, then a language literal will be created.
* @param datatype If datatype is not null, and the datatype is not equal to {@link RDF#LANGSTRING} with a
* non-null lang, then a datatype literal will be created.
* @param parserConfig The source of parser settings, including the desired list of {@link DatatypeHandler} and
* {@link LanguageHandler}s to use for verification and normalization of datatype and language
* literals respectively.
* @param errListener The {@link ParseErrorListener} to use for signalling errors. This will be called if a setting
* is enabled by setting it to true in the {@link ParserConfig}, after which the error may
* trigger an {@link RDFParseException} if the setting is not present in
* {@link ParserConfig#getNonFatalErrors()}.
* @param valueFactory The {@link ValueFactory} to use for creating new {@link Literal}s using this method.
* @return A {@link Literal} created based on the given parameters.
* @throws RDFParseException If there was an error during the process that could not be recovered from, based on
* settings in the given parser config.
*/
public static final Literal createLiteral(String label, String lang, IRI datatype, ParserConfig parserConfig,
ParseErrorListener errListener, ValueFactory valueFactory) throws RDFParseException {
return createLiteral(label, lang, datatype, parserConfig, errListener, valueFactory, -1, -1);
}
/**
* Create a literal using the given parameters, including iterative verification and normalization by any
* {@link DatatypeHandler} or {@link LanguageHandler} implementations that are found in the {@link ParserConfig}.
*
* @param label The value for {@link Literal#getLabel()}, which may be iteratively normalized.
* @param lang If this is not null, and the datatype is either not null, or is equal to
* {@link RDF#LANGSTRING}, then a language literal will be created.
* @param datatype If datatype is not null, and the datatype is not equal to {@link RDF#LANGSTRING} with a
* non-null lang, then a datatype literal will be created.
* @param parserConfig The source of parser settings, including the desired list of {@link DatatypeHandler} and
* {@link LanguageHandler}s to use for verification and normalization of datatype and language
* literals respectively.
* @param errListener The {@link ParseErrorListener} to use for signalling errors. This will be called if a setting
* is enabled by setting it to true in the {@link ParserConfig}, after which the error may
* trigger an {@link RDFParseException} if the setting is not present in
* {@link ParserConfig#getNonFatalErrors()}.
* @param valueFactory The {@link ValueFactory} to use for creating new {@link Literal}s using this method.
* @param lineNo Optional line number, should default to setting this as -1 if not known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param columnNo Optional column number, should default to setting this as -1 if not known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @return A {@link Literal} created based on the given parameters.
* @throws RDFParseException If there was an error during the process that could not be recovered from, based on
* settings in the given parser config.
*/
public static Literal createLiteral(String label, String lang, IRI datatype, ParserConfig parserConfig,
ParseErrorListener errListener, ValueFactory valueFactory, long lineNo, long columnNo)
throws RDFParseException {
if (label == null) {
throw new NullPointerException("Cannot create a literal using a null label");
}
Literal result = null;
String workingLabel = label;
Optional workingLang = Optional.ofNullable(lang);
IRI workingDatatype = datatype;
// In RDF-1.1 we must do lang check first as language literals will all
// have datatype RDF.LANGSTRING, but only language literals would have a
// non-null lang
if (workingLang.isPresent() && (workingDatatype == null || RDF.LANGSTRING.equals(workingDatatype))) {
boolean recognisedLanguage = false;
for (LanguageHandler nextHandler : parserConfig.get(BasicParserSettings.LANGUAGE_HANDLERS)) {
if (nextHandler.isRecognizedLanguage(workingLang.get())) {
recognisedLanguage = true;
if (parserConfig.get(BasicParserSettings.VERIFY_LANGUAGE_TAGS)) {
try {
if (!nextHandler.verifyLanguage(workingLabel, workingLang.get())) {
reportError("'" + lang + "' is not a valid language tag ", lineNo, columnNo,
BasicParserSettings.VERIFY_LANGUAGE_TAGS, parserConfig, errListener);
}
} catch (LiteralUtilException e) {
reportError("'" + label
+ " could not be verified by a language handler that recognised it. language was "
+ lang, lineNo, columnNo, BasicParserSettings.VERIFY_LANGUAGE_TAGS, parserConfig,
errListener);
}
}
if (parserConfig.get(BasicParserSettings.NORMALIZE_LANGUAGE_TAGS)) {
try {
result = nextHandler.normalizeLanguage(workingLabel, workingLang.get(), valueFactory);
workingLabel = result.getLabel();
workingLang = result.getLanguage();
workingDatatype = result.getDatatype();
} catch (LiteralUtilException e) {
reportError(
"'" + label + "' did not have a valid value for language " + lang + ": "
+ e.getMessage() + " and could not be normalised",
lineNo, columnNo, BasicParserSettings.NORMALIZE_LANGUAGE_TAGS, parserConfig,
errListener);
}
}
}
}
if (!recognisedLanguage) {
reportError("'" + label
+ "' was not recognised as a language literal, and could not be verified, with language "
+ lang, lineNo, columnNo, BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES, parserConfig,
errListener);
}
} else if (workingDatatype != null) {
boolean recognisedDatatype = false;
for (DatatypeHandler nextHandler : parserConfig.get(BasicParserSettings.DATATYPE_HANDLERS)) {
if (nextHandler.isRecognizedDatatype(workingDatatype)) {
recognisedDatatype = true;
if (parserConfig.get(BasicParserSettings.VERIFY_DATATYPE_VALUES)) {
try {
if (!nextHandler.verifyDatatype(workingLabel, workingDatatype)) {
reportError("'" + label + "' is not a valid value for datatype " + datatype, lineNo,
columnNo, BasicParserSettings.VERIFY_DATATYPE_VALUES, parserConfig,
errListener);
}
} catch (LiteralUtilException e) {
reportError("'" + label
+ " could not be verified by a datatype handler that recognised it. datatype was "
+ datatype, lineNo, columnNo, BasicParserSettings.VERIFY_DATATYPE_VALUES,
parserConfig, errListener);
}
}
if (parserConfig.get(BasicParserSettings.NORMALIZE_DATATYPE_VALUES)) {
try {
result = nextHandler.normalizeDatatype(workingLabel, workingDatatype, valueFactory);
workingLabel = result.getLabel();
workingLang = result.getLanguage();
workingDatatype = result.getDatatype();
} catch (LiteralUtilException e) {
reportError(
"'" + label + "' is not a valid value for datatype " + datatype + ": "
+ e.getMessage() + " and could not be normalised",
lineNo, columnNo, BasicParserSettings.NORMALIZE_DATATYPE_VALUES, parserConfig,
errListener);
}
}
}
}
if (!recognisedDatatype) {
reportError("'" + label + "' was not recognised, and could not be verified, with datatype " + datatype,
lineNo, columnNo, BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, parserConfig, errListener);
}
}
if (result == null) {
try {
// Removes datatype for langString datatype with no language tag when VERIFY_DATATYPE_VALUES is False.
if ((workingDatatype == null || RDF.LANGSTRING.equals(workingDatatype))
&& (!workingLang.isPresent() || workingLang.get().isEmpty())
&& !parserConfig.get(BasicParserSettings.VERIFY_DATATYPE_VALUES)) {
workingLang = Optional.ofNullable(null);
workingDatatype = null;
}
// Backup for unnormalised language literal creation
if (workingLang.isPresent() && (workingDatatype == null || RDF.LANGSTRING.equals(workingDatatype))) {
result = valueFactory.createLiteral(workingLabel, workingLang.get().intern());
}
// Backup for unnormalised datatype literal creation
else if (workingDatatype != null) {
CoreDatatype coreDatatype = CoreDatatype.from(workingDatatype);
result = valueFactory.createLiteral(workingLabel,
coreDatatype != CoreDatatype.NONE ? coreDatatype.getIri() : workingDatatype, coreDatatype);
} else {
result = valueFactory.createLiteral(workingLabel, CoreDatatype.XSD.STRING);
}
} catch (Exception e) {
reportFatalError(e, lineNo, columnNo, errListener);
}
}
return result;
}
/**
* Reports an error with associated line- and column number to the registered ParseErrorListener, if the given
* setting has been set to true.
*
* This method also throws an {@link RDFParseException} when the given setting has been set to true and
* it is not a nonFatalError.
*
* @param msg The message to use for {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param relevantSetting The boolean setting that will be checked to determine if this is an issue that we need to
* look at at all. If this setting is true, then the error listener will receive the error,
* and if {@link ParserConfig#isNonFatalError(RioSetting)} returns true an exception will be
* thrown.
* @param parserConfig The {@link ParserConfig} to use for determining if the error is first sent to the
* ParseErrorListener, and whether it is then also non-fatal to avoid throwing an
* {@link RDFParseException}.
* @param errListener The {@link ParseErrorListener} that will be sent messages about errors that are enabled.
* @throws RDFParseException If {@link ParserConfig#get(RioSetting)} returns true, and
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true for the given setting.
*/
public static void reportError(String msg, RioSetting relevantSetting, ParserConfig parserConfig,
ParseErrorListener errListener) throws RDFParseException {
reportError(msg, -1, -1, relevantSetting, parserConfig, errListener);
}
/**
* Reports an error with associated line- and column number to the registered ParseErrorListener, if the given
* setting has been set to true.
*
* This method also throws an {@link RDFParseException} when the given setting has been set to true and
* it is not a nonFatalError.
*
* @param msg The message to use for {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param lineNo Optional line number, should default to setting this as -1 if not known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param columnNo Optional column number, should default to setting this as -1 if not known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param relevantSetting The boolean setting that will be checked to determine if this is an issue that we need to
* look at at all. If this setting is true, then the error listener will receive the error,
* and if {@link ParserConfig#isNonFatalError(RioSetting)} returns true an exception will be
* thrown.
* @param parserConfig The {@link ParserConfig} to use for determining if the error is first sent to the
* ParseErrorListener, and whether it is then also non-fatal to avoid throwing an
* {@link RDFParseException}.
* @param errListener The {@link ParseErrorListener} that will be sent messages about errors that are enabled.
* @throws RDFParseException If {@link ParserConfig#get(RioSetting)} returns true, and
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true for the given setting.
*/
public static void reportError(String msg, long lineNo, long columnNo, RioSetting relevantSetting,
ParserConfig parserConfig, ParseErrorListener errListener) throws RDFParseException {
if (parserConfig.get(relevantSetting)) {
if (errListener != null) {
errListener.error(msg, lineNo, columnNo);
}
if (!parserConfig.isNonFatalError(relevantSetting)) {
throw new RDFParseException(msg, lineNo, columnNo);
}
}
}
/**
* Reports an error with associated line- and column number to the registered ParseErrorListener, if the given
* setting has been set to true.
*
* This method also throws an {@link RDFParseException} when the given setting has been set to true and
* it is not a nonFatalError.
*
* @param e The exception whose message to use for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param lineNo Optional line number, should default to setting this as -1 if not known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param columnNo Optional column number, should default to setting this as -1 if not known. Used for
* {@link ParseErrorListener#error(String, long, long)} and for
* {@link RDFParseException#RDFParseException(String, long, long)}.
* @param relevantSetting The boolean setting that will be checked to determine if this is an issue that we need to
* look at at all. If this setting is true, then the error listener will receive the error,
* and if {@link ParserConfig#isNonFatalError(RioSetting)} returns true an exception will be
* thrown.
* @param parserConfig The {@link ParserConfig} to use for determining if the error is first sent to the
* ParseErrorListener, and whether it is then also non-fatal to avoid throwing an
* {@link RDFParseException}.
* @param errListener The {@link ParseErrorListener} that will be sent messages about errors that are enabled.
* @throws RDFParseException If {@link ParserConfig#get(RioSetting)} returns true, and
* {@link ParserConfig#isNonFatalError(RioSetting)} returns true for the given setting.
*/
public static void reportError(Exception e, long lineNo, long columnNo, RioSetting relevantSetting,
ParserConfig parserConfig, ParseErrorListener errListener) throws RDFParseException {
if (parserConfig.get(relevantSetting)) {
if (errListener != null) {
errListener.error(e.getMessage(), lineNo, columnNo);
}
if (!parserConfig.isNonFatalError(relevantSetting)) {
if (e instanceof RDFParseException) {
throw (RDFParseException) e;
} else {
throw new RDFParseException(e, lineNo, columnNo);
}
}
}
}
/**
* Reports a fatal error to the registered ParseErrorListener, if any, and throws a ParseException
* afterwards. This method simply calls {@link #reportFatalError(String, long, long, ParseErrorListener)} supplying
* -1 for the line- and column number.
*/
public static void reportFatalError(String msg, ParseErrorListener errListener) throws RDFParseException {
reportFatalError(msg, -1, -1, errListener);
}
/**
* Reports a fatal error with associated line- and column number to the registered ParseErrorListener, if any, and
* throws a ParseException afterwards.
*/
public static void reportFatalError(String msg, long lineNo, long columnNo, ParseErrorListener errListener)
throws RDFParseException {
if (errListener != null) {
errListener.fatalError(msg, lineNo, columnNo);
}
throw new RDFParseException(msg, lineNo, columnNo);
}
/**
* Reports a fatal error to the registered ParseErrorListener, if any, and throws a ParseException
* afterwards. An exception is made for the case where the supplied exception is a {@link RDFParseException}; in
* that case the supplied exception is not wrapped in another ParseException and the error message is not reported
* to the ParseErrorListener, assuming that it has already been reported when the original ParseException was
* thrown.
*
* This method simply calls {@link #reportFatalError(Exception, long, long, ParseErrorListener)} supplying
* -1 for the line- and column number.
*/
public static void reportFatalError(Exception e, ParseErrorListener errListener) throws RDFParseException {
reportFatalError(e, -1, -1, errListener);
}
/**
* Reports a fatal error with associated line- and column number to the registered ParseErrorListener, if any, and
* throws a ParseException wrapped the supplied exception afterwards. An exception is made for the case
* where the supplied exception is a {@link RDFParseException}; in that case the supplied exception is not wrapped
* in another ParseException and the error message is not reported to the ParseErrorListener, assuming that it has
* already been reported when the original ParseException was thrown.
*/
public static void reportFatalError(Exception e, long lineNo, long columnNo, ParseErrorListener errListener)
throws RDFParseException {
if (e instanceof RDFParseException) {
throw (RDFParseException) e;
} else {
if (errListener != null) {
errListener.fatalError(e.getMessage(), lineNo, columnNo);
}
throw new RDFParseException(e, lineNo, columnNo);
}
}
/**
* Reports a fatal error with associated line- and column number to the registered ParseErrorListener, if any, and
* throws a ParseException wrapped the supplied exception afterwards. An exception is made for the case
* where the supplied exception is a {@link RDFParseException}; in that case the supplied exception is not wrapped
* in another ParseException and the error message is not reported to the ParseErrorListener, assuming that it has
* already been reported when the original ParseException was thrown.
*/
public static void reportFatalError(String message, Exception e, long lineNo, long columnNo,
ParseErrorListener errListener) throws RDFParseException {
if (e instanceof RDFParseException) {
throw (RDFParseException) e;
} else {
if (errListener != null) {
errListener.fatalError(message, lineNo, columnNo);
}
throw new RDFParseException(message, e, lineNo, columnNo);
}
}
/**
* Protected constructor to prevent direct instantiation.
*/
protected RDFParserHelper() {
}
}