All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openrdf.rio.helpers.BasicParserSettings Maven / Gradle / Ivy

/* 
 * Licensed to Aduna under one or more contributor license agreements.  
 * See the NOTICE.txt file distributed with this work for additional 
 * information regarding copyright ownership. 
 *
 * Aduna licenses this file to you under the terms of the Aduna BSD 
 * License (the "License"); you may not use this file except in compliance 
 * with the License. See the LICENSE.txt file distributed with this work 
 * for the full License.
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package org.openrdf.rio.helpers;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;

import org.openrdf.model.impl.SimpleNamespace;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.openrdf.model.Namespace;
import org.openrdf.model.impl.NamespaceImpl;
import org.openrdf.rio.DatatypeHandler;
import org.openrdf.rio.DatatypeHandlerRegistry;
import org.openrdf.rio.LanguageHandler;
import org.openrdf.rio.LanguageHandlerRegistry;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RioSetting;

/**
 * A class encapsulating the basic parser settings that most parsers may
 * support.
 * 
 * @author Peter Ansell
 * @since 2.7.0
 */
public class BasicParserSettings {

	/**
	 * Vocabulary Prefixes of W3C Documents (Recommendations or Notes)
	 *
	 * @see http://www.w3.org/2011/rdfa-context/rdfa-1.1
	 */
	private static final Set defaultPrefix;
	static {
		Set aNamespaces = new HashSet();

		aNamespaces.add(new SimpleNamespace("cat", "http://www.w3.org/ns/dcat#"));
		aNamespaces.add(new SimpleNamespace("qb", "http://purl.org/linked-data/cube#"));
		aNamespaces.add(new SimpleNamespace("grddl", "http://www.w3.org/2003/g/data-view#"));
		aNamespaces.add(new SimpleNamespace("ma", "http://www.w3.org/ns/ma-ont#"));
		aNamespaces.add(new SimpleNamespace("owl", "http://www.w3.org/2002/07/owl#"));
		aNamespaces.add(new SimpleNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"));
		aNamespaces.add(new SimpleNamespace("rdfa", "http://www.w3.org/ns/rdfa#"));
		aNamespaces.add(new SimpleNamespace("rdfs", "http://www.w3.org/2000/01/rdf-schema#"));
		aNamespaces.add(new SimpleNamespace("rif", "http://www.w3.org/2007/rif#"));
		aNamespaces.add(new SimpleNamespace("rr", "http://www.w3.org/ns/r2rml#"));
		aNamespaces.add(new SimpleNamespace("skos", "http://www.w3.org/2004/02/skos/core#"));
		aNamespaces.add(new SimpleNamespace("skosxl", "http://www.w3.org/2008/05/skos-xl#"));
		aNamespaces.add(new SimpleNamespace("wdr", "http://www.w3.org/2007/05/powder#"));
		aNamespaces.add(new SimpleNamespace("void", "http://rdfs.org/ns/void#"));
		aNamespaces.add(new SimpleNamespace("wdrs", "http://www.w3.org/2007/05/powder-s#"));
		aNamespaces.add(new SimpleNamespace("xhv", "http://www.w3.org/1999/xhtml/vocab#"));
		aNamespaces.add(new SimpleNamespace("xml", "http://www.w3.org/XML/1998/namespace"));
		aNamespaces.add(new SimpleNamespace("xsd", "http://www.w3.org/2001/XMLSchema#"));
		aNamespaces.add(new SimpleNamespace("prov", "http://www.w3.org/ns/prov#"));
		aNamespaces.add(new SimpleNamespace("sd", "http://www.w3.org/ns/sparql-service-description#"));
		aNamespaces.add(new SimpleNamespace("org", "http://www.w3.org/ns/org#"));
		aNamespaces.add(new SimpleNamespace("gldp", "http://www.w3.org/ns/people#"));
		aNamespaces.add(new SimpleNamespace("cnt", "http://www.w3.org/2008/content#"));
		aNamespaces.add(new SimpleNamespace("dcat", "http://www.w3.org/ns/dcat#"));
		aNamespaces.add(new SimpleNamespace("earl", "http://www.w3.org/ns/earl#"));
		aNamespaces.add(new SimpleNamespace("ht", "http://www.w3.org/2006/http#"));
		aNamespaces.add(new SimpleNamespace("ptr", "http://www.w3.org/2009/pointers#"));
		aNamespaces.add(new SimpleNamespace("cc", "http://creativecommons.org/ns#"));
		aNamespaces.add(new SimpleNamespace("ctag", "http://commontag.org/ns#"));
		aNamespaces.add(new SimpleNamespace("dc", "http://purl.org/dc/terms/"));
		aNamespaces.add(new SimpleNamespace("dc11", "http://purl.org/dc/elements/1.1/"));
		aNamespaces.add(new SimpleNamespace("dcterms", "http://purl.org/dc/terms/"));
		aNamespaces.add(new SimpleNamespace("foaf", "http://xmlns.com/foaf/0.1/"));
		aNamespaces.add(new SimpleNamespace("gr", "http://purl.org/goodrelations/v1#"));
		aNamespaces.add(new SimpleNamespace("ical", "http://www.w3.org/2002/12/cal/icaltzd#"));
		aNamespaces.add(new SimpleNamespace("og", "http://ogp.me/ns#"));
		aNamespaces.add(new SimpleNamespace("rev", "http://purl.org/stuff/rev#"));
		aNamespaces.add(new SimpleNamespace("sioc", "http://rdfs.org/sioc/ns#"));
		aNamespaces.add(new SimpleNamespace("v", "http://rdf.data-vocabulary.org/#"));
		aNamespaces.add(new SimpleNamespace("vcard", "http://www.w3.org/2006/vcard/ns#"));
		aNamespaces.add(new SimpleNamespace("schema", "http://schema.org/"));

		defaultPrefix = Collections.unmodifiableSet(aNamespaces);
	}

	private final static Logger log = LoggerFactory.getLogger(BasicParserSettings.class);

	/**
	 * Boolean setting for parser to determine whether values for recognised
	 * datatypes are to be verified.
	 * 

* Verification is performed using registered DatatypeHandlers. *

* Defaults to false since 2.8.0, defaulted to true in 2.7. * * @since 2.7.0 */ public static final RioSetting VERIFY_DATATYPE_VALUES = new RioSettingImpl( "org.openrdf.rio.verifydatatypevalues", "Verify recognised datatype values", Boolean.FALSE); /** * Boolean setting for parser to determine whether to fail parsing if * datatypes are not recognised. *

* Datatypes are recognised based on matching one of the registered * {@link DatatypeHandler}s. *

* Defaults to false. * * @since 2.7.0 */ public static final RioSetting FAIL_ON_UNKNOWN_DATATYPES = new RioSettingImpl( "org.openrdf.rio.failonunknowndatatypes", "Fail on unknown datatypes", Boolean.FALSE); /** * Boolean setting for parser to determine whether recognised datatypes need * to have their values be normalized. *

* Normalization is performed using registered DatatypeHandlers. *

* Defaults to false. * * @since 2.7.0 */ public static final RioSetting NORMALIZE_DATATYPE_VALUES = new RioSettingImpl( "org.openrdf.rio.normalizedatatypevalues", "Normalize recognised datatype values", Boolean.FALSE); /** * Setting used to specify which {@link DatatypeHandler} implementations are * to be used for a given parser configuration. *

* Defaults to an XMLSchema DatatypeHandler implementation based on * {@link DatatypeHandler#XMLSCHEMA} and an RDF DatatypeHandler * implementation based on {@link DatatypeHandler#RDFDATATYPES}. * * @since 2.7.0 */ public static final RioSetting> DATATYPE_HANDLERS; /** * Boolean setting for parser to determine whether to fail parsing if * languages are not recognised. *

* Languages are recognised based on matching one of the registered * {@link LanguageHandler}s. *

* Defaults to false. * * @since 2.7.0 */ public static final RioSetting FAIL_ON_UNKNOWN_LANGUAGES = new RioSettingImpl( "org.openrdf.rio.failonunknownlanguages", "Fail on unknown languages", Boolean.FALSE); /** * Boolean setting for parser to determine whether languages are to be * verified based on a given set of definitions for valid languages. *

* Verification is performed using registered {@link LanguageHandler}s. *

* Defaults to true. * * @since 2.7.0 */ public static final RioSetting VERIFY_LANGUAGE_TAGS = new RioSettingImpl( "org.openrdf.rio.verifylanguagevalues", "Verify language tags", Boolean.TRUE); /** * Boolean setting for parser to determine whether languages need to be * normalized, and to which format they should be normalised. *

* Normalization is performed using registered {@link LanguageHandler}s. *

* Defaults to false. * * @since 2.7.0 */ public static final RioSetting NORMALIZE_LANGUAGE_TAGS = new RioSettingImpl( "org.openrdf.rio.normalizelanguagevalues", "Normalize recognised language tags", Boolean.FALSE); /** * Setting used to specify which {@link LanguageHandler} implementations are * to be used for a given parser configuration. *

* Defaults to an RFC3066 LanguageHandler implementation based on * {@link LanguageHandler#RFC3066}. * * @since 2.7.0 */ public static final RioSetting> LANGUAGE_HANDLERS; /** * Boolean setting for parser to determine whether relative URIs are * verified. *

* Defaults to true. * * @since 2.7.0 */ public static final RioSetting VERIFY_RELATIVE_URIS = new RioSettingImpl( "org.openrdf.rio.verifyrelativeuris", "Verify relative URIs", Boolean.TRUE); /** * Boolean setting for parser to determine if URIs should be verified to * contain only legal characters. *

* Defaults to {@code true}. If set to {@code false}, the parser will report * syntactically illegal URIs to the {@link RDFHandler}. * * @since 2.9.0 */ public static final RioSetting VERIFY_URI_SYNTAX = new RioSettingImpl( "org.openrdf.rio.verifyurisyntax", "Verify URI syntax", Boolean.TRUE); /** * Boolean setting for parser to determine whether parser should attempt to * preserve identifiers for blank nodes. If the blank node did not have an * identifier in the document a new identifier will be generated for it. *

* Defaults to false. * * @since 2.7.0 */ public static final RioSetting PRESERVE_BNODE_IDS = new RioSettingImpl( "org.openrdf.rio.preservebnodeids", "Preserve blank node identifiers", Boolean.FALSE); /** * Boolean setting for parser to determine whether parser should preserve, * truncate, drop, or otherwise manipulate statements that contain long * literals. The maximum length of literals if this setting is set to * truncate or drop is configured using {@link #LARGE_LITERALS_LIMIT}. *

* Defaults to {@link LargeLiteralHandling#PRESERVE}. * * @since 2.7.0 */ public static final RioSetting LARGE_LITERALS_HANDLING = new RioSettingImpl( "org.openrdf.rio.largeliterals", "Large literals handling", LargeLiteralHandling.PRESERVE); /** * If {@link #LARGE_LITERALS_HANDLING} is set to * {@link LargeLiteralHandling#PRESERVE}, which it is by default, then the * value of this setting is not used. *

* If {@link #LARGE_LITERALS_HANDLING} is set to * {@link LargeLiteralHandling#DROP} , then the value of this setting * corresponds to the maximum number of bytes for a literal before the * statement it is a part of is dropped silently by the parser. *

* If {@link #LARGE_LITERALS_HANDLING} is set to * {@link LargeLiteralHandling#TRUNCATE} , then the value of this setting * corresponds to the maximum number of bytes for a literal before the value * is truncated. *

* Defaults to 1048576 bytes, which is equivalent to 1 megabyte. * * @since 2.7.0 */ public static final RioSetting LARGE_LITERALS_LIMIT = new RioSettingImpl( "org.openrdf.rio.largeliteralslimit", "Size limit for large literals", 1048576L); /** *

* Setting to provide a collection of {@link Namespace} objects which will be * used when parsing RDF as the basis for the default set of namespaces of * the document. *

*

* Namespaces specified within the RDF document being parsed will override * these defaults *

*

* Defaults to this * list. *

*

* * @since 2.8.5 */ public static final RioSetting> NAMESPACES = new RioSettingImpl>( "org.openrdf.rio.namespaces", "Collection of default namespaces to use for parsing", defaultPrefix); static { List defaultDatatypeHandlers = new ArrayList(5); try { DatatypeHandlerRegistry registry = DatatypeHandlerRegistry.getInstance(); for (String nextHandler : Arrays.asList(DatatypeHandler.XMLSCHEMA, DatatypeHandler.RDFDATATYPES, DatatypeHandler.DBPEDIA, DatatypeHandler.VIRTUOSOGEOMETRY, DatatypeHandler.GEOSPARQL)) { Optional nextdt = registry.get(nextHandler); if (nextdt.isPresent()) { defaultDatatypeHandlers.add(nextdt.get()); } else { log.warn("Could not find DatatypeHandler : {}", nextHandler); } } } catch (Exception e) { // Ignore exceptions so that service loading failures do not cause // class initialization errors. log.warn("Found an error loading DatatypeHandler services", e); } DATATYPE_HANDLERS = new RioSettingImpl>("org.openrdf.rio.datatypehandlers", "Datatype Handlers", Collections.unmodifiableList(defaultDatatypeHandlers)); List defaultLanguageHandlers = new ArrayList(1); try { LanguageHandlerRegistry registry = LanguageHandlerRegistry.getInstance(); for (String nextHandler : Arrays.asList(LanguageHandler.RFC3066)) { Optional nextlang = registry.get(nextHandler); if (nextlang.isPresent()) { defaultLanguageHandlers.add(nextlang.get()); } else { log.warn("Could not find LanguageHandler : {}", nextHandler); } } } catch (Exception e) { // Ignore exceptions so that service loading failures do not cause // class initialization errors. log.warn("Found an error loading LanguageHandler services", e); } LANGUAGE_HANDLERS = new RioSettingImpl>("org.openrdf.rio.languagehandlers", "Language Handlers", Collections.unmodifiableList(defaultLanguageHandlers)); } /** * Private default constructor. */ private BasicParserSettings() { } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy