All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.rdf4j.rio.helpers.BasicParserSettings Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/
package org.eclipse.rdf4j.rio.helpers;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.Set;

import org.eclipse.rdf4j.model.Namespace;
import org.eclipse.rdf4j.model.util.Namespaces;
import org.eclipse.rdf4j.rio.DatatypeHandler;
import org.eclipse.rdf4j.rio.DatatypeHandlerRegistry;
import org.eclipse.rdf4j.rio.LanguageHandler;
import org.eclipse.rdf4j.rio.LanguageHandlerRegistry;
import org.eclipse.rdf4j.rio.RDFHandler;
import org.eclipse.rdf4j.rio.RioSetting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A class encapsulating the basic parser settings that most parsers may support.
 *
 * @author Peter Ansell
 */
public class BasicParserSettings {

	private final static Logger log = LoggerFactory.getLogger(BasicParserSettings.class);

	/**
	 * Boolean setting for parser to determine whether values for recognised datatypes are to be verified.
	 * 

* Verification is performed using registered DatatypeHandlers. *

* Defaults to false. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.verify_datatype_values}. */ public static final BooleanRioSetting VERIFY_DATATYPE_VALUES = new BooleanRioSetting( "org.eclipse.rdf4j.rio.verify_datatype_values", "Verify recognised datatype values", Boolean.FALSE); /** * Boolean setting for parser to determine whether to fail parsing if datatypes are not recognised. *

* Datatypes are recognised based on matching one of the registered {@link DatatypeHandler}s. *

* Defaults to false. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.fail_on_unknown_datatypes}. */ public static final BooleanRioSetting FAIL_ON_UNKNOWN_DATATYPES = new BooleanRioSetting( "org.eclipse.rdf4j.rio.fail_on_unknown_datatypes", "Fail on unknown datatypes", Boolean.FALSE); /** * Boolean setting for parser to determine whether recognised datatypes need to have their values be normalized. *

* Normalization is performed using registered DatatypeHandlers. *

* Defaults to false. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.normalize_datatype_values}. */ public static final BooleanRioSetting NORMALIZE_DATATYPE_VALUES = new BooleanRioSetting( "org.eclipse.rdf4j.rio.normalize_datatype_values", "Normalize recognised datatype values", Boolean.FALSE); /** * Setting used to specify which {@link DatatypeHandler} implementations are to be used for a given parser * configuration. *

* Defaults to an XMLSchema DatatypeHandler implementation based on {@link DatatypeHandler#XMLSCHEMA} and an RDF * DatatypeHandler implementation based on {@link DatatypeHandler#RDFDATATYPES}. */ public static final RioSetting> DATATYPE_HANDLERS; /** * Boolean setting for parser to determine whether to fail parsing if languages are not recognized. *

* Languages are recognized based on matching one of the registered {@link LanguageHandler}s. *

* Defaults to false. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.fail_on_unknown_languages}. */ public static final BooleanRioSetting FAIL_ON_UNKNOWN_LANGUAGES = new BooleanRioSetting( "org.eclipse.rdf4j.rio.fail_on_unknown_languages", "Fail on unknown languages", Boolean.FALSE); /** * Boolean setting for parser to determine whether languages are to be verified based on a given set of definitions * for valid languages. *

* Verification is performed using registered {@link LanguageHandler}s. *

* Defaults to true. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.verify_language_tags}. */ public static final BooleanRioSetting VERIFY_LANGUAGE_TAGS = new BooleanRioSetting( "org.eclipse.rdf4j.rio.verify_language_tags", "Verify language tags", Boolean.TRUE); /** * Boolean setting for parser to determine whether languages need to be normalized. *

* Normalization is performed using registered {@link LanguageHandler}s. *

* Defaults to false. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.normalize_language_tags}. */ public static final BooleanRioSetting NORMALIZE_LANGUAGE_TAGS = new BooleanRioSetting( "org.eclipse.rdf4j.rio.normalize_language_tags", "Normalize recognised language tags", Boolean.FALSE); /** * Setting used to specify which {@link LanguageHandler} implementations are to be used for a given parser * configuration. *

* Defaults to an BCP47 LanguageHandler implementation based on {@link LanguageHandler#BCP47}. */ public static final RioSetting> LANGUAGE_HANDLERS; /** * Boolean setting for parser to determine whether relative URIs are verified. *

* Defaults to true.. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.verify_relative_uris}. */ public static final BooleanRioSetting VERIFY_RELATIVE_URIS = new BooleanRioSetting( "org.eclipse.rdf4j.rio.verify_relative_uris", "Verify relative URIs", Boolean.TRUE); /** * Boolean setting for parser to determine if URIs should be verified to contain only legal characters. *

* Defaults to {@code true}. If set to {@code false}, the parser will report syntactically illegal URIs to the * {@link RDFHandler}. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.verify_uri_syntax}. */ public static final BooleanRioSetting VERIFY_URI_SYNTAX = new BooleanRioSetting( "org.eclipse.rdf4j.rio.verify_uri_syntax", "Verify URI syntax", Boolean.TRUE); /** * Boolean setting for parser to determine whether parser should attempt to preserve identifiers for blank nodes. If * the blank node did not have an identifier in the document a new identifier will be generated for it. *

* Defaults to false. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.preserve_bnode_ids}. */ public static final BooleanRioSetting PRESERVE_BNODE_IDS = new BooleanRioSetting( "org.eclipse.rdf4j.rio.preserve_bnode_ids", "Preserve blank node identifiers", Boolean.FALSE); /** * Scheme and authority of new mint Skolem IRIs that should replace Blank Nodes. For example a value of * "http://example.com" might cause a blank node to be replaced with an IRI of * "http://example.com/.well-known/genid/d26a2d0e98334696f4ad70a677abc1f6" *

* Defaults to null (disabled). *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.skolem_origin}. */ public static final StringRioSetting SKOLEMIZE_ORIGIN = new StringRioSetting( "org.eclipse.rdf4j.rio.skolem_origin", "Replace blank nodes with well known genid IRIs using this scheme and authority", null); /** * Boolean setting for parser to determine whether parser should preserve, truncate, drop, or otherwise manipulate * statements that contain long literals. The maximum length of literals if this setting is set to truncate or drop * is configured using {@link #LARGE_LITERALS_LIMIT}. *

* Defaults to {@link LargeLiteralHandling#PRESERVE}. */ public static final RioSetting LARGE_LITERALS_HANDLING = new RioSettingImpl<>( "org.eclipse.rdf4j.rio.large_literals", "Large literals handling", LargeLiteralHandling.PRESERVE); /** * If {@link #LARGE_LITERALS_HANDLING} is set to {@link LargeLiteralHandling#PRESERVE}, which it is by default, then * the value of this setting is not used. *

* If {@link #LARGE_LITERALS_HANDLING} is set to {@link LargeLiteralHandling#DROP} , then the value of this setting * corresponds to the maximum number of bytes for a literal before the statement it is a part of is dropped silently * by the parser. *

* If {@link #LARGE_LITERALS_HANDLING} is set to {@link LargeLiteralHandling#TRUNCATE} , then the value of this * setting corresponds to the maximum number of bytes for a literal before the value is truncated. *

* Defaults to 1048576 bytes, which is equivalent to 1 megabyte. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.large_literals_limit}. */ public static final LongRioSetting LARGE_LITERALS_LIMIT = new LongRioSetting( "org.eclipse.rdf4j.rio.large_literals_limit", "Size limit for large literals", 1048576L); /** *

* Setting to provide a collection of {@link Namespace} objects which will be used when parsing RDF as the basis for * the default set of namespaces of the document. *

*

* Namespaces specified within the RDF document being parsed will override these defaults *

*

* Defaults to {@link Namespaces.DEFAULT_RDF4J} the RDFa 1.1 initial context + some additional prefixes. *

*/ public static final RioSetting> NAMESPACES = new RioSettingImpl<>( "org.eclipse.rdf4j.rio.namespaces", "Collection of default namespaces to use for parsing", Namespaces.DEFAULT_RDF4J); /** * Boolean setting for parser to determine whether it should process RDF-star triples encoded as RDF-compatible * special IRIs back to RDF-star values. These IRIs start with urn:rdf4j:triple: followed by the base64-encoding of * the N-Triples serialization of the RDF-star triple value. *

* Parsers that support RDF-star natively will honour this setting too. *

* Defaults to true. *

* Can be overridden by setting system property {@code org.eclipse.rdf4j.rio.process_encoded_rdf_star}. */ public static final BooleanRioSetting PROCESS_ENCODED_RDF_STAR = new BooleanRioSetting( "org.eclipse.rdf4j.rio.process_encoded_rdf_star", "Converts RDF-star triples encoded as RDF-compatible IRIs back to triple values", Boolean.TRUE); static { List defaultDatatypeHandlers = new ArrayList<>(5); try { DatatypeHandlerRegistry registry = DatatypeHandlerRegistry.getInstance(); for (String nextDatatype : Arrays.asList(DatatypeHandler.XMLSCHEMA, DatatypeHandler.RDFDATATYPES, DatatypeHandler.DBPEDIA, DatatypeHandler.VIRTUOSOGEOMETRY, DatatypeHandler.GEOSPARQL)) { Optional nextDatatypeHandler = registry.get(nextDatatype); if (nextDatatypeHandler.isPresent()) { defaultDatatypeHandlers.add(nextDatatypeHandler.get()); } else { log.warn("Could not find DatatypeHandler : {}", nextDatatype); } } } catch (Exception e) { // Ignore exceptions so that service loading failures do not cause // class initialization errors. log.warn("Found an error loading DatatypeHandler services", e); } DATATYPE_HANDLERS = new RioSettingImpl<>("org.eclipse.rdf4j.rio.datatype_handlers", "Datatype Handlers", Collections.unmodifiableList(defaultDatatypeHandlers)); List defaultLanguageHandlers = new ArrayList<>(1); try { LanguageHandlerRegistry registry = LanguageHandlerRegistry.getInstance(); String nextLanguageTagScheme = LanguageHandler.BCP47; if (registry.has(nextLanguageTagScheme)) { Optional nextLanguageHandler = registry.get(nextLanguageTagScheme); if (nextLanguageHandler.isPresent()) { defaultLanguageHandlers.add(nextLanguageHandler.get()); } else { log.warn("Could not find LanguageHandler : {}", nextLanguageTagScheme); } } } catch (Exception e) { // Ignore exceptions so that service loading failures do not cause // class initialization errors. log.warn("Found an error loading LanguageHandler services", e); } LANGUAGE_HANDLERS = new RioSettingImpl<>("org.eclipse.rdf4j.rio.language_handlers", "Language Handlers", Collections.unmodifiableList(defaultLanguageHandlers)); } /** * Private default constructor. */ private BasicParserSettings() { } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy