All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.rdf4j.sail.inferencer.fc.SchemaCachingRDFSInferencer Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 * Copyright (c) 2016 Eclipse RDF4J contributors.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/

package org.eclipse.rdf4j.sail.inferencer.fc;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.lang3.time.StopWatch;
import org.eclipse.rdf4j.common.transaction.IsolationLevel;
import org.eclipse.rdf4j.common.transaction.IsolationLevels;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.repository.Repository;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.sail.NotifyingSail;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.helpers.NotifyingSailWrapper;
import org.eclipse.rdf4j.sail.inferencer.InferencerConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 

* The SchemaCachingRDFSInferencer is an RDFS reasoner that caches all schema (TBox) statements and calculates an * inference map to quickly determine inferred statements. The reasoner can also be instantiated with a predefined * schema for improved performance. *

*

* This reasoner is not a rule based reasoner and will be up to 80x faster than the * {@link ForwardChainingRDFSInferencer}, as well as being more complete. *

*

* The sail puts no limitations on isolation level for read transactions, however all write/delete/update transactions * are serializable with exclusive locks. This limits write/delete/update transactions to one transaction at a time. *

* * @author Håvard Mikkelsen Ottestad */ public class SchemaCachingRDFSInferencer extends NotifyingSailWrapper { private static final Logger logger = LoggerFactory.getLogger(SchemaCachingRDFSInferencer.class); private static final Resource[] DEFAULT_CONTEXT = { null }; // An optional predifinedSchema that the user has provided Repository predefinedSchema; // exclusive lock for modifying the schema cache. private final ReentrantLock exclusiveWriteLock = new ReentrantLock(true); // If false, the inferencer will skip some RDFS rules. boolean useAllRdfsRules = true; // the SPIN sail will add inferred statements that it wants to be used for further inference. volatile protected boolean useInferredToCreateSchema; // Schema cache private Collection properties = new HashSet<>(); private Collection types = new HashSet<>(); private Collection subClassOfStatements = new HashSet<>(); private Collection subPropertyOfStatements = new HashSet<>(); private Collection rangeStatements = new HashSet<>(); private Collection domainStatements = new HashSet<>(); // Forward chained schema cache as lookup tables private Map> calculatedTypes = new HashMap<>(); private Map> calculatedProperties = new HashMap<>(); private Map> calculatedRange = new HashMap<>(); private Map> calculatedDomain = new HashMap<>(); // The inferencer has been instantiated from another inferencer and shares it's schema with that one private boolean sharedSchema; // Inferred statements can either be added to the default context // or to the context that the original inserted statement has private boolean addInferredStatementsToDefaultContext = false; private volatile boolean unmodifiable; /** * Instantiate a new SchemaCachingRDFSInferencer */ public SchemaCachingRDFSInferencer() { super(); predefinedSchema = null; } /** * Instantiate a SchemaCachingRDFSInferencer. * * @param data Base sail for storing data. */ public SchemaCachingRDFSInferencer(NotifyingSail data) { super(data); predefinedSchema = null; } /** * Instantiate a SchemaCachingRDFSInferencer with a predefined schema. The schema will be used for inference, all * other schema statements added will be ignored and no schema statements can be removed. Using a predefined schema * significantly improves performance. * * @param data Base sail for storing data. * @param predefinedSchema Repository containing the schema. */ public SchemaCachingRDFSInferencer(NotifyingSail data, Repository predefinedSchema) { super(data); this.predefinedSchema = predefinedSchema; } /** * Instantiate a SchemaCachingRDFSInferencer. * * @param data Base sail for storing data. * @param useAllRdfsRules Usel all RDFS rules. If set to false rule rdf4a and rdfs4b will be ignore */ public SchemaCachingRDFSInferencer(NotifyingSail data, boolean useAllRdfsRules) { super(data); predefinedSchema = null; this.useAllRdfsRules = useAllRdfsRules; } /** * Instantiate a SchemaCachingRDFSInferencer with a predefined schema. The schema will be used for inference, all * other schema statements added will be ignored and no schema statements can be removed. Using a predefined schema * significantly improves performance. * * @param data Base sail for storing data. * @param predefinedSchema Repository containing the schema. * @param useAllRdfsRules Usel all RDFS rules. If set to false rule rdf4a and rdfs4b will be ignore */ public SchemaCachingRDFSInferencer(NotifyingSail data, Repository predefinedSchema, boolean useAllRdfsRules) { super(data); this.predefinedSchema = predefinedSchema; this.useAllRdfsRules = useAllRdfsRules; } void clearInferenceTables() { logger.debug("Clear inference tables"); acquireExclusiveWriteLock(); properties.clear(); types.clear(); subClassOfStatements.clear(); subPropertyOfStatements.clear(); rangeStatements.clear(); domainStatements.clear(); calculatedTypes.clear(); calculatedProperties.clear(); calculatedRange.clear(); calculatedDomain.clear(); } /** * Tries to obtain an exclusive write lock on this store. This method will block until either the lock is obtained * or an interrupt signal is received. * * @throws SailException if the thread is interrupted while waiting to obtain the lock. */ void acquireExclusiveWriteLock() { if (exclusiveWriteLock.isHeldByCurrentThread()) { return; } try { exclusiveWriteLock.lockInterruptibly(); } catch (InterruptedException e) { throw new SailException(e); } } /** * Releases the exclusive write lock. */ void releaseExclusiveWriteLock() { while (exclusiveWriteLock.isHeldByCurrentThread()) { exclusiveWriteLock.unlock(); } } @Override public void init() throws SailException { super.init(); if (sharedSchema) { return; } try (final SchemaCachingRDFSInferencerConnection conn = getConnection()) { conn.begin(); conn.addAxiomStatements(); List tboxStatments = new ArrayList<>(); if (predefinedSchema != null) { logger.debug("Initializing with a predefined schema."); try (RepositoryConnection schemaConnection = predefinedSchema.getConnection()) { schemaConnection.begin(); try (Stream stream = schemaConnection.getStatements(null, null, null).stream()) { tboxStatments = stream .peek(conn::processForSchemaCache) .collect(Collectors.toList()); } schemaConnection.commit(); } } calculateInferenceMaps(conn, true); if (predefinedSchema != null) { tboxStatments.forEach(statement -> conn.addStatement(statement.getSubject(), statement.getPredicate(), statement.getObject(), statement.getContext())); } conn.commit(); } } @Override public SchemaCachingRDFSInferencerConnection getConnection() throws SailException { InferencerConnection e = (InferencerConnection) super.getConnection(); return new SchemaCachingRDFSInferencerConnection(this, e); } @Override public ValueFactory getValueFactory() { return getBaseSail().getValueFactory(); } /** * Instantiate a new SchemaCachingRDFSInferencer from an existing one. Fast instantiation extracts the schema lookup * tables generated by the existing sail and uses them to populate the lookup tables of a new reasoner. Schema * triples can not be queried in the SchemaCachingRDFSInferencer returned by this method. * * @param sailToInstantiateFrom The SchemaCachingRDFSInferencer to extract the lookup tables from. * @param store Base sail for storing data. * @return inferencer */ static public SchemaCachingRDFSInferencer fastInstantiateFrom( SchemaCachingRDFSInferencer sailToInstantiateFrom, NotifyingSail store) { return fastInstantiateFrom(sailToInstantiateFrom, store, true); } /** * Instantiate a new SchemaCachingRDFSInferencer from an existing one. Fast instantiation extracts the schema lookup * tables generated by the existing sail and uses them to populate the lookup tables of a new reasoner. Schema * triples can not be queried in the SchemaCachingRDFSInferencer returned by this method. * * @param sailToInstantiateFrom The SchemaCachingRDFSInferencer to extract the lookup tables from. * @param store Base sail for storing data. * @param useAllRdfsRules Use all RDFS rules. If set to false rule rdf4a and rdfs4b will be ignore * @return inferencer */ static public SchemaCachingRDFSInferencer fastInstantiateFrom( SchemaCachingRDFSInferencer sailToInstantiateFrom, NotifyingSail store, boolean useAllRdfsRules) { sailToInstantiateFrom.getConnection().close(); sailToInstantiateFrom.makeUnmodifiable(); SchemaCachingRDFSInferencer ret = new SchemaCachingRDFSInferencer(store, sailToInstantiateFrom.predefinedSchema, useAllRdfsRules); ret.sharedSchema = true; sailToInstantiateFrom.calculatedTypes.forEach((key, value) -> value.forEach(v -> { if (!ret.calculatedTypes.containsKey(key)) { ret.calculatedTypes.put(key, new HashSet<>(Set.of(v))); } else { ret.calculatedTypes.get(key).add(v); } })); sailToInstantiateFrom.calculatedProperties.forEach((key, value) -> value.forEach(v -> { if (!ret.calculatedProperties.containsKey(key)) { ret.calculatedProperties.put(key, new HashSet<>(Set.of(v))); } else { ret.calculatedProperties.get(key).add(v); } })); sailToInstantiateFrom.calculatedRange.forEach((key, value) -> value.forEach(v -> { if (!ret.calculatedRange.containsKey(key)) { ret.calculatedRange.put(key, new HashSet<>(Set.of(v))); } else { ret.calculatedRange.get(key).add(v); } })); sailToInstantiateFrom.calculatedDomain.forEach((key, value) -> value.forEach(v -> { if (!ret.calculatedDomain.containsKey(key)) { ret.calculatedDomain.put(key, new HashSet<>(Set.of(v))); } else { ret.calculatedDomain.get(key).add(v); } })); return ret; } private void makeUnmodifiable() { if (unmodifiable) { return; } synchronized (this) { if (!unmodifiable) { unmodifiable = true; if (properties.isEmpty()) { properties = Collections.emptySet(); } else { properties = Set.copyOf(properties); } if (types.isEmpty()) { types = Collections.emptySet(); } else { types = Set.copyOf(types); } if (subClassOfStatements.isEmpty()) { subClassOfStatements = Collections.emptySet(); } else { subClassOfStatements = Set.copyOf(subClassOfStatements); } if (subPropertyOfStatements.isEmpty()) { subPropertyOfStatements = Collections.emptySet(); } else { subPropertyOfStatements = Set.copyOf(subPropertyOfStatements); } if (rangeStatements.isEmpty()) { rangeStatements = Collections.emptySet(); } else { rangeStatements = Set.copyOf(rangeStatements); } if (domainStatements.isEmpty()) { domainStatements = Collections.emptySet(); } else { domainStatements = Set.copyOf(domainStatements); } calculatedTypes.replaceAll((k, v) -> Set.copyOf(v)); calculatedProperties.replaceAll((k, v) -> Set.copyOf(v)); calculatedRange.replaceAll((k, v) -> Set.copyOf(v)); calculatedDomain.replaceAll((k, v) -> Set.copyOf(v)); if (calculatedTypes.isEmpty()) { calculatedTypes = Collections.emptyMap(); } else { calculatedTypes = Map.copyOf(calculatedTypes); } if (calculatedProperties.isEmpty()) { calculatedProperties = Collections.emptyMap(); } else { calculatedProperties = Map.copyOf(calculatedProperties); } if (calculatedRange.isEmpty()) { calculatedRange = Collections.emptyMap(); } else { calculatedRange = Map.copyOf(calculatedRange); } if (calculatedDomain.isEmpty()) { calculatedDomain = Collections.emptyMap(); } else { calculatedDomain = Map.copyOf(calculatedDomain); } } } } void calculateInferenceMaps(SchemaCachingRDFSInferencerConnection conn, boolean addInferred) { logger.debug("Calculate inference maps."); calculateSubClassOf(subClassOfStatements); properties.forEach(predicate -> { if (addInferred) { conn.addInferredStatementInternal(predicate, RDF.TYPE, RDF.PROPERTY, DEFAULT_CONTEXT); } calculatedProperties.put(predicate, ConcurrentHashMap.newKeySet()); }); calculateSubPropertyOf(subPropertyOfStatements); calculateDomainAndRange(rangeStatements, calculatedRange); calculateDomainAndRange(domainStatements, calculatedDomain); if (addInferred) { logger.debug("Add inferred rdfs:subClassOf statements"); calculatedTypes.forEach((subClass, superClasses) -> { superClasses.forEach(superClass -> { conn.addInferredStatementInternal(subClass, RDFS.SUBCLASSOF, superClass, DEFAULT_CONTEXT); }); }); } if (addInferred) { logger.debug("Add inferred rdfs:subPropertyOf statements"); calculatedProperties.forEach((sub, sups) -> { sups.forEach(sup -> { conn.addInferredStatementInternal(sub, RDFS.SUBPROPERTYOF, sup, DEFAULT_CONTEXT); }); }); } } void addSubClassOfStatement(Statement st) { if (!st.getObject().isResource()) { throw new SailException("Object of rdfs:subClassOf should be a resource! " + st); } subClassOfStatements.add(st); types.add(st.getSubject()); types.add((Resource) st.getObject()); } void addSubPropertyOfStatement(Statement st) { if (!st.getObject().isResource()) { throw new SailException("Object of rdfs:subPropertyOf should be a resource! " + st); } subPropertyOfStatements.add(st); properties.add(st.getSubject()); properties.add((Resource) st.getObject()); } void addRangeStatement(Statement st) { if (!st.getObject().isResource()) { throw new SailException("Object of rdfs:range should be a resource! " + st); } rangeStatements.add(st); properties.add(st.getSubject()); types.add((Resource) st.getObject()); } void addDomainStatement(Statement st) { if (!st.getObject().isResource()) { throw new SailException("Object of rdfs:domain should be a resource! " + st); } domainStatements.add(st); properties.add(st.getSubject()); types.add((Resource) st.getObject()); } boolean hasType(Resource r) { return types.contains(r); } void addType(Resource r) { types.add(r); } boolean hasProperty(Resource property) { return properties.contains(property); } void addProperty(Resource property) { properties.add(property); } Set resolveTypes(Resource value) { return calculatedTypes.getOrDefault(value, Collections.emptySet()); } Set resolveProperties(Resource predicate) { return calculatedProperties.getOrDefault(predicate, Collections.emptySet()); } Set resolveRangeTypes(IRI predicate) { return calculatedRange.getOrDefault(predicate, Collections.emptySet()); } Set resolveDomainTypes(IRI predicate) { return calculatedDomain.getOrDefault(predicate, Collections.emptySet()); } private void calculateSubClassOf(Collection subClassOfStatements) { logger.debug("Calculate rdfs:subClassOf inference map."); StopWatch stopWatch = null; if (logger.isDebugEnabled()) { stopWatch = StopWatch.createStarted(); } logger.debug("Fill initial maps"); types.forEach(type -> { if (!calculatedTypes.containsKey(type)) { Set values = ConcurrentHashMap.newKeySet(); values.add(RDFS.RESOURCE); values.add(type); calculatedTypes.put(type, values); } else { calculatedTypes.get(type).add(type); } }); subClassOfStatements.forEach(s -> { if (!s.getObject().isResource()) { throw new SailException("Object of rdfs:subClassOf should be a resource! " + s); } Resource subClass = s.getSubject(); Resource superClass = (Resource) s.getObject(); if (!calculatedTypes.containsKey(subClass)) { Set values = ConcurrentHashMap.newKeySet(); values.add(RDFS.RESOURCE); values.add(subClass); values.add(superClass); calculatedTypes.put(subClass, values); } else { calculatedTypes.get(subClass).add(superClass); } }); // Fixed point approach to finding all sub-classes. // prevSize is the size of the previous application of the function // newSize is the size of the current application of the function // Fixed point is reached when they are the same. // Eg. Two consecutive applications return the same number of subclasses logger.debug("Run until fixed point"); long prevSize = 0; long newSize = -1; while (prevSize != newSize) { prevSize = newSize; newSize = getStream(calculatedTypes) .map(Map.Entry::getValue) .mapToInt(value -> { List> forwardChainedSets = new ArrayList<>(value.size()); for (Resource resource : value) { if (resource != RDFS.RESOURCE) { forwardChainedSets.add(resolveTypes(resource)); } } addAll(value, forwardChainedSets); return value.size(); }) .sum(); logger.debug("Fixed point iteration new size {}", newSize); } if (logger.isDebugEnabled()) { assert stopWatch != null; stopWatch.stop(); logger.debug("Took: " + stopWatch); } } private Stream>> getStream( Map> map) { Set>> entries = map.entrySet(); if (entries.size() > 100) { return entries.parallelStream().peek(ent -> { assert ent.getValue() instanceof ConcurrentHashMap.KeySetView; }); } else { return entries.stream(); } } private void calculateSubPropertyOf(Collection subPropertyOfStatemenets) { logger.debug("Calculate rdfs:subPropertyOf inference map."); StopWatch stopWatch = null; if (logger.isDebugEnabled()) { stopWatch = StopWatch.createStarted(); } subPropertyOfStatemenets.forEach(s -> { if (!s.getObject().isResource()) { throw new SailException("Object of rdfs:subPropertyOf should be a resource! " + s); } Resource subProperty = s.getSubject(); Resource superProperty = (Resource) s.getObject(); if (!calculatedProperties.containsKey(subProperty)) { calculatedProperties.put(subProperty, ConcurrentHashMap.newKeySet()); } if (!calculatedProperties.containsKey(superProperty)) { calculatedProperties.put(superProperty, ConcurrentHashMap.newKeySet()); } calculatedProperties.get(subProperty).add(superProperty); }); calculatedProperties.forEach((k, v) -> v.add(k)); // Fixed point approach to finding all sub-properties. // prevSize is the size of the previous application of the function // newSize is the size of the current application of the function // Fixed point is reached when they are the same. logger.debug("Run until fixed point"); long prevSize = 0; long newSize = -1; while (prevSize != newSize) { prevSize = newSize; newSize = getStream(calculatedProperties) .map(Map.Entry::getValue) .mapToInt(value -> { List> forwardChainedSets = new ArrayList<>(value.size()); for (Resource resource : value) { forwardChainedSets.add(resolveProperties(resource)); } addAll(value, forwardChainedSets); return value.size(); }) .sum(); logger.debug("Fixed point iteration new size {}", newSize); } if (logger.isDebugEnabled()) { assert stopWatch != null; stopWatch.stop(); logger.debug("Took: " + stopWatch); } } private void addAll(Set res, List> from) { if (from.size() == 1) { var forwardChained = from.get(0); if (forwardChained.size() != res.size()) { res.addAll(forwardChained); } } else { for (var forwardChainedSet : from) { res.addAll(forwardChainedSet); } } } private void calculateDomainAndRange(Collection rangeOrDomainStatements, Map> calculatedRangeOrDomain) { StopWatch stopWatch = null; if (logger.isDebugEnabled()) { stopWatch = StopWatch.createStarted(); } logger.debug("Calculate rdfs:domain and rdfs:range inference map."); rangeOrDomainStatements.forEach(s -> { if (!s.getObject().isResource()) { throw new SailException("Object of rdfs:range or rdfs:domain should be a resource! " + s); } Resource predicate = s.getSubject(); Resource object = (Resource) s.getObject(); if (!calculatedProperties.containsKey(predicate)) { calculatedProperties.put(predicate, ConcurrentHashMap.newKeySet()); } if (!calculatedRangeOrDomain.containsKey(predicate)) { calculatedRangeOrDomain.put(predicate, ConcurrentHashMap.newKeySet()); } calculatedRangeOrDomain.get(predicate).add(object); if (!calculatedTypes.containsKey(object)) { calculatedTypes.put(object, ConcurrentHashMap.newKeySet()); } }); calculatedProperties.keySet() .stream() .filter(key -> !calculatedRangeOrDomain.containsKey(key)) .forEach(key -> calculatedRangeOrDomain.put(key, ConcurrentHashMap.newKeySet())); // Fixed point approach to finding all ranges or domains. // prevSize is the size of the previous application of the function // newSize is the size of the current application of the function // Fixed point is reached when they are the same. logger.debug("Run until fixed point"); long prevSize = 0; final long[] newSize = { -1 }; while (prevSize != newSize[0]) { prevSize = newSize[0]; newSize[0] = 0; calculatedRangeOrDomain.forEach((key, value) -> { List resolvedBySubProperty = new ArrayList<>(); resolveProperties(key).forEach(newPredicate -> { Set iris = calculatedRangeOrDomain.get(newPredicate); if (iris != null) { resolvedBySubProperty.addAll(iris); } }); List resolvedBySubClass = new ArrayList<>(); value.addAll(resolvedBySubProperty); value.stream().map(this::resolveTypes).forEach(resolvedBySubClass::addAll); value.addAll(resolvedBySubClass); newSize[0] += value.size(); }); logger.debug("Fixed point iteration new size {}", newSize[0]); } if (logger.isDebugEnabled()) { assert stopWatch != null; stopWatch.stop(); logger.debug("Took: " + stopWatch); } } @Override public IsolationLevel getDefaultIsolationLevel() { IsolationLevel level = super.getDefaultIsolationLevel(); if (level.isCompatibleWith(IsolationLevels.READ_COMMITTED)) { return level; } else { List supported = this.getSupportedIsolationLevels(); return IsolationLevels.getCompatibleIsolationLevel(IsolationLevels.READ_COMMITTED, supported); } } /** *

* Inferred statements can either be added to the default context or to the context that the original inserted * statement has. *

**/ public boolean isAddInferredStatementsToDefaultContext() { return addInferredStatementsToDefaultContext; } /** *

* Inferred statements can either be added to the default context or to the context that the original inserted * statement has. setAddInferredStatementsToDefaultContext(true) will add all inferred statements to the default * context. *

*

* Which context a tbox statement is added to is undefined. *

*

* Before 3.0 default value for addInferredStatementsToDefaultContext was true. From 3.0 the default value is false. *

* * @param addInferredStatementsToDefaultContext */ public void setAddInferredStatementsToDefaultContext(boolean addInferredStatementsToDefaultContext) { this.addInferredStatementsToDefaultContext = addInferredStatementsToDefaultContext; } boolean usesPredefinedSchema() { return predefinedSchema != null || sharedSchema; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy