All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.fortytwo.sesametools.RdfListUtil Maven / Gradle / Ivy

The newest version!
package net.fortytwo.sesametools;

import org.openrdf.OpenRDFUtil;
import org.openrdf.model.IRI;
import org.openrdf.model.Model;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.SimpleValueFactory;
import org.openrdf.model.vocabulary.RDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

/**
 * A utility for translating RDF lists to and from native Java lists.
 *
 * @author Peter Ansell [email protected]
 */
public class RdfListUtil {
    private static final Logger log = LoggerFactory
            .getLogger(RdfListUtil.class);

    /**
     * The default value for checkCycles if no other value is given.
     */
    public final static boolean DEFAULT_CHECK_CYCLES = true;

    /**
     * The default value for checkIncomplete if no other value is given.
     */
    public final static boolean DEFAULT_CHECK_INCOMPLETE = true;

    /**
     * The default value for useIterativeOnError if no other value is given.
     */
    public final static boolean DEFAULT_USE_ITERATIVE_ON_ERROR = true;

    private static final ValueFactory valueFactory = SimpleValueFactory.getInstance();

    /**
     * If enabled, this causes the getLists method to throw RuntimeExceptions if cyclic lists are found.
     * 

* Disabling this property should not cause infinite loops, * as otherwise simple cyclic loops would always cause OutOfMemoryExceptions or StackOverflowExceptions. *

* Disabling this property may result in missing lists from results. *

* NOTE: Tests will fail if you disable this property. *

* Defaults to the constant defined in RdfListUtil.DEFAULT_CHECK_CYCLES */ private final boolean checkCycles; /** * If enabled, this causes the getLists method to throw RuntimeExceptions * when incomplete or invalid lists are found. *

* Some of the cases checked include: *

*

    *
  • whether RDF.REST predicates all map to Resource Objects
  • *
  • whether all of the given heads are Resources
  • *
  • whether RDF.REST predicates map to Resource objects * that contain both RDF.FIRST and valid RDF.REST statements
  • *
*

* Disabling this check may cause unexpected results, including incomplete and missing lists. *

* NOTE: Tests will fail if you disable this property. *

* Defaults to the constant defined in RdfListUtil.DEFAULT_CHECK_INCOMPLETE */ private final boolean checkIncomplete; /** * If enabled, this causes the getLists method to switch from the recursive * method to the iterative method when the hardcoded recursion limit is * reached for a list. *

* The iterative approach is slower in general than the recursive approach, * but can handle much deeper and wider lists. *

* Defaults to the constant defined in RdfListUtil.DEFAULT_USE_ITERATIVE_ON_ERROR */ private final boolean useIterativeOnError; /** * Constructs an instance of the RDF List Processing Utility using the * default error checking and redundancy values. */ public RdfListUtil() { this(DEFAULT_CHECK_CYCLES, DEFAULT_CHECK_INCOMPLETE, DEFAULT_USE_ITERATIVE_ON_ERROR); } /** * Constructs an instance of the RDF List Processing Utility using the * given values to define operational checking and redundancy parameters. * * @param checkCycles Defines whether to check for cycles in lists. * @param checkIncomplete Defines whether to check for properly ended lists. * @param useIterativeOnError Defines whether to use iterative approach when recursive * approach fails with out of memory or stack overflow. */ public RdfListUtil(boolean checkCycles, boolean checkIncomplete, boolean useIterativeOnError) { this.checkCycles = checkCycles; this.checkIncomplete = checkIncomplete; this.useIterativeOnError = useIterativeOnError; } /** * Adds an RDF List with the given elements to a graph. * * @param head the head resource of the list * @param nextValues the list to add. If this list is empty, no statements will be * written * @param graphToAddTo the Model to add the resulting list to * @param contexts the Model contexts into which to add the new statements. If no * contexts are given, statements will be added to the default * (null) context. */ public void addList(final Resource head, final List nextValues, final Model graphToAddTo, final Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); Resource aCurr = head; int i = 0; for (final Value nextValue : nextValues) { // increment counter i++; final Resource aNext = valueFactory.createBNode(); graphToAddTo.add(aCurr, RDF.FIRST, nextValue, contexts); if (i < nextValues.size()) { graphToAddTo.add(aCurr, RDF.REST, aNext, contexts); } else // assign the rest to the rdf:nil object { graphToAddTo.add(aCurr, RDF.REST, RDF.NIL, contexts); } aCurr = aNext; } } /** * Return the contents of the list serialized as an RDF list * * @param subject the subject of a new statement pointing to the head of the * list * @param predicate the predicate of a new statement pointing to the head of the * list * @param nextValues the list to add. If this list is empty, only the pointer * statement will be written. * @param graphToAddTo the Model to add the resulting list to * @param contexts the Model contexts into which to add the new statements. If no * contexts are given, statements will be added to the default * (null) context. */ public void addListAtNode(final Resource subject, final IRI predicate, final List nextValues, final Model graphToAddTo, final Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); final Resource aHead = valueFactory.createBNode(); if (!nextValues.isEmpty()) { graphToAddTo.add(subject, predicate, aHead, contexts); } this.addList(aHead, nextValues, graphToAddTo, contexts); } /** * Fetches a simple (non-branching) list from a graph. * * @param head the head of the list * @param graphToSearch the Model from which the list is to be fetched * @param contexts the Model contexts from which the list is to be fetched * @return the contents of the list */ public List getList(final Resource head, final Model graphToSearch, final Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); final Collection> results = this.getLists(Collections.singleton(head), graphToSearch, contexts); if (results.size() > 1) { throw new RuntimeException( "Found more than one list, possibly due to forking"); } if (results.size() == 1) { return results.iterator().next(); } // no lists found, return empty collection return Collections.emptyList(); } /** * Fetches a single headed list from the Model based on the given subject * and predicate *

* Note: We silently fail if no list is detected at all and return null *

*

* In addition, only the first triple matching the subject-predicate * combination is used to detect the head of the list. *

* * @param subject the subject of a statement pointing to the list * @param predicate the predicate of a statement pointing to the list * @param graphToSearch the Model from which the list is to be fetched * @param contexts the Model contexts from which the list is to be fetched * @return the contents of the list * @throws RuntimeException if the list structure was not complete, or it had cycles */ public List getListAtNode(final Resource subject, final IRI predicate, final Model graphToSearch, final Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); final Collection> allLists = this.getListsAtNode( subject, predicate, graphToSearch, contexts); if (allLists.size() > 1) { throw new RuntimeException( "Found more than one list, possibly due to forking"); } if (allLists.size() == 1) { return allLists.iterator().next(); } // no lists found, return empty collection return Collections.emptyList(); } /** * Fetches a collection of generalized lists, where lists are allowed to * branch from head to tail. * * @param heads the heads of the lists to fetch * @param graphToSearch the Model from which the list is to be fetched * @param contexts the Model contexts from which the list is to be fetched * @return all matching lists. If no matching lists are found, an empty * collection is returned. */ //* public Collection> getListsIterative(final Set heads, final Model graphToSearch, final Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); final List> results = new ArrayList<>(heads.size()); List> completedPointerTrails = new ArrayList<>( heads.size()); for (final Resource nextHead : heads) { if (nextHead == null || nextHead.equals(RDF.NIL)) { throw new RuntimeException( "List structure contains nulls or RDF.NIL in a head position"); } followPointerTrails(nextHead, graphToSearch, completedPointerTrails, contexts); results.addAll(getValuesForPointerTrails(graphToSearch, completedPointerTrails, contexts)); completedPointerTrails.clear(); } // results = getValuesForPointerTrails( // graphToSearch, completedPointerTrails, contexts); return results; } //*/ //* public Collection> getLists(final Set heads, final Model graphToSearch, final Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); Collection> matches = new LinkedList<>(); try { for (Resource h : heads) { matches.addAll(getListsRecursive(h, graphToSearch, contexts)); } } catch (RuntimeException rex) { if (this.getUseIterativeOnError() && rex.getMessage().contains("List was too long")) { matches.clear(); matches = getListsIterative(heads, graphToSearch, contexts); } else { throw rex; } } return matches; } //*/ public Collection> getListsRecursive(final Resource head, final Model graph, final Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); Collection> matches = new LinkedList<>(); Set prev = new HashSet<>(); // The length of this buffer corresponds to both the longest list and the // maximum number of iterations that are supported by this implementation // Attempting to process a list longer than this will throw a RuntimeException // after the maximum number of iterations, as it is not possible to know how // long the longest list will be in advance Value[] buffer = new Value[1000]; matchLists(head, graph, matches, prev, buffer, 0, contexts); return matches; } private void matchLists(final Resource head, final Model graph, final Collection> matches, final Set prev, final Value[] buffer, final int i, final Resource... contexts) { if (head.equals(RDF.NIL)) { // End of list List finalisedList = new ArrayList<>(i); for (int j = 0; j < i; j++) { finalisedList.add(j, buffer[j]); } matches.add(finalisedList); } else if (this.getCheckIncomplete() && !(head instanceof Resource)) { throw new RuntimeException("List structure was not complete"); } else if (!prev.contains(head)) { // List continues, no cycle so far. prev.add(head); Iterator first = graph.filter(head, RDF.FIRST, null, contexts).iterator(); if (this.getCheckIncomplete() && !first.hasNext()) { throw new RuntimeException("List structure was not complete"); } while (first.hasNext()) { buffer[i] = first.next().getObject(); Iterator rest = graph.filter(head, RDF.REST, null, contexts).iterator(); if (this.getCheckIncomplete() && !rest.hasNext()) { throw new RuntimeException("List structure was not complete"); } while (rest.hasNext()) { Value r = rest.next().getObject(); if (r instanceof Resource) { if ((i + 1) >= buffer.length) { throw new RuntimeException(String.format( "List was too long, maximum is %d elements long", buffer.length)); } matchLists((Resource) r, graph, matches, prev, buffer, i + 1); } else if (this.getCheckIncomplete()) { throw new RuntimeException("List structure was not complete"); } } } prev.remove(head); } else if (prev.contains(head) && this.getCheckCycles()) { throw new RuntimeException("List cannot contain cycles"); } else if (this.getCheckIncomplete()) { throw new RuntimeException("List structure was not complete"); } } private List> getValuesForPointerTrails( final Model graphToSearch, List> completedPointerTrails, final Resource... contexts) { final List> results = new ArrayList<>( completedPointerTrails.size()); // Go through the pointer trails finding the corresponding // RDF.FIRST/Value combinations to generate the result lists for (List nextPointerTrail : completedPointerTrails) { final List nextResult = new ArrayList<>(); for (int i = 0; i < nextPointerTrail.size(); i++) { Resource nextPointer = nextPointerTrail.get(i); // Check to make sure that the last element is RDF.NIL if (i == (nextPointerTrail.size() - 1)) { if (!nextPointer.equals(RDF.NIL)) { throw new RuntimeException( "Did not find RDF.NIL as the terminating element of a list"); } } else { if (nextPointer.equals(RDF.NIL)) { throw new RuntimeException( "Found RDF.NIL inside a list trail"); } Value nextValue = null; final Iterator valueMatch = graphToSearch.filter( nextPointer, RDF.FIRST, null, contexts).iterator(); if (valueMatch.hasNext()) { final Statement nextValueMatch = valueMatch.next(); nextValue = nextValueMatch.getObject(); if (valueMatch.hasNext()) { Statement errorValueMatch = valueMatch.next(); log.error("Found multiple rdf:first items nextValueMatch=" + nextValueMatch + " errorValueMatch=" + errorValueMatch); throw new RuntimeException( "List structure cannot contain multiple values" + " for rdf:first items for a given subject resource"); } } if (nextValue == null) { throw new RuntimeException( "List structure was not complete"); } nextResult.add(nextValue); } } if (!nextResult.isEmpty()) { results.add(nextResult); } } return results; } private void followPointerTrails(Resource nextHead, Model graphToSearch, List> completedPointerTrails, Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); List firstPointerTrail = new ArrayList<>(); // add the first head to the currentPointerTrail firstPointerTrail.add(nextHead); // start off our currentPointerTrail marker list with the contents of // the firstPointerTrail List currentPointerTrail = new ArrayList<>( firstPointerTrail); List> uncompletedPointerTrails = new ArrayList<>(); Resource nextPointer = nextHead; boolean allDone; do { // start off thinking all are done, and then set to false as // necessary before the end of the loop allDone = true; // match the nextPointer with RDF.REST predicate to find next hops final Iterator nextMatch = graphToSearch.filter( nextPointer, RDF.REST, null, contexts).iterator(); // if there are no matches complain and throw a runtime exception if (!nextMatch.hasNext()) { throw new RuntimeException("List structure was not complete"); } allDone = resolveNextMatch(completedPointerTrails, currentPointerTrail, uncompletedPointerTrails, allDone, nextMatch); if (nextMatch.hasNext()) { // start a loop to add all of the matches for each of the forks // to uncompleted list while (nextMatch.hasNext()) { if (!resolveNextMatch(completedPointerTrails, currentPointerTrail, uncompletedPointerTrails, allDone, nextMatch)) { allDone = false; } } } // TODO: is allDone needed above or can we rely completely on // uncompletedPointerTrails if (uncompletedPointerTrails.isEmpty()) { currentPointerTrail = null; nextPointer = null; allDone = true; } else { allDone = false; // TODO what is the best, or different strategies for choosing // the next pointer trail currentPointerTrail = uncompletedPointerTrails .remove(uncompletedPointerTrails.size() - 1); nextPointer = currentPointerTrail.get(currentPointerTrail .size() - 1); } } while (!allDone); } private boolean resolveNextMatch( List> completedPointerTrails, List currentPointerTrail, List> uncompletedPointerTrails, boolean allDone, final Iterator nextMatch) { Statement nextMatchStatement = nextMatch.next(); Value nextValue = nextMatchStatement.getObject(); if (nextValue instanceof Resource) { Resource nextResource = (Resource) nextValue; if (this.getCheckCycles() && currentPointerTrail.contains(nextResource)) { throw new RuntimeException("List cannot contain cycles"); } ArrayList nextTrail = new ArrayList<>( currentPointerTrail); nextTrail.add(nextResource); if (nextResource.equals(RDF.NIL)) { // uncompletedPointerTrails.remove(currentPointerTrail); completedPointerTrails.add(nextTrail); } else { allDone = false; uncompletedPointerTrails.add(nextTrail); } } else { throw new RuntimeException("List structure not valid"); } return allDone; } /** * Fetches a collection of generalized lists based on the given subject and * predicate, where lists are allowed to branch from head to tail. * * @param subject the subject of a statement pointing to the list * @param predicate the predicate of a statement pointing to the list * @param graphToSearch the Model from which the list is to be fetched * @param contexts the Model contexts from which the list is to be fetched * @return all matching lists. If no matching lists are found, an empty * collection is returned. */ public Collection> getListsAtNode( final Resource subject, final IRI predicate, final Model graphToSearch, final Resource... contexts) { OpenRDFUtil.verifyContextNotNull(contexts); Collection> results; final Iterator headStatementMatches = graphToSearch.filter( subject, predicate, null, contexts).iterator(); final Set heads = new HashSet<>(); while (headStatementMatches.hasNext()) { final Statement nextHeadStatement = headStatementMatches.next(); if (nextHeadStatement.getObject() instanceof Resource) { heads.add((Resource) nextHeadStatement.getObject()); } } results = this.getLists(heads, graphToSearch, contexts); return results; } /** * @return True if this utility is setup to check for cycles and throw * exceptions if it finds cycles in lists. */ public boolean getCheckCycles() { return checkCycles; } /** * @return True if this utility is setup to check for incomplete, * unterminated lists, and throw exceptions if it finds any. */ public boolean getCheckIncomplete() { return checkIncomplete; } /** * @return True if this utility is setup to use a slower iterative approach * then a recursive approach fails. */ public boolean getUseIterativeOnError() { return useIterativeOnError; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy