All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tinkerpop.blueprints.oupls.sail.GraphSail Maven / Gradle / Ivy

package com.tinkerpop.blueprints.oupls.sail;

import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.KeyIndexableGraph;
import com.tinkerpop.blueprints.TransactionalGraph;
import com.tinkerpop.blueprints.Vertex;
import com.tinkerpop.blueprints.util.wrappers.WrapperGraph;
import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.rio.RDFHandler;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.Rio;
import org.openrdf.sail.NotifyingSailConnection;
import org.openrdf.sail.SailException;
import org.openrdf.sail.helpers.NotifyingSailBase;

import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import java.util.logging.Logger;
import java.util.regex.Pattern;

/**
 * An RDF storage interface for any graph database with a Blueprints IndexableGraph implementation.  It models
 * RDF graphs as property graphs which can be easily traversed and manipulated with other Blueprints-compatible tools.
 * At the same time, it can be used with OpenRDF-based tools to power a SPARQL endpoint, an RDF reasoner, etc.
 *
 * RDF resources are stored as vertices, RDF statements as edges using the Blueprints default (automatic) indices.
 * Namespaces are stored at a special vertex with the id "urn:com.tinkerpop.blueprints.sail:namespaces".
 *
 * This Sail is as transactional as the underlying graph database: if the provided Graph implements TransactionalGraph
 * and is in manual transaction mode, then the SailConnection's commit and rollback methods will be used correspondingly.
 *
 * Retrieval of RDF statements from the store involves both "index-based" and "graph-based" matching, as follows.
 * For each new statement edge which is added to the store, "p" (predicate), "c" (context), and "pc" (predicate and context)
 * property values are added and indexed.  These allow the statement to be quickly retrieved in a query where only the
 * predicate and/or context is specified.  However, BlueprintsSail will additionally index on any triple pattern which
 * is supplied to the constructor, boosting query reactivity at the expense of additional storage overhead.
 * For example, if a "so" pattern is supplied, each new statement edge will also receive an "so" property value which stores the
 * combination of subject and object of the statement.  A subsequent call such as getStatements(john, null, jane) will
 * match both values simultaneously.  This may succeed more quickly than the corresponding graph-based match, which picks
 * either the john or jane vertex as a starting point and filters on adjacent edges.  Graph-based matches are used for
 * all of the triple patterns s,o,sp,so,sc,po,oc,spo,spc,soc,poc,spoc which have not been explicitly flagged for
 * index-based matching.
 *
 * Note: this implementation attaches no semantics to Vertex and Edge IDs, so as to be compatible with Graph
 * implementations which do no not allow IDs to be chosen.
 *
 * @author Joshua Shinavier (http://fortytwo.net)
 */
public class GraphSail extends NotifyingSailBase implements WrapperGraph {

    private static final Logger LOGGER = Logger.getLogger(GraphSail.class.getName());

    public static final String SEPARATOR = " ";

    public static final String
            PREDICATE_PROP = "p",
            CONTEXT_PROP = "c";

    public static final char
            URI_PREFIX = 'U',
            BLANK_NODE_PREFIX = 'B',
            PLAIN_LITERAL_PREFIX = 'P',
            TYPED_LITERAL_PREFIX = 'T',
            LANGUAGE_TAG_LITERAL_PREFIX = 'L',
            NULL_CONTEXT_PREFIX = 'N';

    public static final Pattern INDEX_PATTERN = Pattern.compile("s?p?o?c?");

    public static final String
            BNODE = "bnode",
            INFERRED = "inferred",
            KIND = "kind",
            LANG = "lang",
            LITERAL = "literal",
            TYPE = "type",
            URI = "uri",
            VALUE = "value";

    public static final String DEFAULT_NAMESPACE_PREFIX_KEY = "default-namespace";

    public static final String NULL_CONTEXT_NATIVE = "" + NULL_CONTEXT_PREFIX;

    private static final String[][] ALTERNATIVES = {{"s", ""}, {"p", ""}, {"o", ""}, {"c", ""}, {"sp", "s", "p"}, {"so", "s", "o"}, {"sc", "s", "c"}, {"po", "o", "p"}, {"pc", "p", "c"}, {"oc", "o", "c"}, {"spo", "so", "sp", "po"}, {"spc", "sc", "sp", "pc"}, {"soc", "so", "sc", "oc"}, {"poc", "po", "oc", "pc"}, {"spoc", "spo", "soc", "spc", "poc"},};

    private static final String NAMESPACES_VERTEX_ID = "urn:com.tinkerpop.blueprints.pgm.oupls.sail:namespaces";

    private final DataStore store = new DataStore();

    /**
     * Create a new RDF store using the provided Blueprints graph.  Default edge indices ("p,c,pc") will be used.
     *
     * @param graph the storage layer.  If the provided graph implements TransactionalGraph and is in manual transaction
     *              mode, then this Sail will also be transactional.
     */
    public GraphSail(final T graph) {
        this(graph, "p,c,pc");

        RDFParser p = Rio.createParser(org.openrdf.rio.RDFFormat.NTRIPLES);
        p.setRDFHandler(new RDFHandler() {
            public void startRDF() throws RDFHandlerException {}
            public void endRDF() throws RDFHandlerException {}
            public void handleNamespace(String s, String s1) throws RDFHandlerException {}
            public void handleStatement(Statement s) throws RDFHandlerException {

            }
            public void handleComment(String s) throws RDFHandlerException {}
        });

        //this(graph, "s,p,o,c,sp,so,sc,po,pc,oc,spo,spc,soc,poc,spoc");
    }

    /**
     * Create a new RDF store using the provided Blueprints graph.  Additionally, create edge indices for the provided
     * triple patterns (potentially speeding up certain queries, while increasing storage overhead).
     *
     * @param graph           the storage layer.  If the provided graph implements TransactionalGraph and is in manual transaction
     *                        mode, then this Sail will also be transactional.
     *                        Any vertices and edges in the graph should have been previously created with GraphSail.
     * @param indexedPatterns a comma-delimited list of triple patterns for index-based statement matching.
     *                        The "p" and "c" patterns are necessary for efficient answering of certain queries, but are not required.
     *                        The default list of patterns is "p,c,pc".
     *                        To use GraphSail with a base Graph which does not support edge indices, provide "" as the argument.
     */
    public GraphSail(final T graph, final String indexedPatterns) {
        store.sail = this;
        store.graph = graph;

        store.isTransactional = store.graph instanceof TransactionalGraph;

        if (!store.graph.getIndexedKeys(Vertex.class).contains(VALUE)) {
            store.graph.createKeyIndex(VALUE, Vertex.class);
        }

        store.matchers[0] = new TrivialMatcher(graph);

        createTripleIndices(indexedPatterns);
        assignUnassignedTriplePatterns();

        store.namespaces = store.getReferenceVertex();
        if (null == store.namespaces) {
            try {
                store.namespaces = store.addVertex(NAMESPACES_VERTEX_ID);
            } finally {
                if (store.isTransactional) {
                    ((TransactionalGraph) graph).commit();
                }
            }
        }
    }

    public T getBaseGraph() {
        return this.store.getGraph();
    }

    public void initializeInternal() throws SailException {
        // Do nothing.
    }

    public void shutDownInternal() throws SailException {
        store.graph.shutdown();
    }

    public boolean isWritable() throws SailException {
        // For now, we assume the store is writable.
        return true;
    }

    public NotifyingSailConnection getConnectionInternal() throws SailException {
        return new GraphSailConnection(store);
    }

    public ValueFactory getValueFactory() {
        return store.valueFactory;
    }

    /**
     * Enables or disables the use of efficient, short-lived statements in the iterators returned by
     * GraphSailConnection.getStatements() and GraphSailConnection.evaluate().
     * This feature is disabled by default, and in typical usage scenarios, Java compiler optimization makes it superfluous.
     * However, it potentially confers a performance advantage when a single thread consumes the iterator,
     * inspecting and then immediately discarding each statement.
     *
     * @param flag whether to use volatile statements.
     *             When this method is called, only subsequently created iterators are affected.
     */
    public void useVolatileStatements(final boolean flag) {
        store.volatileStatements = flag;
    }

    /**
     * Enables or disables enforcement of a unique statements policy (disabled by default),
     * which ensures that no new statement will be added which is identical
     * (in all of its subject, predicate, object and context) to an existing statement.
     * If enabled, this policy will first remove any existing statements identical to the to-be-added statement,
     * before adding the latter statement.
     * This comes at the cost of some querying overhead.
     *
     * @param flag whether this policy should be enforced
     */
    public void enforceUniqueStatements(final boolean flag) {
        store.uniqueStatements = flag;
    }

    /**
     * Finds and returns the Vertex corresponding to an RDF value, if it exists in the graph.
     *
     * @param value the RDF value to find
     * @return the corresponding Vertex.
     * A Vertex will be found if an RDF statement has been added to the graph in which the provided value is
     * the subject or object, or if the value has been explicitly added as a Vertex using {@link #addVertex(org.openrdf.model.Value)}
     */
    public Vertex getVertex(final Value value) {
        return store.getVertex(value);
    }

    /**
     * Adds a vertex to the store.
     * 

* This is useful for adding vertices to a graph before adding * edges. Since adding edges involves trying to find each vertex, * this method can be used in order to avoid the overhead of lookup misses. * * @param value * The RDF value represented by the vertex. It will be used to * set other property on the vertex such as the {@value #KIND} * and {@value #LANG}. * @return The newly created vertex. */ public Vertex addVertex(final Value value) { return store.addVertex(value); } public String toString() { String type = store.graph.getClass().getSimpleName().toLowerCase(); return "graphsail[" + type + "]"; } //////////////////////////////////////////////////////////////////////////// /** * A context object which is shared between the Sail and its Connections */ class DataStore { public T graph; public NotifyingSailBase sail; // We don't need a special ValueFactory implementation. public final ValueFactory valueFactory = new ValueFactoryImpl(); public final Collection indexers = new LinkedList(); // A triple pattern matcher for each spoc combination public final Matcher[] matchers = new Matcher[16]; public boolean isTransactional; public boolean volatileStatements = false; public boolean uniqueStatements = false; public Vertex namespaces; public Vertex getReferenceVertex() { //System.out.println("value = " + value); Iterable i = store.graph.getVertices(VALUE, NAMESPACES_VERTEX_ID); // TODO: restore the close() //try { Iterator iter = i.iterator(); return iter.hasNext() ? iter.next() : null; //} finally { // i.close(); //} } public Vertex addVertex(final Value value) { Vertex v = graph.addVertex(null); if (value instanceof URI) { v.setProperty(KIND, URI); v.setProperty(VALUE, value.stringValue()); } else if (value instanceof Literal) { Literal l = (Literal) value; v.setProperty(KIND, LITERAL); v.setProperty(VALUE, l.getLabel()); if (null != l.getDatatype()) { v.setProperty(TYPE, l.getDatatype().stringValue()); } if (null != l.getLanguage()) { v.setProperty(LANG, l.getLanguage()); } } else if (value instanceof BNode) { BNode b = (BNode) value; v.setProperty(KIND, BNODE); v.setProperty(VALUE, b.getID()); } else { throw new IllegalStateException("value of unexpected type: " + value); } return v; } public Vertex getVertex(final Value value) { for (Vertex v : store.graph.getVertices(VALUE, value.stringValue())) { if (matches(v, value)) { return v; } } return null; } public boolean matches(final Vertex vertex, final Value value) { String kind = vertex.getProperty(KIND); String val = vertex.getProperty(VALUE); if (value instanceof URI) { return kind.equals(URI) && val.equals(value.stringValue()); } else if (value instanceof Literal) { if (kind.equals(LITERAL)) { if (!val.equals(((Literal) value).getLabel())) { return false; } String type = vertex.getProperty(TYPE); String lang = vertex.getProperty(LANG); URI vType = ((Literal) value).getDatatype(); String vLang = ((Literal) value).getLanguage(); return null == type && null == vType && null == lang && null == vLang || null != type && null != vType && type.equals(vType.stringValue()) || null != lang && null != vLang && lang.equals(vLang); } else { return false; } } else if (value instanceof BNode) { return kind.equals(BNODE) && ((BNode) value).getID().equals(val); } else { throw new IllegalStateException("value of unexpected kind: " + value); } } public Vertex addVertex(final String id) { Vertex v = graph.addVertex(null); //vertices.put(VALUE, id, store.namespaces); v.setProperty(VALUE, id); return v; } public String getValueOf(final Vertex v) { return (String) v.getProperty(VALUE); } public T getGraph() { return this.graph; } public String valueToNative(final Value value) { if (null == value) { return NULL_CONTEXT_NATIVE; } else if (value instanceof Resource) { return resourceToNative((Resource) value); } else if (value instanceof Literal) { return literalToNative((Literal) value); } else { throw new IllegalStateException("Value has unfamiliar type: " + value); } } public String resourceToNative(final Resource value) { if (value instanceof URI) { return uriToNative((URI) value); } else if (value instanceof BNode) { return bnodeToNative((BNode) value); } else { throw new IllegalStateException("Resource has unfamiliar type: " + value); } } public String uriToNative(final URI value) { return GraphSail.URI_PREFIX + GraphSail.SEPARATOR + value.toString(); } public String bnodeToNative(final BNode value) { return GraphSail.BLANK_NODE_PREFIX + GraphSail.SEPARATOR + value.getID(); } public String literalToNative(final Literal literal) { URI datatype = literal.getDatatype(); if (null == datatype) { String language = literal.getLanguage(); if (null == language) { return GraphSail.PLAIN_LITERAL_PREFIX + GraphSail.SEPARATOR + literal.getLabel(); } else { return GraphSail.LANGUAGE_TAG_LITERAL_PREFIX + GraphSail.SEPARATOR + language + GraphSail.SEPARATOR + literal.getLabel(); } } else { // FIXME return "" + GraphSail.TYPED_LITERAL_PREFIX + GraphSail.SEPARATOR + datatype + GraphSail.SEPARATOR + literal.getLabel(); } } } private void createTripleIndices(final String tripleIndexes) { if (null == tripleIndexes) { throw new IllegalArgumentException("index list, if supplied, must be non-null"); } Set u = new HashSet(); String[] a = tripleIndexes.split(","); for (String s : a) { String pattern = s.trim(); if (pattern.length() > 0) { u.add(pattern); } } if (!u.contains("p")) { LOGGER.warning("no (?s p ?o ?c) index. Certain query operations will be inefficient"); } if (!u.contains("c")) { LOGGER.warning("no (?s ?p ?o c) index. Certain query operations will be inefficient"); } for (String key : u) { if (!store.graph.getIndexedKeys(Edge.class).contains(key)) { store.graph.createKeyIndex(key, Edge.class); } createIndexingMatcher(key); } } private void assignUnassignedTriplePatterns() { // As a first pass, fill in all suitable patterns (those containing // subject and/or object) not already assigned to indexing matchers, // with graph-based matchers. for (int i = 0; i < 16; i++) { if (null == store.matchers[i] && ((0 != (i & 0x1)) || (0 != (i & 0x4)))) { store.matchers[i] = new GraphBasedMatcher((0 != (i & 0x1)), (0 != (i & 0x2)), (0 != (i & 0x4)), (0 != (i & 0x8)), store); } } // Now fill in any remaining patterns with alternative indexing matchers. Matcher[] n = new Matcher[16]; n[0] = store.matchers[0]; for (String[] alts : ALTERNATIVES) { String p = alts[0]; int i = indexFor(p); Matcher m = store.matchers[i]; // if no matcher has been assigned for this pattern if (null == m) { // try primary alternatives in the order they are specified for (int j = 1; j < alts.length; j++) { m = store.matchers[indexFor(alts[j])]; if (null != m) { break; } } // if no primary alternatives are assigned, choose the first secondary alternative if (null == m) { m = n[1]; } } n[i] = m; } System.arraycopy(n, 0, store.matchers, 0, 16); } private int indexFor(final boolean s, final boolean p, final boolean o, final boolean c) { int index = 0; if (s) { index |= 0x1; } if (p) { index |= 0x2; } if (o) { index |= 0x4; } if (c) { index |= 0x8; } return index; } private int indexFor(final String pattern) { boolean s = false, p = false, o = false, c = false; for (byte ch : pattern.getBytes()) { switch (ch) { case 's': s = true; break; case 'p': p = true; break; case 'o': o = true; break; case 'c': c = true; break; default: throw new IllegalStateException(); } } return indexFor(s, p, o, c); } private void createIndexingMatcher(final String pattern) { boolean s = false, p = false, o = false, c = false; for (byte ch : pattern.getBytes()) { switch (ch) { case 's': s = true; break; case 'p': p = true; break; case 'o': o = true; break; case 'c': c = true; break; default: throw new IllegalStateException(); } } int index = indexFor(s, p, o, c); IndexingMatcher m = new IndexingMatcher(s, p, o, c, store); store.matchers[index] = m; store.indexers.add(m); } /* private static void debugEdge(final Edge e) { System.out.println("edge " + e + ":"); for (String key : e.getPropertyKeys()) { System.out.println("\t" + key + ":\t'" + e.getProperty(key) + "'"); } System.out.println("\t[in vertex]: " + e.getVertex(Direction.IN)); System.out.println("\t[out vertex]: " + e.getVertex(Direction.OUT)); } private static void debugVertex(final Vertex v) { System.out.println("vertex " + v + ":"); for (String key : v.getPropertyKeys()) { System.out.println("\t" + key + ":\t'" + v.getProperty(key) + "'"); } Iterator i; i = v.getEdges(Direction.IN).iterator(); System.out.println("\t[in edges]:"); while (i.hasNext()) { System.out.println("\t\t" + i.next()); } i = v.getEdges(Direction.OUT).iterator(); System.out.println("\t[out edges]:"); while (i.hasNext()) { System.out.println("\t\t" + i.next()); } }*/ }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy