org.apache.jena.tdb.solver.SolverLib Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jena-tdb Show documentation
TDB is a storage subsystem for Jena and ARQ, it is a native triple store providing persistent storage of triples/quads.
There is a newer version: 4.10.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.tdb.solver;

import static org.apache.jena.atlas.lib.tuple.TupleFactory.tuple ;
import static org.apache.jena.tdb.lib.Lib2.printAbbrev ;

import java.util.* ;
import java.util.function.Function;
import java.util.function.Predicate;

import org.apache.jena.atlas.iterator.Iter ;
import org.apache.jena.atlas.iterator.IteratorWrapper ;
import org.apache.jena.atlas.lib.tuple.Tuple ;
import org.apache.jena.atlas.lib.tuple.TupleFactory ;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.Triple ;
import org.apache.jena.query.QueryCancelledException ;
import org.apache.jena.sparql.core.BasicPattern ;
import org.apache.jena.sparql.core.Quad ;
import org.apache.jena.sparql.core.Var ;
import org.apache.jena.sparql.engine.ExecutionContext ;
import org.apache.jena.sparql.engine.QueryIterator ;
import org.apache.jena.sparql.engine.binding.Binding ;
import org.apache.jena.sparql.engine.binding.BindingFactory ;
import org.apache.jena.sparql.engine.binding.BindingMap ;
import org.apache.jena.sparql.engine.iterator.QueryIterNullIterator ;
import org.apache.jena.tdb.TDBException ;
import org.apache.jena.tdb.lib.NodeLib ;
import org.apache.jena.tdb.store.DatasetGraphTDB ;
import org.apache.jena.tdb.store.GraphTDB ;
import org.apache.jena.tdb.store.NodeId ;
import org.apache.jena.tdb.store.nodetable.NodeTable ;
import org.apache.jena.tdb.store.nodetupletable.NodeTupleTable ;
import org.apache.jena.tdb.sys.TDBInternal ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;

/** Utilities used within the TDB BGP solver : local TDB store */
public class SolverLib
{
    private static Logger log = LoggerFactory.getLogger(SolverLib.class) ; 
    
    /** Non-reordering execution of a basic graph pattern, given a iterator of bindings as input */ 
    public static QueryIterator execute(GraphTDB graph, BasicPattern pattern, 
                                        QueryIterator input, Predicate> filter,
                                        ExecutionContext execCxt)
    {
        // Maybe default graph or named graph.
        NodeTupleTable ntt = graph.getNodeTupleTable() ;
        return execute(ntt, graph.getGraphName(), pattern, input, filter, execCxt) ;
    }
    
    /** Non-reordering execution of a quad pattern, given a iterator of bindings as input.
     *  GraphNode is Node.ANY for execution over the union of named graphs.
     *  GraphNode is null for execution over the real default graph.
     */ 
    public static QueryIterator execute(DatasetGraphTDB ds, Node graphNode, BasicPattern pattern,
                                        QueryIterator input, Predicate> filter,
                                        ExecutionContext execCxt)
    {
        NodeTupleTable ntt = ds.chooseNodeTupleTable(graphNode) ;
        return execute(ntt, graphNode, pattern, input, filter, execCxt) ;
    }
    
    public static Iterator convertToIds(Iterator iterBindings, NodeTable nodeTable)
    { return Iter.map(iterBindings, convFromBinding(nodeTable)) ; }
    
    /** Convert from Iterator to Iterator, conversion "on demand" 
     * (in convToBinding(BindingNodeId, NodeTable)
     */
    public static Iterator convertToNodes(Iterator iterBindingIds, NodeTable nodeTable)
    { return Iter.map(iterBindingIds, bindingNodeIds -> convToBinding(bindingNodeIds, nodeTable)) ; }
    
    // The worker.  Callers choose the NodeTupleTable.  
    //     graphNode may be Node.ANY, meaning we should make triples unique.
    //     graphNode may be null, meaning default graph

    private static QueryIterator execute(NodeTupleTable nodeTupleTable, Node graphNode, BasicPattern pattern, 
                                         QueryIterator input, Predicate> filter,
                                         ExecutionContext execCxt)
    {
        if ( Quad.isUnionGraph(graphNode) )
            graphNode = Node.ANY ;
        if ( Quad.isDefaultGraph(graphNode) )
            graphNode = null ;
        
        List triples = pattern.getList() ;
        boolean anyGraph = (graphNode==null ? false : (Node.ANY.equals(graphNode))) ;

        int tupleLen = nodeTupleTable.getTupleTable().getTupleLen() ;
        if ( graphNode == null ) {
            if ( 3 != tupleLen )
                throw new TDBException("SolverLib: Null graph node but tuples are of length "+tupleLen) ;
        } else {
            if ( 4 != tupleLen )
                throw new TDBException("SolverLib: Graph node specified but tuples are of length "+tupleLen) ;
        }
        
        // Convert from a QueryIterator (Bindings of Var/Node) to BindingNodeId
        NodeTable nodeTable = nodeTupleTable.getNodeTable() ;
        
        Iterator chain = Iter.map(input, SolverLib.convFromBinding(nodeTable)) ;
        List killList = new ArrayList<>() ;
        
        for ( Triple triple : triples )
        {
            Tuple tuple = null ;
            if ( graphNode == null )
                // 3-tuples
                tuple = tuple(triple.getSubject(), triple.getPredicate(), triple.getObject()) ;
            else
                // 4-tuples.
                tuple = tuple(graphNode, triple.getSubject(), triple.getPredicate(), triple.getObject()) ;
            chain = solve(nodeTupleTable, tuple, anyGraph, chain, filter, execCxt) ;
            chain = makeAbortable(chain, killList) ; 
        }
        
        // DEBUG POINT
        if ( false )
        {
            if ( chain.hasNext())
                chain = Iter.debug(chain) ;
            else
                System.out.println("No results") ;
        }
        
        // Timeout wrapper ****
        // QueryIterTDB gets called async.
        // Iter.abortable?
        // Or each iterator has a place to test.
        // or pass in a thing to test?
        
        
        // Need to make sure the bindings here point to parent.
        Iterator iterBinding = convertToNodes(chain, nodeTable) ;
        
        // "input" will be closed by QueryIterTDB but is otherwise unused.
        // "killList" will be aborted on timeout.
        return new QueryIterTDB(iterBinding, killList, input, execCxt) ;
    }
    
    /** Create an abortable iterator, storing it in the killList.
     *  Just return the input iterator if kilList is null. 
     */
    static  Iterator makeAbortable(Iterator iter, List killList)
    {
        if ( killList == null )
            return iter ;
        IterAbortable k = new IterAbortable<>(iter) ;
        killList.add(k) ;
        return k ;
    }
    
    /** Iterator that adds an abort operation which can be called
     *  at any time, including from another thread, and causes the
     *  iterator to throw an exception when next touched (hasNext, next).  
     */
    static class IterAbortable extends IteratorWrapper implements Abortable
    {
        volatile boolean abortFlag = false ;
        
        public IterAbortable(Iterator iterator)
        {
            super(iterator) ;
        }
        
        /** Can call asynchronously at anytime */
        @Override
        public void abort() { 
            abortFlag = true ;
        }
        
        @Override
        public boolean hasNext()
        {
            if ( abortFlag )
                throw new QueryCancelledException() ;
            return iterator.hasNext() ; 
        }
        
        @Override
        public T next()
        {
            if ( abortFlag )
                throw new QueryCancelledException() ;
            return iterator.next() ; 
        }
    }
    
    public static Iterator solve(NodeTupleTable nodeTupleTable, 
                                                Tuple tuple,
                                                boolean anyGraph,
                                                Iterator chain, Predicate> filter,
                                                ExecutionContext execCxt)
    {
        return new StageMatchTuple(nodeTupleTable, chain, tuple, anyGraph, filter, execCxt) ;
    }

    public static Binding convToBinding(BindingNodeId bindingNodeIds, NodeTable nodeTable) {
        if ( true )
            return new BindingTDB(bindingNodeIds, nodeTable) ;
        else {
            // Makes nodes immediately. Causing unnecessary NodeTable accesses
            // (e.g. project)
            BindingMap b = BindingFactory.create() ;
            for (Var v : bindingNodeIds) {
                NodeId id = bindingNodeIds.get(v) ;
                Node n = nodeTable.getNodeForNodeId(id) ;
                b.add(v, n) ;
            }
            return b ;
        }
    }
    
    // Transform : Binding ==> BindingNodeId
    public static Function convFromBinding(final NodeTable nodeTable)
    {
        return binding -> SolverLib.convert(binding, nodeTable);
    }
    
    /** Binding {@literal ->} BindingNodeId, given a NodeTable */
    public static BindingNodeId convert(Binding binding, NodeTable nodeTable) 
    {
        if ( binding instanceof BindingTDB )
            return ((BindingTDB)binding).getBindingId() ;

        BindingNodeId b = new BindingNodeId(binding) ;
        // and copy over, getting NodeIds.
        Iterator vars = binding.vars() ;

        for ( ; vars.hasNext() ; )
        {
            Var v = vars.next() ;
            Node n = binding.get(v) ;  
            if ( n == null )
                // Variable mentioned in the binding but not actually defined.
                // Can occur with BindingProject
                continue ;

            // Rely on the node table cache for efficency - we will likely be
            // repeatedly looking up the same node in different bindings.
            NodeId id = nodeTable.getNodeIdForNode(n) ;
            // Optional: whether to put in "known missing"
            // Currently, we do. The rest of the code should work with either choice.
            //if ( ! NodeId.isDoesNotExist(id) )
            b.put(v, id) ;
        }
        return b ;
    }
    
    /** Find whether a specific graph name is in the quads table. */
    public static QueryIterator testForGraphName(DatasetGraphTDB ds, Node graphNode, QueryIterator input,
                                                 Predicate> filter, ExecutionContext execCxt) {
        NodeId nid = TDBInternal.getNodeId(ds, graphNode) ;
        boolean exists = !NodeId.isDoesNotExist(nid) ;
        if ( exists ) {
            // Node exists but is it used in the quad position?
            NodeTupleTable ntt = ds.getQuadTable().getNodeTupleTable() ;
            // Don't worry about abortable - this iterator should be fast
            // (with normal indexing - at least one G???).
            // Either it finds a starting point, or it doesn't.  We are only 
            // interested in the first .hasNext.
            Iterator> iter1 = ntt.find(nid, NodeId.NodeIdAny, NodeId.NodeIdAny, NodeId.NodeIdAny) ;
            if ( filter != null )
                iter1 = Iter.filter(iter1, filter) ;
            exists = iter1.hasNext() ;
        }

        if ( exists )
            return input ;
        else {
            input.close() ;
            return QueryIterNullIterator.create(execCxt) ;
        }
    }

    /** Find all the graph names in the quads table. */
    public static QueryIterator graphNames(DatasetGraphTDB ds, Node graphNode, QueryIterator input,
                                           Predicate> filter, ExecutionContext execCxt) {
        List killList = new ArrayList<>() ;
        Iterator> iter1 = ds.getQuadTable().getNodeTupleTable().find(NodeId.NodeIdAny, NodeId.NodeIdAny,
                                                                                   NodeId.NodeIdAny, NodeId.NodeIdAny) ;
        if ( filter != null )
            iter1 = Iter.filter(iter1, filter) ;

        Iterator iter2 = Iter.map(iter1, (t) -> t.get(0)) ;
        iter2 = makeAbortable(iter2, killList) ;

        Iterator iter3 = Iter.distinct(iter2) ;
        iter3 = makeAbortable(iter3, killList) ;

        Iterator iter4 = NodeLib.nodes(ds.getQuadTable().getNodeTupleTable().getNodeTable(), iter3) ;

        final Var var = Var.alloc(graphNode) ;
        Iterator iterBinding = Iter.map(iter4, node -> BindingFactory.binding(var, node)) ;
        // Not abortable.
        return new QueryIterTDB(iterBinding, killList, input, execCxt) ;
    }
    
    /** Turn a BasicPattern into an abbreviated string for debugging */  
    public static String strPattern(BasicPattern pattern)
    {
        List triples = pattern.getList() ;
        String x = Iter.asString(triples, "\n  ") ;
        return printAbbrev(x) ; 
    }

    public static Set convertToNodeIds(Collection nodes, DatasetGraphTDB dataset)
    {
        Set graphIds = new HashSet<>() ;
        NodeTable nt = dataset.getQuadTable().getNodeTupleTable().getNodeTable() ;
        for ( Node n : nodes )
            graphIds.add(nt.getNodeIdForNode(n)) ;
        return graphIds ;
    }

    public static Iterator> unionGraph(NodeTupleTable ntt)
    {
        Iterator> iter = ntt.find((NodeId)null, null, null, null) ;
        iter = Iter.map(iter, quadsToAnyTriples) ;
        //iterMatches = Iter.distinct(iterMatches) ;
        
        // This depends on the way indexes are choose and
        // the indexing pattern. It assumes that the index 
        // chosen ends in G so same triples are adjacent 
        // in a union query.
        /// See TupleTable.scanAllIndex that ensures this.
        iter = Iter.distinctAdjacent(iter) ;
        return iter ;
    }
    
    
    // -- Mutating "transform in place"
    private static Function, Tuple> quadsToAnyTriples = item -> {
        return TupleFactory.create4(NodeId.NodeIdAny, item.get(1), item.get(2), item.get(3) ) ;
    } ;
}