org.apache.jena.tdb.solver.SolverLib Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jena-tdb Show documentation
Show all versions of jena-tdb Show documentation
TDB is a storage subsystem for Jena and ARQ, it is a native triple store providing persistent storage of triples/quads.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.tdb.solver;
import static org.apache.jena.atlas.lib.tuple.TupleFactory.tuple ;
import static org.apache.jena.tdb.lib.Lib2.printAbbrev ;
import java.util.* ;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.jena.atlas.iterator.Iter ;
import org.apache.jena.atlas.iterator.IteratorWrapper ;
import org.apache.jena.atlas.lib.tuple.Tuple ;
import org.apache.jena.atlas.lib.tuple.TupleFactory ;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.Triple ;
import org.apache.jena.query.QueryCancelledException ;
import org.apache.jena.sparql.core.BasicPattern ;
import org.apache.jena.sparql.core.Quad ;
import org.apache.jena.sparql.core.Var ;
import org.apache.jena.sparql.engine.ExecutionContext ;
import org.apache.jena.sparql.engine.QueryIterator ;
import org.apache.jena.sparql.engine.binding.Binding ;
import org.apache.jena.sparql.engine.binding.BindingFactory ;
import org.apache.jena.sparql.engine.binding.BindingMap ;
import org.apache.jena.sparql.engine.iterator.QueryIterNullIterator ;
import org.apache.jena.tdb.TDBException ;
import org.apache.jena.tdb.lib.NodeLib ;
import org.apache.jena.tdb.store.DatasetGraphTDB ;
import org.apache.jena.tdb.store.GraphTDB ;
import org.apache.jena.tdb.store.NodeId ;
import org.apache.jena.tdb.store.nodetable.NodeTable ;
import org.apache.jena.tdb.store.nodetupletable.NodeTupleTable ;
import org.apache.jena.tdb.sys.TDBInternal ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
/** Utilities used within the TDB BGP solver : local TDB store */
public class SolverLib
{
private static Logger log = LoggerFactory.getLogger(SolverLib.class) ;
/** Non-reordering execution of a basic graph pattern, given a iterator of bindings as input */
public static QueryIterator execute(GraphTDB graph, BasicPattern pattern,
QueryIterator input, Predicate> filter,
ExecutionContext execCxt)
{
// Maybe default graph or named graph.
NodeTupleTable ntt = graph.getNodeTupleTable() ;
return execute(ntt, graph.getGraphName(), pattern, input, filter, execCxt) ;
}
/** Non-reordering execution of a quad pattern, given a iterator of bindings as input.
* GraphNode is Node.ANY for execution over the union of named graphs.
* GraphNode is null for execution over the real default graph.
*/
public static QueryIterator execute(DatasetGraphTDB ds, Node graphNode, BasicPattern pattern,
QueryIterator input, Predicate> filter,
ExecutionContext execCxt)
{
NodeTupleTable ntt = ds.chooseNodeTupleTable(graphNode) ;
return execute(ntt, graphNode, pattern, input, filter, execCxt) ;
}
public static Iterator convertToIds(Iterator iterBindings, NodeTable nodeTable)
{ return Iter.map(iterBindings, convFromBinding(nodeTable)) ; }
/** Convert from Iterator to Iterator, conversion "on demand"
* (in convToBinding(BindingNodeId, NodeTable)
*/
public static Iterator convertToNodes(Iterator iterBindingIds, NodeTable nodeTable)
{ return Iter.map(iterBindingIds, bindingNodeIds -> convToBinding(bindingNodeIds, nodeTable)) ; }
// The worker. Callers choose the NodeTupleTable.
// graphNode may be Node.ANY, meaning we should make triples unique.
// graphNode may be null, meaning default graph
private static QueryIterator execute(NodeTupleTable nodeTupleTable, Node graphNode, BasicPattern pattern,
QueryIterator input, Predicate> filter,
ExecutionContext execCxt)
{
if ( Quad.isUnionGraph(graphNode) )
graphNode = Node.ANY ;
if ( Quad.isDefaultGraph(graphNode) )
graphNode = null ;
List triples = pattern.getList() ;
boolean anyGraph = (graphNode==null ? false : (Node.ANY.equals(graphNode))) ;
int tupleLen = nodeTupleTable.getTupleTable().getTupleLen() ;
if ( graphNode == null ) {
if ( 3 != tupleLen )
throw new TDBException("SolverLib: Null graph node but tuples are of length "+tupleLen) ;
} else {
if ( 4 != tupleLen )
throw new TDBException("SolverLib: Graph node specified but tuples are of length "+tupleLen) ;
}
// Convert from a QueryIterator (Bindings of Var/Node) to BindingNodeId
NodeTable nodeTable = nodeTupleTable.getNodeTable() ;
Iterator chain = Iter.map(input, SolverLib.convFromBinding(nodeTable)) ;
List killList = new ArrayList<>() ;
for ( Triple triple : triples )
{
Tuple tuple = null ;
if ( graphNode == null )
// 3-tuples
tuple = tuple(triple.getSubject(), triple.getPredicate(), triple.getObject()) ;
else
// 4-tuples.
tuple = tuple(graphNode, triple.getSubject(), triple.getPredicate(), triple.getObject()) ;
chain = solve(nodeTupleTable, tuple, anyGraph, chain, filter, execCxt) ;
chain = makeAbortable(chain, killList) ;
}
// DEBUG POINT
if ( false )
{
if ( chain.hasNext())
chain = Iter.debug(chain) ;
else
System.out.println("No results") ;
}
// Timeout wrapper ****
// QueryIterTDB gets called async.
// Iter.abortable?
// Or each iterator has a place to test.
// or pass in a thing to test?
// Need to make sure the bindings here point to parent.
Iterator iterBinding = convertToNodes(chain, nodeTable) ;
// "input" will be closed by QueryIterTDB but is otherwise unused.
// "killList" will be aborted on timeout.
return new QueryIterTDB(iterBinding, killList, input, execCxt) ;
}
/** Create an abortable iterator, storing it in the killList.
* Just return the input iterator if kilList is null.
*/
static Iterator makeAbortable(Iterator iter, List killList)
{
if ( killList == null )
return iter ;
IterAbortable k = new IterAbortable<>(iter) ;
killList.add(k) ;
return k ;
}
/** Iterator that adds an abort operation which can be called
* at any time, including from another thread, and causes the
* iterator to throw an exception when next touched (hasNext, next).
*/
static class IterAbortable extends IteratorWrapper implements Abortable
{
volatile boolean abortFlag = false ;
public IterAbortable(Iterator iterator)
{
super(iterator) ;
}
/** Can call asynchronously at anytime */
@Override
public void abort() {
abortFlag = true ;
}
@Override
public boolean hasNext()
{
if ( abortFlag )
throw new QueryCancelledException() ;
return iterator.hasNext() ;
}
@Override
public T next()
{
if ( abortFlag )
throw new QueryCancelledException() ;
return iterator.next() ;
}
}
public static Iterator solve(NodeTupleTable nodeTupleTable,
Tuple tuple,
boolean anyGraph,
Iterator chain, Predicate> filter,
ExecutionContext execCxt)
{
return new StageMatchTuple(nodeTupleTable, chain, tuple, anyGraph, filter, execCxt) ;
}
public static Binding convToBinding(BindingNodeId bindingNodeIds, NodeTable nodeTable) {
if ( true )
return new BindingTDB(bindingNodeIds, nodeTable) ;
else {
// Makes nodes immediately. Causing unnecessary NodeTable accesses
// (e.g. project)
BindingMap b = BindingFactory.create() ;
for (Var v : bindingNodeIds) {
NodeId id = bindingNodeIds.get(v) ;
Node n = nodeTable.getNodeForNodeId(id) ;
b.add(v, n) ;
}
return b ;
}
}
// Transform : Binding ==> BindingNodeId
public static Function convFromBinding(final NodeTable nodeTable)
{
return binding -> SolverLib.convert(binding, nodeTable);
}
/** Binding {@literal ->} BindingNodeId, given a NodeTable */
public static BindingNodeId convert(Binding binding, NodeTable nodeTable)
{
if ( binding instanceof BindingTDB )
return ((BindingTDB)binding).getBindingId() ;
BindingNodeId b = new BindingNodeId(binding) ;
// and copy over, getting NodeIds.
Iterator vars = binding.vars() ;
for ( ; vars.hasNext() ; )
{
Var v = vars.next() ;
Node n = binding.get(v) ;
if ( n == null )
// Variable mentioned in the binding but not actually defined.
// Can occur with BindingProject
continue ;
// Rely on the node table cache for efficency - we will likely be
// repeatedly looking up the same node in different bindings.
NodeId id = nodeTable.getNodeIdForNode(n) ;
// Optional: whether to put in "known missing"
// Currently, we do. The rest of the code should work with either choice.
//if ( ! NodeId.isDoesNotExist(id) )
b.put(v, id) ;
}
return b ;
}
/** Find whether a specific graph name is in the quads table. */
public static QueryIterator testForGraphName(DatasetGraphTDB ds, Node graphNode, QueryIterator input,
Predicate> filter, ExecutionContext execCxt) {
NodeId nid = TDBInternal.getNodeId(ds, graphNode) ;
boolean exists = !NodeId.isDoesNotExist(nid) ;
if ( exists ) {
// Node exists but is it used in the quad position?
NodeTupleTable ntt = ds.getQuadTable().getNodeTupleTable() ;
// Don't worry about abortable - this iterator should be fast
// (with normal indexing - at least one G???).
// Either it finds a starting point, or it doesn't. We are only
// interested in the first .hasNext.
Iterator> iter1 = ntt.find(nid, NodeId.NodeIdAny, NodeId.NodeIdAny, NodeId.NodeIdAny) ;
if ( filter != null )
iter1 = Iter.filter(iter1, filter) ;
exists = iter1.hasNext() ;
}
if ( exists )
return input ;
else {
input.close() ;
return QueryIterNullIterator.create(execCxt) ;
}
}
/** Find all the graph names in the quads table. */
public static QueryIterator graphNames(DatasetGraphTDB ds, Node graphNode, QueryIterator input,
Predicate> filter, ExecutionContext execCxt) {
List killList = new ArrayList<>() ;
Iterator> iter1 = ds.getQuadTable().getNodeTupleTable().find(NodeId.NodeIdAny, NodeId.NodeIdAny,
NodeId.NodeIdAny, NodeId.NodeIdAny) ;
if ( filter != null )
iter1 = Iter.filter(iter1, filter) ;
Iterator iter2 = Iter.map(iter1, (t) -> t.get(0)) ;
iter2 = makeAbortable(iter2, killList) ;
Iterator iter3 = Iter.distinct(iter2) ;
iter3 = makeAbortable(iter3, killList) ;
Iterator iter4 = NodeLib.nodes(ds.getQuadTable().getNodeTupleTable().getNodeTable(), iter3) ;
final Var var = Var.alloc(graphNode) ;
Iterator iterBinding = Iter.map(iter4, node -> BindingFactory.binding(var, node)) ;
// Not abortable.
return new QueryIterTDB(iterBinding, killList, input, execCxt) ;
}
/** Turn a BasicPattern into an abbreviated string for debugging */
public static String strPattern(BasicPattern pattern)
{
List triples = pattern.getList() ;
String x = Iter.asString(triples, "\n ") ;
return printAbbrev(x) ;
}
public static Set convertToNodeIds(Collection nodes, DatasetGraphTDB dataset)
{
Set graphIds = new HashSet<>() ;
NodeTable nt = dataset.getQuadTable().getNodeTupleTable().getNodeTable() ;
for ( Node n : nodes )
graphIds.add(nt.getNodeIdForNode(n)) ;
return graphIds ;
}
public static Iterator> unionGraph(NodeTupleTable ntt)
{
Iterator> iter = ntt.find((NodeId)null, null, null, null) ;
iter = Iter.map(iter, quadsToAnyTriples) ;
//iterMatches = Iter.distinct(iterMatches) ;
// This depends on the way indexes are choose and
// the indexing pattern. It assumes that the index
// chosen ends in G so same triples are adjacent
// in a union query.
/// See TupleTable.scanAllIndex that ensures this.
iter = Iter.distinctAdjacent(iter) ;
return iter ;
}
// -- Mutating "transform in place"
private static Function, Tuple> quadsToAnyTriples = item -> {
return TupleFactory.create4(NodeId.NodeIdAny, item.get(1), item.get(2), item.get(3) ) ;
} ;
}