org.apache.jena.tdb.solver.OpExecutorTDB1 Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jena-tdb Show documentation
TDB is a storage subsystem for Jena and ARQ, it is a native triple store providing persistent storage of triples/quads.
There is a newer version: 4.10.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.tdb.solver;

import java.util.function.Predicate;

import org.apache.jena.atlas.lib.tuple.Tuple ;
import org.apache.jena.atlas.logging.Log ;
import org.apache.jena.graph.Graph ;
import org.apache.jena.graph.Node ;
import org.apache.jena.sparql.ARQInternalErrorException ;
import org.apache.jena.sparql.algebra.Op ;
import org.apache.jena.sparql.algebra.op.* ;
import org.apache.jena.sparql.algebra.optimize.TransformFilterPlacement ;
import org.apache.jena.sparql.core.BasicPattern ;
import org.apache.jena.sparql.core.Quad ;
import org.apache.jena.sparql.core.Substitute ;
import org.apache.jena.sparql.core.Var ;
import org.apache.jena.sparql.engine.ExecutionContext ;
import org.apache.jena.sparql.engine.QueryIterator ;
import org.apache.jena.sparql.engine.iterator.QueryIterPeek ;
import org.apache.jena.sparql.engine.main.OpExecutor ;
import org.apache.jena.sparql.engine.main.OpExecutorFactory ;
import org.apache.jena.sparql.engine.main.QC ;
import org.apache.jena.sparql.engine.main.iterator.QueryIterGraph ;
import org.apache.jena.sparql.engine.optimizer.reorder.ReorderProc ;
import org.apache.jena.sparql.engine.optimizer.reorder.ReorderTransformation ;
import org.apache.jena.sparql.expr.ExprList ;
import org.apache.jena.sparql.mgt.Explain ;
import org.apache.jena.tdb.store.DatasetGraphTDB ;
import org.apache.jena.tdb.store.GraphTDB ;
import org.apache.jena.tdb.store.NodeId;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;

/** TDB executor for algebra expressions.  It is the standard ARQ executor
 *  except for basic graph patterns and filtered basic graph patterns (currently).  
 * 
 * See also: StageGeneratorDirectTDB, a non-reordering 
 */
public class OpExecutorTDB1 extends OpExecutor
{
    private static final Logger log = LoggerFactory.getLogger(OpExecutorTDB1.class) ;
    
    public final static OpExecutorFactory OpExecFactoryTDB = new OpExecutorFactory()
    {
        @Override
        public OpExecutor create(ExecutionContext execCxt)
        { return new OpExecutorTDB1(execCxt) ; }
    } ;
    
    private final boolean isForTDB ;
    
    // A new compile object is created for each op compilation.
    // So the execCxt is changing as we go through the query-compile-execute process  
    public OpExecutorTDB1(ExecutionContext execCxt)
    {
        super(execCxt) ;
        // NB. The dataset may be a TDB one, or a general one.
        // Any merged union graph magic (for a TDB dataset was handled
        // in QueryEngineTDB).
        
        isForTDB = (execCxt.getActiveGraph() instanceof GraphTDB) ;
    }

    @Override
    protected QueryIterator exec(Op op, QueryIterator input) {
        if ( level < 0 )
            // Print only at top level (and we're called before level++) 
            Explain.explain("TDB", op, super.execCxt.getContext()) ;
        return super.exec(op, input) ;
    } 
    
    @Override
    protected QueryIterator execute(OpFilter opFilter, QueryIterator input)
    {
        if ( ! isForTDB )
            return super.execute(opFilter, input) ;
        
        // If the filter does not apply to the input??
        // Where does ARQ catch this?
        
        // (filter (bgp ...))
        if ( OpBGP.isBGP(opFilter.getSubOp()) )
        {
            // Still may be a TDB graph in a non-TDB dataset (e.g. a named model)
            GraphTDB graph = (GraphTDB)execCxt.getActiveGraph() ;
            OpBGP opBGP = (OpBGP)opFilter.getSubOp() ;
            return executeBGP(graph, opBGP, input, opFilter.getExprs(), execCxt) ;
        }
        
        // (filter (quadpattern ...))
        if ( opFilter.getSubOp() instanceof OpQuadPattern )
        {
            OpQuadPattern quadPattern = (OpQuadPattern)opFilter.getSubOp() ;
            DatasetGraphTDB ds = (DatasetGraphTDB)execCxt.getDataset() ;
            return optimizeExecuteQuads(ds, input,
                                        quadPattern.getGraphNode(), quadPattern.getBasicPattern(),
                                        opFilter.getExprs(), execCxt) ;
        }
    
        // (filter (anything else))
        return super.execute(opFilter, input) ;
        }

    // ---- Triple patterns
    
    @Override
    protected QueryIterator execute(OpBGP opBGP, QueryIterator input)
    {
        if ( ! isForTDB )
            return super.execute(opBGP, input) ;
        
        GraphTDB graph = (GraphTDB)execCxt.getActiveGraph() ;
        return executeBGP(graph, opBGP, input, null, execCxt) ;
       
    }

    @Override
    protected QueryIterator execute(OpQuadPattern quadPattern, QueryIterator input)
    {
        if ( ! isForTDB )
            return super.execute(quadPattern, input) ;
            
    //        DatasetGraph dg = execCxt.getDataset() ;
    //        if ( ! ( dg instanceof DatasetGraphTDB ) )
    //            throw new InternalErrorException("Not a TDB backed dataset in quad pattern execution") ;
        
        DatasetGraphTDB ds = (DatasetGraphTDB)execCxt.getDataset() ;
        BasicPattern bgp = quadPattern.getBasicPattern() ;
        Node gn = quadPattern.getGraphNode() ;
        return optimizeExecuteQuads(ds, input, gn, bgp, null, execCxt) ;
    }

    @Override
    protected QueryIterator execute(OpGraph opGraph, QueryIterator input)
    {
        // Path evaluation or dataset sets which do not go straight to the DatasetGraphTDB  
        return new QueryIterGraph(input, opGraph, execCxt) ;
    }

    /** Execute a BGP (and filters) on a TDB graph, which may be in default storage or it may be a named graph */ 
    private static QueryIterator executeBGP(GraphTDB graph, OpBGP opBGP, QueryIterator input, ExprList exprs, 
                                            ExecutionContext execCxt)
    {
        DatasetGraphTDB dsgtdb = graph.getDatasetGraphTDB() ;
        // Is it the real default graph (normal route or explicitly named)?
        if ( ! isDefaultGraphStorage(graph.getGraphName()))
        {
            // Not default storage - it's a named graph in storage. 
            return optimizeExecuteQuads(dsgtdb, input, graph.getGraphName(), opBGP.getPattern(), exprs, execCxt) ;
        }
        
        // Execute a BGP on the real default graph
        return optimizeExecuteTriples(dsgtdb, input, opBGP.getPattern(), exprs, execCxt) ;
    }

    /** Execute, with optimization, a basic graph pattern on the default graph storage */
    private static QueryIterator optimizeExecuteTriples(DatasetGraphTDB dsgtdb, QueryIterator input,
                                                        BasicPattern pattern, ExprList exprs,
                                                        ExecutionContext execCxt) 
    {
        if ( ! input.hasNext() )
            return input ;
    
        // -- Input
        // Must pass this iterator into the next stage.
        if ( pattern.size() >= 2 )
        {
            // Must be 2 or triples to reorder. 
            ReorderTransformation transform = dsgtdb.getReorderTransform() ;
            if ( transform != null )
            {
                QueryIterPeek peek = QueryIterPeek.create(input, execCxt) ;
                input = peek ; // Must pass on
                pattern = reorder(pattern, peek, transform) ;
            }
        }
        // -- Filter placement
            
        Op op = null ;
        if ( exprs != null )
            op = TransformFilterPlacement.transform(exprs, pattern) ;
        else
            op = new OpBGP(pattern) ;
        
        return plainExecute(op, input, execCxt) ;
    }

    /** Execute, with optimization, a quad pattern */
    private static QueryIterator optimizeExecuteQuads(DatasetGraphTDB ds, 
                                                      QueryIterator input, 
                                                      Node gn, BasicPattern bgp,
                                                      ExprList exprs, ExecutionContext execCxt)
    {
        if ( ! input.hasNext() )
            return input ;

        // ---- Graph names with special meaning. 

        gn = decideGraphNode(gn, execCxt) ;
        if ( gn == null )
            return optimizeExecuteTriples(ds, input, bgp, exprs, execCxt) ;
        
        // ---- Execute quads+filters
        if ( bgp.size() >= 2 )
        {
            ReorderTransformation transform = ds.getReorderTransform() ;
    
            if ( transform != null )
            {
                QueryIterPeek peek = QueryIterPeek.create(input, execCxt) ;
                input = peek ; // Original input now invalid.
                bgp = reorder(bgp, peek, transform) ;
            }
        }
        // -- Filter placement
        Op op = null ;
        if ( exprs != null )
            op = TransformFilterPlacement.transform(exprs, gn, bgp) ;
        else
            op = new OpQuadPattern(gn, bgp) ;

        return plainExecute(op, input, execCxt) ;
    }

    /** Execute without modification of the op - does not apply special graph name translations */ 
    private static QueryIterator plainExecute(Op op, QueryIterator input, ExecutionContext execCxt)
    {
        // -- Execute
        // Switch to a non-reordering executor
        // The Op may be a sequence due to TransformFilterPlacement
        // so we need to do a full execution step, not go straight to the SolverLib.
        
        ExecutionContext ec2 = new ExecutionContext(execCxt) ;
        ec2.setExecutor(plainFactory) ;

        // Solve without going through this executor again.
        // There would be issues of nested patterns but this is only a
        // (filter (bgp...)) or (filter (quadpattern ...)) or sequences of these.
        // so there are no nested patterns to reorder.
        return QC.execute(op, input, ec2) ;
    }

    private static BasicPattern reorder(BasicPattern pattern, QueryIterPeek peek, ReorderTransformation transform)
    {
        if ( transform != null )
        {
            // This works by getting one result from the peek iterator,
            // and creating the more gounded BGP. The tranform is used to
            // determine the best order and the transformation is returned. This
            // transform is applied to the unsubstituted pattern (which will be
            // substituted as part of evaluation.
            
            if ( ! peek.hasNext() )
                throw new ARQInternalErrorException("Peek iterator is already empty") ;
 
            BasicPattern pattern2 = Substitute.substitute(pattern, peek.peek() ) ;
            // Calculate the reordering based on the substituted pattern.
            ReorderProc proc = transform.reorderIndexes(pattern2) ;
            // Then reorder original patten
            pattern = proc.reorder(pattern) ;
        }
        return pattern ;
    }
    
    /** Handle special graph node names.  
     * Returns null for default graph in storage (use the triple table).
     * Returns Node.ANY for the union graph
     */
    public static Node decideGraphNode(Node gn, ExecutionContext execCxt)
    {
     // ---- Graph names with special meaning. 
    
        // Graph names with special meaning:
        //   Quad.defaultGraphIRI -- the IRI used in GRAPH <> to mean the default graph.
        //   Quad.defaultGraphNodeGenerated -- the internal marker node used for the quad form of queries.
        //   Quad.unionGraph -- the IRI used in GRAPH <> to mean the union of named graphs
    
        if ( isDefaultGraphStorage(gn) ) 
        {
            // Storage concrete, default graph. 
            // Either outside GRAPH (no implicit union)
            // or using the "name" of the default graph
            return null ;
        }

        // Not default storage graph.
        // ---- Union (RDF Merge) of named graphs

        if ( Quad.isUnionGraph(gn) ) 
            return Node.ANY ;
        return gn ;
    }

    // Is this a query against the real default graph in the storage (in a 3-tuple table). 
    private static boolean isDefaultGraphStorage(Node gn)
    {
        if ( gn == null )
            return true ;
        
        // Is it the implicit name for default graph.
        if ( Quad.isDefaultGraph(gn) )
            // Not accessing the union of named graphs as the default graph
            // and pattern is directed to the default graph.
            return true ;
    
        return false ;
    }
    
    @Override
    protected QueryIterator execute(OpDatasetNames dsNames, QueryIterator input)
    { 
        DatasetGraphTDB ds = (DatasetGraphTDB)execCxt.getDataset() ;
        Predicate> filter = QC2.getFilter(execCxt.getContext()) ;
        Node gn = dsNames.getGraphNode() ;
        if ( Var.isVar(gn) )
            return SolverLib.graphNames(ds, dsNames.getGraphNode(), input, filter, execCxt) ;
        else
            return SolverLib.testForGraphName(ds, dsNames.getGraphNode(), input, filter, execCxt) ;
    }

    // ---- OpExecute factories and plain executor.
    
    private static OpExecutorFactory plainFactory = new OpExecutorPlainFactoryTDB() ;
    private static class OpExecutorPlainFactoryTDB implements OpExecutorFactory
    {
        @Override
        public OpExecutor create(ExecutionContext execCxt)
        {
            return new OpExecutorPlainTDB(execCxt) ;
        }
    }

    /** An op executor that simply executes a BGP or QuadPattern without any reordering */ 
    private static class OpExecutorPlainTDB extends OpExecutor
    {
        Predicate> filter = null ;
        
        public OpExecutorPlainTDB(ExecutionContext execCxt)
        {
            super(execCxt) ;
            filter = QC2.getFilter(execCxt.getContext()) ;
        }
        
        @Override
        public QueryIterator execute(OpBGP opBGP, QueryIterator input)
        {
            Graph g = execCxt.getActiveGraph() ;
            
            if ( g instanceof GraphTDB )
            {
                BasicPattern bgp = opBGP.getPattern() ;
                Explain.explain("Execute", bgp, execCxt.getContext()) ;
                // Triple-backed (but may be named as explicit default graph).
                //return SolverLib.execute((GraphTDB)g, bgp, input, filter, execCxt) ;
                GraphTDB gtdb = (GraphTDB)g ;
                Node gn = decideGraphNode(gtdb.getGraphName(), execCxt) ;
                return SolverLib.execute(gtdb.getDatasetGraphTDB(), gn, bgp, input, filter, execCxt) ;
            }
            Log.warn(this, "Non-GraphTDB passed to OpExecutorPlainTDB") ;
            return super.execute(opBGP, input) ;
        }
        
        @Override
        public QueryIterator execute(OpQuadPattern opQuadPattern, QueryIterator input)
        {
            Node gn = opQuadPattern.getGraphNode() ;
            gn = decideGraphNode(gn, execCxt) ;
            
            if ( execCxt.getDataset() instanceof DatasetGraphTDB )
            {
                DatasetGraphTDB ds = (DatasetGraphTDB)execCxt.getDataset() ;
                Explain.explain("Execute", opQuadPattern.getPattern(), execCxt.getContext()) ;
                BasicPattern bgp = opQuadPattern.getBasicPattern() ;
                return SolverLib.execute(ds, gn, bgp, input, filter, execCxt) ;
            }
            // Maybe a TDB named graph inside a non-TDB dataset.
            Graph g = execCxt.getActiveGraph() ;
            if ( g instanceof GraphTDB )
            {
                // Triples graph from TDB (which is the default graph of the dataset),
                // used a named graph in a composite dataset.
                BasicPattern bgp = opQuadPattern.getBasicPattern() ;
                Explain.explain("Execute", bgp, execCxt.getContext()) ;
                // Don't pass in G -- gn may be different.
                return SolverLib.execute(((GraphTDB)g).getDatasetGraphTDB(), gn, bgp, input, filter, execCxt) ;
            }
            Log.warn(this, "Non-DatasetGraphTDB passed to OpExecutorPlainTDB") ;
            return super.execute(opQuadPattern, input) ;
        }

    }
}