com.bigdata.bop.joinGraph.rto.JoinGraph Maven / Gradle / Ivy

Go to download
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
/*
 * Created on Aug 16, 2010
 */

package com.bigdata.bop.joinGraph.rto;

import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.ap.SampleIndex;
import com.bigdata.bop.ap.SampleIndex.SampleType;
import com.bigdata.bop.controller.AbstractSubqueryOp;
import com.bigdata.bop.engine.AbstractRunningQuery;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpRTO;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility;
import com.bigdata.util.NT;
import com.bigdata.util.concurrent.Haltable;

import cutthecrap.utils.striterators.ICloseableIterator;

/**
 * A join graph with annotations for estimated cardinality and other details in
 * support of runtime query optimization. A join graph is a collection of access
 * paths reading on relations (the vertices of the join graph) and joins which
 * connect those relations (the edges of the join graph). This boils down to a
 * collection of {@link IPredicate}s (access paths reading on on relations),
 * shared variables (which identify joins), and {@link IConstraint}s (which may
 * reject some solutions for those joins). Operators other than standard joins
 * (including optional joins, sort, order by, etc.) must be handled downstream
 * from the join graph in a "tail plan".
 * 
 * The {@link JoinGraph} operator works in two phases. On its first invocation,
 * it constructs a {@link JGraph join graph} and identifies a join path having a
 * low cost join ordering. This join path is converted into a query plan and set
 * as the {@link Attributes#QUERY_PLAN} attribute on the {@link IRunningQuery}.
 * The upstream solutions are then flooded into sub-query that executes the
 * chosen query plan. The solutions from the sub-query are simply copied to the
 * output sink of the {@link JoinGraph} operator. Once the query plan has been
 * identified by the first invocation, subsequent invocations of this operator
 * simply push more data into the sub-query using the pre-identified query plan.
 * 
 * TODO This approach amounts to bottom-up evaluation of the {@link JGraph}.
 * Thus, the RTO is not using information from the upstream query when it
 * decides on a query plan. Therefore, we could lift-out the RTO sections of the
 * query into named subqueries, run them first in parallel, and then INCLUDE
 * their results into the main query. This would require an AST optimizer to
 * modify the AST. (Currently the RTO is integrated when the query plan is
 * generated in {@link AST2BOpUtility} rather than as an AST optimizer.)
 * 
 * @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join
 *      Graph Isolation.
 * 
 * @author Bryan Thompson
 * @version $Id$
 * 
 * @see JGraph
 */
public class JoinGraph extends PipelineOp {

//	private static final transient Logger log = Logger
//			.getLogger(JoinGraph.class);

	private static final long serialVersionUID = 1L;

//    private static final transient Logger log = Logger
//            .getLogger(JoinGraph.class);

    /**
	 * Known annotations.
	 */
	public interface Annotations extends PipelineOp.Annotations {

//        /**
//         * The variables to be projected out of the join graph (optional). When
//         * null, all variables will be projected out.
//         */
//        String SELECTED = JoinGraph.class.getName() + ".selected";
		
        /**
         * The vertices of the join graph, expressed an an {@link IPredicate}[]
         * (required).
         */
        String VERTICES = JoinGraph.class.getName() + ".vertices";

        /**
         * The constraints on the join graph, expressed an an
         * {@link IConstraint}[] (optional, defaults to no constraints).
         */
        String CONSTRAINTS = JoinGraph.class.getName() + ".constraints";

        /**
         * The initial limit for cutoff sampling (default
         * {@value #DEFAULT_LIMIT}).
         */
        String LIMIT = JoinGraph.class.getName() + ".limit";

		int DEFAULT_LIMIT = 100;

        /**
         * The nedges edges of the join graph having the lowest
         * cardinality will be used to generate the initial join paths (default
         * {@value #DEFAULT_NEDGES}). This must be a positive integer. The edges
         * in the join graph are sorted in order of increasing cardinality and
         * up to nedges of those edges having the lowest cardinality are
         * used to form the initial set of join paths. For each edge selected to
         * form a join path, the starting vertex will be the vertex of that edge
         * having the lower cardinality.
         */
		String NEDGES = JoinGraph.class.getName() + ".nedges";

		int DEFAULT_NEDGES = 2;
		
        /**
         * The type of sample to take (default {@value #DEFAULT_SAMPLE_TYPE)}.
         * 
         * @see SampleIndex.SampleType
         */
        String SAMPLE_TYPE = JoinGraph.class.getName() + ".sampleType";
        
        String DEFAULT_SAMPLE_TYPE = SampleType.RANDOM.name();

        /**
         * The set of variables that are known to have already been materialized
         * in the context in which the RTO was invoked.
         */
        String DONE_SET = JoinGraph.class.getName() + ".doneSet";

        /**
         * The AST {@link JoinGroupNode} for the joins and filters that we are
         * running through the RTO (required).
         */
        String JOIN_GROUP = JoinGraph.class.getName() + ".joinGroup";

        /**
         * An {@link NT} object specifying the namespace and timestamp of the KB
         * view against which the RTO is running. This is necessary in order to
         * reconstruct the {@link AST2BOpContext} when it comes time to evaluate
         * either a cutoff join involving filters that need materialization or
         * the selected join path.
         */
        String NT = JoinGraph.class.getName() + ".nt";
        
	}

    /**
     * {@link IQueryAttributes} names for the {@link JoinGraph}. The fully
     * qualified name of the attribute is formed by appending the attribute name
     * to the "bopId-", where bopId is the value returned by
     * {@link BOp#getId()}
     * 
     * @author Bryan
     *         Thompson
     */
    public interface Attributes {

        /**
         * The join path selected by the RTO (output).
         */
        String PATH = JoinGraph.class.getName() + ".path";

        /**
         * The samples associated with join path selected by the RTO (output).
         */
        String SAMPLES = JoinGraph.class.getName() + ".samples";

        /**
         * The physical query plan generated from the RTO determined best join
         * ordering (output). This is used to specify the query plan to be
         * executed by a downstream operator.
         */
        String QUERY_PLAN = JoinGraph.class.getName() + ".queryPlan";

	}
	
    /*
     * JoinGraph operator annotations.
     */
    
//	/**
//	 * @see Annotations#SELECTED
//	 */
//	public IVariable[] getSelected() {
//
//		return (IVariable[]) getRequiredProperty(Annotations.SELECTED);
//
//	}

	/**
	 * @see Annotations#VERTICES
	 */
	public IPredicate[] getVertices() {

		return (IPredicate[]) getRequiredProperty(Annotations.VERTICES);

	}

    /**
     * @see Annotations#CONSTRAINTS
     */
    public IConstraint[] getConstraints() {

        return (IConstraint[]) getProperty(Annotations.CONSTRAINTS, null/* none */);

    }

	/**
	 * @see Annotations#LIMIT
	 */
	public int getLimit() {

		return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT);

	}

	/**
	 * @see Annotations#NEDGES
	 */
	public int getNEdges() {

		return getProperty(Annotations.NEDGES, Annotations.DEFAULT_NEDGES);

	}

	/**
	 * @see Annotations#SAMPLE_TYPE
	 */
	public SampleType getSampleType() {

	    return SampleType.valueOf(getProperty(Annotations.SAMPLE_TYPE,
                Annotations.DEFAULT_SAMPLE_TYPE));
	    
	}

    /**
     * Return the set of variables that are known to have already been
     * materialized at the point in the overall query plan where the RTO is
     * being executed.
     * 
     * @see Annotations#DONE_SET
     */
	@SuppressWarnings("unchecked")
    public Set> getDoneSet() {
	    
	    return (Set>) getRequiredProperty(Annotations.DONE_SET);
	    
	}
	
	/*
	 * IQueryAttributes
	 */
	
	/**
	 * Return the computed join path.
	 * 
	 * @see Attributes#PATH
	 */
    public Path getPath(final IRunningQuery q) {

        return (Path) q.getAttributes().get(getId() + "-" + Attributes.PATH);

    }

    /**
     * Return the samples associated with the computed join path.
     * 
     * @see Annotations#SAMPLES
     */
    @SuppressWarnings("unchecked")
    public Map getSamples(final IRunningQuery q) {

        return (Map) q.getAttributes().get(
                getId() + "-" + Attributes.SAMPLES);

    }    

    private void setPath(final IRunningQuery q, final Path p) {

        q.getAttributes().put(getId() + "-" + Attributes.PATH, p);
        
    }

    private void setSamples(final IRunningQuery q,
            final Map samples) {
        
        q.getAttributes().put(getId() + "-" + Attributes.SAMPLES, samples);
        
    }

    /**
     * Return the query plan to be executed based on the RTO determined join
     * ordering.
     * 
     * @see Attributes#QUERY_PLAN
     */
    public PipelineOp getQueryPlan(final IRunningQuery q) {

        return (PipelineOp) q.getAttributes().get(
                getId() + "-" + Attributes.QUERY_PLAN);

    }

    private void setQueryPlan(final IRunningQuery q,
            final PipelineOp queryPlan) {
        
        q.getAttributes().put(getId() + "-" + Attributes.QUERY_PLAN, queryPlan);
        
    }

    /**
     * Deep copy constructor.
     * 
     * @param op
     */
    public JoinGraph(final JoinGraph op) {
    
        super(op);
        
    }

    public JoinGraph(final BOp[] args, final NV... anns) {

        this(args, NV.asMap(anns));

	}

    public JoinGraph(final BOp[] args, final Map anns) {

        super(args, anns);

        // optional property.
//        final IVariable[] selected = (IVariable[]) getProperty(Annotations.SELECTED);
//
//        if (selected == null)
//            throw new IllegalArgumentException(Annotations.SELECTED);
//
//        if (selected.length == 0)
//            throw new IllegalArgumentException(Annotations.SELECTED);

        // required property.
        final IPredicate[] vertices = (IPredicate[]) getProperty(Annotations.VERTICES);

        if (vertices == null)
            throw new IllegalArgumentException(Annotations.VERTICES);

        if (vertices.length == 0)
            throw new IllegalArgumentException(Annotations.VERTICES);

        if (getLimit() <= 0)
            throw new IllegalArgumentException(Annotations.LIMIT);

        if (getNEdges() <= 0)
            throw new IllegalArgumentException(Annotations.NEDGES);

        /*
         * TODO Check DONE_SET, NT, JOIN_NODES. These annotations are required
         * for the new code path. We should check for their presence. However,
         * the old code path is used by some unit tests which have not yet been
         * updated and do not supply these annotations.
         */
//        // Required.
//        getDoneSet();
//        
//        // Required.
//        getRequiredProperty(Annotations.NT);
        
        if (!isController())
            throw new IllegalArgumentException();

        switch (getEvaluationContext()) {
        case CONTROLLER:
            break;
        default:
            throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT
                    + "=" + getEvaluationContext());
        }

	}

    @Override
	public FutureTask eval(final BOpContext context) {

		return new FutureTask(new JoinGraphTask(context));

	}

	/**
	 * Evaluation of a {@link JoinGraph}.
	 * 
	 * @author Bryan
	 *         Thompson
	 */
	private class JoinGraphTask implements Callable {

	    private final BOpContext context;

	    JoinGraphTask(final BOpContext context) {

	        if (context == null)
	            throw new IllegalArgumentException();

	        this.context = context;

	    }

	    @Override
	    public Void call() throws Exception {
	        
            if (getQueryPlan(context.getRunningQuery()) == null) {
                
                /*
                 * Use the RTO to generate a query plan.
                 * 
                 * TODO Make sure that the JoinGraph can not be triggered
                 * concurrently, e.g., that the CONTROLLER attribute prevents
                 * concurrent evaluation, just like MAX_PARALLEL.
                 */
                
                // final long begin = System.nanoTime();

                // Create the join graph.
                final JGraph g = new JGraph(JoinGraph.this);

                /*
                 * This map is used to associate join path segments (expressed
                 * as an ordered array of bopIds) with edge sample to avoid
                 * redundant effort.
                 */
                final Map edgeSamples = new LinkedHashMap();

                // Find the best join path.
                final Path path = g.runtimeOptimizer(context.getRunningQuery()
                        .getQueryEngine(), edgeSamples);

                /*
                 * Release samples.
                 * 
                 * TODO If we have fully sampled some vertices or edges, then we
                 * could replace the JOIN with the sample. For this to work, we
                 * would need to access path that could read the sample and we
                 * would have to NOT release the samples until the RTO was done
                 * executing sub-queries against the generated query plan. Since
                 * we can flow multiple chunks into the sub-query, this amounts
                 * to having a LAST_PASS annotation.
                 */
                
                for (EdgeSample s : edgeSamples.values()) {

                    s.releaseSample();
                    
                }
 
                for (Vertex v : g.getVertices()) {

                    if (v.sample != null) {
                        v.sample.releaseSample();
                    
                    }
                    
                }
                
                // Set attribute for the join path result.
                setPath(context.getRunningQuery(), path);

                // Set attribute for the join path samples.
                setSamples(context.getRunningQuery(), edgeSamples);

                // final long mark = System.nanoTime();
                //
                // final long elapsed_queryOptimizer = mark - begin;

                /*
                 * Generate the query from the selected join path.
                 */
                final PipelineOp queryOp = AST2BOpRTO.compileJoinGraph(context
                        .getRunningQuery().getQueryEngine(), JoinGraph.this,
                        path);

                // Set attribute for the join path samples.
                setQueryPlan(context.getRunningQuery(), queryOp);

            }
	        
            // The query plan.
            final PipelineOp queryOp = getQueryPlan(context.getRunningQuery());
            
            // Run the query, blocking until it is done.
	        JoinGraph.runSubquery(context, queryOp);

//	        final long elapsed_queryExecution = System.nanoTime() - mark;
//	        
//            if (log.isInfoEnabled())
//                log.info("RTO: queryOptimizer="
//                        + TimeUnit.NANOSECONDS.toMillis(elapsed_queryOptimizer)
//                        + ", queryExecution="
//                        + TimeUnit.NANOSECONDS.toMillis(elapsed_queryExecution));

	        return null;

	    }

	} // class JoinGraphTask

    /**
     * Execute the selected join path.
     * 
     * Note: When executing the query, it is actually being executed as a
     * subquery. Therefore we have to take appropriate care to ensure that the
     * results are copied out of the subquery and into the parent query. See
     * {@link AbstractSubqueryOp} for how this is done.
     */
    static private void runSubquery(
            final BOpContext parentContext,
            final PipelineOp queryOp) throws Exception {

        if(parentContext==null)
            throw new IllegalArgumentException();
        
        if(queryOp==null)
            throw new IllegalArgumentException();
        
        final QueryEngine queryEngine = parentContext.getRunningQuery()
                .getQueryEngine();

        /*
         * Run the sub-query.
         */

        ICloseableIterator subquerySolutionItr = null;

        // Fully materialize the upstream solutions.
        final IBindingSet[] bindingSets = BOpUtility.toArray(
                parentContext.getSource(), parentContext.getStats());

        /*
         * Run on all available upstream solutions.
         * 
         * Note: The subquery will run for each chunk of upstream solutions, so
         * it could make sense to increase the vector size or to collect all
         * upstream solutions into a SolutionSet and then flood them into the
         * sub-query.
         * 
         * Note: We do not need to do a hash join with the output of the
         * sub-query. This amounts to pipelined evaluation. Solutions flow into
         * a subquery and then back out. The only reason for a hash join would
         * be if we project in only a subset of the variables that were in scope
         * in the parent context and then needed to pick up the correlated
         * variables after running the query plan generated by the RTO.
         */
        final IRunningQuery runningSubquery = queryEngine.eval(queryOp,
                bindingSets);

        try {

            // Declare the child query to the parent.
            ((AbstractRunningQuery) parentContext.getRunningQuery())
                    .addChild(runningSubquery);

            // Iterator visiting the subquery solutions.
            subquerySolutionItr = runningSubquery.iterator();

            // Copy solutions from the subquery to the query.
            final long nout = BOpUtility.copy(subquerySolutionItr,
                    parentContext.getSink(), null/* sink2 */,
                    null/* mergeSolution */, null/* selectVars */,
                    null/* constraints */, null/* stats */);

            // verify no problems.
            runningSubquery.get();

        } catch (Throwable t) {

            if (Haltable.isTerminationByInterrupt(t)) {

                // normal termination.
                return;

            }

            // log.error(t,t);

            /*
             * Propagate the error to the parent and rethrow the first cause
             * error out of the subquery.
             */
            throw new RuntimeException(parentContext.getRunningQuery().halt(t));

        } finally {

            runningSubquery.cancel(true/* mayInterruptIfRunning */);

            if (subquerySolutionItr != null)
                subquerySolutionItr.close();

        }

    }

}