com.bigdata.bop.joinGraph.rto.JoinGraph Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 16, 2010
*/
package com.bigdata.bop.joinGraph.rto;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IQueryAttributes;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.ap.SampleIndex;
import com.bigdata.bop.ap.SampleIndex.SampleType;
import com.bigdata.bop.controller.AbstractSubqueryOp;
import com.bigdata.bop.engine.AbstractRunningQuery;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpRTO;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility;
import com.bigdata.util.NT;
import com.bigdata.util.concurrent.Haltable;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* A join graph with annotations for estimated cardinality and other details in
* support of runtime query optimization. A join graph is a collection of access
* paths reading on relations (the vertices of the join graph) and joins which
* connect those relations (the edges of the join graph). This boils down to a
* collection of {@link IPredicate}s (access paths reading on on relations),
* shared variables (which identify joins), and {@link IConstraint}s (which may
* reject some solutions for those joins). Operators other than standard joins
* (including optional joins, sort, order by, etc.) must be handled downstream
* from the join graph in a "tail plan".
*
* The {@link JoinGraph} operator works in two phases. On its first invocation,
* it constructs a {@link JGraph join graph} and identifies a join path having a
* low cost join ordering. This join path is converted into a query plan and set
* as the {@link Attributes#QUERY_PLAN} attribute on the {@link IRunningQuery}.
* The upstream solutions are then flooded into sub-query that executes the
* chosen query plan. The solutions from the sub-query are simply copied to the
* output sink of the {@link JoinGraph} operator. Once the query plan has been
* identified by the first invocation, subsequent invocations of this operator
* simply push more data into the sub-query using the pre-identified query plan.
*
* TODO This approach amounts to bottom-up evaluation of the {@link JGraph}.
* Thus, the RTO is not using information from the upstream query when it
* decides on a query plan. Therefore, we could lift-out the RTO sections of the
* query into named subqueries, run them first in parallel, and then INCLUDE
* their results into the main query. This would require an AST optimizer to
* modify the AST. (Currently the RTO is integrated when the query plan is
* generated in {@link AST2BOpUtility} rather than as an AST optimizer.)
*
* @see http://arxiv.org/PS_cache/arxiv/pdf/0810/0810.4809v1.pdf, XQuery Join
* Graph Isolation.
*
* @author Bryan Thompson
* @version $Id$
*
* @see JGraph
*/
public class JoinGraph extends PipelineOp {
// private static final transient Logger log = Logger
// .getLogger(JoinGraph.class);
private static final long serialVersionUID = 1L;
// private static final transient Logger log = Logger
// .getLogger(JoinGraph.class);
/**
* Known annotations.
*/
public interface Annotations extends PipelineOp.Annotations {
// /**
// * The variables to be projected out of the join graph (optional). When
// * null
, all variables will be projected out.
// */
// String SELECTED = JoinGraph.class.getName() + ".selected";
/**
* The vertices of the join graph, expressed an an {@link IPredicate}[]
* (required).
*/
String VERTICES = JoinGraph.class.getName() + ".vertices";
/**
* The constraints on the join graph, expressed an an
* {@link IConstraint}[] (optional, defaults to no constraints).
*/
String CONSTRAINTS = JoinGraph.class.getName() + ".constraints";
/**
* The initial limit for cutoff sampling (default
* {@value #DEFAULT_LIMIT}).
*/
String LIMIT = JoinGraph.class.getName() + ".limit";
int DEFAULT_LIMIT = 100;
/**
* The nedges edges of the join graph having the lowest
* cardinality will be used to generate the initial join paths (default
* {@value #DEFAULT_NEDGES}). This must be a positive integer. The edges
* in the join graph are sorted in order of increasing cardinality and
* up to nedges of those edges having the lowest cardinality are
* used to form the initial set of join paths. For each edge selected to
* form a join path, the starting vertex will be the vertex of that edge
* having the lower cardinality.
*/
String NEDGES = JoinGraph.class.getName() + ".nedges";
int DEFAULT_NEDGES = 2;
/**
* The type of sample to take (default {@value #DEFAULT_SAMPLE_TYPE)}.
*
* @see SampleIndex.SampleType
*/
String SAMPLE_TYPE = JoinGraph.class.getName() + ".sampleType";
String DEFAULT_SAMPLE_TYPE = SampleType.RANDOM.name();
/**
* The set of variables that are known to have already been materialized
* in the context in which the RTO was invoked.
*/
String DONE_SET = JoinGraph.class.getName() + ".doneSet";
/**
* The AST {@link JoinGroupNode} for the joins and filters that we are
* running through the RTO (required).
*/
String JOIN_GROUP = JoinGraph.class.getName() + ".joinGroup";
/**
* An {@link NT} object specifying the namespace and timestamp of the KB
* view against which the RTO is running. This is necessary in order to
* reconstruct the {@link AST2BOpContext} when it comes time to evaluate
* either a cutoff join involving filters that need materialization or
* the selected join path.
*/
String NT = JoinGraph.class.getName() + ".nt";
}
/**
* {@link IQueryAttributes} names for the {@link JoinGraph}. The fully
* qualified name of the attribute is formed by appending the attribute name
* to the "bopId-", where bopId
is the value returned by
* {@link BOp#getId()}
*
* @author Bryan
* Thompson
*/
public interface Attributes {
/**
* The join path selected by the RTO (output).
*/
String PATH = JoinGraph.class.getName() + ".path";
/**
* The samples associated with join path selected by the RTO (output).
*/
String SAMPLES = JoinGraph.class.getName() + ".samples";
/**
* The physical query plan generated from the RTO determined best join
* ordering (output). This is used to specify the query plan to be
* executed by a downstream operator.
*/
String QUERY_PLAN = JoinGraph.class.getName() + ".queryPlan";
}
/*
* JoinGraph operator annotations.
*/
// /**
// * @see Annotations#SELECTED
// */
// public IVariable>[] getSelected() {
//
// return (IVariable[]) getRequiredProperty(Annotations.SELECTED);
//
// }
/**
* @see Annotations#VERTICES
*/
public IPredicate>[] getVertices() {
return (IPredicate[]) getRequiredProperty(Annotations.VERTICES);
}
/**
* @see Annotations#CONSTRAINTS
*/
public IConstraint[] getConstraints() {
return (IConstraint[]) getProperty(Annotations.CONSTRAINTS, null/* none */);
}
/**
* @see Annotations#LIMIT
*/
public int getLimit() {
return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT);
}
/**
* @see Annotations#NEDGES
*/
public int getNEdges() {
return getProperty(Annotations.NEDGES, Annotations.DEFAULT_NEDGES);
}
/**
* @see Annotations#SAMPLE_TYPE
*/
public SampleType getSampleType() {
return SampleType.valueOf(getProperty(Annotations.SAMPLE_TYPE,
Annotations.DEFAULT_SAMPLE_TYPE));
}
/**
* Return the set of variables that are known to have already been
* materialized at the point in the overall query plan where the RTO is
* being executed.
*
* @see Annotations#DONE_SET
*/
@SuppressWarnings("unchecked")
public Set> getDoneSet() {
return (Set>) getRequiredProperty(Annotations.DONE_SET);
}
/*
* IQueryAttributes
*/
/**
* Return the computed join path.
*
* @see Attributes#PATH
*/
public Path getPath(final IRunningQuery q) {
return (Path) q.getAttributes().get(getId() + "-" + Attributes.PATH);
}
/**
* Return the samples associated with the computed join path.
*
* @see Annotations#SAMPLES
*/
@SuppressWarnings("unchecked")
public Map getSamples(final IRunningQuery q) {
return (Map) q.getAttributes().get(
getId() + "-" + Attributes.SAMPLES);
}
private void setPath(final IRunningQuery q, final Path p) {
q.getAttributes().put(getId() + "-" + Attributes.PATH, p);
}
private void setSamples(final IRunningQuery q,
final Map samples) {
q.getAttributes().put(getId() + "-" + Attributes.SAMPLES, samples);
}
/**
* Return the query plan to be executed based on the RTO determined join
* ordering.
*
* @see Attributes#QUERY_PLAN
*/
public PipelineOp getQueryPlan(final IRunningQuery q) {
return (PipelineOp) q.getAttributes().get(
getId() + "-" + Attributes.QUERY_PLAN);
}
private void setQueryPlan(final IRunningQuery q,
final PipelineOp queryPlan) {
q.getAttributes().put(getId() + "-" + Attributes.QUERY_PLAN, queryPlan);
}
/**
* Deep copy constructor.
*
* @param op
*/
public JoinGraph(final JoinGraph op) {
super(op);
}
public JoinGraph(final BOp[] args, final NV... anns) {
this(args, NV.asMap(anns));
}
public JoinGraph(final BOp[] args, final Map anns) {
super(args, anns);
// optional property.
// final IVariable>[] selected = (IVariable[]) getProperty(Annotations.SELECTED);
//
// if (selected == null)
// throw new IllegalArgumentException(Annotations.SELECTED);
//
// if (selected.length == 0)
// throw new IllegalArgumentException(Annotations.SELECTED);
// required property.
final IPredicate>[] vertices = (IPredicate[]) getProperty(Annotations.VERTICES);
if (vertices == null)
throw new IllegalArgumentException(Annotations.VERTICES);
if (vertices.length == 0)
throw new IllegalArgumentException(Annotations.VERTICES);
if (getLimit() <= 0)
throw new IllegalArgumentException(Annotations.LIMIT);
if (getNEdges() <= 0)
throw new IllegalArgumentException(Annotations.NEDGES);
/*
* TODO Check DONE_SET, NT, JOIN_NODES. These annotations are required
* for the new code path. We should check for their presence. However,
* the old code path is used by some unit tests which have not yet been
* updated and do not supply these annotations.
*/
// // Required.
// getDoneSet();
//
// // Required.
// getRequiredProperty(Annotations.NT);
if (!isController())
throw new IllegalArgumentException();
switch (getEvaluationContext()) {
case CONTROLLER:
break;
default:
throw new IllegalArgumentException(Annotations.EVALUATION_CONTEXT
+ "=" + getEvaluationContext());
}
}
@Override
public FutureTask eval(final BOpContext context) {
return new FutureTask(new JoinGraphTask(context));
}
/**
* Evaluation of a {@link JoinGraph}.
*
* @author Bryan
* Thompson
*/
private class JoinGraphTask implements Callable {
private final BOpContext context;
JoinGraphTask(final BOpContext context) {
if (context == null)
throw new IllegalArgumentException();
this.context = context;
}
@Override
public Void call() throws Exception {
if (getQueryPlan(context.getRunningQuery()) == null) {
/*
* Use the RTO to generate a query plan.
*
* TODO Make sure that the JoinGraph can not be triggered
* concurrently, e.g., that the CONTROLLER attribute prevents
* concurrent evaluation, just like MAX_PARALLEL.
*/
// final long begin = System.nanoTime();
// Create the join graph.
final JGraph g = new JGraph(JoinGraph.this);
/*
* This map is used to associate join path segments (expressed
* as an ordered array of bopIds) with edge sample to avoid
* redundant effort.
*/
final Map edgeSamples = new LinkedHashMap();
// Find the best join path.
final Path path = g.runtimeOptimizer(context.getRunningQuery()
.getQueryEngine(), edgeSamples);
/*
* Release samples.
*
* TODO If we have fully sampled some vertices or edges, then we
* could replace the JOIN with the sample. For this to work, we
* would need to access path that could read the sample and we
* would have to NOT release the samples until the RTO was done
* executing sub-queries against the generated query plan. Since
* we can flow multiple chunks into the sub-query, this amounts
* to having a LAST_PASS annotation.
*/
for (EdgeSample s : edgeSamples.values()) {
s.releaseSample();
}
for (Vertex v : g.getVertices()) {
if (v.sample != null) {
v.sample.releaseSample();
}
}
// Set attribute for the join path result.
setPath(context.getRunningQuery(), path);
// Set attribute for the join path samples.
setSamples(context.getRunningQuery(), edgeSamples);
// final long mark = System.nanoTime();
//
// final long elapsed_queryOptimizer = mark - begin;
/*
* Generate the query from the selected join path.
*/
final PipelineOp queryOp = AST2BOpRTO.compileJoinGraph(context
.getRunningQuery().getQueryEngine(), JoinGraph.this,
path);
// Set attribute for the join path samples.
setQueryPlan(context.getRunningQuery(), queryOp);
}
// The query plan.
final PipelineOp queryOp = getQueryPlan(context.getRunningQuery());
// Run the query, blocking until it is done.
JoinGraph.runSubquery(context, queryOp);
// final long elapsed_queryExecution = System.nanoTime() - mark;
//
// if (log.isInfoEnabled())
// log.info("RTO: queryOptimizer="
// + TimeUnit.NANOSECONDS.toMillis(elapsed_queryOptimizer)
// + ", queryExecution="
// + TimeUnit.NANOSECONDS.toMillis(elapsed_queryExecution));
return null;
}
} // class JoinGraphTask
/**
* Execute the selected join path.
*
* Note: When executing the query, it is actually being executed as a
* subquery. Therefore we have to take appropriate care to ensure that the
* results are copied out of the subquery and into the parent query. See
* {@link AbstractSubqueryOp} for how this is done.
*/
static private void runSubquery(
final BOpContext parentContext,
final PipelineOp queryOp) throws Exception {
if(parentContext==null)
throw new IllegalArgumentException();
if(queryOp==null)
throw new IllegalArgumentException();
final QueryEngine queryEngine = parentContext.getRunningQuery()
.getQueryEngine();
/*
* Run the sub-query.
*/
ICloseableIterator subquerySolutionItr = null;
// Fully materialize the upstream solutions.
final IBindingSet[] bindingSets = BOpUtility.toArray(
parentContext.getSource(), parentContext.getStats());
/*
* Run on all available upstream solutions.
*
* Note: The subquery will run for each chunk of upstream solutions, so
* it could make sense to increase the vector size or to collect all
* upstream solutions into a SolutionSet and then flood them into the
* sub-query.
*
* Note: We do not need to do a hash join with the output of the
* sub-query. This amounts to pipelined evaluation. Solutions flow into
* a subquery and then back out. The only reason for a hash join would
* be if we project in only a subset of the variables that were in scope
* in the parent context and then needed to pick up the correlated
* variables after running the query plan generated by the RTO.
*/
final IRunningQuery runningSubquery = queryEngine.eval(queryOp,
bindingSets);
try {
// Declare the child query to the parent.
((AbstractRunningQuery) parentContext.getRunningQuery())
.addChild(runningSubquery);
// Iterator visiting the subquery solutions.
subquerySolutionItr = runningSubquery.iterator();
// Copy solutions from the subquery to the query.
final long nout = BOpUtility.copy(subquerySolutionItr,
parentContext.getSink(), null/* sink2 */,
null/* mergeSolution */, null/* selectVars */,
null/* constraints */, null/* stats */);
// verify no problems.
runningSubquery.get();
} catch (Throwable t) {
if (Haltable.isTerminationByInterrupt(t)) {
// normal termination.
return;
}
// log.error(t,t);
/*
* Propagate the error to the parent and rethrow the first cause
* error out of the subquery.
*/
throw new RuntimeException(parentContext.getRunningQuery().halt(t));
} finally {
runningSubquery.cancel(true/* mayInterruptIfRunning */);
if (subquerySolutionItr != null)
subquerySolutionItr.close();
}
}
}