com.bigdata.bop.joinGraph.rto.Vertex Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.bop.joinGraph.rto;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpContextBase;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IElement;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.NV;
import com.bigdata.bop.ap.SampleIndex;
import com.bigdata.bop.ap.SampleIndex.SampleType;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.htree.HTree;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.accesspath.IBindingSetAccessPath;
import com.bigdata.striterator.IChunkedIterator;
/**
* A vertex of the join graph is an annotated relation (this corresponds to an
* {@link IPredicate} with additional annotations to support the adaptive query
* optimization algorithm).
*
* The unique identifier for a {@link Vertex} (within a given join graph) is the
* {@link BOp.Annotations#BOP_ID} decorating its {@link IPredicate}.
* {@link #hashCode()} is defined in terms of this unique identifier so we can
* readily detect when a {@link Set} already contains a given {@link Vertex}.
*/
public class Vertex implements Serializable {
private static final transient Logger log = Logger.getLogger(Vertex.class);
private static final long serialVersionUID = 1L;
/**
* The {@link IPredicate} associated with the {@link Vertex}. This basically
* provides the information necessary to select an appropriate access path.
*/
public final IPredicate> pred;
/**
* The most recently taken sample of the {@link Vertex}.
*/
transient VertexSample sample = null;
Vertex(final IPredicate> pred) {
if (pred == null)
throw new IllegalArgumentException();
this.pred = pred;
}
@Override
public String toString() {
return "Vertex{pred=" + pred + ",sample=" + sample + "}";
}
/**
* Equals is based on a reference test.
*/
@Override
public boolean equals(Object o) {
return this == o;
}
/**
* The hash code is just the {@link BOp.Annotations#BOP_ID} of the
* associated {@link IPredicate}.
*/
@Override
public int hashCode() {
return pred.getId();
}
/**
* Take a sample of the vertex, updating {@link #sample} as a side-effect.
* If the sample is already exact, then this is a NOP. If the vertex was
* already sampled to that limit, then this is a NOP (you have to raise the
* limit to re-sample the vertex).
*
* @param limit
* The sample cutoff.
*/
@SuppressWarnings("unchecked")
public void sample(final QueryEngine queryEngine, final int limit,
final SampleType sampleType) {
if (queryEngine == null)
throw new IllegalArgumentException();
if (limit <= 0)
throw new IllegalArgumentException();
if (sampleType == null)
throw new IllegalArgumentException();
final VertexSample oldSample = this.sample;
if (oldSample != null && oldSample.estimateEnum == EstimateEnum.Exact) {
/*
* The old sample is already the full materialization of the vertex.
*/
return;
}
if (oldSample != null && oldSample.limit >= limit) {
/*
* The vertex was already sampled to this limit.
*/
return;
}
/*
* FIXME RTO: AST2BOpJoins is responsible for constructing the
* appropriate access path. Under some cases it can emit a DataSetJoin
* followed by a join against the access path. Under other cases, it
* will use a SCAN+FILTER pattern and attach a filter. The code below
* does not benefit from any of this because the vertex created from the
* [pred] before we invoke AST2BOpJoin#join() and hence lacks all of
* these interesting and critical annotations. When generating the join
* graph, the RTO needs to emit a set of vertices and filters that is
* sufficient for joins for named graphs and default graphs. It also
* needs to emit a set of predicates and filters that is sufficient for
* triples mode joins.
*
* Some possible approaches:
*
* - For the RTO, always do a DataSetJoin + SP. We would need to support
* the DataSetJoin as a Predicate (it does not get modeled that way
* right now). The SP would need to have the DISTINCT SPO filter
* attached for a default graph join. This might even be a DISTINCT
* FILTER that gets into the plan and winds up attached to either the
* DataSetJoin or the SP, depending on which runs first. This would give
* us two APs plus a visible FILTER rather than ONE AP with some hidden
* filters. The DataSetJoin would need to be associated with an AP that
* binds the (hidden) graph variable. This could be an opporunity to
* generalize for storing those data on the native heap / htree / etc. /
* named solution set as well.
*
* Basically, this amounts to saying that we will sample both the set of
* graphs that are in the named graphs or default graphs data set and
* the unconstrained triple pattern AP.
*
* - If C is bound, then we should just wind up with a FILTER that is
* imposing the DISTINCT SPO (for default graph APs) and do not need to
* do anything (for named graph AP)s.
*/
final BOpContextBase context = new BOpContextBase(queryEngine);
final IRelation r = context.getRelation(pred);
final IAccessPath ap = context.getAccessPath(r, pred);
final long rangeCount = oldSample == null ? ap
.rangeCount(false/* exact */) : oldSample.estCard;
if (rangeCount <= limit) {
/*
* Materialize the access path.
*
* TODO This could be more efficient if we raised it onto the AP or
* if we overrode CHUNK_CAPACITY and the fully buffered iterator
* threshold such that everything was materialized as a single
* chunk.
*/
final List