All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.bop.joinGraph.rto.Vertex Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.bop.joinGraph.rto;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.BOpContextBase;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IElement;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.NV;
import com.bigdata.bop.ap.SampleIndex;
import com.bigdata.bop.ap.SampleIndex.SampleType;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.htree.HTree;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.accesspath.IBindingSetAccessPath;
import com.bigdata.striterator.IChunkedIterator;

/**
 * A vertex of the join graph is an annotated relation (this corresponds to an
 * {@link IPredicate} with additional annotations to support the adaptive query
 * optimization algorithm).
 * 

* The unique identifier for a {@link Vertex} (within a given join graph) is the * {@link BOp.Annotations#BOP_ID} decorating its {@link IPredicate}. * {@link #hashCode()} is defined in terms of this unique identifier so we can * readily detect when a {@link Set} already contains a given {@link Vertex}. */ public class Vertex implements Serializable { private static final transient Logger log = Logger.getLogger(Vertex.class); private static final long serialVersionUID = 1L; /** * The {@link IPredicate} associated with the {@link Vertex}. This basically * provides the information necessary to select an appropriate access path. */ public final IPredicate pred; /** * The most recently taken sample of the {@link Vertex}. */ transient VertexSample sample = null; Vertex(final IPredicate pred) { if (pred == null) throw new IllegalArgumentException(); this.pred = pred; } @Override public String toString() { return "Vertex{pred=" + pred + ",sample=" + sample + "}"; } /** * Equals is based on a reference test. */ @Override public boolean equals(Object o) { return this == o; } /** * The hash code is just the {@link BOp.Annotations#BOP_ID} of the * associated {@link IPredicate}. */ @Override public int hashCode() { return pred.getId(); } /** * Take a sample of the vertex, updating {@link #sample} as a side-effect. * If the sample is already exact, then this is a NOP. If the vertex was * already sampled to that limit, then this is a NOP (you have to raise the * limit to re-sample the vertex). * * @param limit * The sample cutoff. */ @SuppressWarnings("unchecked") public void sample(final QueryEngine queryEngine, final int limit, final SampleType sampleType) { if (queryEngine == null) throw new IllegalArgumentException(); if (limit <= 0) throw new IllegalArgumentException(); if (sampleType == null) throw new IllegalArgumentException(); final VertexSample oldSample = this.sample; if (oldSample != null && oldSample.estimateEnum == EstimateEnum.Exact) { /* * The old sample is already the full materialization of the vertex. */ return; } if (oldSample != null && oldSample.limit >= limit) { /* * The vertex was already sampled to this limit. */ return; } /* * FIXME RTO: AST2BOpJoins is responsible for constructing the * appropriate access path. Under some cases it can emit a DataSetJoin * followed by a join against the access path. Under other cases, it * will use a SCAN+FILTER pattern and attach a filter. The code below * does not benefit from any of this because the vertex created from the * [pred] before we invoke AST2BOpJoin#join() and hence lacks all of * these interesting and critical annotations. When generating the join * graph, the RTO needs to emit a set of vertices and filters that is * sufficient for joins for named graphs and default graphs. It also * needs to emit a set of predicates and filters that is sufficient for * triples mode joins. * * Some possible approaches: * * - For the RTO, always do a DataSetJoin + SP. We would need to support * the DataSetJoin as a Predicate (it does not get modeled that way * right now). The SP would need to have the DISTINCT SPO filter * attached for a default graph join. This might even be a DISTINCT * FILTER that gets into the plan and winds up attached to either the * DataSetJoin or the SP, depending on which runs first. This would give * us two APs plus a visible FILTER rather than ONE AP with some hidden * filters. The DataSetJoin would need to be associated with an AP that * binds the (hidden) graph variable. This could be an opporunity to * generalize for storing those data on the native heap / htree / etc. / * named solution set as well. * * Basically, this amounts to saying that we will sample both the set of * graphs that are in the named graphs or default graphs data set and * the unconstrained triple pattern AP. * * - If C is bound, then we should just wind up with a FILTER that is * imposing the DISTINCT SPO (for default graph APs) and do not need to * do anything (for named graph AP)s. */ final BOpContextBase context = new BOpContextBase(queryEngine); final IRelation r = context.getRelation(pred); final IAccessPath ap = context.getAccessPath(r, pred); final long rangeCount = oldSample == null ? ap .rangeCount(false/* exact */) : oldSample.estCard; if (rangeCount <= limit) { /* * Materialize the access path. * * TODO This could be more efficient if we raised it onto the AP or * if we overrode CHUNK_CAPACITY and the fully buffered iterator * threshold such that everything was materialized as a single * chunk. */ final List tmp = new ArrayList((int) rangeCount); final IChunkedIterator itr = ap.iterator(); try { while (itr.hasNext()) { tmp.add(itr.next()); } } finally { itr.close(); } sample = new VertexSample(rangeCount, limit, EstimateEnum.Exact, elementsToBindingSets(pred, tmp.toArray(new Object[tmp .size()]))); } else { /* * Materialize a sample from the access path. */ final SampleIndex sampleOp = new SampleIndex(new BOp[] {}, // NV.asMap(// new NV(SampleIndex.Annotations.PREDICATE, pred),// new NV(SampleIndex.Annotations.LIMIT, limit),// new NV(SampleIndex.Annotations.SAMPLE_TYPE, sampleType.name())// )); sample = new VertexSample(rangeCount, limit, EstimateEnum.Normal, elementsToBindingSets(pred, sampleOp.eval(context))); } if (log.isTraceEnabled()) log.trace("Sampled: id=" + pred.getId() + ", sample=" + sample); return; } /** * Convert the source sample into an IBindingSet[]. * * @param pred * The {@link IPredicate}, which tells us the variables which * need to become bound. * @param elements * The sampled elements as materialized from the index. * * FIXME Replace with inline access path based on * {@link IBindingSetAccessPath}. The data can be stored on an * {@link HTree}. */ static private IBindingSet[] elementsToBindingSets( final IPredicate pred, final Object[] elements) { final IBindingSet[] sourceSample = new IBindingSet[elements.length]; for (int i = 0; i < sourceSample.length; i++) { final IBindingSet bset = new ListBindingSet(); /* * TODO Make this method package private once we convert to using an * inline access path. */ BOpContext.copyValues((IElement) elements[i], pred, bset); sourceSample[i] = bset; } return sourceSample; } }