com.bigdata.bop.fed.shards.MapBindingSetsOverShardsBuffer Maven / Gradle / Ivy
package com.bigdata.bop.fed.shards;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.journal.NoSuchIndexException;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.mdi.IMetadataIndex;
import com.bigdata.mdi.PartitionLocator;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.service.AbstractScaleOutFederation;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.striterator.IKeyOrder;
* A stream of {@link IBindingSet} are mapped across the shards which will have
* the data for the {@link IPredicate#asBound(IBindingSet)} {@link IPredicate}.
* Unsynchronized (non-thread safe) buffer maps the {@link IBindingSet}s across
* the index partition(s) associated with an {@link IPredicate} and
* {@link IKeyOrder}. For each source chunk, "as bound" versions of the target
* {@link IPredicate} are constructed and the {@link IBindingSet}s in the chunk
* are reordered based on {@link IKeyOrder#getFromKey(IKeyBuilder, IPredicate)}
* for each asBound predicate. The {@link PartitionLocator}s are discovered for
* each fromKey using an ordered locator scan and the binding sets are output
* onto a shard or node specific {@link IBuffer} created by a concrete subclass.
* The subclass is responsible for getting the binding sets from this node onto
* the node associated with each output buffer.
* @author Bryan Thompson
* @version $Id: 3448 2010-08-18 20:55:58Z
* thompsonbry $
* @param
* The generic type of the elements in the buffer.
* @param
* The generic type of the elements in the relation associated with
* the {@link IPredicate}.
* @todo This could be refactored such that it no longer implemented
* {@link IBuffer} but instead was a {@link BOp} with binding sets
* streaming in from its source. However, unlike a normal {@link BOp} it
* would have a compound sink and it would have to be tightly integrated
* with the {@link QueryEngine} to be used.
* In fact, this is pretty much just doing a join against the metadata
* index. However, it presumes that there are far fewer index partitions
* than tuples flowing through the system and that it is better to read
* remotely from the {@link IMetadataIndex} and cache than ro use the
* general purpose pipeline join, which would cause all binding sets to be
* routed through the centralized {@link IMetadataIndex}.
public abstract class MapBindingSetsOverShardsBuffer
extends AbstractUnsynchronizedArrayBuffer {
// static transient private final Logger log = Logger.getLogger(MapBindingSetsOverShardsBuffer.class);
protected final AbstractScaleOutFederation> fed;
* The predicate from which we generate the asBound binding sets. This
* predicate and the {@link IKeyOrder} together determine the required
* access path.
protected final IPredicate pred;
// /**
// * Identifies the index for the access path required by the {@link #pred
// * predicate}.
// */
// protected final IKeyOrder keyOrder;
* The timestamp associated with the operation on the target access path. If
* the binding sets will be used to read on the shards of the target access
* path, then this is the read timestamp. If they will be used to write on
* the target access path, then this is the write timestamp.
protected final long timestamp;
// /**
// * The name of the scale-out index associated with the {@link #pred
// * predicate}, including both the relation name and the {@link IKeyOrder}
// * components of the index name.
// */
// protected final String namespace;
// /**
// * The associated {@link IMetadataIndex}.
// *
// * @todo might be moved into the {@link IShardMapper} constructors for
// * efficiency so only materialized when necessary. Alternatively, we
// * might get the {@link IKeyBuilder} from the index metadata template
// * on the {@link IMetadataIndex} and thus avoid lookup of the
// * {@link IRelation}.
// */
// protected final IMetadataIndex mdi;
// /**
// * The {@link IKeyBuilder} for the index associated with the access path
// * required by the predicate.
// */
// protected final IKeyBuilder keyBuilder;
* A scale-out view of the target relation.
protected final IRelation relation;
* The implementation class for the algorithm which will be used to map the
* {@link IBindingSet}s over the shards.
private final IShardMapper algorithm;
* @param fed
* The federation.
* @param pred
* The predicate associated with the target operator. The
* predicate identifies which variables and/or constants form the
* key for the access path and hence selects the shards on which
* the target operator must read or write. For example, when the
* target operator is a JOIN, this is the {@link IPredicate}
* associated with the right hand operator of the join.
* @param timestamp
* The timestamp associated with the operation on the target
* access path. If the binding sets will be used to read on the
* shards of the target access path, then this is the read
* timestamp. If they will be used to write on the target access
* path, then this is the write timestamp.
* @param capacity
* The capacity of this buffer.
// * @param keyOrder
// * Identifies the access path for the target predicate.
public MapBindingSetsOverShardsBuffer(
final IBigdataFederation> fed,//
final IPredicate pred, //
// final IKeyOrder keyOrder,//
final long timestamp,//
final int capacity) {
super(capacity, (Class extends E>) IBindingSet.class);
if (fed == null)
throw new IllegalArgumentException();
if (pred == null)
throw new IllegalArgumentException();
// /*
// * This class was reworked to target the index which is
// * selected dynamically rather than based on a static given IKeyOrder.
// * Just use relation.getKeyOrder(predicate) for each asBound predicate
// * and then obtain the IKeyBuilder from the scale-out view of the
// * associated index.
// */
// if (keyOrder == null)
// throw new IllegalArgumentException();
this.fed = (AbstractScaleOutFederation>) fed;
this.pred = pred;
// this.namespace = pred.getOnlyRelationName() + "."
// + keyOrder.getIndexName();
// this.keyOrder = keyOrder;
this.timestamp = timestamp;
// /*
// * Note: we can use the read view of the relation to get the IKeyBuilder
// * even if we will be writing on the relation since the IKeyBuilder
// * semantics can not be readily changed once an index has been created.
// */
final String namespace = pred.getOnlyRelationName();
// @SuppressWarnings("unchecked")
this.relation = (IRelation) fed.getResourceLocator().locate(
namespace, timestamp);
if (relation == null)
throw new RuntimeException("Not found: relation=" + namespace
+ "@" + TimestampUtility.toString(timestamp));
// final IIndex index = relation.getIndex(keyOrder);
// this.keyBuilder = index.getIndexMetadata().getKeyBuilder();
// /*
// * Resolve a scale-out view of the metadata index for the target
// * predicate.
// */
// {
// mdi = fed.getMetadataIndex(namespace, timestamp);
// if (mdi == null) {
// throw new NoSuchIndexException("name=" + namespace
// + ", timestamp=" + TimestampUtility.toString(timestamp));
// }
// }
* @todo Conditionally choose the best algorithm. I am not sure if we
* might want to do via an annotation (of the target predicate), if this
* is something that we determine automatically, or if there is some
* combination of the two which will work best. (For example, the query
* optimizer will know if the target predicate will be fully bound for
* all inputs but we do not have that information available locally).
* @todo From the perspective of the unit tests, it is important to have
* this be declarative so we can test each of the different algorithms
* independently.
final boolean predicateWillBeFullyBound = false;
if (predicateWillBeFullyBound) {
* Uses ISplitter, but requires keys based on fully bound predicate.
* Note: This can not be used when there are optional joins other
* other conditionals which could result in the predicate not being
* fully bound for some evaluation paths.
* The key order which will be used for that relation for a fully
* bound predicate.
final IKeyOrder keyOrder = null;
algorithm = new Algorithm_FullyBoundPredicate(this, keyOrder);
} else {
// general purpose.
algorithm = new Algorithm_NestedLocatorScan(this);
* Resolve a scale-out view of the metadata index for the target predicate.
protected IMetadataIndex getMetadataIndex(final IKeyOrder keyOrder) {
final String namespace = relation.getFQN(keyOrder);
final IMetadataIndex mdi = fed.getMetadataIndex(namespace, timestamp);
if (mdi == null) {
throw new NoSuchIndexException("name=" + namespace + ", timestamp="
+ TimestampUtility.toString(timestamp));
return mdi;
* Maps the chunk of {@link IBindingSet}s across the index partition(s) for
* the sink join dimension.
* @param a
* A chunk of {@link IBindingSet}s.
protected void handleChunk(final E[] chunk) {
final Bundle[] bundles = new Bundle[chunk.length];
* Create the asBound version of the predicate and the associated
* fromKey for each bindingSet in the chunk.
for (int i = 0; i < chunk.length; i++) {
// an intermediate solution.
final IBindingSet bindingSet = chunk[i];
// the asBound version of the predicate.
final IPredicate asBound = pred.asBound(bindingSet);
// the index which will be used for that asBound predicate.
final IKeyOrder keyOrder = relation.getKeyOrder(asBound);
// the key builder associated with that index.
final IKeyBuilder keyBuilder = relation.getIndex(keyOrder)
// save the bundle for processing.
bundles[i] = new Bundle(keyBuilder, asBound, keyOrder,
// /*
// * Sort the binding sets in the chunk by the fromKey associated with
// * each asBound predicate. [Sort is moved into the implementation.]
// */
// Arrays.sort(bundles);
* Map the bundles over the shards.
* Locator scan for the index partitions for that predicate as bound.
protected Iterator locatorScan(
final IKeyOrder keyOrder, final byte[] fromKey,
final byte[] toKey) {
final String name = relation.getFQN(keyOrder);
return fed
.locatorScan(name, timestamp, fromKey, toKey, false/* reverse */);
* Extended to flush each buffer which targets a specific index partition as
* well.
* {@inheritDoc}
public long flush() {
final long n = super.flush();
for (IBuffer sink : sinks.values()) {
if (!sink.isEmpty())
return n;
* The allocated sinks.
* Note: Since the collection is not thread-safe, synchronization is
* required when adding to the collection and when visiting the elements of
* the collection. However, the {@link MapBindingSetsOverShardsBuffer} is not
* thread-safe either so this should be Ok.
private final LinkedHashMap/* sink */> sinks = new LinkedHashMap>();
* An immutable view of the sinks.
* @todo Rather than exposing all sinks and requiring that all sinks be
* fully buffered, it would be better to hook the production of the
* binding sets for a given {@link PartitionLocator} since many of the
* {@link IShardMapper}s can know when they will not see another
* binding set for a given {@link PartitionLocator} and hence the data
* can be immediately flushed to that target.
* A similar scaling concern with very large numbers of source binding
* sets is that we may be better off applying a {@link SortOp} to the
* binding sets, which allows us to use external merge sorts or even
* hash partitioned distributed merge sorts. This suggests that we
* should really unpack this class as a general purpose operator with
* special integration into the query engine.
public Map/* sink */> getSinks() {
return Collections.unmodifiableMap(sinks);
* Return the buffer used to absorb binding sets which target the specified
* index partition.
* @param locator
* The locator for the target index partition.
* @return The buffer.
protected IBuffer getBuffer(final PartitionLocator locator) {
IBuffer sink = sinks.get(locator);
if (sink == null) {
sinks.put(locator, sink = newBuffer(locator));
return sink;
* Return a buffer onto which binding sets will be written which are
* destined for the specified shard.
* Note: The concrete implementation may chose whether to associate buffers
* with the target shard or the target node.
* @param locator
* The locator for the target index partition.
* @return The buffer.
abstract protected IBuffer newBuffer(PartitionLocator locator);