com.bigdata.bop.fed.shards.MapBindingSetsOverShardsBuffer Maven / Gradle / Ivy

Go to download
package com.bigdata.bop.fed.shards;

import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;

import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.bop.solutions.SortOp;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.journal.NoSuchIndexException;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.mdi.IMetadataIndex;
import com.bigdata.mdi.PartitionLocator;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.service.AbstractScaleOutFederation;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.striterator.IKeyOrder;

/**
 * A stream of {@link IBindingSet} are mapped across the shards which will have
 * the data for the {@link IPredicate#asBound(IBindingSet)} {@link IPredicate}.
 * 
 * Unsynchronized (non-thread safe) buffer maps the {@link IBindingSet}s across
 * the index partition(s) associated with an {@link IPredicate} and
 * {@link IKeyOrder}. For each source chunk, "as bound" versions of the target
 * {@link IPredicate} are constructed and the {@link IBindingSet}s in the chunk
 * are reordered based on {@link IKeyOrder#getFromKey(IKeyBuilder, IPredicate)}
 * for each asBound predicate. The {@link PartitionLocator}s are discovered for
 * each fromKey using an ordered locator scan and the binding sets are output
 * onto a shard or node specific {@link IBuffer} created by a concrete subclass.
 * The subclass is responsible for getting the binding sets from this node onto
 * the node associated with each output buffer.
 * 
 * @author Bryan Thompson
 * @version $Id: UnsyncDistributedOutputBuffer.java 3448 2010-08-18 20:55:58Z
 *          thompsonbry $
 * @param 
 *            The generic type of the elements in the buffer.
 * @param 
 *            The generic type of the elements in the relation associated with
 *            the {@link IPredicate}.
 * 
 * @todo This could be refactored such that it no longer implemented
 *       {@link IBuffer} but instead was a {@link BOp} with binding sets
 *       streaming in from its source. However, unlike a normal {@link BOp} it
 *       would have a compound sink and it would have to be tightly integrated
 *       with the {@link QueryEngine} to be used.
 *       

 *       In fact, this is pretty much just doing a join against the metadata
 *       index. However, it presumes that there are far fewer index partitions
 *       than tuples flowing through the system and that it is better to read
 *       remotely from the {@link IMetadataIndex} and cache than ro use the
 *       general purpose pipeline join, which would cause all binding sets to be
 *       routed through the centralized {@link IMetadataIndex}.
 */
public abstract class MapBindingSetsOverShardsBuffer
        extends AbstractUnsynchronizedArrayBuffer {

//    static transient private final Logger log = Logger.getLogger(MapBindingSetsOverShardsBuffer.class);
    
    protected final AbstractScaleOutFederation fed;
    
    /**
     * The predicate from which we generate the asBound binding sets. This
     * predicate and the {@link IKeyOrder} together determine the required
     * access path. 
     */
    protected final IPredicate pred;

//    /**
//     * Identifies the index for the access path required by the {@link #pred
//     * predicate}.
//     */
//    protected final IKeyOrder keyOrder;

    /**
     * The timestamp associated with the operation on the target access path. If
     * the binding sets will be used to read on the shards of the target access
     * path, then this is the read timestamp. If they will be used to write on
     * the target access path, then this is the write timestamp.
     */
    protected final long timestamp;
    
//    /**
//     * The name of the scale-out index associated with the {@link #pred
//     * predicate}, including both the relation name and the {@link IKeyOrder}
//     * components of the index name.
//     */
//    protected final String namespace;
//
//    /**
//     * The associated {@link IMetadataIndex}.
//     * 
//     * @todo might be moved into the {@link IShardMapper} constructors for
//     *       efficiency so only materialized when necessary. Alternatively, we
//     *       might get the {@link IKeyBuilder} from the index metadata template
//     *       on the {@link IMetadataIndex} and thus avoid lookup of the
//     *       {@link IRelation}.
//     */
//    protected final IMetadataIndex mdi;
//    
//    /**
//     * The {@link IKeyBuilder} for the index associated with the access path
//     * required by the predicate. 
//     */
//    protected final IKeyBuilder keyBuilder;

    /**
     * A scale-out view of the target relation.
     */
    protected final IRelation relation;
    
    /**
     * The implementation class for the algorithm which will be used to map the
     * {@link IBindingSet}s over the shards.
     */
    private final IShardMapper algorithm;
    
   /**
     * @param fed
     *            The federation.
     * @param pred
     *            The predicate associated with the target operator. The
     *            predicate identifies which variables and/or constants form the
     *            key for the access path and hence selects the shards on which
     *            the target operator must read or write. For example, when the
     *            target operator is a JOIN, this is the {@link IPredicate}
     *            associated with the right hand operator of the join.
     * @param timestamp
     *            The timestamp associated with the operation on the target
     *            access path. If the binding sets will be used to read on the
     *            shards of the target access path, then this is the read
     *            timestamp. If they will be used to write on the target access
     *            path, then this is the write timestamp.
     * @param capacity
     *            The capacity of this buffer.
     */
//    * @param keyOrder
//    *            Identifies the access path for the target predicate.
    public MapBindingSetsOverShardsBuffer(
            final IBigdataFederation fed,//
            final IPredicate pred, //
//            final IKeyOrder keyOrder,//
            final long timestamp,//
            final int capacity) {

        super(capacity, (Class) IBindingSet.class);

        if (fed == null)
            throw new IllegalArgumentException();

        if (pred == null)
            throw new IllegalArgumentException();

//        /*
//         * This class was reworked to target the index which is
//         * selected dynamically rather than based on a static given IKeyOrder.
//         * Just use relation.getKeyOrder(predicate) for each asBound predicate
//         * and then obtain the IKeyBuilder from the scale-out view of the
//         * associated index.
//         */
//        if (keyOrder == null)
//            throw new IllegalArgumentException();

        this.fed = (AbstractScaleOutFederation) fed;
        
        this.pred = pred;

//        this.namespace = pred.getOnlyRelationName() + "."
//                + keyOrder.getIndexName();
//
//        this.keyOrder = keyOrder;

        this.timestamp = timestamp;

//        /*
//         * Note: we can use the read view of the relation to get the IKeyBuilder
//         * even if we will be writing on the relation since the IKeyBuilder
//         * semantics can not be readily changed once an index has been created.
//         */
        {

            final String namespace = pred.getOnlyRelationName();
            
//            @SuppressWarnings("unchecked")
            this.relation = (IRelation) fed.getResourceLocator().locate(
                    namespace, timestamp);

            if (relation == null)
                throw new RuntimeException("Not found: relation=" + namespace
                        + "@" + TimestampUtility.toString(timestamp));
            
//            final IIndex index = relation.getIndex(keyOrder);
//
//            this.keyBuilder = index.getIndexMetadata().getKeyBuilder();
            
        }
//
//        /*
//         * Resolve a scale-out view of the metadata index for the target
//         * predicate.
//         */
//        {
//
//            mdi = fed.getMetadataIndex(namespace, timestamp);
//
//            if (mdi == null) {
//
//                throw new NoSuchIndexException("name=" + namespace
//                        + ", timestamp=" + TimestampUtility.toString(timestamp));
//
//            }
//            
//        }

        /*
         * @todo Conditionally choose the best algorithm. I am not sure if we
         * might want to do via an annotation (of the target predicate), if this
         * is something that we determine automatically, or if there is some
         * combination of the two which will work best. (For example, the query
         * optimizer will know if the target predicate will be fully bound for
         * all inputs but we do not have that information available locally).
         * 
         * @todo From the perspective of the unit tests, it is important to have
         * this be declarative so we can test each of the different algorithms
         * independently.
         */
        final boolean predicateWillBeFullyBound = false;
        if (predicateWillBeFullyBound) {

            /*
             * Uses ISplitter, but requires keys based on fully bound predicate.
             * 
             * Note: This can not be used when there are optional joins other
             * other conditionals which could result in the predicate not being
             * fully bound for some evaluation paths.
             */
            
            /*
             * The key order which will be used for that relation for a fully
             * bound predicate.
             */
            final IKeyOrder keyOrder = null;
            
            algorithm = new Algorithm_FullyBoundPredicate(this, keyOrder);
            
        } else {

            // general purpose.
            algorithm = new Algorithm_NestedLocatorScan(this);

        }

    }

    /**
     * Resolve a scale-out view of the metadata index for the target predicate.
     */
    protected IMetadataIndex getMetadataIndex(final IKeyOrder keyOrder) {

        final String namespace = relation.getFQN(keyOrder);

        final IMetadataIndex mdi = fed.getMetadataIndex(namespace, timestamp);

        if (mdi == null) {

            throw new NoSuchIndexException("name=" + namespace + ", timestamp="
                    + TimestampUtility.toString(timestamp));

        }
        
        return mdi;

    }

    /**
     * Maps the chunk of {@link IBindingSet}s across the index partition(s) for
     * the sink join dimension.
     * 
     * @param a
     *            A chunk of {@link IBindingSet}s.
     */
    protected void handleChunk(final E[] chunk) {

        @SuppressWarnings("unchecked")
        final Bundle[] bundles = new Bundle[chunk.length];

		/*
		 * Create the asBound version of the predicate and the associated
		 * fromKey for each bindingSet in the chunk.
		 */
		for (int i = 0; i < chunk.length; i++) {

			// an intermediate solution.
			final IBindingSet bindingSet = chunk[i];

			// the asBound version of the predicate.
			final IPredicate asBound = pred.asBound(bindingSet);

			// the index which will be used for that asBound predicate.
			final IKeyOrder keyOrder = relation.getKeyOrder(asBound);

			// the key builder associated with that index.
			final IKeyBuilder keyBuilder = relation.getIndex(keyOrder)
					.getIndexMetadata().getKeyBuilder();

			// save the bundle for processing.
			bundles[i] = new Bundle(keyBuilder, asBound, keyOrder,
					bindingSet);

		}

//        /*
//         * Sort the binding sets in the chunk by the fromKey associated with
//         * each asBound predicate. [Sort is moved into the implementation.]
//         */
//        Arrays.sort(bundles);

        /*
         * Map the bundles over the shards.
         */
        algorithm.mapOverShards(bundles);

    }

    /**
     * Locator scan for the index partitions for that predicate as bound.
     */
    protected Iterator locatorScan(
            final IKeyOrder keyOrder, final byte[] fromKey,
            final byte[] toKey) {

        final String name = relation.getFQN(keyOrder);

        return fed
                .locatorScan(name, timestamp, fromKey, toKey, false/* reverse */);

    }

    /**
     * Extended to flush each buffer which targets a specific index partition as
     * well.
     * 

     * {@inheritDoc}
     */
    @Override
    public long flush() {

        final long n = super.flush();

        for (IBuffer sink : sinks.values()) {

            if (!sink.isEmpty())
                sink.flush();

        }

        return n;

    }

    /**
     * The allocated sinks.
     * 

     * Note: Since the collection is not thread-safe, synchronization is
     * required when adding to the collection and when visiting the elements of
     * the collection. However, the {@link MapBindingSetsOverShardsBuffer} is not
     * thread-safe either so this should be Ok.
     */
    private final LinkedHashMap/* sink */> sinks = new LinkedHashMap>();

    /**
     * An immutable view of the sinks.
     * 
     * @todo Rather than exposing all sinks and requiring that all sinks be
     *       fully buffered, it would be better to hook the production of the
     *       binding sets for a given {@link PartitionLocator} since many of the
     *       {@link IShardMapper}s can know when they will not see another
     *       binding set for a given {@link PartitionLocator} and hence the data
     *       can be immediately flushed to that target.
     *       

     *       A similar scaling concern with very large numbers of source binding
     *       sets is that we may be better off applying a {@link SortOp} to the
     *       binding sets, which allows us to use external merge sorts or even
     *       hash partitioned distributed merge sorts. This suggests that we
     *       should really unpack this class as a general purpose operator with
     *       special integration into the query engine.
     */
    public Map/* sink */> getSinks() {

        return Collections.unmodifiableMap(sinks);

    }

    /**
     * Return the buffer used to absorb binding sets which target the specified
     * index partition.
     * 
     * @param locator
     *            The locator for the target index partition.
     * 
     * @return The buffer.
     */
    protected IBuffer getBuffer(final PartitionLocator locator) {

        IBuffer sink = sinks.get(locator);

        if (sink == null) {

            sinks.put(locator, sink = newBuffer(locator));

        }

        return sink;

    }

    /**
     * Return a buffer onto which binding sets will be written which are
     * destined for the specified shard.
     * 
     * Note: The concrete implementation may chose whether to associate buffers
     * with the target shard or the target node.
     * 
     * @param locator
     *            The locator for the target index partition.
     * 
     * @return The buffer.
     */
    abstract protected IBuffer newBuffer(PartitionLocator locator);

}