com.bigdata.btree.UnisolatedReadWriteIndex Maven / Gradle / Ivy

Go to download
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Jan 10, 2008
 */

package com.bigdata.btree;

import java.util.Iterator;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;

import com.bigdata.bop.cost.BTreeCostModel;
import com.bigdata.bop.cost.DiskCostModel;
import com.bigdata.bop.cost.ScanCostReport;
import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedureConstructor;
import com.bigdata.btree.proc.IIndexProcedure;
import com.bigdata.btree.proc.IKeyRangeIndexProcedure;
import com.bigdata.btree.proc.IResultHandler;
import com.bigdata.btree.proc.ISimpleIndexProcedure;
import com.bigdata.btree.view.FusedView;
import com.bigdata.counters.CounterSet;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.journal.IConcurrencyManager;
import com.bigdata.mdi.IResourceMetadata;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.service.Split;

import cutthecrap.utils.striterators.IFilter;

/**
 * 
 * A view onto an unisolated index partition which enforces the constraint that
 * either concurrent readers -or- a single writer may have access to the
 * unisolated index at any given time. This provides the maximum possible
 * concurrency for an unisolated index using an internal {@link ReadWriteLock}
 * to coordinate threads.
 * 
 * 
 * The possible concurrency with this approach is higher than that provided by
 * the {@link IConcurrencyManager} since the latter only allows a single process
 * access to the unisolated index while this class can allow multiple readers
 * concurrent access to the same unisolated index. The use of this class
 * is NOT compatible with the {@link IConcurrencyManager} (the
 * {@link IConcurrencyManager} does not respect the locks managed by this
 * class).
 * 
 * 
 * This class does NOT handle deadlock detection. However, it does not expose
 * the underlying lock and the scope of the acquired lock should always be
 * restricted to a single operation as defined by {@link IIndex}. If you
 * circumvent this by writing and submitting an {@link IIndexProcedure} that
 * attempts an operation on another {@link UnisolatedReadWriteIndex} then a
 * deadlock MAY occur.
 * 
 * 
 * The point test methods on this class (get, contains, lookup, remove) have
 * relatively high overhead since they need to acquire and release the lock per
 * point test. If you need to do a bunch of point tests, then submit an
 * {@link IIndexProcedure} that will run against the underlying index once it
 * has acquired the appropriate lock -- point tests from within the
 * {@link IIndexProcedure} will be very efficient.
 * 
 * 
 * Design notes
 * 
 * This class was developed to squeeze the maximum possible performance out of a
 * local database. The use of this class can provide correct interleaving of
 * readers and writers without the use of the {@link ConcurrencyManager} and the
 * group commit protocol which it imposes on writers. It also facilitates the
 * reuse of the buffers backing the unisolated index, which can reduce IO
 * associated with index operations when compared to reading on a read-committed
 * view of the index with concurrent writes and interleaved commits on the
 * corresponding unisolated index.
 * 
 * Reading on the read-committed index view has greater possible concurrency,
 * but requires that writes are committed before they become visible and must
 * read the data from the disk since it does not have access to the buffers for
 * the unisolated index. Requiring a commit in order for the writes to become
 * visible imposes significant latency, especially when computing the fix point
 * of a rule set which may take multiple rounds. Reading on the unisolated index
 * should do better in terms of buffer reuse and does NOT require commits or
 * checkpoints of the index for writes to become visible to readers but does
 * require a means to correctly interleave access to the unisolated index, which
 * is the purpose of this class.
 * 
 * While the lock manager could be modified to support Share vs Exclusive locks
 * and to use Share locks for readers and Exclusive locks for writers, writers
 * would still block until the next commit so the throughput (e.g., when
 * computing the fix point of a rule set) is significantly lower.
 * 
 * @author Bryan Thompson
 */
public class UnisolatedReadWriteIndex implements IIndex, ILinearList,
        IReadWriteLockManager
        // NOT ILocalBTreeView 
        {

    /**
     * The object that manages the locks for the associated index.
     */
    private final ReadWriteLockManager lockManager;
    
    @Override
    public Lock readLock() {
        return lockManager.readLock();
    }

    @Override
    public Lock writeLock() {
        return lockManager.writeLock();
    }

    @Override
    public int getReadLockCount() {
        return lockManager.getReadLockCount();
    }

    @Override
    public boolean isReadOnly() {
        return lockManager.isReadOnly();
    }

    /**
     * Return the appropriate lock depending on whether or not the procedure
     * asserts that it is read-only.
     * 
     * @param proc
     *            The procedure.
     *            
     * @return The lock.
     */
    private Lock lock(final IIndexProcedure proc) {

        if (proc == null)
            throw new IllegalArgumentException();

        if (proc.isReadOnly()) {

            return readLock();

        }
        
        return writeLock();
        
    }

    private void unlock(final Lock lock) {
        
        lock.unlock();
        
    }
    
    /**
     * The unisolated index partition. This is either a {@link BTree} or a
     * {@link FusedView}.
     */
    final private BTree ndx;
    
    /**
     * The default capacity for iterator reads against the underlying index. The
     * main purpose of the capacity is to reduce the contention for the
     * {@link ReadWriteLock}.
     */
    final private int defaultCapacity;
    
    /**
     * The default capacity for iterator reads against the underlying index. The
     * main purpose of the capacity is to reduce the contention for the
     * {@link ReadWriteLock}.
     */
    final static protected int DEFAULT_CAPACITY = 1000;// 10000;

    /**
     * Creates a view of an unisolated index that will enforce the concurrency
     * constraints of the {@link BTree} class, but only among other instances of
     * this class for the same underlying index.
     * 
     * @param ndx
     *            The underlying unisolated index.
     * 
     * @throws IllegalArgumentException
     *             if the index is null.
     */
    public UnisolatedReadWriteIndex(final BTree ndx) {
        
        this(ndx, DEFAULT_CAPACITY);
        
    }
    
    /**
     * Creates a view of an unisolated index that will enforce the concurrency
     * constraints of the {@link BTree} class, but only among other instances of
     * this class for the same underlying index.
     * 
     * @param ndx
     *            The underlying unisolated index.
     * @param defaultCapacity
     *            The capacity for iterator reads against the underlying index.
     *            The main purpose of the capacity is to reduce the contention
     *            for the {@link ReadWriteLock}. Relatively small values should
     *            therefore be fine. See {@link #DEFAULT_CAPACITY}.
     * 
     * @throws IllegalArgumentException
     *             if the index is null.
     */
    public UnisolatedReadWriteIndex(final BTree ndx, final int defaultCapacity) {

        if (ndx == null)
            throw new IllegalArgumentException();

        if (defaultCapacity <= 0)
            throw new IllegalArgumentException();

        this.ndx = ndx;

        this.defaultCapacity = defaultCapacity;

        this.lockManager = ReadWriteLockManager.getLockManager(ndx);
        
    }

    @Override
    public String toString() {
        
        return getClass().getSimpleName() + "{" + ndx.toString() + "}";
        
    }
    
    @Override
    public IndexMetadata getIndexMetadata() {

        return ndx.getIndexMetadata();
        
    }

    @Override
    public IResourceMetadata[] getResourceMetadata() {

        return getIndexMetadata().getPartitionMetadata().getResources();

    }

    @Override
    public CounterSet getCounters() {

        return ndx.getCounters();
        
    }

    /**
     * This throws an exception. If you need access to the {@link ICounter} for
     * the index partition, then write and submit an {@link IIndexProcedure}.
     * 
     * @throws UnsupportedOperationException
     */
    @Override
    public ICounter getCounter() {
        
        throw new UnsupportedOperationException();
        
    }

    @Override
    public boolean contains(final Object key) {

        final Lock lock = readLock();
        lock.lock();        
        try {
            
            return ndx.contains(key);
            
        } finally {
            
            unlock(lock);
            
        }
        
    }

    @Override
    public Object insert(final Object key, final Object value) {

        final Lock lock = writeLock();
        lock.lock();
        try {
            
            return ndx.insert(key,value);
            
        } finally {
            
            unlock(lock);
            
        }
    
    }

    @Override
    public Object lookup(final Object key) {
        
        final Lock lock = readLock();
        lock.lock();
        try {
            
            return ndx.lookup(key);
            
        } finally {
            
            unlock(lock);
            
        }
        
    }

    @Override
    public Object remove(final Object key) {

        final Lock lock = writeLock();
        lock.lock();
        try {
            
            return ndx.remove(key);
            
        } finally {
            
            unlock(lock);
            
        }
    
    }

    @Override
    public boolean contains(final byte[] key) {

        final Lock lock = readLock();
        lock.lock();        
        try {
            
            return ndx.contains(key);
            
        } finally {
            
            unlock(lock);
            
        }
        
    }
    
    @Override
    public byte[] lookup(final byte[] key) {

        final Lock lock = readLock();
        lock.lock();
        try {
            
            return ndx.lookup(key);
            
        } finally {
            
            unlock(lock);
            
        }
        
    }

    @Override
    public byte[] insert(final byte[] key, final byte[] value) {

        final Lock lock = writeLock();
        lock.lock();        
        try {
            
            return ndx.insert(key,value);
            
        } finally {
            
            unlock(lock);
            
        }
        
    }

    @Override
    public byte[] putIfAbsent(final byte[] key, final byte[] value) {

        final Lock lock = writeLock();
        lock.lock();        
        try {
            
            return ndx.putIfAbsent(key,value);
            
        } finally {
            
            unlock(lock);
            
        }
        
    }

    @Override
    public byte[] remove(final byte[] key) {

        final Lock lock = writeLock();
        lock.lock();
        try {
            
            return ndx.remove(key);
            
        } finally {
            
            unlock(lock);
            
        }

    }

    @Override
    public long rangeCount() {

        final Lock lock = readLock();
        lock.lock();
        try {
        
            return ndx.rangeCount();
            
        } finally {
            
            unlock(lock);
            
        }
        
    }
    
    @Override
    public long rangeCount(final byte[] fromKey, final byte[] toKey) {

        final Lock lock = readLock();
        lock.lock();
        try {
        
            return ndx.rangeCount(fromKey, toKey);
            
        } finally {
            
            unlock(lock);
            
        }

    }

    @Override
    public long rangeCountExact(final byte[] fromKey, final byte[] toKey) {

        final Lock lock = readLock();
        lock.lock();
        try {
        
            return ndx.rangeCountExact(fromKey, toKey);
            
        } finally {
            
            unlock(lock);
            
        }
        
    }

    @Override
    public long rangeCountExactWithDeleted(final byte[] fromKey, final byte[] toKey) {

        final Lock lock = readLock();
        lock.lock();
        try {
        
            return ndx.rangeCountExactWithDeleted(fromKey, toKey);
            
        } finally {
            
            unlock(lock);
            
        }
        
    }

    @Override
    @SuppressWarnings("rawtypes")
    final public ITupleIterator rangeIterator() {

        return rangeIterator(null, null);

    }
    
    @Override
    @SuppressWarnings("rawtypes")
    public ITupleIterator rangeIterator(final byte[] fromKey, final byte[] toKey) {

        return rangeIterator(fromKey, toKey, 0/* capacity */,
                IRangeQuery.DEFAULT, null/* filter */);

    }

    /**
     * The iterator will read on the underlying index in chunks, buffering
     * tuples as it goes. The buffer capacity is as specified by the caller and
     * will default to the capacity specified to the ctor. The iterator acquires
     * and releases the appropriate lock (either the shared read lock or the
     * exclusive write lock) before it fetches reads the next chunk of tuples
     * from the underlying index. Likewise, the mutation methods on the iterator
     * will acquire the exclusive write lock.
     */
    @Override
    @SuppressWarnings("rawtypes")
    public ITupleIterator rangeIterator(final byte[] fromKey, final byte[] toKey,
            int capacity, int flags, final IFilter filter) {

        if (capacity == 0) {
         
            /*
             * When the buffer capacity is not specified, use the default from
             * the constructor.
             */
            
            capacity = defaultCapacity;
            
        }

        if ((flags & IRangeQuery.REMOVEALL) != 0) {

            /*
             * AbstractChunkedIterator handles REMOVEALL by buffering the keys
             * for the tuples to be deleted and then deleting the keys in bulk
             * after each chunk. Therefore we need to ensure that the KEYS flag
             * is set here.
             */
            
            flags |= IRangeQuery.KEYS;
            
        }
        
        /*
         * Note: Accepts the delegate. The methods that access the delegate are
         * all overridden to acquire the appropriate lock.
         */
        
        return new ChunkedIterator(this.ndx, fromKey, toKey, capacity, flags,
                filter);

    }

    /**
     * Inner class provides a buffered iterator reading against the underlying
     * unisolated index. The class coordinates reads (and writes) with the outer
     * class using the appropriate {@link Lock}. Buffering means that the
     * iterator will read a chunk of tuples at a time, which reduces contention
     * for the {@link Lock}.
     * 
     * @author Bryan Thompson
     */
    private class ChunkedIterator extends ChunkedLocalRangeIterator {

        private ChunkedIterator(final IIndex ndx, final byte[] fromKey, final byte[] toKey,
                final int capacity, final int flags, final IFilter filter) {
            
            super(ndx, fromKey, toKey, capacity, flags, filter);
            
        }

        /**
         * Extended to acquire the exclusive write lock.
         */
        @Override
        protected void deleteBehind(final int n, final Iterator keys) {

            final Lock lock = writeLock();
            lock.lock();
            try {
            
                super.deleteBehind(n, keys);
                
            } finally {
                
                unlock(lock);
                
            }
            
        }

        /**
         * Extended to acquire the exclusive write lock.
         */
        @Override
        protected void deleteLast(final byte[] key) {

            final Lock lock = writeLock();
            lock.lock();
            try {
            
                super.deleteLast(key);
                
            } finally {
                
                unlock(lock);
                
            }
            
        }

        /**
         * Extended to acquire the shared read lock (or the exclusive write lock
         * if {@link IRangeQuery#REMOVEALL} was specified for the iterator).
         */
        @Override
        protected ResultSet getResultSet(final long timestamp, final byte[] fromKey,
                final byte[] toKey, final int capacity, final int flags, final IFilter filter) {

            final boolean mutation = (flags & IRangeQuery.REMOVEALL) != 0;

            final Lock lock = mutation ? writeLock() : readLock();
            lock.lock();
            try {

                return super.getResultSet(timestamp, fromKey, toKey, capacity,
                        flags, filter);
                
            } finally {
                
                unlock(lock);
                
            }
            
        }
        
    } // ChunkedIterator
    
    @Override
    public  T submit(final byte[] key, final ISimpleIndexProcedure proc) {

        final Lock lock = lock(proc);
        lock.lock();
        try {

            /*
             * Apply the procedure to the underlying index now that we are
             * holding the appropriate lock.
             */

            return ndx.submit(key, proc);

        } finally {

            unlock(lock);

        }

    }

    @Override
    @SuppressWarnings("rawtypes")
    public void submit(final byte[] fromKey, final byte[] toKey,
            final IKeyRangeIndexProcedure proc, final IResultHandler handler) {

        final Lock lock = lock(proc);
        lock.lock();
        try {

            /*
             * Apply the procedure to the underlying index now that we are
             * holding the appropriate lock.
             */

            ndx.submit(fromKey, toKey, proc, handler);

        } finally {

            unlock(lock);

        }

    }

    @Override
    @SuppressWarnings({ "rawtypes", "unchecked" })
    public void submit(final int fromIndex, final int toIndex, final byte[][] keys,
            final byte[][] vals, final AbstractKeyArrayIndexProcedureConstructor ctor,
            final IResultHandler aggregator) {

        if (ctor == null)
            throw new IllegalArgumentException();

        final IIndexProcedure proc = ctor.newInstance(this, fromIndex, toIndex,
                keys, vals);

        final Lock lock = lock(proc);
        lock.lock();
        try {
            
            /*
			 * Apply the procedure to the underlying index now that we are
			 * holding the appropriate lock.
			 */
            
            final Object result = proc.apply(ndx);

            if (aggregator != null) {

                aggregator.aggregate(result, new Split(null, fromIndex, toIndex));

            }
            
        } finally {
            
            unlock(lock);
            
        }
        
    }

    /**
     * Estimate the cost of a range scan.
     * 
     * @param diskCostModel
     *            The disk cost model.
     * @param rangeCount
     *            The #of tuples to be visited.
     *            
     * @return The estimated cost.
     */
    public ScanCostReport estimateCost(final DiskCostModel diskCostModel,
            final long rangeCount) {

        // BTree is its own statistics view.
        final IBTreeStatistics stats = (BTree) ndx;

        // Estimate cost based on random seek per node/leaf.
        final double cost = new BTreeCostModel(diskCostModel).rangeScan(
                rangeCount, stats.getBranchingFactor(), stats.getHeight(),
                stats.getUtilization().getLeafUtilization());

        return new ScanCostReport(rangeCount, cost);

    }

    @Override
    public long indexOf(final byte[] key) {
        final Lock lock = readLock();
        lock.lock();
        try {
            return ndx.indexOf(key);
        } finally {
            lock.unlock();
        }
    }

    @Override
    public byte[] keyAt(final long index) {
        final Lock lock = readLock();
        lock.lock();
        try {
            return ndx.keyAt(index);
        } finally {
            lock.unlock();
        }
    }

    @Override
    public byte[] valueAt(final long index) {
        final Lock lock = readLock();
        lock.lock();
        try {
            return ndx.valueAt(index);
        } finally {
            lock.unlock();
        }
    }

//    /*
//	 * ILocalBTreeView
//	 * 
//	 * FIXME Perhaps it is NOT a good idea to implement this the ILocalBTreeView
//	 * interface since we can not safely expose the backing AbstractBTree
//	 * objects (especially the mutable BTree) without breaking the thread-safety
//	 * contract offered by the UnisolatedReadWriteIndex.
//	 */
//    
//	@Override
//	public int getSourceCount() {
//		return ndx.getSourceCount();
//	}
//
//	/**
//	 * @throws UnsupportedOperationException
//	 *             It is not possible to return the backing indices without
//	 *             breaking the thread-safety pattern imposed by the
//	 *             {@link UnisolatedReadWriteIndex}.
//	 */
//	@Override
//	public AbstractBTree[] getSources() {
////		return new AbstractBTree[] { ndx };
//		throw new UnsupportedOperationException();
//	}
//
//	/**
//	 * @throws UnsupportedOperationException
//	 *             It is not possible to return the backing index without
//	 *             breaking the thread-safety pattern imposed by the
//	 *             {@link UnisolatedReadWriteIndex}.
//	 * 
//	 *             TODO It might be possible to change the return type for this
//	 *             method to something that was a greatest common set of shared
//	 *             interfaces for a {@link BTree} a
//	 *             {@link UnisolatedReadWriteIndex} where the backing index is a
//	 *             simple {@link BTree} rather than a {@link FusedView}.
//	 */
//	@Override
//	public BTree getMutableBTree() {
////		return ndx;
//		throw new UnsupportedOperationException();
//	}
//
//	@Override
//	public IBloomFilter getBloomFilter() {
//		return ndx.getBloomFilter();
//	}

    /**
     * Return the backing store for the index.
     */
    public IRawStore getStore() {
    	
        final Lock lock = readLock();
        lock.lock();
        try {
        	return ndx.getStore();
        } finally {
            lock.unlock();
        }
    	
    }
    
}