com.bigdata.btree.UnisolatedReadWriteIndex Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Jan 10, 2008
*/
package com.bigdata.btree;
import java.util.Iterator;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import com.bigdata.bop.cost.BTreeCostModel;
import com.bigdata.bop.cost.DiskCostModel;
import com.bigdata.bop.cost.ScanCostReport;
import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedureConstructor;
import com.bigdata.btree.proc.IIndexProcedure;
import com.bigdata.btree.proc.IKeyRangeIndexProcedure;
import com.bigdata.btree.proc.IResultHandler;
import com.bigdata.btree.proc.ISimpleIndexProcedure;
import com.bigdata.btree.view.FusedView;
import com.bigdata.counters.CounterSet;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.journal.IConcurrencyManager;
import com.bigdata.mdi.IResourceMetadata;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.service.Split;
import cutthecrap.utils.striterators.IFilter;
/**
*
* A view onto an unisolated index partition which enforces the constraint that
* either concurrent readers -or- a single writer may have access to the
* unisolated index at any given time. This provides the maximum possible
* concurrency for an unisolated index using an internal {@link ReadWriteLock}
* to coordinate threads.
*
*
* The possible concurrency with this approach is higher than that provided by
* the {@link IConcurrencyManager} since the latter only allows a single process
* access to the unisolated index while this class can allow multiple readers
* concurrent access to the same unisolated index. The use of this class
* is NOT compatible with the {@link IConcurrencyManager} (the
* {@link IConcurrencyManager} does not respect the locks managed by this
* class).
*
*
* This class does NOT handle deadlock detection. However, it does not expose
* the underlying lock and the scope of the acquired lock should always be
* restricted to a single operation as defined by {@link IIndex}. If you
* circumvent this by writing and submitting an {@link IIndexProcedure} that
* attempts an operation on another {@link UnisolatedReadWriteIndex} then a
* deadlock MAY occur.
*
*
* The point test methods on this class (get, contains, lookup, remove) have
* relatively high overhead since they need to acquire and release the lock per
* point test. If you need to do a bunch of point tests, then submit an
* {@link IIndexProcedure} that will run against the underlying index once it
* has acquired the appropriate lock -- point tests from within the
* {@link IIndexProcedure} will be very efficient.
*
*
* Design notes
*
* This class was developed to squeeze the maximum possible performance out of a
* local database. The use of this class can provide correct interleaving of
* readers and writers without the use of the {@link ConcurrencyManager} and the
* group commit protocol which it imposes on writers. It also facilitates the
* reuse of the buffers backing the unisolated index, which can reduce IO
* associated with index operations when compared to reading on a read-committed
* view of the index with concurrent writes and interleaved commits on the
* corresponding unisolated index.
*
* Reading on the read-committed index view has greater possible concurrency,
* but requires that writes are committed before they become visible and must
* read the data from the disk since it does not have access to the buffers for
* the unisolated index. Requiring a commit in order for the writes to become
* visible imposes significant latency, especially when computing the fix point
* of a rule set which may take multiple rounds. Reading on the unisolated index
* should do better in terms of buffer reuse and does NOT require commits or
* checkpoints of the index for writes to become visible to readers but does
* require a means to correctly interleave access to the unisolated index, which
* is the purpose of this class.
*
* While the lock manager could be modified to support Share vs Exclusive locks
* and to use Share locks for readers and Exclusive locks for writers, writers
* would still block until the next commit so the throughput (e.g., when
* computing the fix point of a rule set) is significantly lower.
*
* @author Bryan Thompson
*/
public class UnisolatedReadWriteIndex implements IIndex, ILinearList,
IReadWriteLockManager
// NOT ILocalBTreeView
{
/**
* The object that manages the locks for the associated index.
*/
private final ReadWriteLockManager lockManager;
@Override
public Lock readLock() {
return lockManager.readLock();
}
@Override
public Lock writeLock() {
return lockManager.writeLock();
}
@Override
public int getReadLockCount() {
return lockManager.getReadLockCount();
}
@Override
public boolean isReadOnly() {
return lockManager.isReadOnly();
}
/**
* Return the appropriate lock depending on whether or not the procedure
* asserts that it is read-only.
*
* @param proc
* The procedure.
*
* @return The lock.
*/
private Lock lock(final IIndexProcedure> proc) {
if (proc == null)
throw new IllegalArgumentException();
if (proc.isReadOnly()) {
return readLock();
}
return writeLock();
}
private void unlock(final Lock lock) {
lock.unlock();
}
/**
* The unisolated index partition. This is either a {@link BTree} or a
* {@link FusedView}.
*/
final private BTree ndx;
/**
* The default capacity for iterator reads against the underlying index. The
* main purpose of the capacity is to reduce the contention for the
* {@link ReadWriteLock}.
*/
final private int defaultCapacity;
/**
* The default capacity for iterator reads against the underlying index. The
* main purpose of the capacity is to reduce the contention for the
* {@link ReadWriteLock}.
*/
final static protected int DEFAULT_CAPACITY = 1000;// 10000;
/**
* Creates a view of an unisolated index that will enforce the concurrency
* constraints of the {@link BTree} class, but only among other instances of
* this class for the same underlying index.
*
* @param ndx
* The underlying unisolated index.
*
* @throws IllegalArgumentException
* if the index is null
.
*/
public UnisolatedReadWriteIndex(final BTree ndx) {
this(ndx, DEFAULT_CAPACITY);
}
/**
* Creates a view of an unisolated index that will enforce the concurrency
* constraints of the {@link BTree} class, but only among other instances of
* this class for the same underlying index.
*
* @param ndx
* The underlying unisolated index.
* @param defaultCapacity
* The capacity for iterator reads against the underlying index.
* The main purpose of the capacity is to reduce the contention
* for the {@link ReadWriteLock}. Relatively small values should
* therefore be fine. See {@link #DEFAULT_CAPACITY}.
*
* @throws IllegalArgumentException
* if the index is null
.
*/
public UnisolatedReadWriteIndex(final BTree ndx, final int defaultCapacity) {
if (ndx == null)
throw new IllegalArgumentException();
if (defaultCapacity <= 0)
throw new IllegalArgumentException();
this.ndx = ndx;
this.defaultCapacity = defaultCapacity;
this.lockManager = ReadWriteLockManager.getLockManager(ndx);
}
@Override
public String toString() {
return getClass().getSimpleName() + "{" + ndx.toString() + "}";
}
@Override
public IndexMetadata getIndexMetadata() {
return ndx.getIndexMetadata();
}
@Override
public IResourceMetadata[] getResourceMetadata() {
return getIndexMetadata().getPartitionMetadata().getResources();
}
@Override
public CounterSet getCounters() {
return ndx.getCounters();
}
/**
* This throws an exception. If you need access to the {@link ICounter} for
* the index partition, then write and submit an {@link IIndexProcedure}.
*
* @throws UnsupportedOperationException
*/
@Override
public ICounter getCounter() {
throw new UnsupportedOperationException();
}
@Override
public boolean contains(final Object key) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.contains(key);
} finally {
unlock(lock);
}
}
@Override
public Object insert(final Object key, final Object value) {
final Lock lock = writeLock();
lock.lock();
try {
return ndx.insert(key,value);
} finally {
unlock(lock);
}
}
@Override
public Object lookup(final Object key) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.lookup(key);
} finally {
unlock(lock);
}
}
@Override
public Object remove(final Object key) {
final Lock lock = writeLock();
lock.lock();
try {
return ndx.remove(key);
} finally {
unlock(lock);
}
}
@Override
public boolean contains(final byte[] key) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.contains(key);
} finally {
unlock(lock);
}
}
@Override
public byte[] lookup(final byte[] key) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.lookup(key);
} finally {
unlock(lock);
}
}
@Override
public byte[] insert(final byte[] key, final byte[] value) {
final Lock lock = writeLock();
lock.lock();
try {
return ndx.insert(key,value);
} finally {
unlock(lock);
}
}
@Override
public byte[] putIfAbsent(final byte[] key, final byte[] value) {
final Lock lock = writeLock();
lock.lock();
try {
return ndx.putIfAbsent(key,value);
} finally {
unlock(lock);
}
}
@Override
public byte[] remove(final byte[] key) {
final Lock lock = writeLock();
lock.lock();
try {
return ndx.remove(key);
} finally {
unlock(lock);
}
}
@Override
public long rangeCount() {
final Lock lock = readLock();
lock.lock();
try {
return ndx.rangeCount();
} finally {
unlock(lock);
}
}
@Override
public long rangeCount(final byte[] fromKey, final byte[] toKey) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.rangeCount(fromKey, toKey);
} finally {
unlock(lock);
}
}
@Override
public long rangeCountExact(final byte[] fromKey, final byte[] toKey) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.rangeCountExact(fromKey, toKey);
} finally {
unlock(lock);
}
}
@Override
public long rangeCountExactWithDeleted(final byte[] fromKey, final byte[] toKey) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.rangeCountExactWithDeleted(fromKey, toKey);
} finally {
unlock(lock);
}
}
@Override
@SuppressWarnings("rawtypes")
final public ITupleIterator rangeIterator() {
return rangeIterator(null, null);
}
@Override
@SuppressWarnings("rawtypes")
public ITupleIterator rangeIterator(final byte[] fromKey, final byte[] toKey) {
return rangeIterator(fromKey, toKey, 0/* capacity */,
IRangeQuery.DEFAULT, null/* filter */);
}
/**
* The iterator will read on the underlying index in chunks, buffering
* tuples as it goes. The buffer capacity is as specified by the caller and
* will default to the capacity specified to the ctor. The iterator acquires
* and releases the appropriate lock (either the shared read lock or the
* exclusive write lock) before it fetches reads the next chunk of tuples
* from the underlying index. Likewise, the mutation methods on the iterator
* will acquire the exclusive write lock.
*/
@Override
@SuppressWarnings("rawtypes")
public ITupleIterator rangeIterator(final byte[] fromKey, final byte[] toKey,
int capacity, int flags, final IFilter filter) {
if (capacity == 0) {
/*
* When the buffer capacity is not specified, use the default from
* the constructor.
*/
capacity = defaultCapacity;
}
if ((flags & IRangeQuery.REMOVEALL) != 0) {
/*
* AbstractChunkedIterator handles REMOVEALL by buffering the keys
* for the tuples to be deleted and then deleting the keys in bulk
* after each chunk. Therefore we need to ensure that the KEYS flag
* is set here.
*/
flags |= IRangeQuery.KEYS;
}
/*
* Note: Accepts the delegate. The methods that access the delegate are
* all overridden to acquire the appropriate lock.
*/
return new ChunkedIterator(this.ndx, fromKey, toKey, capacity, flags,
filter);
}
/**
* Inner class provides a buffered iterator reading against the underlying
* unisolated index. The class coordinates reads (and writes) with the outer
* class using the appropriate {@link Lock}. Buffering means that the
* iterator will read a chunk of tuples at a time, which reduces contention
* for the {@link Lock}.
*
* @author Bryan Thompson
*/
private class ChunkedIterator extends ChunkedLocalRangeIterator {
private ChunkedIterator(final IIndex ndx, final byte[] fromKey, final byte[] toKey,
final int capacity, final int flags, final IFilter filter) {
super(ndx, fromKey, toKey, capacity, flags, filter);
}
/**
* Extended to acquire the exclusive write lock.
*/
@Override
protected void deleteBehind(final int n, final Iterator keys) {
final Lock lock = writeLock();
lock.lock();
try {
super.deleteBehind(n, keys);
} finally {
unlock(lock);
}
}
/**
* Extended to acquire the exclusive write lock.
*/
@Override
protected void deleteLast(final byte[] key) {
final Lock lock = writeLock();
lock.lock();
try {
super.deleteLast(key);
} finally {
unlock(lock);
}
}
/**
* Extended to acquire the shared read lock (or the exclusive write lock
* if {@link IRangeQuery#REMOVEALL} was specified for the iterator).
*/
@Override
protected ResultSet getResultSet(final long timestamp, final byte[] fromKey,
final byte[] toKey, final int capacity, final int flags, final IFilter filter) {
final boolean mutation = (flags & IRangeQuery.REMOVEALL) != 0;
final Lock lock = mutation ? writeLock() : readLock();
lock.lock();
try {
return super.getResultSet(timestamp, fromKey, toKey, capacity,
flags, filter);
} finally {
unlock(lock);
}
}
} // ChunkedIterator
@Override
public T submit(final byte[] key, final ISimpleIndexProcedure proc) {
final Lock lock = lock(proc);
lock.lock();
try {
/*
* Apply the procedure to the underlying index now that we are
* holding the appropriate lock.
*/
return ndx.submit(key, proc);
} finally {
unlock(lock);
}
}
@Override
@SuppressWarnings("rawtypes")
public void submit(final byte[] fromKey, final byte[] toKey,
final IKeyRangeIndexProcedure proc, final IResultHandler handler) {
final Lock lock = lock(proc);
lock.lock();
try {
/*
* Apply the procedure to the underlying index now that we are
* holding the appropriate lock.
*/
ndx.submit(fromKey, toKey, proc, handler);
} finally {
unlock(lock);
}
}
@Override
@SuppressWarnings({ "rawtypes", "unchecked" })
public void submit(final int fromIndex, final int toIndex, final byte[][] keys,
final byte[][] vals, final AbstractKeyArrayIndexProcedureConstructor ctor,
final IResultHandler aggregator) {
if (ctor == null)
throw new IllegalArgumentException();
final IIndexProcedure proc = ctor.newInstance(this, fromIndex, toIndex,
keys, vals);
final Lock lock = lock(proc);
lock.lock();
try {
/*
* Apply the procedure to the underlying index now that we are
* holding the appropriate lock.
*/
final Object result = proc.apply(ndx);
if (aggregator != null) {
aggregator.aggregate(result, new Split(null, fromIndex, toIndex));
}
} finally {
unlock(lock);
}
}
/**
* Estimate the cost of a range scan.
*
* @param diskCostModel
* The disk cost model.
* @param rangeCount
* The #of tuples to be visited.
*
* @return The estimated cost.
*/
public ScanCostReport estimateCost(final DiskCostModel diskCostModel,
final long rangeCount) {
// BTree is its own statistics view.
final IBTreeStatistics stats = (BTree) ndx;
// Estimate cost based on random seek per node/leaf.
final double cost = new BTreeCostModel(diskCostModel).rangeScan(
rangeCount, stats.getBranchingFactor(), stats.getHeight(),
stats.getUtilization().getLeafUtilization());
return new ScanCostReport(rangeCount, cost);
}
@Override
public long indexOf(final byte[] key) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.indexOf(key);
} finally {
lock.unlock();
}
}
@Override
public byte[] keyAt(final long index) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.keyAt(index);
} finally {
lock.unlock();
}
}
@Override
public byte[] valueAt(final long index) {
final Lock lock = readLock();
lock.lock();
try {
return ndx.valueAt(index);
} finally {
lock.unlock();
}
}
// /*
// * ILocalBTreeView
// *
// * FIXME Perhaps it is NOT a good idea to implement this the ILocalBTreeView
// * interface since we can not safely expose the backing AbstractBTree
// * objects (especially the mutable BTree) without breaking the thread-safety
// * contract offered by the UnisolatedReadWriteIndex.
// */
//
// @Override
// public int getSourceCount() {
// return ndx.getSourceCount();
// }
//
// /**
// * @throws UnsupportedOperationException
// * It is not possible to return the backing indices without
// * breaking the thread-safety pattern imposed by the
// * {@link UnisolatedReadWriteIndex}.
// */
// @Override
// public AbstractBTree[] getSources() {
//// return new AbstractBTree[] { ndx };
// throw new UnsupportedOperationException();
// }
//
// /**
// * @throws UnsupportedOperationException
// * It is not possible to return the backing index without
// * breaking the thread-safety pattern imposed by the
// * {@link UnisolatedReadWriteIndex}.
// *
// * TODO It might be possible to change the return type for this
// * method to something that was a greatest common set of shared
// * interfaces for a {@link BTree} a
// * {@link UnisolatedReadWriteIndex} where the backing index is a
// * simple {@link BTree} rather than a {@link FusedView}.
// */
// @Override
// public BTree getMutableBTree() {
//// return ndx;
// throw new UnsupportedOperationException();
// }
//
// @Override
// public IBloomFilter getBloomFilter() {
// return ndx.getBloomFilter();
// }
/**
* Return the backing store for the index.
*/
public IRawStore getStore() {
final Lock lock = readLock();
lock.lock();
try {
return ndx.getStore();
} finally {
lock.unlock();
}
}
}