com.bigdata.btree.AbstractChunkedTupleIterator Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Feb 1, 2008
*/
package com.bigdata.btree;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.log4j.Logger;
import com.bigdata.btree.keys.SuccessorUtil;
import com.bigdata.io.ByteArrayBuffer;
import com.bigdata.io.DataInputBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.journal.IIndexStore;
import com.bigdata.journal.ITx;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.rawstore.IBlock;
import com.bigdata.util.BytesUtil;
import cutthecrap.utils.striterators.IFilter;
/**
* A chunked iterator that proceeds a {@link ResultSet} at a time. This
* introduces the concept of a {@link #continuationQuery()} so that the iterator
* can materialize the tuples using a sequence of queries that progresses
* through the index until all tuples in the key range have been visited.
*
* @author Bryan Thompson
* @version $Id$
*/
abstract public class AbstractChunkedTupleIterator implements ITupleIterator {
protected static final transient Logger log = Logger
.getLogger(AbstractChunkedTupleIterator.class);
protected static final transient boolean INFO = log.isInfoEnabled();
protected static final transient boolean DEBUG = log.isDebugEnabled();
/**
* Error message used by {@link #getKey()} when the iterator was not
* provisioned to request keys from the data service.
*/
static protected transient final String ERR_NO_KEYS = "Keys not requested";
/**
* Error message used by {@link #getValue()} when the iterator was not
* provisioned to request values from the data service.
*/
static protected transient final String ERR_NO_VALS = "Values not requested";
/**
* The first key to visit -or- null iff no lower bound.
*/
protected final byte[] fromKey;
/**
* The first key to NOT visit -or- null iff no upper bound.
*/
protected final byte[] toKey;
/**
* This controls the #of results per data service query.
*/
protected final int capacity;
/**
* These flags control whether keys and/or values are requested. If neither
* keys nor values are requested, then this is just a range count operation
* and you might as well use rangeCount instead.
*/
protected final int flags;
/**
* Optional filter.
*/
protected final IFilter filter;
/**
* The #of range query operations executed.
*/
protected int nqueries = 0;
/**
* The current result set. For each index partition spanned by the overall
* key range supplied by the client, we will issue at least one range query
* against that index partition. Once all entries in a result set have been
* consumed by the client, we test the result set to see whether or not it
* exhausted the entries that could be matched for that index partition. If
* not, then we will issue "continuation" query against the same index
* position. If we are scanning forward, then the continuation query will
* start (toKey) from the successor of the last key scanned (if we are
* scanning backwards, then toKey will be the leftSeparator of the index
* partition and fromKey will be the last key scanned).
*
* Note: A result set will be empty if there are no entries (after
* filtering) that lie within the key range in a given index partition. It
* is possible for any of the result sets to be empty. Consider a case of
* static partitioning of an index into N partitions. When the index is
* empty, a range query of the entire index will still query each of the N
* partitions. However, since the index is empty none of the partitions will
* have any matching entries and all result sets will be empty.
*
* @todo it would be useful if the {@link ResultSet} reported the maximum
* length for the keys and for the values. This could be used to right
* size the buffers which otherwise we have to let grow until they are
* of sufficient capacity.
*
* @see #rangeQuery()
* @see #continuationQuery()
*/
protected ResultSet rset = null;
/**
* The timestamp for the operation as specified by the ctor (this is used
* for remote index queries but when running against a local index).
*/
protected abstract long getTimestamp();
/**
* Note: value is 0L until the first {@link ResultSet} has been read.
*/
private long commitTime = 0L;
/**
* The timestamp returned by the initial {@link ResultSet}.
*/
protected long getCommitTime() {
return commitTime;
}
/**
* When true
the {@link #getCommitTime()} will be used to
* ensure that {@link #continuationQuery()}s run against the same commit
* point for the local index partition thereby producing a read
* consistent view even when the iterator is {@link ITx#READ_COMMITTED}.
* When false
{@link #continuationQuery()}s will use
* whatever value is returned by {@link #getTimestamp()}. Read-consistent
* semantics for a partitioned index are achieved using the timestamp
* returned by {@link IIndexStore#getLastCommitTime()} rather than
* {@link ITx#READ_COMMITTED}.
*/
abstract protected boolean getReadConsistent();
/**
* Return the timestamp used for {@link #continuationQuery()}s. The value
* returned depends on whether or not {@link #getReadConsistent()} is
* true
. When consistent reads are required the timestamp
* will be the {@link ResultSet#getCommitTime()} for the initial
* {@link ResultSet}. Otherwise it is the value returned by
* {@link #getTimestamp()}.
*
* @throws IllegalStateException
* if {@link #getReadConsistent()} is true
and
* the initial {@link ResultSet} has not been read since the
* commitTime for that {@link ResultSet} is not yet available.
*/
final protected long getReadTime() {
if( getTimestamp() == ITx.READ_COMMITTED && getReadConsistent() ) {
if (commitTime == 0L) {
/*
* The commitTime is not yet available (nothing has been read).
*/
throw new IllegalStateException();
}
return TimestampUtility.asHistoricalRead(commitTime);
}
return getTimestamp();
}
/**
* The #of enties visited so far.
*/
protected long nvisited = 0;
/**
* The index of the last entry visited in the current {@link ResultSet}.
* This is reset to -1
each time we obtain a new
* {@link ResultSet}.
*/
protected int lastVisited = -1;
/**
* When true, the entire key range specified by the client has been visited
* and the iterator is exhausted (i.e., all done).
*/
protected boolean exhausted = false;
/**
* The #of queries issued so far.
*/
public int getQueryCount() {
return nqueries;
}
/**
* The #of entries visited so far (not the #of entries scanned, which can be
* much greater if a filter is in use).
*/
public long getVisitedCount() {
return nvisited;
}
/**
* The capacity used by default when the caller specified 0
* as the capacity for the iterator.
*/
protected int getDefaultCapacity() {
return 100;//1000;//100000;
}
public AbstractChunkedTupleIterator(final byte[] fromKey,
final byte[] toKey, int capacity, final int flags,
final IFilter filter) {
if (capacity < 0) {
throw new IllegalArgumentException();
}
this.fromKey = fromKey;
this.toKey = toKey;
this.capacity = capacity == 0 ? getDefaultCapacity() : capacity;
this.flags = flags;
this.filter = filter;
}
/**
* Abstract method must return the next {@link ResultSet} based on the
* supplied parameter values.
*
* @param timestamp
* @param fromKey
* @param toKey
* @param capacity
* @param flags
* @param filter
* @return
*/
abstract protected ResultSet getResultSet(long timestamp,byte[] fromKey, byte[] toKey,
int capacity, int flags, IFilter filter);
/**
* Issues the original range query.
*/
protected void rangeQuery() {
assert !exhausted;
if (INFO)
log.info("nqueries=" + nqueries + ", fromKey="
+ BytesUtil.toString(fromKey) + ", toKey="
+ BytesUtil.toString(toKey));
// initial query.
rset = getResultSet(getTimestamp(), fromKey, toKey, capacity, flags, filter);
/*
* Note: will be 0L if reading on a local index.
*/
commitTime = rset.getCommitTime();
// reset index into the ResultSet.
lastVisited = -1;
nqueries++;
if (INFO) {
log.info("Got chunk: ntuples=" + rset.getNumTuples()
+ ", exhausted=" + rset.isExhausted() + ", lastKey="
+ BytesUtil.toString(rset.getLastKey()));
}
}
/**
* Issues a "continuation" query against the same index. This is invoked iff
* there are no entries left to visit in the current {@link ResultSet} but
* {@link ResultSet#isExhausted()} is [false], indicating that there is more
* data available.
*/
protected void continuationQuery() {
assert !exhausted;
assert rset != null;
assert !rset.isExhausted();
/*
* Save the last visited key for #remove().
*/
lastVisitedKeyInPriorResultSet = tuple.getKeysRequested() ? tuple
.getKey() : null;
if ((flags & IRangeQuery.REVERSE) == 0) {
/*
* Forward scan.
*
* Start from the successor of the last key scanned by the previous
* result set.
*/
final boolean fixedLengthSuccessor = (flags * IRangeQuery.FIXED_LENGTH_SUCCESSOR) != 0;
final byte[] _fromKey = fixedLengthSuccessor ? SuccessorUtil
.successor(rset.getLastKey().clone()) : BytesUtil
.successor(rset.getLastKey());
if (INFO)
log.info("forwardScan: fromKey=" + BytesUtil.toString(_fromKey)
+ ", toKey=" + BytesUtil.toString(toKey));
// continuation query.
rset = getResultSet(getReadTime(), _fromKey, toKey, capacity,
flags, filter);
} else {
/*
* Reverse scan.
*
* The new upper bound is the last key that we visited. The lower
* bound is unchanged.
*/
final byte[] _toKey = rset.getLastKey();
if (INFO)
log.info("reverseScan: fromKey=" + BytesUtil.toString(fromKey)
+ ", toKey=" + BytesUtil.toString(_toKey));
// continuation query.
rset = getResultSet(getReadTime(), fromKey, _toKey, capacity,
flags, filter);
}
// reset index into the ResultSet.
lastVisited = -1;
nqueries++;
deleteBehind();
}
/**
* This gets set by {@link #continuationQuery()} to the value of the key for
* the then current {@link #tuple}. This is used by {@link #remove()} in
* the edge case where {@link #lastVisited} is -1
because a
* continuation query has been issued but {@link #next()} has not yet been
* invoked. It is cleared by {@link #next()} so that it does not hang
* around.
*/
protected byte[] lastVisitedKeyInPriorResultSet;
/**
* There are three levels at which we need to test in order to determine if
* the total iterator is exhausted. First, we need to test to see if there
* are more entries remaining in the current {@link ResultSet}. If not and
* the {@link ResultSet} is NOT {@link ResultSet#isExhausted() exhausted},
* then we issue a {@link #continuationQuery()} against the same index
* partition. If the {@link ResultSet} is
* {@link ResultSet#isExhausted() exhausted}, then we test to see whether
* or not we have visited all index partitions. If so, then the iterator is
* exhausted. Otherwise we issue a range query against the
* {@link #nextPartition()}.
*
* @return True iff the iterator is not exhausted.
*/
public boolean hasNext() {
if (nqueries == 0) {
// Obtain the first result set.
rangeQuery();
}
assert rset != null;
if (exhausted) {
return false;
}
final int ntuples = rset.getNumTuples();
if (ntuples > 0 && lastVisited + 1 < ntuples) {
/*
* There is more data in the current ResultSet.
*/
return true;
}
if (!rset.isExhausted()) {
/*
* This result set is empty but there is more data available.
*/
continuationQuery();
/*
* Recursive query since the result set might be empty.
*
* Note: The result set could be empty if we are: (a) unisolated; or
* (b) in a read committed transaction; or (c) if a filter is being
* applied.
*/
return hasNext();
}
// Exausted.
exhausted = true;
// flush any buffered deletes.
flush();
return false;
}
public ITuple next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
lastVisitedKeyInPriorResultSet = null; // clear
nvisited++; // total #visited.
lastVisited++; // index of last visited tuple in current result set.
return tuple = new ResultSetTuple(rset, lastVisited);
}
/** The last tuple returned by #next(). */
private ResultSetTuple tuple = null;
/**
* An {@link ITuple} that draws its data from a {@link ResultSet}.
*
* @author Bryan Thompson
* @version $Id$
*/
public class ResultSetTuple implements ITuple {
private final ResultSet rset;
private final int lastVisited;
protected ResultSetTuple(final ResultSet rset, final int lastVisited) {
this.rset = rset;
this.lastVisited = lastVisited;
}
public int getSourceIndex() {
if (lastVisited == -1)
throw new IllegalStateException();
return rset.getSourceIndex(lastVisited);
}
public int flags() {
return flags;
}
public boolean getKeysRequested() {
return (flags & IRangeQuery.KEYS) != 0;
}
public boolean getValuesRequested() {
return (flags & IRangeQuery.VALS) != 0;
}
public byte[] getKey() {
if (lastVisited == -1)
throw new IllegalStateException();
return rset.getKeys().get(lastVisited);
}
public ByteArrayBuffer getKeyBuffer() {
if (lastVisited == -1)
throw new IllegalStateException();
if (keyBuffer == null) {
final int initialCapacity = rset.getKeys().length(lastVisited);
keyBuffer = new DataOutputBuffer(initialCapacity);
}
keyBuffer.reset();
rset.getKeys().copy(lastVisited, keyBuffer);
keyBuffer.flip();
return keyBuffer;
}
private DataOutputBuffer keyBuffer = null;
public DataInputBuffer getKeyStream() {
return new DataInputBuffer(getKeyBuffer());
}
public byte[] getValue() {
if (lastVisited == -1)
throw new IllegalStateException();
if (!getValuesRequested())
throw new UnsupportedOperationException();
return rset.getValues().get(lastVisited);
}
public boolean isNull() {
return getValue() == null;
}
public ByteArrayBuffer getValueBuffer() {
if (lastVisited == -1)
throw new IllegalStateException();
if (valueBuffer == null) {
final int initialCapacity = rset.getValues().length(lastVisited);
valueBuffer = new DataOutputBuffer(initialCapacity);
}
valueBuffer.reset();
rset.getValues().copy(lastVisited, valueBuffer);
valueBuffer.flip();
return valueBuffer;
}
private DataOutputBuffer valueBuffer = null;
public DataInputBuffer getValueStream() {
return new DataInputBuffer(getValueBuffer());
}
public E getObject() {
if (lastVisited == -1)
throw new IllegalStateException();
return (E)rset.getTupleSerializer().deserialize(this);
}
public long getVersionTimestamp() {
if (lastVisited == -1)
throw new IllegalStateException();
if (!rset.hasVersionTimestamps()) {
// Version timestamps not maintained by the index.
return 0L;
}
return rset.getVersionTimestamp(lastVisited);
}
public boolean isDeletedVersion() {
if (lastVisited == -1)
throw new IllegalStateException();
if (!rset.hasDeleteMarkers()) {
// Delete markers not maintained by the index.
return false;
}
return rset.getDeleteMarker(lastVisited);
}
public long getVisitCount() {
/*
* The total #of tuples visited by itr across all result sets.
*/
return nvisited;
}
public ITupleSerializer getTupleSerializer() {
return rset.getTupleSerializer();
}
public IBlock readBlock(long addr) {
final int sourceIndex = getSourceIndex();
return AbstractChunkedTupleIterator.this.readBlock(sourceIndex,
addr);
}
public String toString() {
return AbstractTuple.toString(this);
}
}
/**
* Queues a request to remove the entry under the most recently visited key.
* If the iterator is exhausted then the entry will be deleted immediately.
* Otherwise the requests will be queued until the current {@link ResultSet}
* is exhausted and then a batch delete will be done for the queue.
*/
synchronized public void remove() {
if (nvisited == 0) {
throw new IllegalStateException();
}
if (!tuple.getKeysRequested()) {
throw new UnsupportedOperationException(ERR_NO_KEYS);
}
if (removeList == null) {
removeList = new ArrayList(capacity);
}
final byte[] key = lastVisited == -1 ? lastVisitedKeyInPriorResultSet
: tuple.getKey();
assert key != null;
/*
* Test to see if the iterator is willing to _seek_ more results within
* the _current_ result set. We need to do this without calling
* hasNext() since that will cause the next result set to be fetched if
* the current one has been consumed and that would make the iterator
* eagerly fetch more result sets than the consumer has actually
* demanded by calling #hasNext() on their end.
*/
if (!exhausted && nvisited < capacity) {
// queue up for batch delete.
removeList.add(key);
} else {
// delete immediately since the iterator is exhausted.
deleteLast(key);
}
}
private ArrayList removeList;
/**
* Method flushes any queued deletes. You MUST do this if you are only
* processing part of the buffered capacity of the iterator and you are are
* deleting some index entries. Failure to {@link #flush()} under these
* circumstances will result in some buffered deletes never being applied.
*/
public void flush() {
deleteBehind();
}
protected void deleteBehind() {
if(removeList==null||removeList.isEmpty()) return;
deleteBehind(removeList.size(),removeList.iterator());
removeList.clear();
}
/**
* Batch delete the index entries identified by keys and clear the
* list.
*
* @param n
* The #of keys to be deleted.
* @param keys
* The keys to be deleted.
*/
abstract protected void deleteBehind(int n,Iterator keys);
/**
* Delete the index entry identified by key.
*
* @param key
* A key.
*/
abstract protected void deleteLast(byte[] key);
/**
* Return an object that may be used to read the block from the backing
* store per the contract for {@link ITuple#readBlock(long)}
*
* @param sourceIndex
* The value from {@link ITuple#getSourceIndex()}.
*
* @param addr
* The value supplied to {@link ITuple#readBlock(long)}.
*/
abstract protected IBlock readBlock(int sourceIndex, long addr);
}