com.bigdata.btree.AbstractChunkedTupleIterator Maven / Gradle / Ivy

Go to download
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Feb 1, 2008
 */

package com.bigdata.btree;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.NoSuchElementException;

import org.apache.log4j.Logger;

import com.bigdata.btree.keys.SuccessorUtil;
import com.bigdata.io.ByteArrayBuffer;
import com.bigdata.io.DataInputBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.journal.IIndexStore;
import com.bigdata.journal.ITx;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.rawstore.IBlock;
import com.bigdata.util.BytesUtil;

import cutthecrap.utils.striterators.IFilter;

/**
 * A chunked iterator that proceeds a {@link ResultSet} at a time. This
 * introduces the concept of a {@link #continuationQuery()} so that the iterator
 * can materialize the tuples using a sequence of queries that progresses
 * through the index until all tuples in the key range have been visited.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
abstract public class AbstractChunkedTupleIterator implements ITupleIterator {

    protected static final transient Logger log = Logger
            .getLogger(AbstractChunkedTupleIterator.class);

    protected static final transient boolean INFO = log.isInfoEnabled();

    protected static final transient boolean DEBUG = log.isDebugEnabled();
    
    /**
     * Error message used by {@link #getKey()} when the iterator was not
     * provisioned to request keys from the data service.
     */
    static protected transient final String ERR_NO_KEYS = "Keys not requested";

    /**
     * Error message used by {@link #getValue()} when the iterator was not
     * provisioned to request values from the data service.
     */
    static protected transient final String ERR_NO_VALS = "Values not requested";

    /**
     * The first key to visit -or- null iff no lower bound.
     */
    protected final byte[] fromKey;

    /**
     * The first key to NOT visit -or- null iff no upper bound.
     */
    protected final byte[] toKey;

    /**
     * This controls the #of results per data service query.
     */
    protected final int capacity;

    /**
     * These flags control whether keys and/or values are requested. If neither
     * keys nor values are requested, then this is just a range count operation
     * and you might as well use rangeCount instead.
     */
    protected final int flags;

    /**
     * Optional filter.
     */
    protected final IFilter filter;

    /**
     * The #of range query operations executed.
     */
    protected int nqueries = 0;

    /**
     * The current result set. For each index partition spanned by the overall
     * key range supplied by the client, we will issue at least one range query
     * against that index partition. Once all entries in a result set have been
     * consumed by the client, we test the result set to see whether or not it
     * exhausted the entries that could be matched for that index partition. If
     * not, then we will issue "continuation" query against the same index
     * position. If we are scanning forward, then the continuation query will
     * start (toKey) from the successor of the last key scanned (if we are
     * scanning backwards, then toKey will be the leftSeparator of the index
     * partition and fromKey will be the last key scanned).
     * 
     * Note: A result set will be empty if there are no entries (after
     * filtering) that lie within the key range in a given index partition. It
     * is possible for any of the result sets to be empty. Consider a case of
     * static partitioning of an index into N partitions. When the index is
     * empty, a range query of the entire index will still query each of the N
     * partitions. However, since the index is empty none of the partitions will
     * have any matching entries and all result sets will be empty.
     * 
     * @todo it would be useful if the {@link ResultSet} reported the maximum
     *       length for the keys and for the values. This could be used to right
     *       size the buffers which otherwise we have to let grow until they are
     *       of sufficient capacity.
     * 
     * @see #rangeQuery()
     * @see #continuationQuery()
     */
    protected ResultSet rset = null;

    /**
     * The timestamp for the operation as specified by the ctor (this is used
     * for remote index queries but when running against a local index).
     */
    protected abstract long getTimestamp();
    
    /**
     * Note: value is 0L until the first {@link ResultSet} has been read.
     */
    private long commitTime = 0L;
    
    /**
     * The timestamp returned by the initial {@link ResultSet}.
     */
    protected long getCommitTime() {
        
        return commitTime;
        
    }
    
    /**
     * When true the {@link #getCommitTime()} will be used to
     * ensure that {@link #continuationQuery()}s run against the same commit
     * point for the local index partition thereby producing a read
     * consistent view even when the iterator is {@link ITx#READ_COMMITTED}.
     * When false {@link #continuationQuery()}s will use
     * whatever value is returned by {@link #getTimestamp()}. Read-consistent
     * semantics for a partitioned index are achieved using the timestamp
     * returned by {@link IIndexStore#getLastCommitTime()} rather than
     * {@link ITx#READ_COMMITTED}.
     */
    abstract protected boolean getReadConsistent();
    
    /**
     * Return the timestamp used for {@link #continuationQuery()}s. The value
     * returned depends on whether or not {@link #getReadConsistent()} is
     * true. When consistent reads are required the timestamp
     * will be the {@link ResultSet#getCommitTime()} for the initial
     * {@link ResultSet}. Otherwise it is the value returned by
     * {@link #getTimestamp()}.
     * 
     * @throws IllegalStateException
     *             if {@link #getReadConsistent()} is true and
     *             the initial {@link ResultSet} has not been read since the
     *             commitTime for that {@link ResultSet} is not yet available.
     */
    final protected long getReadTime() {
        
        if( getTimestamp() == ITx.READ_COMMITTED && getReadConsistent() ) {
            
            if (commitTime == 0L) {

                /*
                 * The commitTime is not yet available (nothing has been read).
                 */
                
                throw new IllegalStateException();
                
            }
            
            return TimestampUtility.asHistoricalRead(commitTime);
            
        }
        
        return getTimestamp();
        
    }
    
    /**
     * The #of enties visited so far.
     */
    protected long nvisited = 0;

    /**
     * The index of the last entry visited in the current {@link ResultSet}.
     * This is reset to -1 each time we obtain a new
     * {@link ResultSet}.
     */
    protected int lastVisited = -1;

    /**
     * When true, the entire key range specified by the client has been visited
     * and the iterator is exhausted (i.e., all done).
     */
    protected boolean exhausted = false;

    /**
     * The #of queries issued so far.
     */
    public int getQueryCount() {

        return nqueries;

    }

    /**
     * The #of entries visited so far (not the #of entries scanned, which can be
     * much greater if a filter is in use).
     */
    public long getVisitedCount() {

        return nvisited;

    }

    /**
     * The capacity used by default when the caller specified 0
     * as the capacity for the iterator.
     */
    protected int getDefaultCapacity() {
        
        return 100;//1000;//100000;
        
    }
    
    public AbstractChunkedTupleIterator(final byte[] fromKey,
            final byte[] toKey, int capacity, final int flags,
            final IFilter filter) {

        if (capacity < 0) {

            throw new IllegalArgumentException();

        }

        this.fromKey = fromKey;
        
        this.toKey = toKey;
        
        this.capacity = capacity == 0 ? getDefaultCapacity() : capacity;
        
        this.flags = flags;
        
        this.filter = filter;

    }

    /**
     * Abstract method must return the next {@link ResultSet} based on the
     * supplied parameter values.
     * 
     * @param timestamp 
     * @param fromKey
     * @param toKey
     * @param capacity
     * @param flags
     * @param filter
     * @return
     */
    abstract protected ResultSet getResultSet(long timestamp,byte[] fromKey, byte[] toKey,
            int capacity, int flags, IFilter filter);

    /**
     * Issues the original range query.
     */
    protected void rangeQuery() {

        assert !exhausted;

        if (INFO)
            log.info("nqueries=" + nqueries + ", fromKey="
                    + BytesUtil.toString(fromKey) + ", toKey="
                    + BytesUtil.toString(toKey));

        // initial query.
        rset = getResultSet(getTimestamp(), fromKey, toKey, capacity, flags, filter);

        /*
         * Note: will be 0L if reading on a local index.
         */
        commitTime = rset.getCommitTime();
        
        // reset index into the ResultSet.
        lastVisited = -1;

        nqueries++;

        if (INFO) {

            log.info("Got chunk: ntuples=" + rset.getNumTuples()
                    + ", exhausted=" + rset.isExhausted() + ", lastKey="
                    + BytesUtil.toString(rset.getLastKey()));
            
        }
        
    }

    /**
     * Issues a "continuation" query against the same index. This is invoked iff
     * there are no entries left to visit in the current {@link ResultSet} but
     * {@link ResultSet#isExhausted()} is [false], indicating that there is more
     * data available.
     */
    protected void continuationQuery() {

        assert !exhausted;
        assert rset != null;
        assert !rset.isExhausted();

        /*
         * Save the last visited key for #remove().
         */
        lastVisitedKeyInPriorResultSet = tuple.getKeysRequested() ? tuple
                .getKey() : null;

        if ((flags & IRangeQuery.REVERSE) == 0) {
            
            /*
             * Forward scan.
             * 
             * Start from the successor of the last key scanned by the previous
             * result set.
             */

            final boolean fixedLengthSuccessor = (flags * IRangeQuery.FIXED_LENGTH_SUCCESSOR) != 0;
            
            final byte[] _fromKey = fixedLengthSuccessor ? SuccessorUtil
                    .successor(rset.getLastKey().clone()) : BytesUtil
                    .successor(rset.getLastKey());

            if (INFO)
                log.info("forwardScan: fromKey=" + BytesUtil.toString(_fromKey)
                        + ", toKey=" + BytesUtil.toString(toKey));

            // continuation query.
            rset = getResultSet(getReadTime(), _fromKey, toKey, capacity,
                    flags, filter);

        } else {

            /*
             * Reverse scan.
             * 
             * The new upper bound is the last key that we visited. The lower
             * bound is unchanged.
             */

            final byte[] _toKey = rset.getLastKey();

            if (INFO)
                log.info("reverseScan: fromKey=" + BytesUtil.toString(fromKey)
                        + ", toKey=" + BytesUtil.toString(_toKey));

            // continuation query.
            rset = getResultSet(getReadTime(), fromKey, _toKey, capacity,
                    flags, filter);

        }
        
        // reset index into the ResultSet.
        lastVisited = -1;

        nqueries++;
        
        deleteBehind();

    }
    
    /**
     * This gets set by {@link #continuationQuery()} to the value of the key for
     * the then current {@link #tuple}. This is used by {@link #remove()} in
     * the edge case where {@link #lastVisited} is -1 because a
     * continuation query has been issued but {@link #next()} has not yet been
     * invoked. It is cleared by {@link #next()} so that it does not hang
     * around.
     */
    protected byte[] lastVisitedKeyInPriorResultSet;

    /**
     * There are three levels at which we need to test in order to determine if
     * the total iterator is exhausted. First, we need to test to see if there
     * are more entries remaining in the current {@link ResultSet}. If not and
     * the {@link ResultSet} is NOT {@link ResultSet#isExhausted() exhausted},
     * then we issue a {@link #continuationQuery()} against the same index
     * partition. If the {@link ResultSet} is
     * {@link ResultSet#isExhausted() exhausted}, then we test to see whether
     * or not we have visited all index partitions. If so, then the iterator is
     * exhausted. Otherwise we issue a range query against the
     * {@link #nextPartition()}.
     * 
     * @return True iff the iterator is not exhausted.
     */
    public boolean hasNext() {

        if (nqueries == 0) {

            // Obtain the first result set.
            rangeQuery();

        }
        
        assert rset != null;

        if (exhausted) {

            return false;

        }

        final int ntuples = rset.getNumTuples();

        if (ntuples > 0 && lastVisited + 1 < ntuples) {
            /*
             * There is more data in the current ResultSet.
             */
            return true;
        }

        if (!rset.isExhausted()) {
            
            /*
             * This result set is empty but there is more data available.
             */
            
            continuationQuery();
            
            /*
             * Recursive query since the result set might be empty.
             * 
             * Note: The result set could be empty if we are: (a) unisolated; or
             * (b) in a read committed transaction; or (c) if a filter is being
             * applied.
             */
            
            return hasNext();
            
        }
        
        // Exausted.
        exhausted = true;
        
        // flush any buffered deletes.
        flush();
        
        return false;
        
    }

    public ITuple next() {

        if (!hasNext()) {

            throw new NoSuchElementException();

        }

        lastVisitedKeyInPriorResultSet = null; // clear
        
        nvisited++; // total #visited.

        lastVisited++; // index of last visited tuple in current result set.

        return tuple = new ResultSetTuple(rset, lastVisited);

    }

    /** The last tuple returned by #next(). */
    private ResultSetTuple tuple = null;

    /**
     * An {@link ITuple} that draws its data from a {@link ResultSet}.
     * 
     * @author Bryan Thompson
     * @version $Id$
     */
    public class ResultSetTuple implements ITuple {

        private final ResultSet rset;
        private final int lastVisited;
        
        protected ResultSetTuple(final ResultSet rset, final int lastVisited) {

            this.rset = rset;
            this.lastVisited = lastVisited;
            
        }

        public int getSourceIndex() {
            
            if (lastVisited == -1)
                throw new IllegalStateException();

            return rset.getSourceIndex(lastVisited);
            
        }
        
        public int flags() {
            
            return flags;
            
        }
        
        public boolean getKeysRequested() {

            return (flags & IRangeQuery.KEYS) != 0;

        }

        public boolean getValuesRequested() {

            return (flags & IRangeQuery.VALS) != 0;

        }
        
        public byte[] getKey() {

            if (lastVisited == -1)
                throw new IllegalStateException();

            return rset.getKeys().get(lastVisited);

        }

        public ByteArrayBuffer getKeyBuffer() {

            if (lastVisited == -1)
                throw new IllegalStateException();

            if (keyBuffer == null) {

                final int initialCapacity = rset.getKeys().length(lastVisited);
                
                keyBuffer = new DataOutputBuffer(initialCapacity);
                
            }
            
            keyBuffer.reset();
            
            rset.getKeys().copy(lastVisited, keyBuffer);

            keyBuffer.flip();
            
            return keyBuffer;
            
        }
        private DataOutputBuffer keyBuffer = null;

        public DataInputBuffer getKeyStream() {

            return new DataInputBuffer(getKeyBuffer());
            
        }
        
        public byte[] getValue() {

            if (lastVisited == -1)
                throw new IllegalStateException();

            if (!getValuesRequested())
                throw new UnsupportedOperationException();

            return rset.getValues().get(lastVisited);

        }

        public boolean isNull() {
            
            return getValue() == null;
            
        }
        
        public ByteArrayBuffer getValueBuffer() {
            
            if (lastVisited == -1)
                throw new IllegalStateException();
            
            if (valueBuffer == null) {
                
                final int initialCapacity = rset.getValues().length(lastVisited);

                valueBuffer = new DataOutputBuffer(initialCapacity);
                
            }
            
            valueBuffer.reset();
            
            rset.getValues().copy(lastVisited, valueBuffer);
            
            valueBuffer.flip();
            
            return valueBuffer;
            
        }
        private DataOutputBuffer valueBuffer = null;

        public DataInputBuffer getValueStream() {

            return new DataInputBuffer(getValueBuffer());
            
        }
        
        public E getObject() {
            
            if (lastVisited == -1)
                throw new IllegalStateException();

            return (E)rset.getTupleSerializer().deserialize(this); 
            
        }
        
        public long getVersionTimestamp() {

            if (lastVisited == -1)
                throw new IllegalStateException();

            if (!rset.hasVersionTimestamps()) {

                // Version timestamps not maintained by the index.

                return 0L;

            }

            return rset.getVersionTimestamp(lastVisited);

        }

        public boolean isDeletedVersion() {

            if (lastVisited == -1)
                throw new IllegalStateException();

            if (!rset.hasDeleteMarkers()) {

                // Delete markers not maintained by the index.
                
                return false;
                
            }

            return rset.getDeleteMarker(lastVisited);

        }

        public long getVisitCount() {

            /*
             * The total #of tuples visited by itr across all result sets.
             */
            
            return nvisited;

        }
        
        public ITupleSerializer getTupleSerializer() {
            
            return rset.getTupleSerializer();
            
        }

        public IBlock readBlock(long addr) {

            final int sourceIndex = getSourceIndex();
            
            return AbstractChunkedTupleIterator.this.readBlock(sourceIndex,
                    addr);
            
        }

        public String toString() {
            
            return AbstractTuple.toString(this);
            
        }

    }

    /**
     * Queues a request to remove the entry under the most recently visited key.
     * If the iterator is exhausted then the entry will be deleted immediately.
     * Otherwise the requests will be queued until the current {@link ResultSet}
     * is exhausted and then a batch delete will be done for the queue.
     */
    synchronized public void remove() {

        if (nvisited == 0) {

            throw new IllegalStateException();

        }

        if (!tuple.getKeysRequested()) {

            throw new UnsupportedOperationException(ERR_NO_KEYS);

        }

        if (removeList == null) {

            removeList = new ArrayList(capacity);

        }

        final byte[] key = lastVisited == -1 ? lastVisitedKeyInPriorResultSet
                : tuple.getKey();

        assert key != null;

        /*
         * Test to see if the iterator is willing to _seek_ more results within
         * the _current_ result set. We need to do this without calling
         * hasNext() since that will cause the next result set to be fetched if
         * the current one has been consumed and that would make the iterator
         * eagerly fetch more result sets than the consumer has actually
         * demanded by calling #hasNext() on their end.
         */
        if (!exhausted && nvisited < capacity) {

            // queue up for batch delete.
            removeList.add(key);

        } else {

            // delete immediately since the iterator is exhausted.
            deleteLast(key);

        }

    }

    private ArrayList removeList;

    /**
     * Method flushes any queued deletes. You MUST do this if you are only
     * processing part of the buffered capacity of the iterator and you are are
     * deleting some index entries. Failure to {@link #flush()} under these
     * circumstances will result in some buffered deletes never being applied.
     */
    public void flush() {

        deleteBehind();
        
    }
    
    protected void deleteBehind() {
        
        if(removeList==null||removeList.isEmpty()) return;
        
        deleteBehind(removeList.size(),removeList.iterator());
        
        removeList.clear();
        
    }
    
    /**
     * Batch delete the index entries identified by keys and clear the
     * list.
     * 
     * @param n
     *            The #of keys to be deleted.
     * @param keys
     *            The keys to be deleted.
     */
    abstract protected void deleteBehind(int n,Iterator keys);

    /**
     * Delete the index entry identified by key.
     * 
     * @param key
     *            A key.
     */
    abstract protected void deleteLast(byte[] key);

    /**
     * Return an object that may be used to read the block from the backing
     * store per the contract for {@link ITuple#readBlock(long)}
     * 
     * @param sourceIndex
     *            The value from {@link ITuple#getSourceIndex()}.
     * 
     * @param addr
     *            The value supplied to {@link ITuple#readBlock(long)}.
     */
    abstract protected IBlock readBlock(int sourceIndex, long addr);

}