All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.inf.JustificationIterator Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Nov 14, 2007
 */

package com.bigdata.rdf.inf;

import java.util.NoSuchElementException;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.log4j.Logger;

import com.bigdata.btree.IIndex;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.journal.IIndexManager;
import com.bigdata.relation.accesspath.IElementFilter;
import com.bigdata.util.Bytes;

/**
 * Iterator visits {@link Justification}s reading from the justification index.
 * The iterator optionally supports asynchronous read ahead.
 * 
 * @author Bryan Thompson
 * @version $Id$
 * 
 * @deprecated replace with appropriate access path reading on the
 *             justifications index for SPORelation (or treat as a secondary
 *             relation).
 */
public class JustificationIterator implements IJustificationIterator {

    private static final transient Logger log = Logger
            .getLogger(JustificationIterator.class);

    /**
     * The maximum #of statements that will be buffered by the iterator.
     */
    private static final transient int MAXIMUM_CAPACITY = 10 * Bytes.kilobyte32; // was 100k

    private boolean open = true;

    /**
     * The actual capacity of the buffer (never zero).
     */
    private final int capacity;

    /**
     * The #of statements that have been read from the source
     * and placed into the buffer. All such statements will also have passed the
     * optional {@link IElementFilter}.
     */
    private int numBuffered;

    /**
     * The #of statements that have been read by the caller using
     * {@link #next()}.
     */
    private int numReadByCaller;

    /**
     * The #of chunks that have been read by the caller.
     */
    private int nchunks = 0;

    /**
     * A buffer holding {@link Justification}s that have not been visited.
     * {@link Justification}s that have been visited are taken from the buffer,
     * making room for new statements which can be filled in asynchronously by
     * the {@link Reader}.
     */
    private ArrayBlockingQueue buffer;

    /**
     * The source iterator reading on the selected justification index.
     */
    private ITupleIterator src;

//    /**
//     * The executor service for the {@link Reader} (iff the {@link Reader} runs
//     * asynchronously).
//     */
//    private final ExecutorService readService;

    /**
     * The future for the {@link Reader} and null if a synchronous
     * read was performed (fully buffered read in the caller's thread).
     */
    private final FutureTask ft;
    
    /**
     * Set to true iff an asynchronous {@link Reader} is used AND there is
     * nothing more to be read.
     */
    private final AtomicBoolean readerDone = new AtomicBoolean(false);

    /**
     * The minimum desirable chunk size for {@link #nextChunk()}.
     */
    static private final int MIN_CHUNK_SIZE = 100;

//    /**
//     * If NO results show up within this timeout then {@link #nextChunk()} will
//     * throw a {@link RuntimeException} to abort the reader - the probably cause
//     * is a network outage.
//     */
//    static private final long TIMEOUT = Long.MAX_VALUE;

    /**
     * Create an iterator reading from the justifications index.
     * 
     * @param capacity
     *            The maximum #of statements that will be buffered. When ZERO
     *            (0) the iterator will range count the access path fully buffer
     *            if there are less than {@link #MAXIMUM_CAPACITY} statements
     *            selected by the triple pattern. When non-zero, the caller's
     *            value is used - this gives you control when you really, really
     *            want to have something fully buffered, e.g., for an in-memory
     *            self-join.
     */
    public JustificationIterator(final IIndexManager indexManager,
            final IIndex ndx, int capacity) {

        if (indexManager == null)
            throw new IllegalArgumentException();

        if (ndx == null)
            throw new IllegalArgumentException();

        if (capacity < 0)
            throw new IllegalArgumentException();

        /*
         * When true, asynchronous read-ahead will be used to refill the buffer
         * as it becomes depleted. When false, read-ahead will be synchronous
         * (this is useful when you want to read at most N statements from the
         * index).
         */
        boolean async = true;

        // The fast range count (upper bound)
        final long rangeCount = ndx.rangeCount();

        if (capacity == 0) {

            /*
             * Attempt to fully buffer the justifications.
             */

            if (capacity > MAXIMUM_CAPACITY || rangeCount > MAXIMUM_CAPACITY) {

                /*
                 * If the capacity would exceed the maximum then we limit
                 * the capacity to the maximum.
                 */

                capacity = MAXIMUM_CAPACITY;

            } else {
                
                // Otherwise use the range count (upper bound).
                capacity = (int) rangeCount;
                
            }

        } else {

            if (capacity > rangeCount) {

                /*
                 * If the caller has over-estimated the actual range count for
                 * the index then reduce the capacity to the real range count.
                 * This makes it safe for the caller to request a capacity of 1M
                 * SPOs and only a "right-sized" buffer will be allocated.
                 * 
                 * Note: The range count is generally an upper bound rather than
                 * an exact value.
                 */

                capacity = (int) rangeCount;

                /*
                 * Note: If the caller is making a best effort attempt to read
                 * everything into memory AND the data will fit within the
                 * caller's specified capacity, then we disable asynchronous
                 * reads so that they will get everything in one chunk.
                 */

                async = false;

            }

        }

        if (rangeCount < 100) {

            // Disable async reads if we are not reading much data.

            async = false;

        }

        if (capacity == 0) {

            /*
             * Note: The ArrayBlockingQueue has a minimum capacity of ONE (1).
             */

            capacity = 1;

        }

        this.capacity = capacity;

        this.src = ndx.rangeIterator(null/* fromKey */, null/* toKey */,
                0/* capacity */, IRangeQuery.KEYS, null/* filter */);

        this.buffer = new ArrayBlockingQueue(capacity);

        if (async) {

            // wrap reader as Future
            ft = new FutureTask(new Reader());
            
            // submit for asynchronous read ahead
            indexManager.getExecutorService().submit(ft);

        } else {

            // Fill the buffer (synchronous).

            ft = null;
            
            fillBuffer();

        }

    }

    /**
     * Reads from the statement index, filling the {@link #buffer}.
     * 
     * @author Bryan Thompson
     * @version $Id$
     */
    private class Reader implements Callable {

        /**
         * Runs the {@link Reader}.
         * 
         * @return null.
         */
        public Object call() throws Exception {

            while (src.hasNext()) {

                final Justification t = (Justification)src.next().getObject();

                try {

                    /*
                     * Note: This will block if the buffer is at capacity.
                     */

                    buffer.put(t);

                    numBuffered++;

                } catch (InterruptedException ex) {

                    throw new RuntimeException(ex);

                }

            }

            // Nothing left to read.

            readerDone.set(true);

            return null;

        }

    }

    /**
     * (Re-)fills the buffer up to its capacity or the exhaustion of the source
     * iterator.
     * 
     * @return false if the buffer is still empty.
     */
    private boolean fillBuffer() {

        assertOpen();

        if (ft != null) {

            // This method MUST NOT be invoked when using the async reader.
            throw new AssertionError();

        }

        try {
            // if(log.isDebugEnabled()) log.debug("(Re-)filling buffer: remainingCapacity="
            // + buffer.remainingCapacity());

            while (src.hasNext() && buffer.remainingCapacity() > 0) {

                final Justification jst = (Justification) src.next()
                        .getObject();

                try {

                    buffer.put(jst);

                    numBuffered++;

                } catch (InterruptedException ex) {

                    throw new RuntimeException(ex);

                }

            }

            // false if the buffer is still empty.

            return !buffer.isEmpty();

        } finally {

            if (log.isDebugEnabled())
                log.debug("(Re-)filled buffer: size=" + buffer.size()
                        + ", remainingCapacity=" + buffer.remainingCapacity()
                        + ", done=" + !src.hasNext());

        }

    }

    public boolean hasNext() {

        if (!open)
            return false;

        if (buffer.isEmpty()) {

            /*
             * The buffer is empty, but there may be more data available from
             * the underlying iterator.
             */

            if (ft != null) {

                // async reader - so wait on it.
                awaitReader();

            } else {

                // sync reader - so fill the buffer in this thread.
                fillBuffer();

            }

            if (buffer.isEmpty()) {

                // the buffer is still empty, so the iterator is exhausted.
                return false;

            }

        }

        // at least one Justification in the buffer.
        return true;

    }

    public Justification next() {

        if (!hasNext()) {

            throw new NoSuchElementException();

        }

        final Justification t;

        try {

            t = buffer.take();

        } catch (InterruptedException ex) {

            throw new RuntimeException(ex);

        }

        numReadByCaller++;

        return t;

    }

    /**
     * Returns a chunk whose size is the #of statements currently in the buffer.
     * 

* Note: When asynchronous reads are used, the buffer will be transparently * refilled and should be ready for a next chunk by the time you are done * with this one. */ public Justification[] nextChunk() { if (!hasNext()) { throw new NoSuchElementException(); } if (ft != null) { // make sure that we fill the buffer before we deliver a chunk. awaitReader(); } // there are at least this many in the buffer. final int n = buffer.size(); // allocate the array. final Justification[] stmts = new Justification[n]; for (int i = 0; i < n; i++) { stmts[i] = next(); } if (log.isDebugEnabled()) log.debug("chunkSize=" + n + ", nchunks=" + nchunks + ", #read(caller)=" + numReadByCaller + ", #read(src)=" + numBuffered); return stmts; } /** * Await some data from the {@link Reader}. *

* Note: If there is some data available this will continue to wait until at * least {@link #MIN_CHUNK_SIZE} statements are available from the * {@link Reader} -or- until the reader signals that it is * {@link #readerDone done}. This helps to keep up the chunk size and hence * the efficiency of batch operations when we might otherwise get into a * race with the {@link Reader}. */ private void awaitReader() { if (ft == null) { /* * This method MUST NOT be invoked unless you are using the async * reader. */ throw new AssertionError(); } // final long begin = System.currentTimeMillis(); /* * Wait for at least N records to show up. */ final int N = capacity < MIN_CHUNK_SIZE ? capacity : MIN_CHUNK_SIZE; while (buffer.size() < N && !readerDone.get()) { try { /* * TODO This uses a Thread.sleep() to avoid a lock ordering * problem because we did not have access to the lock used * internally by the blocking queue when this code was written. * However, we now have incorporated at least one JSR166 * blocking queue class which can use the caller's lock. That * makes it possible to setup conditions which can be awaited * for full/not-full, etc., but you have to be careful not to * violate the manner in which the lock signal/notify semantics * are used internally by the blocking queue implementation. */ Thread.sleep(10/*ms*/); } catch (InterruptedException ex) { throw new RuntimeException(ex); } // final long elapsed = System.currentTimeMillis() - begin; // // if (elapsed > TIMEOUT && buffer.isEmpty()) { // // throw new RuntimeException("Timeout after " + elapsed + "ms"); // // } } } /** * @throws UnsupportedOperationException always */ public void remove() { assertOpen(); throw new UnsupportedOperationException(); } public void close() { if (!open) { // Already closed. return; } log.debug("Closing iterator"); open = false; if (ft != null) { // terminate the Reader. ft.cancel(true/* mayInterruptIfRunning */); // try { // // readService.awaitTermination(500, TimeUnit.MILLISECONDS); // // } catch (InterruptedException e) { // // log.warn("Read service did not terminate: " + e); // // } } // discard buffer. buffer.clear(); buffer = null; // discard the source iterator. src = null; } private final void assertOpen() { if (!open) throw new IllegalStateException(); } }