![JAR search and dependency download from the Maven repository](/logo.png)
src.it.unimi.dsi.big.mg4j.search.AndDocumentIterator Maven / Gradle / Ivy
Show all versions of mg4j-big Show documentation
package it.unimi.dsi.big.mg4j.search;
/*
* MG4J: Managing Gigabytes for Java (big)
*
* Copyright (C) 2003-2011 Paolo Boldi and Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.longs.LongSet;
import it.unimi.dsi.fastutil.objects.ObjectArrays;
import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.util.Interval;
import it.unimi.dsi.util.Intervals;
import java.io.IOException;
import java.util.NoSuchElementException;
/** A document iterator that returns the AND of a number of document iterators.
*
* This class adds to {@link it.unimi.dsi.big.mg4j.search.AbstractIntersectionDocumentIterator}
* an interval iterator generating the AND of the intervals returned for each of the documents involved.
*/
public class AndDocumentIterator extends AbstractIntersectionDocumentIterator {
private static final boolean ASSERTS = false;
private final static boolean DEBUG = false;
/** Returns a document iterator that computes the AND of the given array of iterators.
*
*
Note that the special case of the empty and of the singleton arrays
* are handled efficiently.
*
* @param index the default index; relevant only if it
has zero length.
* @param documentIterator the iterators to be joined.
* @return a document iterator that computes the AND of it
.
* @throws IOException
*/
public static DocumentIterator getInstance( final Index index, final DocumentIterator... documentIterator ) throws IOException {
if ( documentIterator.length == 0 ) return TrueDocumentIterator.getInstance( index );
if ( documentIterator.length == 1 ) return documentIterator[ 0 ];
return new AndDocumentIterator( documentIterator );
}
/** Returns a document iterator that computes the AND of the given nonzero-length array of iterators.
*
*
Note that the special case of the singleton array is handled efficiently.
*
* @param documentIterator the iterators to be joined (at least one).
* @return a document iterator that computes the AND of it
.
* @throws IOException
*/
public static DocumentIterator getInstance( final DocumentIterator... documentIterator ) throws IOException {
if ( documentIterator.length == 0 ) throw new IllegalArgumentException( "The provided array of document iterators is empty." );
return getInstance( null, documentIterator );
}
protected AndDocumentIterator( final DocumentIterator[] documentIterator ) {
super( documentIterator );
}
protected IntervalIterator getComposedIntervalIterator( final Index index ) {
return indexIterator == null ? new AndIntervalIterator( index ) : new AndIndexIntervalIterator( index );
}
/** An interval iterator returning the AND (in the Clarke−Cormack−Burkowski lattice) of the component interval iterators. */
private class AndIntervalIterator extends AbstractCompositeIntervalIterator implements IntervalIterator {
/** The index of this iterator. */
final private Index index;
/** A heap-based indirect priority queue used to keep track of the currently scanned intervals. */
final private ObjectHeapSemiIndirectPriorityQueue queue;
/** Whether the scan is over. */
private boolean endOfProcess;
/** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */
private int lastLeft;
/** The maximum right extreme currently in the queue. */
private int maxRight;
/** Creates a new AND interval iterator.
*
* @param index the index of the iterator.
*/
public AndIntervalIterator( final Index index ) {
super( n );
// We just set up some internal data, but we perform no initialisation.
this.index = index;
queue = new ObjectHeapSemiIndirectPriorityQueue( curr, Intervals.STARTS_BEFORE_OR_PROLONGS );
}
public void reset() throws IOException {
ObjectArrays.fill( curr, null );
queue.clear();
next = null;
maxRight = Integer.MIN_VALUE;
lastLeft = Integer.MIN_VALUE;
endOfProcess = false;
for ( int i = 0; i < n; i++ ) {
intervalIterator[ i ] = documentIterator[ i ].intervalIterator( index );
// TRUE and FALSE iterators are simply skipped.
if ( intervalIterator[ i ] != IntervalIterators.TRUE && intervalIterator[ i ] != IntervalIterators.FALSE ) {
curr[ i ] = intervalIterator[ i ].nextInterval();
queue.enqueue( i );
maxRight = Math.max( maxRight, curr[ i ].right );
}
}
if ( ASSERTS ) assert ! queue.isEmpty();
}
public void intervalTerms( final LongSet terms ) {
for( int i = n; i-- != 0; ) intervalIterator[ i ].intervalTerms( terms );
}
public boolean hasNext() {
try {
return hasNextInternal();
}
catch ( IOException e ) {
throw new RuntimeException( e );
}
}
private boolean hasNextInternal() throws IOException {
if ( endOfProcess ) return false;
int first;
while ( curr[ first = queue.first() ].left == lastLeft ) {
if ( ( curr[ first ] = intervalIterator[ first ].nextInterval() ) == null ) {
endOfProcess = true;
return false;
}
maxRight = Math.max( maxRight, curr[ first ].right );
queue.changed();
}
return true;
}
public Interval nextInterval() throws IOException {
if ( ! hasNextInternal() ) return null;
int nextLeft, nextRight;
do {
final int first = queue.first();
nextLeft = curr[ first ].left;
nextRight = maxRight;
if ( DEBUG ) System.err.println( this + " is saving interval " + Interval.valueOf( nextLeft, nextRight ) );
/* We check whether the current top is equal to the span of the queue, and
* whether the top interval iterator is exhausted. In both cases, the
* current span is guaranteed to be a minimal interval. */
if ( curr[ first ].right == maxRight || ( endOfProcess = ( curr[ first ] = intervalIterator[ first ].nextInterval() ) == null ) ) break;
maxRight = Math.max( maxRight, curr[ first ].right );
queue.changed();
} while ( maxRight == nextRight );
return Interval.valueOf( lastLeft = nextLeft, nextRight );
}
public int extent() {
int s = 0;
for ( int i = n; i-- != 0; ) s += intervalIterator[ i ].extent();
return s;
}
}
/** An interval iterator returning the AND (in the Clarke−Cormack−Burkowski lattice) of the component interval iterators. */
private class AndIndexIntervalIterator extends AbstractCompositeIndexIntervalIterator implements IntervalIterator {
/** The index of this iterator. */
final private Index index;
/** A heap-based indirect priority queue used to keep track of the currently scanned positions. */
final private IntHeapSemiIndirectPriorityQueue queue;
/** Whether the scan is over. */
private boolean endOfProcess;
/** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */
private int lastLeft;
/** The maximum right extreme currently in the queue. */
private int maxRight;
/** Creates a new AND interval iterator.
*
* @param index the index of the iterator.
*/
public AndIndexIntervalIterator( final Index index ) {
super( n );
// We just set up some internal data, but we perform no initialisation.
this.index = index;
queue = new IntHeapSemiIndirectPriorityQueue( curr );
}
public void reset() throws IOException {
queue.clear();
maxRight = Integer.MIN_VALUE;
lastLeft = Integer.MIN_VALUE;
next = null;
endOfProcess = false;
for ( int i = 0; i < n; i++ ) {
// The case != index is identical to the TRUE/FALSE case in AndIntervalIterator.
final Index indexIteratorIndex = indexIterator[ i ].index();
if ( indexIteratorIndex == index && indexIteratorIndex.hasPositions ) {
position[ i ] = indexIterator[ i ].positionArray();
count[ i ] = indexIterator[ i ].count();
curr[ i ] = position[ i ][ currPos[ i ] = 0 ];
queue.enqueue( i );
maxRight = Math.max( maxRight, curr[ i ] );
}
}
if ( ASSERTS ) assert ! queue.isEmpty();
}
public void intervalTerms( final LongSet terms ) {
for( int i = n; i-- != 0; ) terms.add( indexIterator[ i ].termNumber() );
}
public boolean hasNext() {
if ( endOfProcess ) return false;
int first;
while ( curr[ first = queue.first() ] == lastLeft ) {
if ( ++currPos[ first ] == count[ first ] ) {
endOfProcess = true;
return false;
}
curr[ first ] = position[ first ][ currPos[ first ] ];
maxRight = Math.max( maxRight, curr[ first ] );
queue.changed();
}
return true;
}
public Interval next() {
final Interval nextInterval = nextInterval();
if ( nextInterval == null ) throw new NoSuchElementException();
return nextInterval;
}
public Interval nextInterval() {
if ( ! hasNext() ) return null;
int nextLeft, nextRight;
do {
final int first = queue.first();
nextLeft = curr[ first ];
nextRight = maxRight;
if ( DEBUG ) System.err.println( this + " is saving interval " + Interval.valueOf( nextLeft, nextRight ) );
/* We check whether all iterators are on the same position, and
* whether the top interval iterator is exhausted. In both cases, the
* current span is guaranteed to be a minimal interval. */
if ( curr[ first ] == maxRight || ( endOfProcess = ++currPos[ first ] == count[ first ] ) ) break;
curr[ first ] = position[ first ][ currPos[ first ] ];
if ( maxRight < curr[ first ] ) maxRight = curr[ first ];
queue.changed();
} while ( maxRight == nextRight );
return Interval.valueOf( lastLeft = nextLeft, nextRight );
}
public int extent() {
return n;
}
}
}