src.it.unimi.dsi.mg4j.search.OrderedAndDocumentIterator Maven / Gradle / Ivy
package it.unimi.dsi.mg4j.search;
/*
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2003-2011 Paolo Boldi and Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.util.Interval;
import it.unimi.dsi.util.Intervals;
import java.io.IOException;
/** An iterator returning documents containing nonoverlapping intervals in query order
* satisfying the underlying queries.
*
* In practice, this iterator implements strictly ordered AND, which is
* satisfied when the subqueries are satisfied by nonoverlapping intervals in query order.
*/
public class OrderedAndDocumentIterator extends AbstractOrderedIntervalDocumentIterator {
@SuppressWarnings("hiding")
private final static boolean ASSERTS = false;
/** Returns a document iterator that computes the ordered AND of the given array of iterators.
*
*
Note that the special case of the empty and of the singleton arrays
* are handled efficiently.
*
* @param index the default index; relevant only if it
has zero length.
* @param documentIterator the iterators to be joined.
* @return a document iterator that computes the ordered AND of it
.
* @throws IOException
*/
public static DocumentIterator getInstance( final Index index, final DocumentIterator... documentIterator ) throws IOException {
if ( documentIterator.length == 0 ) return TrueDocumentIterator.getInstance( index );
if ( documentIterator.length == 1 ) return documentIterator[ 0 ];
return new OrderedAndDocumentIterator( documentIterator );
}
/** Returns a document iterator that computes the ordered AND of the given nonzero-length array of iterators.
*
*
Note that the special case of the singleton array is handled efficiently.
*
* @param documentIterator the iterators to be joined (at least one).
* @return a document iterator that computes the ordered AND of it
.
* @throws IOException
*/
public static DocumentIterator getInstance( final DocumentIterator... documentIterator ) throws IOException {
if ( documentIterator.length == 0 ) throw new IllegalArgumentException( "The provided array of document iterators is empty." );
return getInstance( null, documentIterator );
}
protected OrderedAndDocumentIterator( final DocumentIterator[] documentIterator ) throws IOException {
super( documentIterator );
if ( ahead ) {
// The document over which we are positioned might not be valid.
ahead = false;
if ( intervalIterator().hasNext() || nextDocumentInternal() != -1 ) ahead = true;
}
}
protected IntervalIterator getComposedIntervalIterator( final Index unused ) {
if ( ASSERTS ) assert unused == soleIndex;
return indexIterator == null ? new OrderedAndIntervalIterator() : new OrderedAndIndexIntervalIterator();
}
/** An interval iterator returning the ordered AND of the component iterators
* (i.e., intervals made of sequences of intervals
* of the component iterator, in the given order).
*
*
In this implementation, {@link #advanced} can be true
* even when {@link AbstractOrderedIntervalIterator#endOfProcess} is true, as a candidate
* can be ready to be returned even if the do-while loop in {@link #hasNext()} has
* set {@link AbstractOrderedIntervalIterator#endOfProcess}.
*/
private class OrderedAndIntervalIterator extends AbstractOrderedIntervalIterator {
@SuppressWarnings("hiding")
private final static boolean DEBUG = false;
/** Whether the scan is over. */
private boolean endOfProcess;
/** The index of the next list to be aligned (from 0 to {@link #m}). */
private int toBeAligned;
/** The number of non-{@link IntervalIterators#TRUE} interval iterator. Only
* elements with index smaller than this value are valid in {@link AbstractCompositeIntervalIterator#intervalIterator}. */
private int m;
/** Loads {@link #curr} with the first interval from each non-{@link IntervalIterators#TRUE} iterator, leaving
* in {@link #m} the number of non-{@link IntervalIterators#TRUE} iterators.
*/
public void reset() throws IOException {
m = 0;
next = null;
toBeAligned = 1;
endOfProcess = false;
for( int i = 0; i < n; i++ ) {
intervalIterator[ m ] = documentIterator[ i ].intervalIterator();
if ( intervalIterator[ m ] != IntervalIterators.TRUE ) {
if ( ASSERTS ) assert intervalIterator[ m ].hasNext();
curr[ m++ ] = Intervals.MINUS_INFINITY;
}
}
if ( m == 0 ) throw new IllegalStateException();
endOfProcess = ( curr[ 0 ] = intervalIterator[ 0 ].nextInterval() ) == null;
}
public void intervalTerms( final IntSet terms ) {
for( int i = n; i-- != 0; ) intervalIterator[ i ].intervalTerms( terms );
}
public Interval nextInterval() throws IOException {
if ( next != null ) {
final Interval result = next;
next = null;
return result;
}
if ( endOfProcess ) return null;
final Interval[] curr = this.curr;
final IntervalIterator[] intervalIterator = this.intervalIterator;
final int m = this.m;
// We have to decrease leftOfLast to avoid overflows. Do not test it against Integer.MAX_VALUE.
int nextLeft = Integer.MAX_VALUE, nextRight = Integer.MAX_VALUE, leftOfLast = Integer.MAX_VALUE - 1;
int i = toBeAligned;
for(;;) {
if ( DEBUG ) System.err.println( "Current candidate: " + Interval.valueOf( nextLeft, nextRight ) );
for(;;) {
if ( curr[ i - 1 ].right >= leftOfLast - ( m - i - 1 ) ) {
// If we're here the last interval we obtained is aligned, but it cannot completed to an alignment smaller than [nextLeft..nextRight]
toBeAligned = i;
if ( ASSERTS ) assert nextLeft != Integer.MAX_VALUE;
return Interval.valueOf( nextLeft, nextRight );
}
if ( i == m || curr[ i ].left > curr[ i - 1 ].right ) break;
do {
if ( curr[ i ].right >= leftOfLast - ( m - i - 2 ) || ( curr[ i ] = intervalIterator[ i ].nextInterval() ) == null ) {
toBeAligned = i;
endOfProcess = curr[ i ] == null;
return nextLeft == Integer.MAX_VALUE ? null : Interval.valueOf( nextLeft, nextRight );
}
} while ( curr[ i ].left <= curr[ i - 1 ].right );
i++;
}
nextLeft = curr[ 0 ].left;
nextRight = curr[ m - 1 ].right;
leftOfLast = curr[ m - 1 ].left;
i = 1;
if ( ( curr[ 0 ] = intervalIterator[ 0 ].nextInterval() ) == null ) {
endOfProcess = true;
toBeAligned = 1;
return Interval.valueOf( nextLeft, nextRight );
}
}
}
public int extent() {
int s = 0;
for ( int i = m; i-- != 0; ) s += intervalIterator[ i ].extent();
return s;
}
}
/** An interval iterator returning the BLOCK of the component iterator
* (i.e., intervals made of sequences of consecutive intervals
* of the component iterator, in the given order).
*
*
In this implementation, {@link #advanced} is
* never true when {@link AbstractOrderedIntervalIterator#endOfProcess} is true.
*/
private class OrderedAndIndexIntervalIterator extends AbstractOrderedIndexIntervalIterator {
/** Whether the scan is over. */
private boolean endOfProcess;
/** The index of the next list to be aligned. */
private int toBeAligned;
public void reset() throws IOException {
final int[][] position = this.position;
final int[] curr = this.curr;
final int[] count = this.count;
IntArrays.fill( currPos, -1 );
for( int i = n; i-- != 0; ) {
count[ i ] = indexIterator[ i ].count();
position[ i ] = indexIterator[ i ].positionArray();
curr[ i ] = Integer.MIN_VALUE;
}
next = null;
toBeAligned = 1;
endOfProcess = false;
curr[ 0 ] = position[ 0 ][ currPos[ 0 ] = 0 ];
}
public void intervalTerms( final IntSet terms ) {
for( int i = n; i-- != 0; ) terms.add( indexIterator[ i ].termNumber() );
}
public Interval nextInterval() {
if ( next != null ) {
final Interval result = next;
next = null;
return result;
}
if ( endOfProcess ) return null;
// We have to decrease nextRight to avoid overflows. Do not test it against Integer.MAX_VALUE.
int nextLeft = Integer.MAX_VALUE, nextRight = Integer.MAX_VALUE - 1;
final int[][] position = this.position;
final int[] currPos = this.currPos;
final int[] count = this.count;
final int[] curr = this.curr;
final int n = OrderedAndDocumentIterator.this.n;
int i = toBeAligned;
for(;;) {
if ( DEBUG ) System.err.println( "Current candidate: " + Interval.valueOf( nextLeft, nextRight ) );
for(;;) {
if ( curr[ i - 1 ] >= nextRight - ( n - i - 1 ) ) {
// If we're here the last position we obtained is aligned, but it cannot completed to an alignment smaller than [nextLeft..nextRight]
toBeAligned = i;
if ( ASSERTS ) assert nextLeft != Integer.MAX_VALUE;
return Interval.valueOf( nextLeft, nextRight );
}
// Note that in this particular case we must check that this is not the first iteration of the external loop
if ( i == n || curr[ i ] > curr[ i - 1 ] ) break;
do {
// For singletons, curr[ i ] >= nextRight - ( n - i - 2 ) is always false here.
if ( ASSERTS ) assert curr[ i ] < nextRight - ( n - i - 2 );
if ( ++currPos[ i ] == count[ i ] ) {
endOfProcess = true;
return nextLeft == Integer.MAX_VALUE ? null : Interval.valueOf( nextLeft, nextRight );
}
else curr[ i ] = position[ i ][ currPos[ i ] ];
} while ( curr[ i ] <= curr[ i - 1 ] );
i++;
}
nextLeft = curr[ 0 ];
nextRight = curr[ n - 1 ];
i = 1;
if ( ++currPos[ 0 ] == count[ 0 ] ) {
endOfProcess = true;
return Interval.valueOf( nextLeft, nextRight );
}
curr[ 0 ] = position[ 0 ][ currPos[ 0 ] ];
}
}
public int extent() {
return n;
}
}
}