![JAR search and dependency download from the Maven repository](/logo.png)
src.it.unimi.dsi.big.mg4j.search.AbstractUnionDocumentIterator Maven / Gradle / Ivy
Show all versions of mg4j-big Show documentation
package it.unimi.dsi.big.mg4j.search;
/*
* MG4J: Managing Gigabytes for Java (big)
*
* Copyright (C) 2003-2011 Paolo Boldi and Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.fastutil.IndirectPriorityQueue;
import it.unimi.dsi.fastutil.longs.LongHeapSemiIndirectPriorityQueue;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import java.io.IOException;
import java.util.Iterator;
/** A document iterator on documents, generating the union of the documents returned
* by a number of document iterators.
*
* The pattern of this class is the same as that of {@link AbstractIntersectionDocumentIterator}.
* Additionally, this class provides a mechanism that makes accessible the set of component
* document iterators that are {@linkplain #computeFront() positioned on the current document}.
*/
public abstract class AbstractUnionDocumentIterator extends AbstractCompositeDocumentIterator {
private final static boolean DEBUG = false;
//private final static boolean ASSERTS = false;
/** A heap-based semi-indirect priority queue used to keep track of the currently scanned integers. */
final protected LongHeapSemiIndirectPriorityQueue queue;
/** The {@link IndirectPriorityQueue#front(int[])} of {@link #queue}, if {@link #frontSize} is not -1. */
final protected int[] front;
/** The reference array used for the queue. */
final protected long[] refArray;
/** A map from indices to interval iterators. */
final private Reference2ReferenceArrayMap intervalIterators;
/** A map from indices to the iterators returned for the current document. The key set may
* not contain an index because the related iterator has never been requested. Moreover,
* the iterator in this map for a given index may differ from the one in {@link #intervalIterators}
* because it could be {@link IntervalIterators#TRUE} (in fact, in that case it may even
* happen that {@link #intervalIterators} does not contain the index). */
final private Reference2ReferenceArrayMap currentIterators;
/** An unmodifiable wrapper around {@link #currentIterators}. */
final private Reference2ReferenceMap unmodifiableCurrentIterators;
/** The number of valid entries in {@link #front}, or -1 if the front has not been computed for the current document. */
protected int frontSize = -1;
/** Creates a new document iterator that computes the OR of the given array of iterators.
* @param documentIterator the iterators to be joined.
* @throws IOException
*/
protected AbstractUnionDocumentIterator( final DocumentIterator... documentIterator ) throws IOException {
super( documentIterator );
this.refArray = new long[ n ];
queue = new LongHeapSemiIndirectPriorityQueue( refArray );
intervalIterators = new Reference2ReferenceArrayMap( indices.size() );
currentIterators = new Reference2ReferenceArrayMap( indices.size() );
unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );
// Only add to the queue nonempty iterators...
for ( int i = 0; i < n; i++ ) if ( ( refArray[ i ] = documentIterator[ i ].nextDocument() ) != -1 ) queue.enqueue( i );
// If queue is empty, the process is over
if ( queue.isEmpty() ) curr = END_OF_LIST;
front = new int[ queue.size() ];
}
public long skipTo( final long n ) throws IOException {
if ( curr >= n ) return curr;
currentIterators.clear();
frontSize = -1; // Invalidate front
int first;
long res;
while( refArray[ first = queue.first() ] < n ) {
// Cannot advance the minimum
if ( ( res = documentIterator[ first ].skipTo( n ) ) == END_OF_LIST ) {
// Remove it
queue.dequeue();
// If nothing else remains, we are done
if ( queue.isEmpty() ) return curr = END_OF_LIST;
}
else {
// Advance the top element, and signal this fact to the queue
refArray[ first ] = res;
queue.changed();
}
}
return curr = refArray[ first ];
}
public long nextDocument() throws IOException {
if ( curr == END_OF_LIST ) return -1;
final long c = refArray[ queue.first() ];
// On the first call, the queue should not be advanced.
if ( curr == -1 ) return curr = c;
currentIterators.clear();
frontSize = -1; // Invalidate front
// The least element
int first;
// Advance all elements equal to the least one
while( refArray[ first = queue.first() ] == c ) {
if ( ( refArray[ first ] = documentIterator[ first ].nextDocument() ) != - 1 ) queue.changed();
else {
// Remove it
queue.dequeue();
// If nothing else remains, we are done
if ( queue.isEmpty() ) {
curr = END_OF_LIST;
return -1;
}
}
}
return curr = refArray[ first ];
}
/** Forces computation of the current front, returning the number of indices it contains.
*
* After a call to this method,
* the first elements of {@link #front} contain
* the indices of the {@linkplain AbstractCompositeDocumentIterator#documentIterator component document iterators}
* that are positioned on the current document. If the front has already been
* computed for the current document, this method has no side effects.
*
* @return the size of the current front (the number of valid entries in {@link #front}).
*/
protected int computeFront() {
if ( frontSize == -1 ) frontSize = queue.front( front );
return frontSize;
}
public Reference2ReferenceMap intervalIterators() throws IOException {
final Iterator i = indices.iterator();
while( i.hasNext() ) intervalIterator( i.next() );
return unmodifiableCurrentIterators;
}
public IntervalIterator intervalIterator( final Index index ) throws IOException {
ensureOnADocument();
if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );
if ( ! indices.contains( index ) ) return IntervalIterators.FALSE;
IntervalIterator intervalIterator;
// If the iterator has been created and it's ready, we just return it.
if ( ( intervalIterator = currentIterators.get( index ) ) != null ) return intervalIterator;
int t = 0, f = 0, c = computeFront();
/* We count the number of TRUE and FALSE iterators. In the case of index iterators, we can avoid
* the check and just rely on the index internals.
*
* If all iterators are FALSE, we return FALSE. Else if all remaining iterators are TRUE
* we return TRUE.
*/
IntervalIterator soleIterator = null;
if ( indexIterator == null )
for( int i = c; i -- != 0; ) {
intervalIterator = documentIterator[ front[ i ] ].intervalIterator( index );
if ( intervalIterator == IntervalIterators.TRUE ) t++;
else if ( intervalIterator == IntervalIterators.FALSE ) f++;
else if ( soleIterator == null ) soleIterator = intervalIterator;
}
else
for( int i = c; i -- != 0; ) {
final Index indexIteratorIndex = indexIterator[ front[ i ] ].index();
if ( indexIteratorIndex != index ) f++;
else if ( ! indexIteratorIndex.hasPositions ) t++;
else if ( soleIterator == null ) soleIterator = indexIterator[ front[ i ] ].intervalIterator( index );
}
if ( f == c ) intervalIterator = IntervalIterators.FALSE;
else if ( f + t == c ) intervalIterator = IntervalIterators.TRUE;
else if ( f + t < c - 1 ) {
intervalIterator = intervalIterators.get( index );
if ( intervalIterator == null ) intervalIterators.put( index, intervalIterator = getComposedIntervalIterator( index ) );
intervalIterator.reset();
} else intervalIterator = soleIterator;
currentIterators.put( index, intervalIterator );
return intervalIterator;
}
abstract protected IntervalIterator getComposedIntervalIterator( Index index );
/** Invokes {@link #acceptOnTruePaths(DocumentIteratorVisitor)} only on component
* iterators positioned on the current document.
*
* @param visitor a visitor.
* @return true if the visit should continue.
* @throws IOException
*/
@Override
public T acceptOnTruePaths( DocumentIteratorVisitor visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final int s = computeFront();
final T[] a = visitor.newArray( s );
if ( a == null ) {
for( int i = 0; i < s; i++ ) if ( documentIterator[ front[ i ] ].acceptOnTruePaths( visitor ) == null ) return null;
}
else {
for( int i = 0; i < s; i++ ) if ( ( a[ i ] = documentIterator[ front[ i ] ].acceptOnTruePaths( visitor ) ) == null ) return null;
}
return visitor.visitPost( this, a );
}
}