src.it.unimi.dsi.big.mg4j.search.AlignDocumentIterator Maven / Gradle / Ivy
Show all versions of mg4j-big Show documentation
package it.unimi.dsi.big.mg4j.search;
/*
* MG4J: Managing Gigabytes for Java (big)
*
* Copyright (C) 2008-2011 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.index.IndexIterator;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.fastutil.longs.LongSet;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.util.Interval;
import java.io.IOException;
/** A document iterator that aligns the results of two iterators over
* different indices.
*
* This class is an example of cross-index computation. As in the case of an
* {@link AndDocumentIterator}, we intersect the posting lists. However, once
* we get to the index level, we actually return just intervals that appear in
* all component iterators. Of course, this is meaningful only if all
* indices represent different views on the same data, a typical example being
* semantic tagging.
*
*
An instance of this class exposes a single interval iterator associated with
* the index of the first component iterator, as all interval iterators
* are exhausted during the computation of their intersection.
* Correspondingly, a call to {@link IntervalIterator#intervalTerms(LongSet)} just
* returns the terms related to the first component iterator.
*/
public class AlignDocumentIterator extends AbstractDocumentIterator {
private final static boolean DEBUG = false;
/** The first operand, to be aligned. */
final private DocumentIterator firstIterator;
/** The second operand, to be used to align the first operand. */
final private DocumentIterator secondIterator;
/** {@link #firstIterator}, if it is an {@link IndexIterator}. */
final private IndexIterator firstIndexIterator;
/** {@link #secondIterator}, if it is an {@link IndexIterator}. */
final private IndexIterator secondIndexIterator;
/** The sole index involved in this iterator. */
final private Index index;
/** A singleton containing {@link #currentIterator}. */
final private Reference2ReferenceMap currentIterators;
/** An unmodifiable wrapper around {@link #currentIterator}. */
final private Reference2ReferenceMap unmodifiableCurrentIterators;
/** The interval iterator associated with this document iterator, or null
. */
private IntervalIterator intervalIterator;
/** The iterator returned for the current document, if any, or null
. */
private IntervalIterator currentIterator;
/** Returns a document iterator that aligns the first iterator to the second.
*
* @param firstIterator the iterator to be aligned.
* @param secondIterator the iterator used to align firstIterator
.
*
* @return a document iterator that computes the alignment of firstIterator
on secondIterator
.
*/
public static DocumentIterator getInstance( final DocumentIterator firstIterator, final DocumentIterator secondIterator ) {
return new AlignDocumentIterator( firstIterator, secondIterator );
}
protected AlignDocumentIterator( final DocumentIterator firstIterator, final DocumentIterator secondIterator ) {
this.firstIterator = firstIterator;
this.secondIterator = secondIterator;
if ( firstIterator instanceof IndexIterator && secondIterator instanceof IndexIterator ) {
firstIndexIterator = (IndexIterator)firstIterator;
secondIndexIterator = (IndexIterator)secondIterator;
}
else firstIndexIterator = secondIndexIterator = null;
if ( firstIterator.indices().size() != 1 || secondIterator.indices().size() != 1 ) throw new IllegalArgumentException( "You can align single-index iterators only" );
index = firstIterator.indices().iterator().next();
currentIterators = new Reference2ReferenceArrayMap( 1 );
unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );
}
public ReferenceSet indices() {
return firstIterator.indices();
}
public long nextDocument() throws IOException {
currentIterator = null;
long first;
if ( ( first = firstIterator.nextDocument() ) != -1 ) {
long second = -1; // This forces a call to secondIterator.skipTo( first ).
for( ;; ) {
if ( first < second ) {
if ( ( first = firstIterator.skipTo( second ) ) == END_OF_LIST ) break;
}
else if ( second < first ) {
if ( ( second = secondIterator.skipTo( first ) ) == END_OF_LIST ) break;
}
else {
curr = first;
if ( intervalIterator().hasNext() ) return first;
currentIterator = null;
if ( ( first = firstIterator.nextDocument() ) == -1 ) break;
}
}
}
curr = END_OF_LIST;
return -1;
}
public boolean mayHaveNext() {
return firstIterator.mayHaveNext() && secondIterator.mayHaveNext();
}
public long skipTo( final long n ) throws IOException {
if ( curr >= n ) return curr;
currentIterator = null;
long first;
long second;
if ( ( first = firstIterator.skipTo( n ) ) == END_OF_LIST ) return curr = END_OF_LIST;
second = -1; // This forces a call to secondIterator.skipTo( first ).
for( ;; ) {
if ( first < second ) {
if ( ( first = firstIterator.skipTo( second ) ) == END_OF_LIST ) return curr = END_OF_LIST;
}
else if ( second < first ) {
if ( ( second = secondIterator.skipTo( first ) ) == END_OF_LIST ) return curr = END_OF_LIST;
}
else {
curr = first;
if ( intervalIterator().hasNext() ) return first;
currentIterator = null;
if ( ( first = firstIterator.nextDocument() ) == -1 ) return curr = END_OF_LIST;
}
}
}
public IntervalIterator intervalIterator() throws IOException {
return intervalIterator( index );
}
public Reference2ReferenceMap intervalIterators() throws IOException {
currentIterators.put( index, intervalIterator() );
return unmodifiableCurrentIterators;
}
public IntervalIterator intervalIterator( final Index index ) throws IOException {
if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );
ensureOnADocument();
if ( index != this.index ) return IntervalIterators.FALSE;
// If the iterator has been created and it's ready, we just return it.
if ( currentIterator != null ) return currentIterator;
final IntervalIterator firstIntervalIterator = firstIterator.intervalIterator(), secondIntervalIterator = secondIterator.intervalIterator();
if ( secondIntervalIterator == IntervalIterators.FALSE ) return currentIterator = IntervalIterators.FALSE;
if ( secondIntervalIterator == IntervalIterators.TRUE ) return currentIterator = firstIntervalIterator == IntervalIterators.TRUE ? IntervalIterators.TRUE : IntervalIterators.FALSE;
if ( firstIntervalIterator == IntervalIterators.TRUE ) return currentIterator = IntervalIterators.FALSE;
if ( intervalIterator == null ) intervalIterator = firstIndexIterator == null ? new AlignIntervalIterator() : new AlignIndexIntervalIterator();
intervalIterator.reset();
return currentIterator = intervalIterator;
}
public void dispose() throws IOException {
firstIterator.dispose();
secondIterator.dispose();
}
public T accept( final DocumentIteratorVisitor visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final T[] a = visitor.newArray( 2 );
if ( a == null ) {
if ( firstIterator.accept( visitor ) == null ) return null;
if ( secondIterator.accept( visitor ) == null ) return null;
}
else {
if ( ( a[ 0 ] = firstIterator.accept( visitor ) ) == null ) return null;
if ( ( a[ 1 ] = secondIterator.accept( visitor ) ) == null ) return null;
}
return visitor.visitPost( this, a );
}
public T acceptOnTruePaths( final DocumentIteratorVisitor visitor ) throws IOException {
if ( ! visitor.visitPre( this ) ) return null;
final T[] a = visitor.newArray( 1 );
if ( a == null ) {
if ( firstIterator.acceptOnTruePaths( visitor ) == null ) return null;
}
else {
if ( ( a[ 0 ] = firstIterator.acceptOnTruePaths( visitor ) ) == null ) return null;
}
return visitor.visitPost( this, a );
}
/** An interval iterator returning the intersection of the component interval iterators. */
private class AlignIntervalIterator extends AbstractIntervalIterator implements IntervalIterator {
/** The interval iterator of the first iterator. */
private IntervalIterator firstIntervalIterator;
/** The interval iterator of the second iterator. */
private IntervalIterator secondIntervalIterator;
/** Whether the scan is over. */
private boolean endOfProcess;
public void reset() throws IOException {
next = null;
endOfProcess = false;
firstIntervalIterator = firstIterator.intervalIterator();
secondIntervalIterator = secondIterator.intervalIterator();
}
public void intervalTerms( final LongSet terms ) {
firstIntervalIterator.intervalTerms( terms );
}
public Interval nextInterval() throws IOException {
if ( next != null ) {
final Interval result = next;
next = null;
return result;
}
if ( endOfProcess ) return null;
Interval firstInterval = null, secondInterval = null;
firstInterval = firstIntervalIterator.nextInterval();
secondInterval = secondIntervalIterator.nextInterval();
if ( firstInterval == null || secondInterval == null ) {
endOfProcess = true;
return null;
}
while ( ! firstInterval.equals( secondInterval ) ) {
if ( firstInterval.left <= secondInterval.left ) {
if ( ( firstInterval = firstIntervalIterator.nextInterval() ) == null ) {
endOfProcess = true;
return null;
}
}
else {
if ( ( secondInterval = secondIntervalIterator.nextInterval() ) == null ) {
endOfProcess = true;
return null;
}
}
}
return firstInterval;
}
public int extent() {
return firstIntervalIterator.extent();
}
public String toString() {
return getClass().getSimpleName() + "(" + firstIterator + ", " + secondIterator + ")";
}
}
/** An interval iterator returning the intersection of the component interval iterators. */
private class AlignIndexIntervalIterator extends AbstractIntervalIterator implements IntervalIterator {
/** Whether the scan is over. */
private boolean endOfProcess;
/** The positions of the first iterator. */
private int[] firstPosition;
/** The positions of the second iterator. */
private int[] secondPosition;
/** The count of the first iterator. */
private int firstCount;
/** The count of the second iterator. */
private int secondCount;
/** The position of the first iterator. */
private int firstCurr;
/** The position of the second iterator. */
private int secondCurr;
public void reset() throws IOException {
next = null;
endOfProcess = false;
firstPosition = firstIndexIterator.positionArray();
secondPosition = secondIndexIterator.positionArray();
firstCount = firstIndexIterator.count();
secondCount = secondIndexIterator.count();
firstCurr = secondCurr = -1;
}
public void intervalTerms( final LongSet terms ) {
terms.add( firstIndexIterator.termNumber() );
}
public Interval nextInterval() {
if ( next != null ) {
final Interval result = next;
next = null;
return result;
}
if ( endOfProcess ) return null;
final int[] firstPosition = this.firstPosition, secondPosition = this.secondPosition;
if ( ++firstCurr == firstCount || ++secondCurr == secondCount ) {
endOfProcess = true;
return null;
}
while ( firstPosition[ firstCurr ] != secondPosition[ secondCurr ] ) {
if ( firstPosition[ firstCurr ] < secondPosition[ secondCurr ] ) {
if ( ++firstCurr == firstCount ) {
endOfProcess = true;
return null;
}
}
else {
if ( ++secondCurr == secondCount ) {
endOfProcess = true;
return null;
}
}
}
return Interval.valueOf( secondPosition[ secondCurr ] );
}
public int extent() {
return 1;
}
public String toString() {
return getClass().getSimpleName() + "(" + firstIterator + ", " + secondIterator + ")";
}
}
public String toString() {
return getClass().getSimpleName() + "(" + firstIterator + ", " + secondIterator + ")";
}
}