src.it.unimi.dsi.big.mg4j.search.PayloadPredicateDocumentIterator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mg4j-big Show documentation
MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.
The newest version!
package it.unimi.dsi.big.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2007-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.index.IndexIterator;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceSet;

import java.io.IOException;

import org.apache.commons.collections.Predicate;


/** A document iterator that filters an {@link IndexIterator}, returning just
 * documents whose payload satisfies a given predicate.
 * The interval iterators are computed by delegation to the underlying {@link IndexIterator}.
 *
 * Besides the classic {@link #skipTo(long)} method, this class provides a {@link #skipUnconditionallyTo(long)}
 * method that skips to a given document even if the document does not match the predicate. This
 * feature is fundamental to implement an efficient list intersection algorithm, as {@link #skipTo(long)} is
 * very expensive when the argument does not satisfy the predicate (as the next valid document must be searched
 * for exhaustively).
 * 
 * @author Sebastiano Vigna
 * @since 0.9
 */

public class PayloadPredicateDocumentIterator extends AbstractDocumentIterator implements DocumentIterator {
	/** The underlying iterator. */
	private final IndexIterator indexIterator;
	/** The predicate to filter payloads. */
	private final Predicate payloadPredicate;

	/** Creates a new payload-predicate document iterator over a given index iterator.
	 * @param indexIterator an index iterator.
	 * @param payloadPredicate a predicate on payloads that will be used to filter the documents returned by indexIterator.
	 */
	protected PayloadPredicateDocumentIterator( final IndexIterator indexIterator, final Predicate payloadPredicate ) {
		this.indexIterator = indexIterator;
		this.payloadPredicate = payloadPredicate;
	}

	/** Returns a new payload-predicate document iterator over a given index iterator.
	 * @param indexIterator an index iterator.
	 * @param payloadPredicate a predicate on payloads that will be used to filter the documents returned by indexIterator.
	 */
	public static PayloadPredicateDocumentIterator getInstance( final IndexIterator indexIterator, final Predicate payloadPredicate ) {
		return new PayloadPredicateDocumentIterator( indexIterator, payloadPredicate );
	}

	public ReferenceSet indices() {
		return indexIterator.indices();
	}

	public long skipTo( final long n ) throws IOException {
		if ( curr >= n ) return curr;
		if ( ( curr = indexIterator.skipTo( n ) ) != END_OF_LIST && ! payloadPredicate.evaluate( indexIterator.payload() ) ) nextDocument();
		return curr;
	}

	/** Skips to the given document, even if the document does not satisfy the predicate of this document iterator.
	 * 
	 * @param candidate a document pointer.
	 * @return assuming that p is the first document pointer larger than or equal to n,
	 * p if document p satisfies the predicate, -p-1 otherwise; 
	 * if there is no document
	 * pointer larger than or equal to n, {@link DocumentIterator#END_OF_LIST}.
	 * @throws IOException 
	 * @see #skipTo(long)
	 */
	
	public long skipUnconditionallyTo( final long candidate ) throws IOException {
		if ( curr < candidate ) curr = indexIterator.skipTo( candidate );
		if ( curr == END_OF_LIST ) return END_OF_LIST;
		return payloadPredicate.evaluate( indexIterator.payload() ) ? curr : -curr - 1;
	}
	
	public long nextDocument() throws IOException {
		long d;
		while( ( d = indexIterator.nextDocument() ) != -1 && ! payloadPredicate.evaluate( indexIterator.payload() ) );
		curr = fromNextDocument( d );
		return d;
	}
	
	public boolean mayHaveNext() {
		return indexIterator.mayHaveNext();
	}

	public void dispose() throws IOException {
		indexIterator.dispose();
	}
	
	public  T accept( final DocumentIteratorVisitor visitor ) throws IOException {
		if ( ! visitor.visitPre( this ) ) return null;
		final T[] a = visitor.newArray( 1 );
		if ( a == null ) {
			if ( indexIterator.accept( visitor ) == null ) return null;
		}
		else {			
			if ( ( a[ 0 ] = indexIterator.accept( visitor ) ) == null ) return null;
		}
		return visitor.visitPost( this, a );
	}

	public  T acceptOnTruePaths( final DocumentIteratorVisitor visitor ) throws IOException {
		if ( ! visitor.visitPre( this ) ) return null;
		final T[] a = visitor.newArray( 1 );
		if ( a == null ) {
			if ( indexIterator.acceptOnTruePaths( visitor ) == null ) return null;
		}
		else {			
			if ( ( a[ 0 ] = indexIterator.acceptOnTruePaths( visitor ) ) == null ) return null;
		}
		return visitor.visitPost( this, a );
	}

	public String toString() {
	   return getClass().getSimpleName() + "(" + indexIterator + ")" + payloadPredicate;
	}

	public Reference2ReferenceMap intervalIterators() throws IOException {
		return indexIterator.intervalIterators();
	}

	public IntervalIterator intervalIterator() throws IOException {
		return indexIterator.intervalIterator();
	}

	public IntervalIterator intervalIterator( final Index index ) throws IOException {
		return indexIterator.intervalIterator( index );
	}
}