All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.mg4j.search.PayloadPredicateDocumentIterator Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2007-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.index.IndexIterator;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceSet;

import java.io.IOException;

import org.apache.commons.collections.Predicate;


/** A document iterator that filters an {@link IndexIterator}, returning just
 * documents whose payload satisfies a given predicate.
 * The interval iterators are computed by delegation to the underlying {@link IndexIterator}.
 *
 * 

Besides the classic {@link #skipTo(long)} method, this class provides a {@link #skipUnconditionallyTo(long)} * method that skips to a given document even if the document does not match the predicate. This * feature is fundamental to implement an efficient list intersection algorithm, as {@link #skipTo(long)} is * very expensive when the argument does not satisfy the predicate (as the next valid document must be searched * for exhaustively). * * @author Sebastiano Vigna * @since 0.9 */ public class PayloadPredicateDocumentIterator extends AbstractDocumentIterator implements DocumentIterator { /** The underlying iterator. */ private final IndexIterator indexIterator; /** The predicate to filter payloads. */ private final Predicate payloadPredicate; /** Creates a new payload-predicate document iterator over a given index iterator. * @param indexIterator an index iterator. * @param payloadPredicate a predicate on payloads that will be used to filter the documents returned by indexIterator. */ protected PayloadPredicateDocumentIterator( final IndexIterator indexIterator, final Predicate payloadPredicate ) { this.indexIterator = indexIterator; this.payloadPredicate = payloadPredicate; } /** Returns a new payload-predicate document iterator over a given index iterator. * @param indexIterator an index iterator. * @param payloadPredicate a predicate on payloads that will be used to filter the documents returned by indexIterator. */ public static PayloadPredicateDocumentIterator getInstance( final IndexIterator indexIterator, final Predicate payloadPredicate ) { return new PayloadPredicateDocumentIterator( indexIterator, payloadPredicate ); } public ReferenceSet indices() { return indexIterator.indices(); } public long skipTo( final long n ) throws IOException { if ( curr >= n ) return curr; if ( ( curr = indexIterator.skipTo( n ) ) != END_OF_LIST && ! payloadPredicate.evaluate( indexIterator.payload() ) ) nextDocument(); return curr; } /** Skips to the given document, even if the document does not satisfy the predicate of this document iterator. * * @param candidate a document pointer. * @return assuming that p is the first document pointer larger than or equal to n, * p if document p satisfies the predicate, -p-1 otherwise; * if there is no document * pointer larger than or equal to n, {@link DocumentIterator#END_OF_LIST}. * @throws IOException * @see #skipTo(long) */ public long skipUnconditionallyTo( final long candidate ) throws IOException { if ( curr < candidate ) curr = indexIterator.skipTo( candidate ); if ( curr == END_OF_LIST ) return END_OF_LIST; return payloadPredicate.evaluate( indexIterator.payload() ) ? curr : -curr - 1; } public long nextDocument() throws IOException { long d; while( ( d = indexIterator.nextDocument() ) != -1 && ! payloadPredicate.evaluate( indexIterator.payload() ) ); curr = fromNextDocument( d ); return d; } public boolean mayHaveNext() { return indexIterator.mayHaveNext(); } public void dispose() throws IOException { indexIterator.dispose(); } public T accept( final DocumentIteratorVisitor visitor ) throws IOException { if ( ! visitor.visitPre( this ) ) return null; final T[] a = visitor.newArray( 1 ); if ( a == null ) { if ( indexIterator.accept( visitor ) == null ) return null; } else { if ( ( a[ 0 ] = indexIterator.accept( visitor ) ) == null ) return null; } return visitor.visitPost( this, a ); } public T acceptOnTruePaths( final DocumentIteratorVisitor visitor ) throws IOException { if ( ! visitor.visitPre( this ) ) return null; final T[] a = visitor.newArray( 1 ); if ( a == null ) { if ( indexIterator.acceptOnTruePaths( visitor ) == null ) return null; } else { if ( ( a[ 0 ] = indexIterator.acceptOnTruePaths( visitor ) ) == null ) return null; } return visitor.visitPost( this, a ); } public String toString() { return getClass().getSimpleName() + "(" + indexIterator + ")" + payloadPredicate; } public Reference2ReferenceMap intervalIterators() throws IOException { return indexIterator.intervalIterators(); } public IntervalIterator intervalIterator() throws IOException { return indexIterator.intervalIterator(); } public IntervalIterator intervalIterator( final Index index ) throws IOException { return indexIterator.intervalIterator( index ); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy