All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.mg4j.search.NotDocumentIterator Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2003-2011 Paolo Boldi and Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;

import java.io.IOException;


/** A document iterator that returns documents not returned by its underlying iterator,
 * and returns just {@link it.unimi.dsi.big.mg4j.search.IntervalIterators#TRUE} on all interval iterators.
 * 
 * @author Paolo Boldi
 * @author Sebastiano Vigna
 * @since 0.9
 */

public class NotDocumentIterator extends AbstractDocumentIterator {
	@SuppressWarnings("unused")
	private final static boolean DEBUG = false;
	private final static boolean ASSERTS = false;

	/** The underlying iterator. */
	private final DocumentIterator documentIterator;
	/** If not null, the sole index involved in this iterator. */
	private final Index soleIndex;
	/** The number of documents. */
	private final long numberOfDocuments;
	/** A map mapping all indices in {@link #indices()} to {@link IntervalIterators#TRUE}, and the others to {@link IntervalIterators#FALSE}. */
	private final Reference2ReferenceMap intervalIterators;
	/** An unmodifiable copy of {@link #intervalIterators}. */
	private final Reference2ReferenceMap unmodifiableIntervalIterators;
	/** The next document that must not be returned, or {@link #numberOfDocuments}
	 * if the underlying iterator is exhausted. {@link #next} is always less than or equal to
	 * this field. */
	private long nextToSkip;
	/** The next document that will be considered; it might be returned or not depending on whether it is returned
	 * by {@link #documentIterator}. */
	private long nextCandidate;

	/** Creates a new NOT document iterator over a given iterator.
	 * @param documentIterator an iterator.
	 * @param numberOfDocuments the number of documents.
	 */
	protected NotDocumentIterator( final DocumentIterator documentIterator, final long numberOfDocuments ) throws IOException {
		this.documentIterator = documentIterator;
		this.numberOfDocuments = numberOfDocuments;
		
		if ( ( nextToSkip = documentIterator.nextDocument() ) == -1 ) nextToSkip = numberOfDocuments;
		final int n = documentIterator.indices().size();
		
		soleIndex = n == 1 ? indices().iterator().next() : null;
		intervalIterators = new Reference2ReferenceArrayMap( n );
		for( Index i: indices() ) intervalIterators.put( i, IntervalIterators.TRUE );
		intervalIterators.defaultReturnValue( IntervalIterators.FALSE );
		unmodifiableIntervalIterators = Reference2ReferenceMaps.unmodifiable( intervalIterators );	
	}

	/** Returns a document iterator computing the NOT of the given iterator.
	 * @param it an iterator.
	 * @param numberOfDocuments2 the number of documents.
	 */
	public static NotDocumentIterator getInstance( final DocumentIterator it, final long numberOfDocuments2 ) throws IOException {
		return new NotDocumentIterator( it, numberOfDocuments2 );
	}
	
	public ReferenceSet indices() {
		return documentIterator.indices();
	}

	public long skipTo( final long n ) throws IOException {
		if ( curr >= n ) return curr;
		
		nextCandidate = n;
		nextToSkip = documentIterator.skipTo( n );
		if ( nextToSkip == END_OF_LIST ) nextToSkip = numberOfDocuments;
		nextDocument();
		return curr;
	}

	public long nextDocument() throws IOException {
		for(;;) {
			if ( nextCandidate >= numberOfDocuments ) {
				curr = END_OF_LIST;
				return -1;
			}
			if ( nextCandidate < nextToSkip ) return curr = nextCandidate++;
			if ( ASSERTS ) assert nextCandidate == nextToSkip;
			nextCandidate++;
			nextToSkip = documentIterator.nextDocument();
			if ( nextToSkip == -1 ) nextToSkip = numberOfDocuments;
		}
	}
	
	public boolean mayHaveNext() {
		return nextCandidate < numberOfDocuments;
	}

	public void dispose() throws IOException {
		documentIterator.dispose();
	}
	
	public  T accept( final DocumentIteratorVisitor visitor ) throws IOException {
		if ( ! visitor.visitPre( this ) ) return null;
		final T[] a = visitor.newArray( 1 );
		if ( a == null ) {
			if ( documentIterator.accept( visitor ) == null ) return null;
		}
		else {
			if ( ( a[ 0 ] = documentIterator.accept( visitor ) ) == null ) return null;
		}
		return visitor.visitPost( this, a );
	}

	public  T acceptOnTruePaths( final DocumentIteratorVisitor visitor ) {
		if ( ! visitor.visitPre( this ) ) return null;
		return visitor.visitPost( this, null );
	}

	public String toString() {
	   return getClass().getSimpleName() + "(" + documentIterator + ")";
	}

	public Reference2ReferenceMap intervalIterators() {
		return unmodifiableIntervalIterators;
	}

	public IntervalIterator intervalIterator() {
		if ( soleIndex == null ) throw new IllegalStateException();
		return IntervalIterators.TRUE;
	}

	public IntervalIterator intervalIterator( final Index index ) {
		return intervalIterators.get( index );
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy