src.it.unimi.dsi.big.mg4j.search.AbstractDocumentIterator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mg4j-big Show documentation
MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.
The newest version!
package it.unimi.dsi.big.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2007-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
import it.unimi.dsi.util.Interval;

import java.io.IOException;
import java.util.NoSuchElementException;

/** An abstract iterator on documents that and 
 * provides support for the {@link DocumentIterator#weight()}/{@link DocumentIterator#weight(double)} methods
 * and an internal class that eases the construction of {@linkplain IntervalIterator interval iterators}.
 *
 * Instances of this class expect implementation to keep track of the {@linkplain #curr current document}
 * of the iterator. The special value -1 denotes an iterator that has not still been accessed,
 * and the special value {@link DocumentIterator#END_OF_LIST} denotes an iterator that has been exhausted. 
 * 
 * 
Methods performing actions depending on the last document returned should throw an {@link IllegalStateException}
 * if called when {@link #curr} is -1 or {@link DocumentIterator#END_OF_LIST}. 
 * You just need to call {@link #ensureOnADocument()}.
 * 
 * Finally, {@link #toNextDocument(long)} will turn the value of {@link #curr} into a suitable return value
 * for {@link #nextDocument()} (as {@link DocumentIterator#END_OF_LIST} needs to be massaged).
 */

public abstract class AbstractDocumentIterator implements DocumentIterator {
	/** The current document of the iterator. The special value -1 denotes an iterator that has not still been accessed,
	 * and the special value {@link DocumentIterator#END_OF_LIST} denotes an iterator that has been exhausted. */
	protected long curr = -1;
	/** The weight of this iterator. */
	protected double weight = 1;
	
	public boolean mayHaveNext() {
		return curr != END_OF_LIST;
	}

	/** Turns the value of the argument into a valid return value of {@link #nextDocument()}
	 * 
	 * @param curr a value for {@link #curr}, including possibly {@link DocumentIterator#END_OF_LIST}.
	 * @return the correct return value for {@link #nextDocument()}.
	 */
	protected static long toNextDocument( final long curr ) {
		return ( curr + 1 ) & 0x8000000000000000L | curr;
	}
	
	/** Turns a value returned by {@link #nextDocument()} into a valid value for {@link #curr}.
	 * 
	 * @param d a value returned by {@link #nextDocument()}.
	 * @return the correct return value for {@link #curr}.
	 */
	protected static long fromNextDocument( final long d ) {
		return d & 0x7FFFFFFFFFFFFFFFL;
	}
	
	public double weight() {
		return weight;
	}
	
	public DocumentIterator weight( final double weight ) {
		this.weight = weight;
		return this;
	}
	
	/** Invokes {@link DocumentIterator#intervalIterator()}
	 * 
	 * @return {@link DocumentIterator#intervalIterator()}.
	 */
	public IntervalIterator iterator() {
		try {
			return intervalIterator();
		}
		catch ( IOException e ) {
			throw new RuntimeException( e );
		}
	}

	protected final void ensureOnADocument() {
		// This catches curr == END_OF_LIST || curr == -1.
		if ( ( curr | 0x8000000000000000L ) == -1 ) throw new IllegalStateException();
	}
	
	/** Returns the current document.
	 * 
	 * @return {@link #curr}.
	 */
	public long document() {
		return curr; 
	}
	
	protected abstract static class AbstractIntervalIterator extends AbstractObjectIterator implements IntervalIterator {
		/** The next document to be returned, if it has already been peeked at by {@link #hasNext()},
		 * or null. */
		protected Interval next;
		
		/** Checks whether {@link #next} has been already set; if not, peeks at the interval returned by {@link IntervalIterator#nextInterval() nextInterval()}.
		 *
		 * @return true if {@link #next} is not null or if {@link IntervalIterator#nextInterval() nextInterval()} has returned a valid interval.
		 */
		public boolean hasNext() {
			if ( next == null ) try {
				next = nextInterval();
			}
			catch ( IOException e ) {
				throw new RuntimeException( e );
			}
			return next != null;
		}
		
		/** Checks whether there is an interval to be returned, sets
		 * {@link #next} to null, and returns its previous value.
		 *
		 * @return the next interval, as cached by {@link #hasNext()}.
		 */
		@Deprecated
		public Interval next() {
			if ( ! hasNext() ) throw new NoSuchElementException();
			final Interval result = next;
			next = null;
			return result;
		}
	}
}