All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.mg4j.search.AbstractDocumentIterator Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.search;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2007-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
import it.unimi.dsi.util.Interval;

import java.io.IOException;
import java.util.NoSuchElementException;

/** An abstract iterator on documents that and 
 * provides support for the {@link DocumentIterator#weight()}/{@link DocumentIterator#weight(double)} methods
 * and an internal class that eases the construction of {@linkplain IntervalIterator interval iterators}.
 *
 * 

Instances of this class expect implementation to keep track of the {@linkplain #curr current document} * of the iterator. The special value -1 denotes an iterator that has not still been accessed, * and the special value {@link DocumentIterator#END_OF_LIST} denotes an iterator that has been exhausted. * *

Methods performing actions depending on the last document returned should throw an {@link IllegalStateException} * if called when {@link #curr} is -1 or {@link DocumentIterator#END_OF_LIST}. * You just need to call {@link #ensureOnADocument()}. * *

Finally, {@link #toNextDocument(long)} will turn the value of {@link #curr} into a suitable return value * for {@link #nextDocument()} (as {@link DocumentIterator#END_OF_LIST} needs to be massaged). */ public abstract class AbstractDocumentIterator implements DocumentIterator { /** The current document of the iterator. The special value -1 denotes an iterator that has not still been accessed, * and the special value {@link DocumentIterator#END_OF_LIST} denotes an iterator that has been exhausted. */ protected long curr = -1; /** The weight of this iterator. */ protected double weight = 1; public boolean mayHaveNext() { return curr != END_OF_LIST; } /** Turns the value of the argument into a valid return value of {@link #nextDocument()} * * @param curr a value for {@link #curr}, including possibly {@link DocumentIterator#END_OF_LIST}. * @return the correct return value for {@link #nextDocument()}. */ protected static long toNextDocument( final long curr ) { return ( curr + 1 ) & 0x8000000000000000L | curr; } /** Turns a value returned by {@link #nextDocument()} into a valid value for {@link #curr}. * * @param d a value returned by {@link #nextDocument()}. * @return the correct return value for {@link #curr}. */ protected static long fromNextDocument( final long d ) { return d & 0x7FFFFFFFFFFFFFFFL; } public double weight() { return weight; } public DocumentIterator weight( final double weight ) { this.weight = weight; return this; } /** Invokes {@link DocumentIterator#intervalIterator()} * * @return {@link DocumentIterator#intervalIterator()}. */ public IntervalIterator iterator() { try { return intervalIterator(); } catch ( IOException e ) { throw new RuntimeException( e ); } } protected final void ensureOnADocument() { // This catches curr == END_OF_LIST || curr == -1. if ( ( curr | 0x8000000000000000L ) == -1 ) throw new IllegalStateException(); } /** Returns the current document. * * @return {@link #curr}. */ public long document() { return curr; } protected abstract static class AbstractIntervalIterator extends AbstractObjectIterator implements IntervalIterator { /** The next document to be returned, if it has already been peeked at by {@link #hasNext()}, * or null. */ protected Interval next; /** Checks whether {@link #next} has been already set; if not, peeks at the interval returned by {@link IntervalIterator#nextInterval() nextInterval()}. * * @return true if {@link #next} is not null or if {@link IntervalIterator#nextInterval() nextInterval()} has returned a valid interval. */ public boolean hasNext() { if ( next == null ) try { next = nextInterval(); } catch ( IOException e ) { throw new RuntimeException( e ); } return next != null; } /** Checks whether there is an interval to be returned, sets * {@link #next} to null, and returns its previous value. * * @return the next interval, as cached by {@link #hasNext()}. */ @Deprecated public Interval next() { if ( ! hasNext() ) throw new NoSuchElementException(); final Interval result = next; next = null; return result; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy