![JAR search and dependency download from the Maven repository](/logo.png)
src.it.unimi.dsi.big.mg4j.index.IndexIterator Maven / Gradle / Ivy
Show all versions of mg4j-big Show documentation
package it.unimi.dsi.big.mg4j.index;
/*
* MG4J: Managing Gigabytes for Java (big)
*
* Copyright (C) 2004-2011 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.big.mg4j.index.payload.Payload;
import it.unimi.dsi.big.mg4j.search.DocumentIterator;
import java.io.IOException;
/** An iterator over an inverted list.
*
* An index iterator scans the inverted list of an indexed term. Each
* integer returned by {@link DocumentIterator#nextDocument() nextDocument()}
* is the index of a document containing the
* term. If the index contains counts, they can be obtained after each call to
* {@link #nextDocument()} using {@link #count()}. Then, if the index contains
* positions they can be obtained as an array using {@link #positionArray()}, as
* an iterator using {@link #positions()}, or stored into an array using {@link #positions(int[])}.
*
*
Note that this interface extends {@link it.unimi.dsi.big.mg4j.search.DocumentIterator}.
* The intervals returned for a document are exactly length-one intervals
* corresponding to the positions returned by {@link #positions()}. If the index
* to which an instance of this class refers does not contain positions, an {@link UnsupportedOperationException}
* will be thrown.
*
*
Additionally, this interface strengthens {@link DocumentIterator#weight(double)} so that
* it {@linkplain #weight(double) returns an index iterator}.
*
*/
public interface IndexIterator extends DocumentIterator {
/** Returns the index over which this iterator is built.
*
* @return the index over which this iterator is built.
*/
public Index index();
/** Returns the number of the term whose inverted list is returned by this index iterator.
*
*
Usually, the term number is automatically set by {@link IndexReader#documents(CharSequence)} or {@link IndexReader#documents(long)}.
*
* @return the number of the term over which this iterator is built.
* @throws IllegalStateException if no term was set when the iterator was created.
* @see #term()
*/
public long termNumber();
/** Returns the term whose inverted list is returned by this index iterator.
*
*
Usually, the term is automatically set by {@link IndexReader#documents(CharSequence)} or {@link IndexReader#documents(long)}, but you can
* supply your own term with {@link #term(CharSequence)}.
*
* @return the term over which this iterator is built, as a compact mutable string.
* @throws IllegalStateException if no term was set when the iterator was created.
* @see #termNumber()
*/
public String term();
/** Sets the term whose inverted list is returned by this index iterator.
*
*
Usually, the term is automatically set by {@link Index#documents(CharSequence)}
* or by {@link IndexReader#documents(CharSequence)}, but you can
* use this method to ensure that {@link #term()} doesn't throw
* an exception.
*
* @param term a character sequence (that will be defensively copied)
* that will be assumed to be the term whose inverted list is returned by this index iterator.
* @return this index iterator.
*/
public IndexIterator term( CharSequence term );
/** Returns the frequency, that is, the number of documents that will be returned by this iterator.
*
* @return the number of documents that will be returned by this iterator.
*/
public long frequency() throws IOException;
/** Returns the payload, if any, associated with the current document.
*
* @return the payload associated with the current document.
*/
public Payload payload() throws IOException;
/** Returns the count, that is, the number of occurrences of the term in the current document.
*
* @return the count (number of occurrences) of the term in the current document.
* @throws UnsupportedOperationException if the index of this iterator does not contain counts.
*/
public int count() throws IOException;
/** Returns the positions at which the term appears in the current document.
*
* @return the positions of the current document in which the current term appears.
* @throws UnsupportedOperationException if the index of this iterator does not contain positions.
*/
public IntIterator positions() throws IOException;
/** Stores the positions at which the term appears in the current document in a given array.
*
*
If the array is not large enough (i.e., it does not contain {@link #count()} elements),
* this method will return a negative number (the opposite of the count).
*
* @param positions an array that will be used to store positions.
* @return the {@linkplain #count() count}; it will have the sign changed if positions
cannot
* hold all positions.
* @throws UnsupportedOperationException if the index of this iterator does not contain positions.
*/
public int positions( int[] positions ) throws IOException;
/** Returns the positions at which the term appears in the current document in an array.
*
*
Implementations are allowed to return the same array across different calls to this method.
*
* @return an array whose first {@linkplain #count()} elements contain the document positions.
* @throws UnsupportedOperationException if the index of this iterator does not contain positions.
*/
public int[] positionArray() throws IOException;
/** Sets the id of this index iterator.
*
*
The id is an integer associated with each index iterator. It has
* no specific semantics, and can be used differently in different contexts.
* A typical usage pattern, for instance, is using it to assign a unique number to
* the index iterators contained in a composite document iterator (say,
* numbering consecutively the leaves of the composite).
*
* @param id the new id for this index iterator.
* @return this index iterator.
*/
public IndexIterator id( int id );
/** Returns the id of this index iterator.
*
* @see #id(int)
* @return the id of this index iterator.
*/
public int id();
/** Returns the weight of this index iterator.
*
* @see DocumentIterator#weight(double)
*/
public IndexIterator weight( double weight );
}