![JAR search and dependency download from the Maven repository](/logo.png)
test.it.unimi.dsi.big.mg4j.search.IntArrayIndexIterator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mg4j-big Show documentation
Show all versions of mg4j-big Show documentation
MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.
The newest version!
package it.unimi.dsi.big.mg4j.search;
import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntIterators;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.longs.AbstractLongIterator;
import it.unimi.dsi.fastutil.longs.LongSet;
import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.fastutil.objects.ReferenceSets;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.index.IndexIterator;
import it.unimi.dsi.big.mg4j.index.IndexReader;
import it.unimi.dsi.big.mg4j.index.NullTermProcessor;
import it.unimi.dsi.big.mg4j.index.TooManyTermsException;
import it.unimi.dsi.big.mg4j.index.payload.Payload;
import it.unimi.dsi.big.mg4j.search.IntervalIterator;
import it.unimi.dsi.big.mg4j.search.IntervalIterators;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.util.Interval;
import java.io.IOException;
import java.util.Arrays;
import java.util.NoSuchElementException;
/** A partially implemented {@link IndexIterator index iterator} that returns
* a given list of documents and associated positions.
*
*/
@SuppressWarnings("unused")
public class IntArrayIndexIterator implements IndexIterator {
private final static Index index = new TestIndex();
private static class TestIndex extends Index {
private static final long serialVersionUID = 1L;
public TestIndex() {
super( Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE,
null, true, true, NullTermProcessor.getInstance(), "text", null, null );
}
public IndexIterator documents( CharSequence prefix, int limit ) throws IOException, TooManyTermsException {
throw new UnsupportedOperationException();
}
public IndexReader getReader() throws IOException {
throw new UnsupportedOperationException();
}
public IndexReader getReader( int bufferSize ) throws IOException {
throw new UnsupportedOperationException();
}
}
private final long[] document;
private final int[][] position;
private int curr = -1;
private IntervalIterator currentIterator;
private String term;
private int id;
private final int termNumber;
private double weight;
public double weight() {
return weight;
}
public IntArrayIndexIterator weight( final double weight ) {
this.weight = weight;
return this;
}
/** Creates a new array-based index iterator with term number 0.
*
* @param document an (increasing) array of documents that will be returned.
* @param position a parallel array of arrays of positions.
*/
public IntArrayIndexIterator( long[] document, int[][] position ) {
this( 0, document, position );
}
/** Creates a new array-based index iterator.
*
* @param termNumber the term number of this iterator.
* @param document an (increasing) array of documents that will be returned.
* @param position a parallel array of arrays of positions.
*/
public IntArrayIndexIterator( final int termNumber, long[] document, int[][] position ) {
this.termNumber = termNumber;
this.document = document;
this.position = position;
if( document.length != position.length ) throw new IllegalArgumentException();
for( int i = 0; i < document.length - 1; i++ ) if ( document[ i ] >= document[ i + 1 ] ) throw new IllegalArgumentException( "Document array is not increasing" );
for( int i = 0; i < document.length; i++ )
for( int j = position[ i ].length - 1; j-- != 0; ) if ( position[ i ][ j ] >= position[ i ][ j +1 ] )
throw new IllegalArgumentException( "Non-increasing position list for document " + i + ": " + Arrays.toString( position[ i ] ) );
}
public long termNumber() {
return termNumber;
}
public boolean mayHaveNext() {
return curr < document.length - 1;
}
public long nextDocument() {
if ( ! mayHaveNext() ) return -1;
curr++;
currentIterator = null;
return document[ curr ];
}
public long skipTo( long n ) {
if ( curr != -1 && document[ curr ] >= n ) return document[ curr ];
long result;
while ( mayHaveNext() ) if ( ( result = nextDocument() ) >= n ) return result;
return END_OF_LIST;
}
public T accept(DocumentIteratorVisitor visitor) throws IOException {
return visitor.visit( this );
}
public T acceptOnTruePaths(DocumentIteratorVisitor visitor) throws IOException {
return visitor.visit( this );
}
public void dispose() {}
public long document() {
if ( curr == -1 ) return -1;
return document[ curr ];
}
public ReferenceSet indices() {
return ReferenceSets.singleton( index );
}
public static class ArraySingletonIntervalIterator extends AbstractObjectIterator implements IntervalIterator {
private int curr = -1;
private final int[] position;
public ArraySingletonIntervalIterator( int[] position ) {
this.position = position;
}
public int extent() {
return 1;
}
public void reset() {
curr = -1;
}
public void intervalTerms( final LongSet terms ) {
throw new UnsupportedOperationException();
}
public boolean hasNext() {
return curr < position.length - 1;
}
public Interval next() {
if ( ! hasNext() ) throw new NoSuchElementException();
curr++;
return Interval.valueOf( position[ curr ] );
}
public Interval nextInterval() {
if ( ! hasNext() ) return null;
return next();
}
public String toString() {
return Arrays.toString( position );
}
}
public IntervalIterator intervalIterator() {
if ( curr == -1 ) throw new IllegalStateException();
if ( currentIterator != null ) return currentIterator;
if ( position[ curr ].length == 0 ) return IntervalIterators.FALSE;
return currentIterator = new ArraySingletonIntervalIterator( position[ curr ] );
}
public IntervalIterator intervalIterator(Index index) {
return intervalIterator();
}
public Reference2ReferenceMap intervalIterators() {
throw new UnsupportedOperationException();
}
public IntervalIterator iterator() {
return intervalIterator();
}
public void reset() {
curr = -1;
}
public String toString() {
MutableString result = new MutableString();
result.append( '[' );
for( int i = 0; i < document.length; i++ ) {
if ( i != 0 ) result.append( ", " );
result.append( '<' ).append( document[ i ] ).append( ':' ).append( Arrays.toString( position[ i ] ) ).append( '>' );
}
return result.append( ']' ).toString();
}
public int count() {
return position[ curr ].length;
}
public long frequency() {
return document.length;
}
public IntArrayIndexIterator id( int id ) {
this.id = id;
return this;
}
public int id() {
return id;
}
public Index index() {
return index;
}
public Payload payload() {
return null;
}
public int[] positionArray() {
return position[ curr ];
}
public IntIterator positions() {
return IntIterators.wrap( position[ curr] );
}
public int positions( int[] position ) {
if ( this.position[ curr ].length > position.length ) return -this.position[ curr ].length - 1;
System.arraycopy( this.position[ curr ], 0, position, 0, this.position[ curr ].length );
return this.position[ curr ].length;
}
public String term() {
return term;
}
public IntArrayIndexIterator term( CharSequence term ) {
this.term = term.toString();
return this;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy