All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.it.unimi.dsi.big.mg4j.search.IntArrayIndexIterator Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.search;

import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntIterators;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.longs.AbstractLongIterator;
import it.unimi.dsi.fastutil.longs.LongSet;
import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.fastutil.objects.ReferenceSets;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.index.IndexIterator;
import it.unimi.dsi.big.mg4j.index.IndexReader;
import it.unimi.dsi.big.mg4j.index.NullTermProcessor;
import it.unimi.dsi.big.mg4j.index.TooManyTermsException;
import it.unimi.dsi.big.mg4j.index.payload.Payload;
import it.unimi.dsi.big.mg4j.search.IntervalIterator;
import it.unimi.dsi.big.mg4j.search.IntervalIterators;
import it.unimi.dsi.big.mg4j.search.visitor.DocumentIteratorVisitor;
import it.unimi.dsi.util.Interval;

import java.io.IOException;
import java.util.Arrays;
import java.util.NoSuchElementException;

/** A partially implemented {@link IndexIterator index iterator} that returns
 * a given list of documents and associated positions.
 * 
 */

@SuppressWarnings("unused")
public class IntArrayIndexIterator implements IndexIterator {
	private final static Index index = new TestIndex();
		
	private static class TestIndex extends Index { 
		private static final long serialVersionUID = 1L;

		public TestIndex() {
			super( Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE,
			null, true, true, NullTermProcessor.getInstance(), "text", null, null );
		}

		public IndexIterator documents( CharSequence prefix, int limit ) throws IOException, TooManyTermsException {
			throw new UnsupportedOperationException();
		}

		public IndexReader getReader() throws IOException {
			throw new UnsupportedOperationException();
		}

		public IndexReader getReader( int bufferSize ) throws IOException {
			throw new UnsupportedOperationException();
		}
	}
	
	
	private final long[] document;
	private final int[][] position;
	
	private int curr = -1;
	private IntervalIterator currentIterator;
	private String term;
	private int id;
	private final int termNumber;
	private double weight;	

	public double weight() {
		return weight;
	}
	
	public IntArrayIndexIterator weight( final double weight ) {
		this.weight = weight;
		return this;
	}
	
	/** Creates a new array-based index iterator with term number 0.
	 * 
	 * @param document an (increasing) array of documents that will be returned.
	 * @param position a parallel array of arrays of positions.
	 */
	
	public IntArrayIndexIterator( long[] document, int[][] position ) {
		this( 0, document, position );
	}
	
	/** Creates a new array-based index iterator.
	 * 
	 * @param termNumber the term number of this iterator.
	 * @param document an (increasing) array of documents that will be returned.
	 * @param position a parallel array of arrays of positions.
	 */
	
	public IntArrayIndexIterator( final int termNumber, long[] document, int[][] position ) {
		this.termNumber = termNumber;
		this.document = document;
		this.position = position;
		if( document.length != position.length ) throw new IllegalArgumentException();
		for( int i = 0; i < document.length - 1; i++ ) if ( document[ i ] >= document[ i + 1 ] ) throw new IllegalArgumentException( "Document array is not increasing" );
		for( int i = 0; i < document.length; i++ )
			for( int j = position[ i ].length - 1; j-- != 0; ) if ( position[ i ][ j ] >= position[ i ][ j +1 ] ) 
				throw new IllegalArgumentException( "Non-increasing position list for document " + i + ": " + Arrays.toString( position[ i ] ) );
	}
	
	public long termNumber() {
		return termNumber;
	}

	public boolean mayHaveNext() {
		return curr < document.length - 1; 
	}

	public long nextDocument() {
		if ( ! mayHaveNext() ) return -1;
		curr++;
		currentIterator = null;
		return document[ curr ];
	}

	public long skipTo( long n ) {
		if ( curr != -1 && document[ curr ] >= n ) return document[ curr ];
		long result;
		while ( mayHaveNext() ) if ( ( result = nextDocument() ) >= n ) return result;
		return END_OF_LIST;
	}

	public  T accept(DocumentIteratorVisitor visitor) throws IOException {
		return visitor.visit( this );
	}

	public  T acceptOnTruePaths(DocumentIteratorVisitor visitor) throws IOException {
		return visitor.visit( this );
	}

	public void dispose() {}

	public long document() {
		if ( curr == -1 ) return -1;
		return document[ curr ];
	}
	
	public ReferenceSet indices() {
		return ReferenceSets.singleton(  index );
	}

	public static class ArraySingletonIntervalIterator extends AbstractObjectIterator implements IntervalIterator {
		private int curr = -1;
		private final int[] position;

		public ArraySingletonIntervalIterator( int[] position ) {
			this.position = position;
		}
		
		public int extent() {
			return 1;
		}

		public void reset() {
			curr = -1;
		}

		public void intervalTerms( final LongSet terms ) {
			throw new UnsupportedOperationException();
		}

		public boolean hasNext() {
			return curr < position.length - 1;
		}

		public Interval next() {
			if ( ! hasNext() ) throw new NoSuchElementException();
			curr++;
			return Interval.valueOf( position[ curr ] );
		}

		public Interval nextInterval() {
			if ( ! hasNext() ) return null;
			return next();
		}
		
		public String toString() {
			return Arrays.toString( position );
		}
	}
	
	public IntervalIterator intervalIterator() {
		if ( curr == -1 ) throw new IllegalStateException();
		if ( currentIterator != null ) return currentIterator;
		if ( position[ curr ].length == 0 ) return IntervalIterators.FALSE;
		return currentIterator = new ArraySingletonIntervalIterator( position[ curr ] );
	}

	public IntervalIterator intervalIterator(Index index) {
		return intervalIterator();
	}

	public Reference2ReferenceMap intervalIterators() {
		throw new UnsupportedOperationException();
	}

	public IntervalIterator iterator() {
		return intervalIterator();
	}
	
	public void reset() {
		curr = -1;
	}
	
	public String toString() {
		MutableString result = new MutableString();
		result.append( '[' );
		for( int i = 0; i < document.length; i++ ) {
			if ( i != 0 ) result.append( ", " );
			result.append( '<' ).append( document[ i ] ).append( ':' ).append( Arrays.toString(  position[ i ] ) ).append( '>' );
		}
		return result.append( ']' ).toString();
	}

	public int count() {
		return position[ curr ].length;
	}

	public long frequency() {
		return document.length;
	}

	public IntArrayIndexIterator id( int id ) {
		this.id = id;
		return this;
	}

	public int id() {
		return id;
	}

	public Index index() {
		return index;
	}

	public Payload payload() {
		return null;
	}

	public int[] positionArray() {
		return position[ curr ];
	}

	public IntIterator positions() {
		return IntIterators.wrap( position[ curr] );
	}

	public int positions( int[] position ) {
		if ( this.position[ curr ].length > position.length ) return -this.position[ curr ].length - 1;
		System.arraycopy( this.position[ curr ], 0, position, 0, this.position[ curr ].length );
		return this.position[ curr ].length;
	}

	public String term() {
		return term;
	}

	public IntArrayIndexIterator term( CharSequence term ) {
		this.term = term.toString();
		return this;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy