All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.it.unimi.dsi.big.mg4j.index.MultiTermIndexIteratorTest Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.index;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import it.unimi.dsi.big.mg4j.document.StringArrayDocumentCollection;
import it.unimi.dsi.big.mg4j.query.nodes.Query;
import it.unimi.dsi.big.mg4j.query.nodes.QueryBuilderVisitorException;
import it.unimi.dsi.big.mg4j.query.parser.QueryParserException;
import it.unimi.dsi.big.mg4j.query.parser.SimpleParser;
import it.unimi.dsi.big.mg4j.search.DocumentIterator;
import it.unimi.dsi.big.mg4j.search.DocumentIteratorBuilderVisitor;
import it.unimi.dsi.big.mg4j.search.IntArrayIndexIterator;
import it.unimi.dsi.big.mg4j.search.OrDocumentIterator;
import it.unimi.dsi.big.mg4j.search.visitor.AbstractDocumentIteratorVisitor;
import it.unimi.dsi.big.mg4j.tool.IndexBuilder;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.util.Interval;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;

import org.apache.commons.configuration.ConfigurationException;
import org.junit.BeforeClass;
import org.junit.Test;

public class MultiTermIndexIteratorTest {
	private static BitStreamIndex index;
	private static SimpleParser simpleParser;
	
	@BeforeClass
	public static void setUp() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {

		String basename = File.createTempFile( MultiTermIndexIterator.class.getSimpleName(), "test" ).getCanonicalPath();
		new IndexBuilder( basename, new StringArrayDocumentCollection( "a", "b", "c" ) ).run();
		index = DiskBasedIndex.getInstance( basename + "-text", true, true );
		simpleParser = new SimpleParser( index.termProcessor );
	}

	@Test
	public void testSkipBug() throws QueryParserException, QueryBuilderVisitorException, IOException {
		Query query = simpleParser.parse( "a + b + c" );
		DocumentIteratorBuilderVisitor documentIteratorBuilderVisitor = new DocumentIteratorBuilderVisitor( null, index, Integer.MAX_VALUE );
		DocumentIterator documentIterator = query.accept( documentIteratorBuilderVisitor );
		assertEquals( 2, documentIterator.skipTo( 2 ) );
		documentIterator.dispose();
	}
	
	@Test
	public void test() throws IOException {
		IndexIterator i0 = new IntArrayIndexIterator( new long[] { 0, 1, 2 }, 
				new int[][] { 
				{ 0, 3 }, 
				{ 0 }, 
				{ 0 }, 
				} );
		IndexIterator i1 = new IntArrayIndexIterator( new long[] { 0, 2 }, 
				new int[][] { 
				{ 1 },
				{ 1 },
				} );
		IndexIterator i2 = new IntArrayIndexIterator( new long[] { 0, 1, 3 }, 
				new int[][] { 
				{ 2 },
				{ 2 },
				{ 0 },
				} );
		MultiTermIndexIterator multiTermIndexIterator = (MultiTermIndexIterator)MultiTermIndexIterator.getInstance( i0, i1, i2 );
		assertEquals( 3, multiTermIndexIterator.frequency() );
		
		//assertTrue( multiTermIndexIterator.hasNext() );
		//assertTrue( multiTermIndexIterator.hasNext() ); // To increase coverage
		
		assertEquals( 0, multiTermIndexIterator.nextDocument() );
		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
		assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
		assertEquals( Interval.valueOf( 1 ), multiTermIndexIterator.intervalIterator().nextInterval() );
		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );

		assertEquals( 4, multiTermIndexIterator.count() );
		int[] position = multiTermIndexIterator.positionArray();
		assertEquals( 0, position[ 0 ] );
		assertEquals( 1, position[ 1 ] );
		assertEquals( 2, position[ 2 ] );
		assertEquals( 3, position[ 3 ] );

		assertEquals( Interval.valueOf( 2 ), multiTermIndexIterator.intervalIterator().nextInterval() );
		
		position = new int[ 4 ];
		multiTermIndexIterator.positions( position );
		assertEquals( 0, position[ 0 ] );
		assertEquals( 1, position[ 1 ] );
		assertEquals( 2, position[ 2 ] );
		assertEquals( 3, position[ 3 ] );

		
		assertEquals( Interval.valueOf( 3 ), multiTermIndexIterator.intervalIterator().nextInterval() );

		IntIterator positions = multiTermIndexIterator.positions();
		assertEquals( 0, positions.nextInt() );
		assertEquals( 1, positions.nextInt() );
		assertEquals( 2, positions.nextInt() );
		assertEquals( 3, positions.nextInt() );
		assertFalse( positions.hasNext() );

		
		assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
		assertFalse( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage

		assertEquals( 1, multiTermIndexIterator.nextDocument() );
		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
		assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
		assertEquals( Interval.valueOf( 2 ), multiTermIndexIterator.intervalIterator().nextInterval() );

		assertEquals( 2, multiTermIndexIterator.count() );
		position = multiTermIndexIterator.positionArray();
		assertEquals( 0, position[ 0 ] );
		assertEquals( 2, position[ 1 ] );
		positions = multiTermIndexIterator.positions();
		assertEquals( 0, positions.nextInt() );
		assertEquals( 2, positions.nextInt() );
		assertFalse( positions.hasNext() );
		
		assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );

		assertEquals( 2, multiTermIndexIterator.nextDocument() );
		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() ); // To increase coverage
		assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );
		assertEquals( Interval.valueOf( 1 ), multiTermIndexIterator.intervalIterator().nextInterval() );

		assertEquals( 2, multiTermIndexIterator.count() );
		position = multiTermIndexIterator.positionArray();
		assertEquals( 0, position[ 0 ] );
		assertEquals( 1, position[ 1 ] );
		positions = multiTermIndexIterator.positions();
		assertEquals( 0, positions.nextInt() );
		assertEquals( 1, positions.nextInt() );
		assertFalse( positions.hasNext() );
		
		assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
		
		// Here we get the iterator of the underlying IndexIterator
		assertEquals( 3, multiTermIndexIterator.nextDocument() );
		assertTrue( multiTermIndexIterator.intervalIterator().hasNext() );
		assertEquals( Interval.valueOf( 0 ), multiTermIndexIterator.intervalIterator().nextInterval() );

		assertEquals( 1, multiTermIndexIterator.count() );
		position = multiTermIndexIterator.positionArray();
		assertEquals( 0, position[ 0 ] );
		positions = multiTermIndexIterator.positions();
		assertEquals( 0, positions.nextInt() );
		assertFalse( positions.hasNext() );
		
		assertFalse( multiTermIndexIterator.intervalIterator().hasNext() );
		assertEquals( -1, multiTermIndexIterator.nextDocument() );
		assertEquals( -1, multiTermIndexIterator.nextDocument() );

	}
	
	// Contributed by Fabien Campagne
	@Test
	public void testMG4JMultiTermPositionIssue() throws IllegalAccessException, NoSuchMethodException, ConfigurationException, IOException, InvocationTargetException, InstantiationException, ClassNotFoundException, URISyntaxException {
		String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
		new IndexBuilder( basename, new StringArrayDocumentCollection(
                "A B C D E F F G G",
                "G A T H S K L J W L",
                "E S K D L J F K L S J D L S J D",
                "E B"
		) ).run();
		Index index = DiskBasedIndex.getInstance( basename + "-text", true, true );

        /// String query = "A| B+C+G|W|S+J";
        DocumentIterator iterator = OrDocumentIterator.getInstance(
                index.documents("A"),
                MultiTermIndexIterator.getInstance(
                        index.documents("B"),
                        index.documents("C"),
                        index.documents("G")
                ),
                index.documents("W"),
                MultiTermIndexIterator.getInstance(
                        index.documents("S"),
                        index.documents("J")
                ));


        final long[] currDoc = new long[ 1 ];
        // A visitor invoking positionArray() on IndexIterators positioned on the current document.
        AbstractDocumentIteratorVisitor visitor = new AbstractDocumentIteratorVisitor() {
        	public Boolean visit(IndexIterator indexIterator) throws IOException {
        		if (indexIterator.count() > 0 && indexIterator.document() == currDoc[ 0 ] ) indexIterator.positionArray();
          		return Boolean.TRUE;
        	}
        };


        for (int document = 0; document < index.numberOfDocuments; document++) {
            currDoc[ 0 ] = iterator.skipTo(document);

            if (document == currDoc[ 0 ]) {
               iterator.accept(visitor); // see method visit below.
            }
        }

        
        while( ( currDoc[ 0 ] = iterator.nextDocument() ) != -1 ) iterator.accept(  visitor );
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy