All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.it.unimi.dsi.big.mg4j.search.OrDocumentIteratorTest Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.search;

import static org.junit.Assert.*;
import it.unimi.dsi.big.mg4j.document.StringArrayDocumentCollection;
import it.unimi.dsi.big.mg4j.index.BitStreamIndex;
import it.unimi.dsi.big.mg4j.index.DiskBasedIndex;
import it.unimi.dsi.big.mg4j.query.nodes.Query;
import it.unimi.dsi.big.mg4j.query.nodes.QueryBuilderVisitorException;
import it.unimi.dsi.big.mg4j.query.parser.QueryParserException;
import it.unimi.dsi.big.mg4j.query.parser.SimpleParser;
import it.unimi.dsi.big.mg4j.tool.IndexBuilder;
import it.unimi.dsi.util.Interval;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.io.filefilter.PrefixFileFilter;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

public class OrDocumentIteratorTest {
	private static BitStreamIndex index;
	private static SimpleParser simpleParser;
	private static String basename;

	@BeforeClass
	public static void setUp() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		basename = File.createTempFile( OrDocumentIteratorTest.class.getSimpleName(), "test" ).getCanonicalPath();
		new IndexBuilder( basename, new StringArrayDocumentCollection( "a", "b", "c" ) ).run();
		index = DiskBasedIndex.getInstance( basename + "-text", true, true );
		simpleParser = new SimpleParser( index.termProcessor );
	}

	@AfterClass
	public static void tearDown() {
		for( File f: new File( basename ).getParentFile().listFiles( (FileFilter)new PrefixFileFilter( new File( basename ).getName() ) ) )	f.delete();
	}

	@Test
	public void testSkipBug() throws QueryParserException, QueryBuilderVisitorException, IOException {
		Query query = simpleParser.parse( "a | b | c" );
		DocumentIteratorBuilderVisitor documentIteratorBuilderVisitor = new DocumentIteratorBuilderVisitor( null, index, Integer.MAX_VALUE );
		DocumentIterator documentIterator = query.accept( documentIteratorBuilderVisitor );
		assertEquals( 2, documentIterator.skipTo( 2 ) );
		documentIterator.dispose();
	}
	
	@Test
	public void testOr() throws IOException {
		DocumentIterator i0 = new IntArrayDocumentIterator( new long[] { 0, 2, 3, 4, 5, 6, 7 }, 
				new int[][][] { 
				{ { 0, 1 } }, 
				{ { 0, 1 }, { 1, 2 } },
				{ { 0, 1 }, { 1, 2 } },
				{ {} },
				{ {} },
				{},
				{},
				} );
		DocumentIterator i1 = new IntArrayDocumentIterator( new long[] { 1, 2, 3, 4, 5, 6, 7 }, 
				new int[][][] { 
				{ { 1 } },
				{ { 1, 3 }, { 3, 4 } },
				{ {} },
				{ {} },
				{},
				{ {} },
				{},
				} );
		OrDocumentIterator orDocumentIterator = (OrDocumentIterator)OrDocumentIterator.getInstance( i0, i1 );
		assertTrue( orDocumentIterator.mayHaveNext() );
		
		assertEquals( 0, orDocumentIterator.nextDocument() );
		assertTrue( orDocumentIterator.intervalIterator().hasNext() );
		assertTrue( orDocumentIterator.intervalIterator().hasNext() ); // To increase coverage
		assertEquals( Interval.valueOf( 0, 1 ), orDocumentIterator.intervalIterator().nextInterval() );
		assertFalse( orDocumentIterator.intervalIterator().hasNext() );
		assertFalse( orDocumentIterator.intervalIterator().hasNext() ); // To increase coverage
		
		assertEquals( 1, orDocumentIterator.nextDocument() );
		assertEquals( Interval.valueOf( 1, 1 ), orDocumentIterator.intervalIterator().nextInterval() );
		assertFalse( orDocumentIterator.intervalIterator().hasNext() );

		assertEquals( 2, orDocumentIterator.nextDocument() );
		assertEquals( Interval.valueOf( 0, 1 ), orDocumentIterator.intervalIterator().nextInterval() );
		assertEquals( Interval.valueOf( 1, 2 ), orDocumentIterator.intervalIterator().nextInterval() );
		assertEquals( Interval.valueOf( 3, 4 ), orDocumentIterator.intervalIterator().nextInterval() );
		assertFalse( orDocumentIterator.intervalIterator().hasNext() );

		assertEquals( 3, orDocumentIterator.nextDocument() );
		assertEquals( Interval.valueOf( 0, 1 ), orDocumentIterator.intervalIterator().nextInterval() );
		assertEquals( Interval.valueOf( 1, 2 ), orDocumentIterator.intervalIterator().nextInterval() );
		assertFalse( orDocumentIterator.intervalIterator().hasNext() );

		assertEquals( 4, orDocumentIterator.nextDocument() );
		assertEquals( IntervalIterators.TRUE, orDocumentIterator.intervalIterator() );
		
		assertEquals( 5, orDocumentIterator.nextDocument() );
		assertEquals( IntervalIterators.TRUE, orDocumentIterator.intervalIterator() );

		assertEquals( 6, orDocumentIterator.nextDocument() );
		assertEquals( IntervalIterators.TRUE, orDocumentIterator.intervalIterator() );
		
		assertEquals( 7, orDocumentIterator.nextDocument() );
		assertEquals( IntervalIterators.FALSE, orDocumentIterator.intervalIterator() );
		assertEquals( -1, orDocumentIterator.nextDocument() );
		assertEquals( -1, orDocumentIterator.nextDocument() );
	}
	
	@Test
	public void testExtentDocumentIterator() throws IOException {
		IntArrayDocumentIterator i0 = new IntArrayDocumentIterator( new long[] { 0, 2, 3, 4 }, 
				new int[][][] { 
				{ { 0, 1 }, { 1, 2 } }, 
				{ { 1, 5 } }, 
				{ {} },
				{ {} }
				} );
		IntArrayDocumentIterator i1 = new IntArrayDocumentIterator( new long[] { 1, 2, 3, 4 }, 
				new int[][][] { 
				{ { 5, 7 } }, 
				{ { 2, 4 } }, 
				{ { 2, 4 } }, 
				{ {} }
				} );
		
		DocumentIterator orDocumentIterator = OrDocumentIterator.getInstance( i0, i1 );
		assertEquals( 0, orDocumentIterator.nextDocument() );
		assertEquals( 2, orDocumentIterator.intervalIterator().extent() );
		assertEquals( 1, orDocumentIterator.nextDocument() );
		assertEquals( 3, orDocumentIterator.intervalIterator().extent() );
		assertEquals( 2, orDocumentIterator.nextDocument() );
		assertEquals( 3, orDocumentIterator.intervalIterator().extent() );
		assertEquals( 3, orDocumentIterator.nextDocument() );
		assertEquals( 3, orDocumentIterator.intervalIterator().extent() );
		assertEquals( 4, orDocumentIterator.nextDocument() );
		assertEquals( -1, orDocumentIterator.nextDocument() );
		assertEquals( -1, orDocumentIterator.nextDocument() );
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy