![JAR search and dependency download from the Maven repository](/logo.png)
test.it.unimi.dsi.big.mg4j.mock.search.MockNonmockTest Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mg4j-big Show documentation
Show all versions of mg4j-big Show documentation
MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.
The newest version!
package it.unimi.dsi.big.mg4j.mock.search;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import it.unimi.dsi.big.mg4j.document.CompositeDocumentSequence;
import it.unimi.dsi.big.mg4j.document.DateArrayDocumentCollection;
import it.unimi.dsi.big.mg4j.document.DocumentSequence;
import it.unimi.dsi.big.mg4j.document.StringArrayDocumentCollection;
import it.unimi.dsi.big.mg4j.index.BitStreamIndex;
import it.unimi.dsi.big.mg4j.index.CompressionFlags;
import it.unimi.dsi.big.mg4j.index.CompressionFlags.Coding;
import it.unimi.dsi.big.mg4j.index.CompressionFlags.Component;
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.query.nodes.Align;
import it.unimi.dsi.big.mg4j.query.nodes.And;
import it.unimi.dsi.big.mg4j.query.nodes.Consecutive;
import it.unimi.dsi.big.mg4j.query.nodes.Difference;
import it.unimi.dsi.big.mg4j.query.nodes.LowPass;
import it.unimi.dsi.big.mg4j.query.nodes.MultiTerm;
import it.unimi.dsi.big.mg4j.query.nodes.Not;
import it.unimi.dsi.big.mg4j.query.nodes.Or;
import it.unimi.dsi.big.mg4j.query.nodes.OrderedAnd;
import it.unimi.dsi.big.mg4j.query.nodes.Query;
import it.unimi.dsi.big.mg4j.query.nodes.QueryBuilderVisitorException;
import it.unimi.dsi.big.mg4j.query.nodes.Range;
import it.unimi.dsi.big.mg4j.query.nodes.Select;
import it.unimi.dsi.big.mg4j.query.nodes.Term;
import it.unimi.dsi.big.mg4j.query.nodes.True;
import it.unimi.dsi.big.mg4j.query.parser.SimpleParser;
import it.unimi.dsi.big.mg4j.search.DocumentIterator;
import it.unimi.dsi.big.mg4j.search.DocumentIteratorBuilderVisitor;
import it.unimi.dsi.big.mg4j.search.IntervalIterator;
import it.unimi.dsi.big.mg4j.search.IntervalIterators;
import it.unimi.dsi.big.mg4j.search.score.ScorerTest;
import it.unimi.dsi.big.mg4j.tool.IndexBuilder;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import it.unimi.dsi.fastutil.longs.LongRBTreeSet;
import it.unimi.dsi.fastutil.longs.LongSortedSet;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2ReferenceArrayMap;
import it.unimi.dsi.fastutil.objects.Object2ReferenceMap;
import it.unimi.dsi.fastutil.objects.ObjectIterators;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.util.Interval;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
import java.text.DateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.io.FileUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class MockNonmockTest {
/** The temporary directory where all tests are run. */
private static File tempDir;
private static final int nDocuments = 100;
private static final int nIndices = 3;
private static final int nSpecialIndices = 1;
private static final int nNonposIndices = 1;
private static final int maxDocLength = 100;
private static final String[] dictionary = new String[] { "a", "b", "c" };
private static final int maxSubqueries = 5;
private static final int maxMargin = 3;
private static final int minLow = 3;
private static final int maxLow = 10;
private static final int maxGap = 10;
private static StringArrayDocumentCollection[] documentCollection = new StringArrayDocumentCollection[ nIndices ];
private static DateArrayDocumentCollection[] specialCollection = new DateArrayDocumentCollection[ nSpecialIndices ];
private static StringArrayDocumentCollection[] nonposCollection = new StringArrayDocumentCollection[ nNonposIndices ];
private static String[] basename = new String[ nIndices ];
private static String[] specialBasename = new String[ nSpecialIndices ];
private static String[] nonposBasename = new String[ nNonposIndices ];
private static Object2ReferenceMap indexMap = new Object2ReferenceArrayMap();
private static Object2ReferenceMap indexName = new Object2ReferenceArrayMap();
private static Index[] index = new BitStreamIndex[ nIndices + nSpecialIndices + nNonposIndices ];
private static Random random = new Random( 0 );
@BeforeClass
public static void setUp() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
// Create a new directory under /tmp
tempDir = File.createTempFile( "mg4jtest", null );
tempDir.delete();
tempDir.mkdir();
for ( int i = 0; i < nIndices; i++ ) {
basename[ i ] = File.createTempFile( ScorerTest.class.getSimpleName(), "test" + i, tempDir ).toString();
String[] docs = new String[ nDocuments ];
for ( int d = 0; d < nDocuments; d++ ) {
int docLength = random.nextInt( maxDocLength );
StringBuilder sb = new StringBuilder();
for ( int j = 0; j < docLength; j++ )
sb.append( dictionary[ random.nextInt( dictionary.length ) ] + " " );
docs[ d ] = sb.toString();
}
documentCollection[ i ] = new StringArrayDocumentCollection( docs );
new IndexBuilder( basename[ i ], new CompositeDocumentSequence( new DocumentSequence[] { new StringArrayDocumentCollection( docs ) }, new String[] { "text" + i } ) ).run();
index[ i ] = Index.getInstance( basename[ i ] + "-text" + i + "?mapped=1", true, true );
indexMap.put( "index" + i, index[ i ] );
indexName.put( index[ i ], "index" + i );
}
for ( int i = 0; i < nSpecialIndices; i++ ) {
specialBasename[ i ] = File.createTempFile( ScorerTest.class.getSimpleName(), "special-test" + i, tempDir ).toString();
Date[] docs = new Date[ nDocuments ];
for ( int d = 0; d < nDocuments; d++ ) docs[ d ] = new Date( random.nextLong() % 1000000000L );
specialCollection[ i ] = new DateArrayDocumentCollection( docs );
new IndexBuilder( specialBasename[ i ], specialCollection[ i ] ).run();
index[ nIndices + i ] = Index.getInstance( specialBasename[ i ] + "-date", true, true );
indexMap.put( "special" + i, index[ nIndices + i ] );
indexName.put( index[ nIndices + i ], "special" + i );
}
for ( int i = 0; i < nNonposIndices; i++ ) {
Map writerFlags = new Object2ObjectOpenHashMap( CompressionFlags.DEFAULT_STANDARD_INDEX );
writerFlags.remove( Component.POSITIONS );
writerFlags.remove( Component.COUNTS );
nonposBasename[ i ] = File.createTempFile( ScorerTest.class.getSimpleName(), "nonpos-test" + i, tempDir ).toString();
String[] docs = new String[ nDocuments ];
for ( int d = 0; d < nDocuments; d++ ) {
int docLength = random.nextInt( maxDocLength );
StringBuilder sb = new StringBuilder();
for ( int j = 0; j < docLength; j++ )
sb.append( dictionary[ random.nextInt( dictionary.length ) ] + " " );
docs[ d ] = sb.toString();
}
nonposCollection[ i ] = new StringArrayDocumentCollection( docs );
new IndexBuilder( nonposBasename[ i ], nonposCollection[ i ] ).standardWriterFlags( writerFlags ).run();
index[ nIndices + nSpecialIndices + i ] = Index.getInstance( nonposBasename[ i ] + "-text", true, true );
indexMap.put( "nonposindex" + i, index[ nIndices + nSpecialIndices + i ] );
indexName.put( index[ nIndices + nSpecialIndices + i ], "nonposindex" + i );
}
}
@AfterClass
public static void tearDown() throws IOException {
FileUtils.forceDelete( tempDir );
}
public LongSortedSet assertSame( DocumentIterator it0, DocumentIterator it1 ) throws IOException {
LongSortedSet documents = assertSame( it0, it1, Integer.MAX_VALUE );
it0.dispose();
it1.dispose();
return documents;
}
public LongSortedSet assertSame( DocumentIterator it0, DocumentIterator it1, int maxIter ) throws IOException {
final LongRBTreeSet documents = new LongRBTreeSet();
// Note that we *always* evaluate *both* hasNext().
while ( maxIter-- != 0 ) {
final long d0 = it0.nextDocument();
final long d1 = it1.nextDocument();
assertEquals( d0, d1 );
if ( d0 == -1 || d1 == -1 ) break;
documents.add( d0 );
//System.err.println( "Document: " + it0.document() );
ReferenceSet indices0 = it0.indices();
ReferenceSet indices1 = it1.indices();
assertEquals( indices0, indices1 );
for ( Index index: indices0 ) {
if ( ! index.hasPositions ) continue;
IntervalIterator intervalIterator0 = it0.intervalIterator( index );
IntervalIterator intervalIterator1 = it1.intervalIterator( index );
assertFalse( indexName.get( index ) + " " + intervalIterator0 + " != " + intervalIterator1, intervalIterator0 == IntervalIterators.FALSE && intervalIterator1 != IntervalIterators.FALSE );
assertFalse( indexName.get( index ) + " " + intervalIterator0 + " != " + intervalIterator1, intervalIterator0 != IntervalIterators.FALSE && intervalIterator1 == IntervalIterators.FALSE );
assertFalse( indexName.get( index ) + " " + intervalIterator0 + " != " + intervalIterator1, intervalIterator0 == IntervalIterators.TRUE && intervalIterator1 != IntervalIterators.TRUE );
assertFalse( indexName.get( index ) + " " + intervalIterator0 + " != " + intervalIterator1, intervalIterator0 != IntervalIterators.TRUE && intervalIterator1 == IntervalIterators.TRUE );
if ( intervalIterator0 == IntervalIterators.TRUE || intervalIterator1 == IntervalIterators.FALSE ) continue;
while ( intervalIterator0.hasNext() && intervalIterator1.hasNext() ) {
Interval interval0 = intervalIterator0.nextInterval();
Interval interval1 = intervalIterator1.nextInterval();
assertEquals( interval0, interval1 );
}
assertEquals( Boolean.valueOf( intervalIterator0.hasNext() ), Boolean.valueOf( intervalIterator1.hasNext() ) );
}
}
if ( maxIter != -1 ) {
assertEquals( -1, it0.nextDocument() );
assertEquals( -1, it1.nextDocument() );
}
return documents;
}
private void assertFirstSkip( LongSortedSet documents, DocumentIterator it0, DocumentIterator it1 ) throws IOException {
assertEquals( documents.firstLong(), it0.skipTo( documents.firstLong() ) );
assertEquals( documents.firstLong(), it1.skipTo( documents.firstLong() ) );
assertSame( it0, it1, 4 );
it0.dispose();
it1.dispose();
}
private void assertLastSkip( LongSortedSet documents, DocumentIterator it0, DocumentIterator it1 ) throws IOException {
assertEquals( documents.lastLong(), it0.skipTo( documents.lastLong() ) );
assertEquals( documents.lastLong(), it1.skipTo( documents.lastLong() ) );
assertSame( it0, it1 );
it0.dispose();
it1.dispose();
}
private void assertSkipExisting( LongSortedSet documents, DocumentIterator it0, DocumentIterator it1 ) throws IOException {
long[] document = documents.toLongArray();
final long d0 = document[ random.nextInt( 1 + document.length / 2 ) ];
final long d1 = document[ ( document.length - 1 ) / 2 + random.nextInt( 1 + document.length / 2 ) ];
assertEquals( d0, it0.skipTo( d0 ) );
assertEquals( d0, it1.skipTo( d0 ) );
assertSame( it0, it1, 2 );
assertEquals( it0.skipTo( d1 ), it1.skipTo( d1 ) );
assertSame( it0, it1, 2 );
it0.dispose();
it1.dispose();
}
private void assertSkipNonExisting( LongSortedSet documents, DocumentIterator it0, DocumentIterator it1 ) throws IOException {
final long lastDoc = documents.lastLong();
if ( documents.size() < lastDoc + 1 ) {
long r;
while( documents.contains( r = ( random.nextLong() & 0x7FFFFFFFFFFFFFFFL ) % ( lastDoc + 1 ) ) );
assertEquals( it0.skipTo( r ), it1.skipTo( r ) );
assertSame( it0, it1, 2 );
}
it0.dispose();
it1.dispose();
}
private void assertSkipBeyondLast( LongSortedSet documents, DocumentIterator it0, DocumentIterator it1 ) throws IOException {
final long lastDoc = documents.lastLong();
assertEquals( it0.skipTo( lastDoc + 1 ), it1.skipTo( lastDoc + 1 ) );
assertSame( it0, it1, 1 );
it0.dispose();
it1.dispose();
}
public void testQuery( Query query ) throws QueryBuilderVisitorException, IOException {
DocumentIteratorBuilderVisitor real = new DocumentIteratorBuilderVisitor( indexMap, index[ 0 ], nDocuments * 2 );
it.unimi.dsi.big.mg4j.mock.search.DocumentIteratorBuilderVisitor mock = new it.unimi.dsi.big.mg4j.mock.search.DocumentIteratorBuilderVisitor( indexMap, index[ 0 ], nDocuments * 2 );
LongSortedSet documents = assertSame( query.accept( mock ), query.accept( real ) );
if ( ! documents.isEmpty() ) {
assertFirstSkip( documents, query.accept( mock ), query.accept( real ) );
assertLastSkip( documents, query.accept( mock ), query.accept( real ) );
assertSkipExisting( documents, query.accept( mock ), query.accept( real ) );
assertSkipExisting( documents, query.accept( mock ), query.accept( real ) );
assertSkipNonExisting( documents, query.accept( mock ), query.accept( real ) );
assertSkipBeyondLast( documents, query.accept( mock ), query.accept( real ) );
}
}
public void printResult( Query query, boolean mock, Index indx ) throws QueryBuilderVisitorException, IOException {
printResult( query, mock, indx, Integer.MAX_VALUE );
}
public void printResult( Query query, boolean mock, Index indx, int maxDocument ) throws QueryBuilderVisitorException, IOException {
DocumentIterator it;
if ( mock )
it = query.accept( new it.unimi.dsi.big.mg4j.mock.search.DocumentIteratorBuilderVisitor( indexMap, index[ 0 ], nDocuments * 2 ) );
else
it = query.accept( new DocumentIteratorBuilderVisitor( indexMap, index[ 0 ], nDocuments * 2 ) );
long d;
while ( ( d = it.nextDocument() ) != -1 && d < maxDocument ) {
System.err.println( "Document: " + d );
if ( it.intervalIterator( indx ) == IntervalIterators.TRUE )
System.err.println( indexName.get( indx ) + " --> TRUE" );
else if ( it.intervalIterator( indx ) == IntervalIterators.FALSE )
System.err.println( indexName.get( indx ) + " --> FALSE" );
else
System.err.println( indexName.get( indx ) + " --> " + ObjectIterators.pour( it.intervalIterator( indx ) ) );
}
}
/**
*
* @param level the maximum depth of the query to be generated.
* @param canSelect true
if {@link Select} is allowed.
* @param needsPositions true
if all subqueries must be on indices with positions.
* @param noPositions true
if the current index does not contain positions (hence, no operator
* requiring indices is allowed).
* @return an artificially generated query.
*/
public Query generateQuery( int level, boolean canSelect, boolean needsPositions, boolean noPositions ) {
if ( level == 0 ) return new Term( dictionary[ random.nextInt( dictionary.length ) ] );
int queryType = random.nextInt( 14 );
switch( queryType ) {
case 0:
case 1:
case 2:
case 3:
case 12:
int c = 1 + random.nextInt( maxSubqueries );
Query q[] = new Query[ c ];
for ( int i = 0; i < c; i++ )
q[ i ] = generateQuery( level - 1,
canSelect && queryType < 2,
needsPositions || queryType ==2 || queryType == 3 || queryType == 12, noPositions );
if ( noPositions && queryType > 1 ) queryType %= 2; //Do not generate Consecutive or OrderedAnds if no positions are available
switch ( queryType ) {
case 0: return new Or( q );
case 1: return new And( q );
case 2: return new Consecutive( q );
case 3: return new OrderedAnd( q );
case 12:
int[] gap = new int[ c ];
for ( int i = 0; i < c; i++ ) gap[ i ] = random.nextInt( maxGap );
return new Consecutive( q, gap );
}
case 4:
return new Not( generateQuery( level - 1, canSelect, needsPositions, noPositions ) );
case 5:
if ( noPositions )
return new Not( generateQuery( level - 1, canSelect, needsPositions, noPositions ) );
else
return new Align( generateQuery( level - 1, false, true, noPositions ), generateQuery( level - 1, false, true, noPositions ) );
case 6:
if ( canSelect )
if ( random.nextInt( 5 ) == 4 && !needsPositions )
return new Select( "nonposindex" + random.nextInt( nNonposIndices ), generateQuery( level - 1, canSelect, needsPositions, true ) );
else
return new Select( "index" + random.nextInt( nIndices ), generateQuery( level - 1, canSelect, needsPositions, false ) );
case 7:
if ( !noPositions )
return new Difference( generateQuery( level - 1, canSelect, true, noPositions ), generateQuery( level - 1, canSelect, true, noPositions ), random.nextInt( maxMargin ), random.nextInt( maxMargin ) );
case 8:
if ( !noPositions )
return new LowPass( generateQuery( level - 1, canSelect, true, noPositions ), minLow + random.nextInt( maxLow - minLow ) );
case 9:
return new True();
case 10:
return new Term( dictionary[ random.nextInt( dictionary.length ) ] );
case 11:
int t = 1 + random.nextInt( dictionary.length - 1 );
IntSet queryTerms = new IntOpenHashSet();
while ( queryTerms.size() < t ) queryTerms.add( random.nextInt( dictionary.length ) );
Term tt[] = new Term[ t ];
int ss[] = new int[ t ];
queryTerms.toArray( ss );
for ( int i = 0; i < t; i++ ) tt[ i ] = new Term( dictionary[ ss[ i ] ] );
return new MultiTerm( tt );
case 13:
DateFormat dateFormat = DateFormat.getDateInstance( DateFormat.SHORT, Locale.UK );
String dateFrom = dateFormat.format( new Date( random.nextLong() % 1000000000L ) );
String dateTo = dateFormat.format( new Date( 500000000L + random.nextLong() % 500000000L ) );
if ( canSelect && !needsPositions )
return new Select( "special" + random.nextInt( nSpecialIndices ),
new Range( dateFrom, dateTo ) );
else
return new Term( dictionary[ random.nextInt( dictionary.length ) ] );
}
return null;
}
@Test
public void testOne() throws Exception {
Query q;
q = new SimpleParser( new ObjectOpenHashSet( new String[] { "index0", "index1", "index2", "special0" } ), "index0" ).parse( "special0:[24/12/69 .. 12/01/70]" );
//q = (Query)BinIO.loadObject("/tmp/q");
// q = new Consecutive( new Query[] { new Term( "a" ), new Term( "c" ) }, new int[] { 3 , 4 } );
System.out.println( q );
//System.out.println( IOUtils.toString( (Reader) specialCollection[ 0 ].document( 0 ).content( 0 ) ) );
System.out.println( specialCollection[ 0 ].document( 0 ).content( 0 ) );
testQuery( q );
printResult( q, false, index[ nIndices ], 1 );
}
public void testSimple() throws QueryBuilderVisitorException, IOException {
for ( int i = 0; i < 2000; i++ ) {
Query q = generateQuery( random.nextInt( 6 ), true, false, false );
//it.unimi.dsi.fastutil.io.BinIO.storeObject( q, "/tmp/query" );
System.out.println( q );
testQuery( q );
}
}
@Test
public void testRandom() throws QueryBuilderVisitorException, IOException {
for ( int i = 0; i < 10000; i++ ) {
Query q = generateQuery( random.nextInt( 6 ), true, false, false );
System.out.println( q );
testQuery( q );
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy