All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.it.unimi.dsi.big.mg4j.tool.IndexTest Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.tool;

import static it.unimi.dsi.big.mg4j.index.CompressionFlags.DEFAULT_PAYLOAD_INDEX;
import static it.unimi.dsi.big.mg4j.index.DiskBasedIndex.FREQUENCIES_EXTENSION;
import static it.unimi.dsi.big.mg4j.index.DiskBasedIndex.GLOBCOUNTS_EXTENSION;
import static it.unimi.dsi.big.mg4j.index.DiskBasedIndex.INDEX_EXTENSION;
import static it.unimi.dsi.big.mg4j.index.DiskBasedIndex.OFFSETS_EXTENSION;
import static it.unimi.dsi.big.mg4j.index.DiskBasedIndex.PROPERTIES_EXTENSION;
import static it.unimi.dsi.big.mg4j.index.DiskBasedIndex.SIZES_EXTENSION;
import static it.unimi.dsi.big.mg4j.index.DiskBasedIndex.TERMMAP_EXTENSION;
import static it.unimi.dsi.big.mg4j.index.DiskBasedIndex.TERMS_EXTENSION;
import static it.unimi.dsi.logging.ProgressLogger.DEFAULT_LOG_INTERVAL;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import it.unimi.dsi.Util;
import it.unimi.dsi.big.io.FileLinesCollection;
import it.unimi.dsi.big.io.FileLinesCollection.FileLinesIterator;
import it.unimi.dsi.big.mg4j.document.AbstractDocumentSequence;
import it.unimi.dsi.big.mg4j.document.CompositeDocumentSequence;
import it.unimi.dsi.big.mg4j.document.DateArrayDocumentCollection;
import it.unimi.dsi.big.mg4j.document.Document;
import it.unimi.dsi.big.mg4j.document.DocumentCollection;
import it.unimi.dsi.big.mg4j.document.DocumentFactory;
import it.unimi.dsi.big.mg4j.document.DocumentFactory.FieldType;
import it.unimi.dsi.big.mg4j.document.DocumentIterator;
import it.unimi.dsi.big.mg4j.document.DocumentSequence;
import it.unimi.dsi.big.mg4j.document.IdentityDocumentFactory;
import it.unimi.dsi.big.mg4j.document.InputStreamDocumentSequence;
import it.unimi.dsi.big.mg4j.document.IntArrayDocumentCollection;
import it.unimi.dsi.big.mg4j.document.MapVirtualDocumentCollection;
import it.unimi.dsi.big.mg4j.document.SimpleCompressedDocumentCollectionBuilder;
import it.unimi.dsi.big.mg4j.document.StringArrayDocumentCollection;
import it.unimi.dsi.big.mg4j.document.ZipDocumentCollectionBuilder;
import it.unimi.dsi.big.mg4j.index.CompressionFlags;
import it.unimi.dsi.big.mg4j.index.CompressionFlags.Coding;
import it.unimi.dsi.big.mg4j.index.CompressionFlags.Component;
import it.unimi.dsi.big.mg4j.index.DiskBasedIndex;
import it.unimi.dsi.big.mg4j.index.DowncaseTermProcessor;
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.mg4j.index.IndexIterator;
import it.unimi.dsi.big.mg4j.index.IndexReader;
import it.unimi.dsi.big.mg4j.index.TermProcessor;
import it.unimi.dsi.big.mg4j.index.cluster.DocumentalPartitioningStrategy;
import it.unimi.dsi.big.mg4j.index.cluster.DocumentalStrategies;
import it.unimi.dsi.big.mg4j.index.cluster.IndexCluster;
import it.unimi.dsi.big.mg4j.index.cluster.LexicalPartitioningStrategy;
import it.unimi.dsi.big.mg4j.index.cluster.LexicalStrategies;
import it.unimi.dsi.big.mg4j.index.remote.IndexServer;
import it.unimi.dsi.big.mg4j.tool.Scan.VirtualDocumentFragment;
import it.unimi.dsi.big.util.ShiftAddXorSignedStringMap;
import it.unimi.dsi.big.util.StringMap;
import it.unimi.dsi.bits.TransformationStrategies;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.Swapper;
import it.unimi.dsi.fastutil.ints.AbstractIntComparator;
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.ints.IntIterators;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectRBTreeSet;
import it.unimi.dsi.fastutil.objects.Reference2ObjectOpenHashMap;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.big.mg4j.index.BitStreamIndex;
import it.unimi.dsi.sux4j.mph.MWHCFunction;
import it.unimi.dsi.util.Properties;

import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Reader;
import java.lang.reflect.InvocationTargetException;
import java.net.ConnectException;
import java.net.InetAddress;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Map;

import junit.framework.Assert;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class IndexTest {
	static {
		Util.ensureLog4JIsConfigured( Level.INFO );
	}

	private static StringMap createMap( String basename ) throws IOException {
		FileLinesCollection flc = new FileLinesCollection( basename, "UTF-8" );
		return new ShiftAddXorSignedStringMap( flc.iterator(), new MWHCFunction( flc, TransformationStrategies.utf16() ) );
	}
	
	
	private String basename;

	private final int NUMBER_OF_DOCUMENTS = 100;

	private final int[] INTEGER_DOCUMENT = new int[ NUMBER_OF_DOCUMENTS ];

	private final Date[] DATE_DOCUMENT = new Date[ NUMBER_OF_DOCUMENTS ];

	@SuppressWarnings("unchecked")
	private final Int2ObjectMap[] VIRTUAL_DOCUMENT = new Int2ObjectMap[ NUMBER_OF_DOCUMENTS ];
	{
		for ( int i = INTEGER_DOCUMENT.length; i-- != 0; )
			INTEGER_DOCUMENT[ i ] = i;
		for ( int i = DATE_DOCUMENT.length; i-- != 0; )
			DATE_DOCUMENT[ i ] = new Date( i * 86400000L );
		for ( int i = VIRTUAL_DOCUMENT.length; i-- != 0; ) {
			VIRTUAL_DOCUMENT[ i ] = new Int2ObjectArrayMap();
			VIRTUAL_DOCUMENT[ i ].put( i - 1, "link _ to previous document link" );
			VIRTUAL_DOCUMENT[ i ].put( i, "link to this document link" );
			VIRTUAL_DOCUMENT[ i ].put( i + 1, "link to next document link" );
		}
	}

	private final VirtualDocumentResolver RESOLVER = new MapVirtualDocumentCollection.TrivialVirtualDocumentResolver( NUMBER_OF_DOCUMENTS );

	public static Reference2ObjectOpenHashMap defaultStandardIndex() {
		return new Reference2ObjectOpenHashMap( CompressionFlags.DEFAULT_STANDARD_INDEX );
	}
	
	public final static TermProcessor KILL_A_PROCESSOR = KillATermProcessor.getInstance();

	public final static class KillATermProcessor implements TermProcessor {
		private static final long serialVersionUID = 1L;

		private static final KillATermProcessor INSTANCE = new KillATermProcessor();

		public TermProcessor copy() {
			return this;
		}

		public static TermProcessor getInstance() {
			return INSTANCE;
		}

		public boolean processPrefix( MutableString prefix ) {
			return true;
		}

		public boolean processTerm( MutableString term ) {
			return term.indexOf( 'a' ) == -1;
		}
	};

	final static int[] INDEXED_FIELD = { 0, 1, 2 };

	/**
	 * Checks that the two provided indices are byte-by-byte the same, and that property files
	 * coincide except for the provided property keys.
	 * 
	 * @param basename0 the basename of an index.
	 * @param basename1 the basename of an index.
	 * @param excludedProperty a list of property keys that will not be considered when evaluating
	 * the equality of property fields.
	 */
	private void sameIndex( final String basename0, final String basename1, final String... excludedProperty ) throws IOException, ConfigurationException {
		// The two indices must be byte-by-byte identical in all components
		for ( String ext : new String[] { INDEX_EXTENSION, OFFSETS_EXTENSION, TERMS_EXTENSION, SIZES_EXTENSION, FREQUENCIES_EXTENSION, GLOBCOUNTS_EXTENSION } ) {
			File f0 = new File( basename0 + ext );
			File f1 = new File( basename1 + ext );
			assertEquals( ext, Boolean.valueOf( f0.exists() ), Boolean.valueOf( f1.exists() ) );
			if ( ext != SIZES_EXTENSION && f0.exists() ) assertTrue( ext, IOUtils.contentEquals( new FileInputStream( f0 ), new FileInputStream( f1 ) ) );
		}

		Properties properties0 = new Properties( basename0 + PROPERTIES_EXTENSION );
		Properties properties1 = new Properties( basename1 + PROPERTIES_EXTENSION );
		for ( String p : excludedProperty ) {
			properties0.setProperty( p, null );
			properties1.setProperty( p, null );
		}

		assertEquals( properties0, properties1 );
	}

	public static void sameContent( CharSequence basename0, CharSequence basename1, it.unimi.dsi.big.io.FileLinesCollection.FileLinesIterator fileLinesIterator ) throws ConfigurationException, SecurityException, IOException, URISyntaxException,
			ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		sameContent( it.unimi.dsi.big.mg4j.index.Index.getInstance( basename0 ), it.unimi.dsi.big.mg4j.index.Index.getInstance( basename1 ), fileLinesIterator );
	}

	public static void sameContent( CharSequence basename0, CharSequence basename1 ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException,
			InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		sameContent( basename0, basename1, null );
	}

	public static void sameContent( it.unimi.dsi.big.mg4j.index.Index index0, it.unimi.dsi.big.mg4j.index.Index index1 ) throws IOException {
		sameContent( index0, index1, null );
	}

	public static void sameContent( it.unimi.dsi.big.mg4j.index.Index index0, it.unimi.dsi.big.mg4j.index.Index index1, final FileLinesIterator terms ) throws IOException {
		assertEquals( Boolean.valueOf( index0.hasCounts ), Boolean.valueOf( index1.hasCounts ) );
		assertEquals( Boolean.valueOf( index0.hasPositions ), Boolean.valueOf( index1.hasPositions ) );
		assertEquals( Boolean.valueOf( index0.hasPayloads ), Boolean.valueOf( index1.hasPayloads ) );
		assertEquals( index0.numberOfTerms, index1.numberOfTerms );
		assertEquals( index0.numberOfDocuments, index1.numberOfDocuments );

		final long numTerms = index0.numberOfTerms;
		long document;
		int[] p0 = IntArrays.EMPTY_ARRAY, p1 = IntArrays.EMPTY_ARRAY;
		boolean hasCounts = index0.hasCounts, hasPositions = index0.hasPositions;
		final IndexReader reader0 = index0.getReader(), reader1 = index1.getReader();
		IndexIterator i0, i1;
		for ( int i = 0; i < numTerms; i++ ) {
			if ( terms != null ) {
				final CharSequence term = terms.next();
				i0 = reader0.documents( term );
				i1 = reader1.documents( term );
			}
			else {
				i0 = reader0.documents( i );
				i1 = reader1.documents( i );
			}

			while ( i0.mayHaveNext() && i1.mayHaveNext() ) {
				assertEquals( "term " + i, document = i0.nextDocument(), i1.nextDocument() );
				if ( hasCounts ) {
					assertEquals( "term " + i + ", document " + document, i0.count(), i1.count() );
					if ( hasPositions ) {
						if ( i0.count() > p0.length ) p0 = new int[ i0.count() ];
						if ( i1.count() > p1.length ) p1 = new int[ i1.count() ];
						i0.positions( p0 );
						i1.positions( p1 );
						for ( int p = 0; p < i0.count(); p++ ) assertEquals( "term " + i + ", document " + document + ", position " + p, p0[ p ], p1[ p ] );
					}
				}
			}
		}
		reader0.close();
		reader1.close();
	}


	public int processDocument( WordReader wordReader, int documentIndex, int startPos, Object2ObjectOpenHashMap> termMap, TermProcessor termProcessor )
			throws IOException {
		assertTrue( documentIndex >= 0 );
		Object2ObjectOpenHashMap terms = new Object2ObjectOpenHashMap();
		MutableString word = new MutableString(), nonWord = new MutableString();

		int pos = startPos;
		while ( wordReader.next( word, nonWord ) ) {
			if ( word.length() == 0 ) continue;
			if ( !termProcessor.processTerm( word ) ) {
				pos++;
				continue;
			}
			IntArrayList positions = terms.get( word );
			if ( positions == null ) terms.put( word.copy(), positions = new IntArrayList() );
			positions.add( pos++ );
		}

		for ( MutableString term : terms.keySet() ) {
			ObjectArrayList list = termMap.get( term );
			IntArrayList positions = terms.get( term );
			if ( list == null ) termMap.put( term, list = new ObjectArrayList() );

			int[] t = new int[ positions.size() + 1 ];
			t[ 0 ] = documentIndex;
			System.arraycopy( positions.elements(), 0, t, 1, positions.size() );
			list.add( t );
		}

		return pos;
	}

	/**
	 * Checks that the fields indexed by the given indices have been indexed correctly by performing
	 * a mock index construction over the given sequence.
	 * 
	 * @param sequence a document sequence.
	 * @param resolver the virtual document resolver used to index the collection (we assume the
	 * same for all virtual fields), or null.
	 * @param gap the virtual document gap (we assume the same for all virtual fields; it is
	 * immaterial if no field is virtual).
	 * @param index a list of indices that have indexed one or more fields of sequence.
	 */
	@SuppressWarnings("unchecked")
	public void checkAgainstContent( DocumentSequence sequence, int[] map, VirtualDocumentResolver resolver, int gap, Index... index ) throws IOException {
		DocumentIterator iterator = sequence.iterator();
		DocumentFactory factory = sequence.factory();
		Document document;
		final int n = index.length;
		final int[] field = new int[ n ];
		final int[][] currMaxPos = new int[ n ][];
		final int[] maxDoc = new int[ n ];
		IntArrays.fill( maxDoc, -1 );
		final Object2ObjectOpenHashMap>[] termMap = new Object2ObjectOpenHashMap[ n ];
		final IntArrayList[] payloadPointers = new IntArrayList[ n ];
		final ObjectArrayList[] payloadContent = new ObjectArrayList[ n ];

		for ( int i = 0; i < n; i++ ) {
			field[ i ] = factory.fieldIndex( index[ i ].field );
			switch ( factory.fieldType( field[ i ] ) ) {
			case VIRTUAL:
				currMaxPos[ i ] = new int[ (int)resolver.numberOfDocuments() ];
			case TEXT:
				termMap[ i ] = new Object2ObjectOpenHashMap>();
				break;
			case DATE:
			case INT:
				payloadPointers[ i ] = new IntArrayList();
				payloadContent[ i ] = new ObjectArrayList();
			}
		}

		int documentIndex = 0;

		while ( ( document = iterator.nextDocument() ) != null ) {
			for ( int i = 0; i < field.length; i++ ) {
				switch ( factory.fieldType( field[ i ] ) ) {
				case TEXT:
					processDocument( document.wordReader( field[ i ] ).setReader( (Reader)document.content( field[ i ] ) ), map == null ? documentIndex : map[ documentIndex ], 0, termMap[ i ],
							index[ i ].termProcessor );
					break;
				case VIRTUAL:
					ObjectArrayList fragments = (ObjectArrayList)document.content( field[ i ] );
					resolver.context( document );
					for ( VirtualDocumentFragment fragment : fragments ) {
						int d = (int)resolver.resolve( fragment.documentSpecifier() );

						if ( d != -1 ) {
							if ( map != null ) d = map[ d ];
							if ( maxDoc[ i ] < d ) maxDoc[ i ] = d;
							currMaxPos[ i ][ d ] = processDocument( document.wordReader( field[ i ] ).setReader( new FastBufferedReader( fragment.text() ) ), d, currMaxPos[ i ][ d ], termMap[ i ],
									index[ i ].termProcessor )
									+ gap;
						}
					}
					break;
				case INT:
				case DATE:
					Object x = document.content( field[ i ] );
					if ( x != null ) {
						payloadPointers[ i ].add( map == null ? documentIndex : map[ documentIndex ] );
						payloadContent[ i ].add( x );
					}
				default:
				}
			}
			document.close();
			documentIndex++;
		}

		iterator.close();

		for ( int i = 0; i < n; i++ ) {
			if ( termMap[ i ] != null ) for ( ObjectArrayList list : termMap[ i ].values() ) {
				// We sort in all cases, just to reduce the possible execution paths
				Collections.sort( list, new Comparator() {
					public int compare( int[] p0, int[] p1 ) {
						return p0[ 0 ] - p1[ 0 ];
					}
				} );

				switch ( factory.fieldType( field[ i ] ) ) {
				case VIRTUAL:
					// We coalesce the list
					ObjectArrayList newList = new ObjectArrayList();
					for ( int k = 0; k < list.size(); ) {
						int s;
						for ( s = k + 1; s < list.size(); s++ )
							if ( list.get( k )[ 0 ] != list.get( s )[ 0 ] ) break;
						int count = 0;
						for ( int t = k; t < s; t++ )
							count += list.get( t ).length - 1;
						int[] posting = new int[ count + 1 ];
						posting[ 0 ] = list.get( k )[ 0 ];
						count = 1;
						for ( int t = k; t < s; t++ ) {
							System.arraycopy( list.get( t ), 1, posting, count, list.get( t ).length - 1 );
							count += list.get( t ).length - 1;
						}
						k = s;
						newList.add( posting );
					}
					list.clear();
					list.addAll( newList );
					break;
				default:
				}
			}
			if ( payloadPointers[ i ] != null ) {
				final int p[] = payloadPointers[ i ].elements();
				final Object[] b = payloadContent[ i ].elements();
				Arrays.quickSort( 0, payloadPointers[ i ].size(), new AbstractIntComparator() {
					public int compare( int i0, int i1 ) {
						return p[ i0 ] - p[ i1 ];
					}
				}, new Swapper() {
					public void swap( int i0, int i1 ) {
						final int t = p[ i0 ];
						p[ i0 ] = p[ i1 ];
						p[ i1 ] = t;
						final Object o = b[ i0 ];
						b[ i0 ] = b[ i1 ];
						b[ i1 ] = o;
					}
				} );
			}
		}


		for ( int i = 0; i < n; i++ ) {
			assertEquals( index[ i ].toString(), factory.fieldType( field[ i ] ) == FieldType.VIRTUAL ? maxDoc[ i ] + 1 : documentIndex, index[ i ].numberOfDocuments );
			switch ( factory.fieldType( field[ i ] ) ) {
			case TEXT:
			case VIRTUAL:
				assertEquals( termMap[ i ].size(), index[ i ].numberOfTerms );
				int postings = 0,
				occurrences = 0;
				for ( ObjectArrayList l : termMap[ i ].values() ) {
					postings += l.size();
					for ( int[] p : l )
						occurrences += p.length - 1;
				}
				assertEquals( index[ i ].toString(), postings, index[ i ].numberOfPostings );
				assertEquals( occurrences, index[ i ].numberOfOccurrences );
				IndexReader indexReader = index[ i ].getReader();
				for ( MutableString term : new ObjectRBTreeSet( termMap[ i ].keySet() ).toArray( new MutableString[ termMap[ i ].size() ] ) ) {
					String msg = index[ i ] + ":" + term;
					IndexIterator indexIterator = indexReader.documents( term );
					ObjectArrayList list = termMap[ i ].get( term );
					int k = 0;
					while ( indexIterator.mayHaveNext() ) {
						assertEquals( msg, list.get( k )[ 0 ], indexIterator.nextDocument() ); // Document
																								// pointer
						if ( index[ i ].hasCounts ) assertEquals( msg, list.get( k ).length - 1, indexIterator.count() ); // Count
						if ( index[ i ].hasPositions ) {
							final int[] position = indexIterator.positionArray();
							for ( int p = 0; p < indexIterator.count(); p++ )
								assertEquals( msg, list.get( k )[ p + 1 ], position[ p ] ); // Positions
						}
						k++;
					}
					assertEquals( k, list.size() ); // This implicitly checks the frequency
				}
				indexReader.close();
				break;
			case INT:
			case DATE:
				assertEquals( index[ i ].toString(), payloadPointers[ i ].size(), index[ i ].numberOfPostings );
				assertEquals( index[ i ].toString(), documentIndex != 0 ? 1 : 0, index[ i ].numberOfTerms );
				assertEquals( index[ i ].toString(), -1, index[ i ].numberOfOccurrences );
				if ( documentIndex != 0 ) {
					IndexIterator indexIterator = index[ i ].documents( 0 );
					int k = 0;
					while ( indexIterator.mayHaveNext() ) {
						assertEquals( payloadPointers[ i ].getInt( k ), indexIterator.nextDocument() );
						if ( factory.fieldType( field[ i ] ) == FieldType.INT ) assertEquals( ( (Number)payloadContent[ i ].get( k ) ).longValue(), ( (Number)indexIterator.payload().get() )
								.longValue() );
						else assertEquals( payloadContent[ i ].get( k ), indexIterator.payload().get() );
						k++;
					}
					indexIterator.dispose();
					assertEquals( k, payloadContent[ i ].size() );
				}
			}
		}
	}

	/**
	 * Checks skips in the given index.
	 * 
	 * @param basename an index basename.
	 */
	public void checkSkips( final CharSequence basename ) throws IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		checkSkips( basename, basename );
	}

	/**
	 * Checks skips in the given index.
	 * 
	 * @param basename an index basename.
	 * @param termsBasename an alternative basename to locate the list of terms.
	 */
	public void checkSkips( final CharSequence basename, final CharSequence termsBasename ) throws IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		checkSkips( Index.getInstance( basename ), new FileLinesCollection( termsBasename + DiskBasedIndex.TERMS_EXTENSION, "UTF-8" ).iterator() );
	}

	/**
	 * Checks skips in the given index.
	 * 
	 * @param index an index.
	 * @param termsBasename an alternative basename to locate the list of terms.
	 */
	public void checkSkips( final Index index, final CharSequence termsBasename ) throws IOException, SecurityException {
		checkSkips( index, new FileLinesCollection( termsBasename + DiskBasedIndex.TERMS_EXTENSION, "UTF-8" ).iterator() );
	}

	/**
	 * Checks skips in the given index.
	 * 
	 * @param index an index.
	 * @param terms an alternative iterator on the index terms.
	 */
	public void checkSkips( final Index index, final FileLinesCollection.FileLinesIterator terms ) throws IOException {
		checkSkips( index, BitStreamIndex.DEFAULT_BUFFER_SIZE, terms );
	}
	
	/**
	 * Checks skips in the given index.
	 * 
	 * @param index an index.
	 * @param bufferSize a buffer size of index readers.
	 * @param termsBasename an alternative basename to locate the list of terms.
	 */
	public void checkSkips( final Index index, final int bufferSize, final CharSequence termsBasename ) throws IOException, SecurityException {
		checkSkips( index, bufferSize, new FileLinesCollection( termsBasename + DiskBasedIndex.TERMS_EXTENSION, "UTF-8" ).iterator() );
	}

	/**
	 * Checks skips in the given index.
	 * 
	 * @param index an index.
	 * @param bufferSize a buffer size of index readers.
	 * @param terms an alternative iterator on the index terms.
	 */
	public void checkSkips( final Index index, final int bufferSize, final FileLinesCollection.FileLinesIterator terms ) throws IOException {
		int start = 0, end = 0;
		long result;
		final LongArrayList l = new LongArrayList();
		final ObjectArrayList positions = new ObjectArrayList();

				final IndexReader indexReader = index.getReader( bufferSize );
				for (int t = 0; t < index.numberOfTerms; t++) {
					final MutableString term = terms.next();
					l.clear();
					positions.clear();
					final IndexIterator documents = indexReader.documents( term );
					long d;
					while( ( d = documents.nextDocument() ) != -1 ) {
						l.add( d );
						if ( index.hasPositions ) positions.add( ArrayUtils.subarray( documents.positionArray(), 0, documents.count() ) );
					}
					
					for( start = 0; start < l.size(); start++ ) {
						for( end = start + 1; end < l.size(); end++ ) {
							IndexIterator indexIterator = indexReader.documents( term );
							
							result = indexIterator.skipTo( l.getLong( start ) );
							assertEquals( l.getLong( start ), indexIterator.document() );
							assertEquals( l.getLong( start ), result );
							result = indexIterator.skipTo( l.getLong( end ) );
							assertEquals( l.getLong( end ), indexIterator.document() );
							assertEquals( l.getLong( end ), result );
							
							if ( index.hasPositions ) {
								// This catches wrong state reconstruction after skips.
								indexIterator = indexReader.documents( term );
								indexIterator.skipTo( l.getLong( start ) );
								assertEquals( l.getLong( start ), indexIterator.document() );
								assertEquals( positions.get( start ).length, indexIterator.count() );
								assertArrayEquals( positions.get( start ), ArrayUtils.subarray( indexIterator.positionArray(), 0, indexIterator.count() ) );
								indexIterator.skipTo( l.getLong( end ) );
								assertEquals( l.getLong( end ), indexIterator.document() );
								assertEquals( positions.get( end ).length, indexIterator.count() );
								assertArrayEquals( positions.get( end ), ArrayUtils.subarray( indexIterator.positionArray(), 0, indexIterator.count() ) );
							}
							
						}
						
						IndexIterator indexIterator = indexReader.documents( term );
						
						result = indexIterator.skipTo( l.getLong( start ) );
						assertEquals( l.getLong( start ), indexIterator.document() );
						assertEquals( l.getLong( start ), result );
						result = indexIterator.skipTo( it.unimi.dsi.big.mg4j.search.DocumentIterator.END_OF_LIST );
						assertEquals( it.unimi.dsi.big.mg4j.search.DocumentIterator.END_OF_LIST, result );
						assertEquals( indexIterator.nextDocument(), -1 );
					}
				}
				
				indexReader.close();
	}
	
	
	@Before
	public void setUp() throws IOException {
		basename = File.createTempFile( IndexTest.class.getSimpleName(), "test" ).getCanonicalPath();
	}

	@After
	public void tearDown() throws IOException {
		for ( Object f : FileUtils.listFiles( new File( basename ).getParentFile(), FileFilterUtils.prefixFileFilter( IndexTest.class.getSimpleName() ), null ) )
			( (File)f ).delete();
		if ( lastSequence != null ) lastSequence.close();
	}

	// We keep track of the last returned sequence to close it without cluttering the test code
	private DocumentSequence lastSequence;

	public DocumentSequence getSequence() throws ConfigurationException, IOException {
		if ( lastSequence != null ) lastSequence.close();
		return lastSequence = new CompositeDocumentSequence( new InputStreamDocumentSequence( this.getClass().getResourceAsStream( "documents.data" ), 10, new IdentityDocumentFactory(
				new String[] { "encoding=UTF-8" } ), NUMBER_OF_DOCUMENTS ), new IntArrayDocumentCollection( INTEGER_DOCUMENT ), new DateArrayDocumentCollection( DATE_DOCUMENT ),
				new MapVirtualDocumentCollection( VIRTUAL_DOCUMENT ) );
	}

	@SuppressWarnings("unchecked")
	public DocumentSequence getEmptySequence() throws ConfigurationException, IOException {
		if ( lastSequence != null ) lastSequence.close();
		return lastSequence = new CompositeDocumentSequence( new StringArrayDocumentCollection(), new IntArrayDocumentCollection(), new DateArrayDocumentCollection(),
				new MapVirtualDocumentCollection() );
	}

	public void testIndex( boolean interleaved, Map flags, int quantum, int height, TermProcessor termProcessor ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException,
			InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {

		// Vanilla indexing
		new IndexBuilder( basename, getSequence() ).standardWriterFlags( flags ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum )
				.height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run();
		checkAgainstContent( getSequence(), null, RESOLVER, Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP, Index.getInstance( basename + "-text" ), Index.getInstance( basename + "-int" ), Index
				.getInstance( basename + "-date" ), Index.getInstance( basename + "-virtual" ) );

		final String basenameZipped = basename + "-zipped";
		if ( interleaved && flags.get( Component.POSITIONS ) != null ) flags.put( Component.POSITIONS, Coding.GOLOMB );
		// Vanilla indexing generating a zipped collection (we also use Golomb coding to test the usage of sizes in combinations).
		ZipDocumentCollectionBuilder zipBuilder = new ZipDocumentCollectionBuilder( basenameZipped, getSequence().factory(), true );
		new IndexBuilder( basename, getSequence() ).standardWriterFlags( flags ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum )
				.height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).builder( zipBuilder ).run();
		// Vanilla indexing using the zipped collection
		new IndexBuilder( basenameZipped, AbstractDocumentSequence.load( basenameZipped + DocumentCollection.DEFAULT_EXTENSION ) ).standardWriterFlags( flags ).termProcessor( termProcessor ).indexedFields( 0, 1, 2, 3 ).skipBufferSize( 1024 )
				.pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run();

		// The two indices must be byte-by-byte identical (and we keep the zipped index for future
		// reference)
		sameIndex( basename + "-text", basenameZipped + "-text" );
		sameIndex( basename + "-int", basenameZipped + "-int", "batches" );
		sameIndex( basename + "-date", basenameZipped + "-date", "batches" );
		sameIndex( basename + "-virtual", basenameZipped + "-virtual", "batches" );

		checkSkips( basename + "-text" );
		checkSkips( basename + "-int" );
		checkSkips( basename + "-date" );
		checkSkips( basename + "-virtual" );

		final String basenameSimple = basename + "-simple";

		// Vanilla indexing generating a simple compressed collection
		SimpleCompressedDocumentCollectionBuilder simpleBuilder = new SimpleCompressedDocumentCollectionBuilder( basenameSimple, getSequence().factory(), true );
		new IndexBuilder( basename, getSequence() ).standardWriterFlags( flags ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum )
				.height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).builder( simpleBuilder ).run();
		// Vanilla indexing using the simple compressed collection
		new IndexBuilder( basenameSimple, AbstractDocumentSequence.load( basenameSimple + DocumentCollection.DEFAULT_EXTENSION ) ).standardWriterFlags( flags ).termProcessor( termProcessor ).indexedFields( 0, 1, 2, 3 ).skipBufferSize( 1024 )
				.pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run();

		// The two indices must be byte-by-byte identical (and we keep the zipped index for future
		// reference)
		sameIndex( basename + "-text", basenameSimple + "-text" );
		sameIndex( basename + "-int", basenameSimple + "-int", "batches" );
		sameIndex( basename + "-date", basenameSimple + "-date", "batches" );
		sameIndex( basename + "-virtual", basenameSimple + "-virtual", "batches" );


		// Indexing with just one batch
		new IndexBuilder( basename + "-onebatch", getSequence() ).standardWriterFlags( flags ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 )
				.quantum( quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( NUMBER_OF_DOCUMENTS ).run();

		if ( quantum >= 0 ) {
			// The two indices must be byte-by-byte identical
			sameIndex( basename + "-text", basename + "-onebatch-text", "batches" );
			sameIndex( basename + "-int", basename + "-onebatch-int", "batches" );
			sameIndex( basename + "-date", basename + "-onebatch-date", "batches" );
			sameIndex( basename + "-virtual", basename + "-onebatch-virtual", "batches" );
		}
		else {
			// The two indices must have the same content, as a different division
			// in batches can lead to a different quantum estimate. 
			sameContent( basename + "-text", basename + "-onebatch-text" );
			sameContent( basename + "-int", basename + "-onebatch-int" );
			sameContent( basename + "-date", basename + "-onebatch-date" );
			sameContent( basename + "-virtual", basename + "-onebatch-virtual" );
		}
	}

	public void testIndex( boolean interleaved, int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException,
			IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		testIndex( interleaved, defaultStandardIndex(), quantum, height, DowncaseTermProcessor.getInstance() );
	}

	public void testIndex( boolean interleaved, Map flags, int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		testIndex( interleaved, flags, quantum, height, DowncaseTermProcessor.getInstance() );
	}
	
	@Test
	public void testIndex() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException,
			InvocationTargetException, NoSuchMethodException {

		final Reference2ObjectOpenHashMap flags = new Reference2ObjectOpenHashMap( defaultStandardIndex() );
		flags.remove( Component.POSITIONS );
		testIndex( true, flags, 4, 4 );
		testIndex( true, flags, -4, 4 );
		flags.remove( Component.COUNTS );
		testIndex( true, flags, 4, 4 );
		testIndex( true, flags, -4, 4 );

		
		testIndex( true, 0, 0 );
		testIndex( true, defaultStandardIndex(), 0, 0, KILL_A_PROCESSOR );
		testIndex( true, 1, 1 );
		testIndex( true, 1, 2 );
		testIndex( true, 4, 1 );
		testIndex( true, 4, 4 );
		testIndex( true, 8, 1 );
		testIndex( true, 8, 4 );
		testIndex( true, -1, 1 );
		testIndex( true, -1, 2 );
		testIndex( true, -4, 1 );
		testIndex( true, -4, 4 );
		testIndex( true, -8, 1 );
		testIndex( true, -8, 4 );

		testIndex( false, 1, 0 );
		testIndex( false, defaultStandardIndex(), 1, 0, KILL_A_PROCESSOR );
		testIndex( false, 1, 1 );
		testIndex( false, 1, 2 );
		testIndex( false, 4, 1 );
		testIndex( false, 4, 4 );
		testIndex( false, 8, 1 );
		testIndex( false, 8, 4 );
		testIndex( false, -1, 1 );
		testIndex( false, -1, 2 );
		testIndex( false, -4, 1 );
		testIndex( false, -4, 4 );
		testIndex( false, -8, 1 );
		testIndex( false, -8, 4 );
	}

	public void testRemappedIndex( boolean interleaved, Map flags, int quantum, int height, TermProcessor termProcessor ) throws IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException,
			InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {

		final String basenameMapped = basename + "-map";
		int[] map = IntIterators.unwrap( BinIO.asIntIterator( new DataInputStream( this.getClass().getResourceAsStream( "documents.permutation.data" ) ) ) );
		String mapFile = File.createTempFile( this.getClass().getSimpleName(), "map" ).toString();
		BinIO.storeInts( map, mapFile );

		// Remapped index
		new IndexBuilder( basenameMapped, getSequence() ).standardWriterFlags( flags ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum(
				quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).mapFile( mapFile ).run();
		checkAgainstContent( getSequence(), map, RESOLVER, Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP, Index.getInstance( basenameMapped + "-text" ), Index.getInstance( basenameMapped + "-int" ), Index
				.getInstance( basenameMapped + "-date" ), Index.getInstance( basenameMapped + "-virtual" ) );

		// Remapped index, one batch
		new IndexBuilder( basenameMapped + "-onebatch", getSequence() ).standardWriterFlags( flags ).termProcessor( termProcessor ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 )
				.quantum( quantum ).height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( NUMBER_OF_DOCUMENTS ).mapFile( mapFile ).run();

		if ( quantum >= 0 ) {
			// The two indices must be byte-by-byte identical
			sameIndex( basenameMapped + "-text", basenameMapped + "-onebatch-text", "batches" );
			sameIndex( basenameMapped + "-int", basenameMapped + "-onebatch-int", "batches" );
			sameIndex( basenameMapped + "-date", basenameMapped + "-onebatch-date", "batches" );
			sameIndex( basenameMapped + "-virtual", basenameMapped + "-onebatch-virtual", "batches" );
		}
		else {
			// The two indices must have the same content, as a different division
			// in batches can lead to a different quantum estimate. 
			sameContent( basenameMapped + "-text", basenameMapped + "-onebatch-text" );
			sameContent( basenameMapped + "-int", basenameMapped + "-onebatch-int" );
			sameContent( basenameMapped + "-date", basenameMapped + "-onebatch-date" );
			sameContent( basenameMapped + "-virtual", basenameMapped + "-onebatch-virtual" );
		}
	}

	public void testRemappedIndex( boolean interleaved, int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException,
			IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		testRemappedIndex( interleaved, defaultStandardIndex(), quantum, height, DowncaseTermProcessor.getInstance() );
	}
	public void testRemappedIndex( boolean interleaved, Map flags, int quantum, int height ) throws IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		testRemappedIndex( interleaved, flags, quantum, height, DowncaseTermProcessor.getInstance() );
	}
	
	@Test
	public void testRemappedIndex() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException,
			InvocationTargetException, NoSuchMethodException {

		final Reference2ObjectOpenHashMap flags = new Reference2ObjectOpenHashMap( defaultStandardIndex() );
		flags.remove( Component.POSITIONS );
		testRemappedIndex( true, flags, 4, 4 );
		testRemappedIndex( true, flags, -4, 4 );
		flags.remove( Component.COUNTS );
		testRemappedIndex( true, flags, 4, 4 );
		testRemappedIndex( true, flags, -4, 4 );

		
		testRemappedIndex( true, 0, 0 );
		testRemappedIndex( true, defaultStandardIndex(), 0, 0, KILL_A_PROCESSOR );
		testRemappedIndex( true, 1, 1 );
		testRemappedIndex( true, 1, 2 );
		testRemappedIndex( true, 4, 1 );
		testRemappedIndex( true, 4, 4 );
		testRemappedIndex( true, 8, 1 );
		testRemappedIndex( true, 8, 4 );
		testRemappedIndex( true, -1, 1 );
		testRemappedIndex( true, -1, 2 );
		testRemappedIndex( true, -4, 1 );
		testRemappedIndex( true, -4, 4 );
		testRemappedIndex( true, -8, 1 );
		testRemappedIndex( true, -8, 4 );

		testRemappedIndex( false, 1, 0 );
		testRemappedIndex( false, defaultStandardIndex(), 1, 0, KILL_A_PROCESSOR );
		testRemappedIndex( false, 1, 1 );
		testRemappedIndex( false, 1, 2 );
		testRemappedIndex( false, 4, 1 );
		testRemappedIndex( false, 4, 4 );
		testRemappedIndex( false, 8, 1 );
		testRemappedIndex( false, 8, 4 );
		testRemappedIndex( false, -1, 1 );
		testRemappedIndex( false, -1, 2 );
		testRemappedIndex( false, -4, 1 );
		testRemappedIndex( false, -4, 4 );
		testRemappedIndex( false, -8, 1 );
		testRemappedIndex( false, -8, 4 );
	}

	public void testPartitionConcatenate( boolean interleaved, Map flags, int quantum, int height ) throws Exception {
		// Vanilla indexing
		if ( interleaved && flags.get( Component.POSITIONS ) != null ) flags.put( Component.POSITIONS, Coding.GOLOMB );
		new IndexBuilder( basename, getSequence() ).standardWriterFlags( flags ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height )
				.virtualDocumentResolver( 3, RESOLVER ).run();

		// We partition
		BinIO.storeObject( DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy" );

		new PartitionDocumentally( basename + "-text", basename + "-text-part", DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy", 0, 1024, flags, 
				interleaved, quantum != 0, Math.abs( quantum ), height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		new PartitionDocumentally( basename + "-int", basename + "-int-part", DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy", 0, 1024, DEFAULT_PAYLOAD_INDEX,
				interleaved, quantum != 0, Math.abs( quantum ), height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		new PartitionDocumentally( basename + "-date", basename + "-date-part", DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy", 0, 1024, DEFAULT_PAYLOAD_INDEX,
				interleaved, quantum != 0, Math.abs( quantum ), height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		new PartitionDocumentally( basename + "-virtual", basename + "-virtual-part", DocumentalStrategies.uniform( 3, NUMBER_OF_DOCUMENTS ), basename + "-strategy", 0, 1024, flags,
				interleaved, quantum != 0, Math.abs( quantum ), height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();

		// For the text part, we need term maps to call sameIndex()
		String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );

		sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() );

		localIndex = new Properties( basename + "-int-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );

		sameContent( basename + "-int", basename + "-int-part" );
		
		localIndex = new Properties( basename + "-date-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );

		sameContent( basename + "-date", basename + "-date-part" );

		localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );

		sameContent( basename + "-virtual", basename + "-virtual-part", new FileLinesCollection( basename + "-virtual" + TERMS_EXTENSION, "UTF-8" ).iterator() );

		checkSkips( basename + "-text-part", basename + "-text" );
		checkSkips( basename + "-int-part", basename + "-int" );
		checkSkips( basename + "-date-part", basename + "-date" );
		checkSkips( basename + "-virtual-part", basename + "-virtual" );

		localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		new Concatenate( basename + "-text-merged", localIndex, false, 1024, flags, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		if ( quantum >= 0 ) sameIndex( basename + "-text", basename + "-text-merged", "batches", flags.containsKey( Component.COUNTS ) ? "" : "occurrences" );
		sameContent( basename + "-text", basename + "-text-merged" );

		localIndex = new Properties( basename + "-int-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		new Concatenate( basename + "-int-merged", localIndex, false, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		if ( quantum >= 0 ) sameIndex( basename + "-text", basename + "-text-merged", "batches", flags.containsKey( Component.COUNTS ) ? "" : "occurrences" );
		sameContent( basename + "-text", basename + "-text-merged" );

		localIndex = new Properties( basename + "-date-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		new Concatenate( basename + "-date-merged", localIndex, false, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		if ( quantum >= 0 ) sameIndex( basename + "-text", basename + "-text-merged", "batches", flags.containsKey( Component.COUNTS ) ? "" : "occurrences" );
		sameContent( basename + "-text", basename + "-text-merged" );

		localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		new Concatenate( basename + "-virtual-merged", localIndex, false, 1024, flags, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		if ( quantum >= 0 ) sameIndex( basename + "-text", basename + "-text-merged", "batches", flags.containsKey( Component.COUNTS ) ? "" : "occurrences" );
		sameContent( basename + "-text", basename + "-text-merged" );
	}

	@Test
	public void testPartitionConcatenate() throws Exception {

		final Reference2ObjectOpenHashMap flags = new Reference2ObjectOpenHashMap( defaultStandardIndex() );
		flags.remove( Component.POSITIONS );
		testPartitionConcatenate( true, flags, 4, 4 );
		testPartitionConcatenate( true, flags, -4, 4 );
		flags.remove( Component.COUNTS );
		testPartitionConcatenate( true, flags, 4, 4 );
		testPartitionConcatenate( true, flags, -4, 4 );

		testPartitionConcatenate( true, defaultStandardIndex(), 0, 0 );
		testPartitionConcatenate( true, defaultStandardIndex(), 1, 1 );
		testPartitionConcatenate( true, defaultStandardIndex(), 1, 2 );
		testPartitionConcatenate( true, defaultStandardIndex(), 4, 1 );
		testPartitionConcatenate( true, defaultStandardIndex(), 4, 4 );
		testPartitionConcatenate( true, defaultStandardIndex(), 8, 1 );
		testPartitionConcatenate( true, defaultStandardIndex(), 8, 4 );
		testPartitionConcatenate( true, defaultStandardIndex(), -1, 1 );
		testPartitionConcatenate( true, defaultStandardIndex(), -1, 2 );
		testPartitionConcatenate( true, defaultStandardIndex(), -4, 1 );
		testPartitionConcatenate( true, defaultStandardIndex(), -4, 4 );
		testPartitionConcatenate( true, defaultStandardIndex(), -8, 1 );
		testPartitionConcatenate( true, defaultStandardIndex(), -8, 4 );

		testPartitionConcatenate( false, defaultStandardIndex(), 1, 0 );
		testPartitionConcatenate( false, defaultStandardIndex(), 1, 1 );
		testPartitionConcatenate( false, defaultStandardIndex(), 1, 2 );
		testPartitionConcatenate( false, defaultStandardIndex(), 4, 1 );
		testPartitionConcatenate( false, defaultStandardIndex(), 4, 4 );
		testPartitionConcatenate( false, defaultStandardIndex(), 8, 1 );
		testPartitionConcatenate( false, defaultStandardIndex(), 8, 4 );
		testPartitionConcatenate( false, defaultStandardIndex(), -1, 1 );
		testPartitionConcatenate( false, defaultStandardIndex(), -1, 2 );
		testPartitionConcatenate( false, defaultStandardIndex(), -4, 1 );
		testPartitionConcatenate( false, defaultStandardIndex(), -4, 4 );
		testPartitionConcatenate( false, defaultStandardIndex(), -8, 1 );
		testPartitionConcatenate( false, defaultStandardIndex(), -8, 4 );
	}


	public void testPartitionMerge( boolean interleaved, Map flags, int quantum, int height ) throws ConfigurationException, SecurityException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
			Exception {
		
		if ( interleaved && flags.get( Component.POSITIONS ) != null ) flags.put( Component.POSITIONS, Coding.GOLOMB );
		
		// Vanilla indexing
		new IndexBuilder( basename, getSequence() ).standardWriterFlags( flags ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height )
				.virtualDocumentResolver( 3, RESOLVER ).run();

		// Now we use a crazy strategy moving around documents using modular arithmetic
		final DocumentalPartitioningStrategy modulo3 = new Modulo3DocumentalClusteringStrategy( NUMBER_OF_DOCUMENTS );
		BinIO.storeObject( modulo3, basename + "-strategy" );

		new PartitionDocumentally( basename + "-text", basename + "-text-part", modulo3, basename + "-strategy", 0, 1024, flags, interleaved, quantum != 0, Math.abs( quantum ), height,
				1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		new PartitionDocumentally( basename + "-int", basename + "-int-part", modulo3, basename + "-strategy", 0, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, Math.abs( quantum ), height, 1024 * 1024,
				DEFAULT_LOG_INTERVAL ).run();
		new PartitionDocumentally( basename + "-date", basename + "-date-part", modulo3, basename + "-strategy", 0, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, Math.abs( quantum ), height,
				1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		new PartitionDocumentally( basename + "-virtual", basename + "-virtual-part", modulo3, basename + "-strategy", 0, 1024, flags, interleaved, quantum != 0, Math.abs( quantum ), height,
				1024 * 1024, DEFAULT_LOG_INTERVAL ).run();

		String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );
		sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() );

		localIndex = new Properties( basename + "-int-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );

		sameContent( basename + "-int", basename + "-int-part" );

		localIndex = new Properties( basename + "-date-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );

		sameContent( basename + "-date", basename + "-date-part" );
		
		localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex ) BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );
		sameContent( basename + "-virtual", basename + "-virtual-part", new FileLinesCollection( basename + "-virtual" + TERMS_EXTENSION, "UTF-8" ).iterator() );

		checkSkips( basename + "-text-part", basename + "-text" );
		checkSkips( basename + "-int-part", basename + "-int" );
		checkSkips( basename + "-date-part", basename + "-date" );
		checkSkips( basename + "-virtual-part", basename + "-virtual" );

		localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );

		new Merge( basename + "-text-merged", localIndex, false, 1024, flags, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		if ( ! interleaved && quantum >= 0 ) sameIndex( basename + "-text", basename + "-text-merged", "batches" );
		else sameContent( basename + "-text", basename + "-text-merged" );
		localIndex = new Properties( basename + "-int-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		new Merge( basename + "-int-merged", localIndex, false, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		if ( ! interleaved && quantum >= 0 ) sameIndex( basename + "-int", basename + "-int-merged", "batches" );
		else sameContent( basename + "-int", basename + "-int-merged" );
		localIndex = new Properties( basename + "-date-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		new Merge( basename + "-date-merged", localIndex, false, 1024, DEFAULT_PAYLOAD_INDEX, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		if ( ! interleaved && quantum >= 0 ) sameIndex( basename + "-date", basename + "-date-merged", "batches" );
		else sameContent( basename + "-date", basename + "-date-merged" );
		localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		new Merge( basename + "-virtual-merged", localIndex, false, 1024, flags, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
		if ( ! interleaved && quantum >= 0 ) sameIndex( basename + "-virtual", basename + "-virtual-merged", "batches" );
		else sameContent( basename + "-virtual", basename + "-virtual-merged" );
	}

	@Test
	public void testPartitionMerge() throws Exception {
		final Reference2ObjectOpenHashMap flags = new Reference2ObjectOpenHashMap( defaultStandardIndex() );
		flags.remove( Component.POSITIONS );
		testPartitionMerge( true, flags, 4, 4 );
		testPartitionMerge( true, flags, -4, 4 );
		flags.remove( Component.COUNTS );
		testPartitionMerge( true, flags, 4, 4 );
		testPartitionMerge( true, flags, -4, 4 );
		
		testPartitionMerge( true, defaultStandardIndex(), 0, 0 );
		testPartitionMerge( true, defaultStandardIndex(), 1, 1 );
		testPartitionMerge( true, defaultStandardIndex(), 1, 2 );
		testPartitionMerge( true, defaultStandardIndex(), 4, 1 );
		testPartitionMerge( true, defaultStandardIndex(), 4, 4 );
		testPartitionMerge( true, defaultStandardIndex(), 8, 1 );
		testPartitionMerge( true, defaultStandardIndex(), 8, 4 );
		testPartitionMerge( true, defaultStandardIndex(), -1, 1 );
		testPartitionMerge( true, defaultStandardIndex(), -1, 2 );
		testPartitionMerge( true, defaultStandardIndex(), -4, 1 );
		testPartitionMerge( true, defaultStandardIndex(), -4, 4 );
		testPartitionMerge( true, defaultStandardIndex(), -8, 1 );
		testPartitionMerge( true, defaultStandardIndex(), -8, 4 );

		testPartitionMerge( false, defaultStandardIndex(), 1, 0 );
		testPartitionMerge( false, defaultStandardIndex(), 1, 1 );
		testPartitionMerge( false, defaultStandardIndex(), 1, 2 );
		testPartitionMerge( false, defaultStandardIndex(), 4, 1 );
		testPartitionMerge( false, defaultStandardIndex(), 4, 4 );
		testPartitionMerge( false, defaultStandardIndex(), 8, 1 );
		testPartitionMerge( false, defaultStandardIndex(), 8, 4 );
		testPartitionMerge( false, defaultStandardIndex(), -1, 1 );
		testPartitionMerge( false, defaultStandardIndex(), -1, 2 );
		testPartitionMerge( false, defaultStandardIndex(), -4, 1 );
		testPartitionMerge( false, defaultStandardIndex(), -4, 4 );
		testPartitionMerge( false, defaultStandardIndex(), -8, 1 );
		testPartitionMerge( false, defaultStandardIndex(), -8, 4 );
	}

	public void testLexicalPartitioning( boolean interleaved, Map flags ) throws ConfigurationException, SecurityException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException,
			Exception {
		// Vanilla indexing
		new IndexBuilder( basename, getSequence() ).standardWriterFlags( flags ).interleaved( interleaved ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).virtualDocumentResolver( 3, RESOLVER ).run();

		// Now we use a crazy strategy moving around documents using modular arithmetic
		final LexicalPartitioningStrategy uniform = LexicalStrategies.uniform( 3, DiskBasedIndex.getInstance( basename + "-text" ) );
		BinIO.storeObject( uniform, basename + "-strategy" );

		new PartitionLexically( basename + "-text", basename + "-text-part", uniform, basename + "-strategy", 1024, DEFAULT_LOG_INTERVAL ).run();
		new PartitionLexically( basename + "-virtual", basename + "-virtual-part", uniform, basename + "-strategy", 1024, DEFAULT_LOG_INTERVAL ).run();

		String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
		for ( String index : localIndex )
			BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );
		sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() );
		sameContent( basename + "-virtual", basename + "-virtual-part" );
	}

	@Test
	public void testLexicalPartitioning() throws ConfigurationException, SecurityException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, Exception {
		testLexicalPartitioning( true, defaultStandardIndex() );
		testLexicalPartitioning( false, defaultStandardIndex() );
		Reference2ObjectOpenHashMap flags = new Reference2ObjectOpenHashMap( defaultStandardIndex() );
		flags.remove( Component.POSITIONS );
		testLexicalPartitioning( true, flags );
		flags.remove( Component.COUNTS );
		testLexicalPartitioning( true, flags );
	}

	public void testRemote( boolean interleaved, Map flags, int quantum, int height ) throws ConfigurationException, SecurityException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, URISyntaxException, InvocationTargetException, NoSuchMethodException, InterruptedException {
		// Vanilla indexing
		new IndexBuilder( basename, getSequence() ).standardWriterFlags( flags ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum )
				.height( height ).virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run();
		
		final InetAddress localHost = InetAddress.getLocalHost();
		Index remoteIndex;

		try {
			IndexServer.quit( localHost.getHostAddress(), 9999 ); // To kill stuff laying around
		}
		catch( ConnectException dontCare ) {}

		new Thread() {
			public void run() {
				try {
					IndexServer.start( Index.getInstance( basename + "-text" ), localHost, 9999, false );
				}
				catch ( Exception e ) {
					Assert.fail( e.getMessage() );
				}
			}
		}.start();
		Thread.sleep( 1000 ); // Wait for the server to start up.

		remoteIndex = Index.getInstance( "mg4j://" + localHost.getHostAddress() + ":9999" );
		
		sameContent( Index.getInstance( basename + "-text" ), remoteIndex );
		checkSkips( remoteIndex, 128, basename + "-text" );
		IndexServer.quit( localHost.getHostAddress(), 9999 );

		new Thread() {
			public void run() {
				try {
					IndexServer.start( Index.getInstance( basename + "-text" ), localHost, 9999, true );
				}
				catch ( Exception e ) {
					Assert.fail( e.getMessage() );
				}
			}
		}.start();
		Thread.sleep( 1000 ); // Wait for the server to start up.

		remoteIndex = Index.getInstance( "mg4j://" + localHost.getHostAddress() + ":9999" );
		
		sameContent( Index.getInstance( basename + "-text" ), remoteIndex );
		checkSkips( remoteIndex, 128, basename + "-text" );
		IndexServer.quit( localHost.getHostAddress(), 9999 );
	}
	
	@Test
	public void testRemote() throws Exception {
		final Reference2ObjectOpenHashMap flags = new Reference2ObjectOpenHashMap( defaultStandardIndex() );
		flags.remove( Component.POSITIONS );
		testRemote( true, flags, 4, 4 );
		testRemote( true, flags, -4, 4 );
		flags.remove( Component.COUNTS );
		testRemote( true, flags, 4, 4 );
		testRemote( true, flags, -4, 4 );

		testRemote( true, defaultStandardIndex(), 0, 0 );
		testRemote( true, defaultStandardIndex(), 8, 4 );
		testRemote( true, defaultStandardIndex(), -1, 1 );
		testRemote( true, defaultStandardIndex(), -8, 1 );

		testRemote( false, defaultStandardIndex(), 1, 0 );
		testRemote( false, defaultStandardIndex(), 8, 4 );
		testRemote( false, defaultStandardIndex(), -1, 1 );
		testRemote( false, defaultStandardIndex(), -8, 1 );
	}

	
	public void testEmpty( boolean interleaved, Map flags, int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException,
			IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		// Vanilla indexing
		new IndexBuilder( basename, getEmptySequence() ).standardWriterFlags( flags ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height )
				.virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run();
		checkAgainstContent( getEmptySequence(), null, RESOLVER, Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP, Index.getInstance( basename + "-text" ), Index.getInstance( basename + "-int" ), Index
				.getInstance( basename + "-date" ), Index.getInstance( basename + "-virtual" ) );

		// Permuted indexing
		String mapFile = File.createTempFile( this.getClass().getSimpleName(), "permutation" ).toString();
		new IndexBuilder( basename + "-mapped", getEmptySequence() ).standardWriterFlags( flags ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height )
				.virtualDocumentResolver( 3, RESOLVER ).mapFile( mapFile ).documentsPerBatch( 20 ).run();

		sameIndex( basename + "-text", basename + "-mapped-text" );
		sameIndex( basename + "-int", basename + "-mapped-int" );
		sameIndex( basename + "-date", basename + "-mapped-date" );
		sameIndex( basename + "-virtual", basename + "-mapped-virtual" );
	}

	@Test
	public void testEmpty() throws Exception {
		final Reference2ObjectOpenHashMap flags = new Reference2ObjectOpenHashMap( defaultStandardIndex() );
		flags.remove( Component.POSITIONS );
		testEmpty( true, flags, 4, 4 );
		testEmpty( true, flags, -4, 4 );
		flags.remove( Component.COUNTS );
		testEmpty( true, flags, 4, 4 );
		testEmpty( true, flags, -4, 4 );


		testEmpty( true, defaultStandardIndex(), 0, 0 );
		testEmpty( true, defaultStandardIndex(), 1, 1 );
		testEmpty( true, defaultStandardIndex(), 1, 2 );
		testEmpty( true, defaultStandardIndex(), 4, 1 );
		testEmpty( true, defaultStandardIndex(), 4, 4 );
		testEmpty( true, defaultStandardIndex(), 8, 1 );
		testEmpty( true, defaultStandardIndex(), 8, 4 );
		testEmpty( true, defaultStandardIndex(), -1, 1 );
		testEmpty( true, defaultStandardIndex(), -1, 2 );
		testEmpty( true, defaultStandardIndex(), -8, 1 );
		testEmpty( true, defaultStandardIndex(), -8, 4 );
		testEmpty( true, defaultStandardIndex(), -8, 1 );
		testEmpty( true, defaultStandardIndex(), -8, 4 );

		testEmpty( false, defaultStandardIndex(), 1, 0 );
		testEmpty( false, defaultStandardIndex(), 1, 1 );
		testEmpty( false, defaultStandardIndex(), 1, 2 );
		testEmpty( false, defaultStandardIndex(), 4, 1 );
		testEmpty( false, defaultStandardIndex(), 4, 4 );
		testEmpty( false, defaultStandardIndex(), 8, 1 );
		testEmpty( false, defaultStandardIndex(), 8, 4 );
		testEmpty( false, defaultStandardIndex(), -1, 1 );
		testEmpty( false, defaultStandardIndex(), -1, 2 );
		testEmpty( false, defaultStandardIndex(), -8, 1 );
		testEmpty( false, defaultStandardIndex(), -8, 4 );
		testEmpty( false, defaultStandardIndex(), -8, 1 );
		testEmpty( false, defaultStandardIndex(), -8, 4 );
	}

	public void testLoadOptions( boolean interleaved, int quantum, int height ) throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException,
			IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		// Vanilla indexing
		new IndexBuilder( basename, getSequence() ).skipBufferSize( 1024 ).pasteBufferSize( 1024 ).interleaved( interleaved ).skips( quantum != 0 ).quantum( quantum ).height( height )
				.virtualDocumentResolver( 3, RESOLVER ).documentsPerBatch( 20 ).run();
		for ( String options : new String[] { "inmemory=1", "mapped=1", "offsetstep=0", "offsetstep=-2" } )
			checkAgainstContent( getSequence(), null, RESOLVER, Scan.DEFAULT_VIRTUAL_DOCUMENT_GAP, Index.getInstance( basename + "-text?" + options ),
					Index.getInstance( basename + "-int?" + options ), Index.getInstance( basename + "-date?" + options ), Index.getInstance( basename + "-virtual?" + options ) );
	}

	@Test
	public void testLoadOptions() throws Exception {
		testLoadOptions( true, 0, 0 );
		testLoadOptions( true, 1, 1 );
		testLoadOptions( true, -1, 1 );

		testLoadOptions( false, 1, 0 );
		testLoadOptions( false, 1, 1 );
		testLoadOptions( false, -1, 1 );
	}

}