src.it.unimi.dsi.big.mg4j.index.DiskBasedIndex Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mg4j-big Show documentation
MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.
The newest version!
package it.unimi.dsi.big.mg4j.index;

/*		 
 * MG4J: Managing Gigabytes for Java (big)
 *
 * Copyright (C) 2004-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.ints.AbstractIntBigList;
import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
import it.unimi.dsi.fastutil.ints.IntBigArrayBigList;
import it.unimi.dsi.fastutil.ints.IntBigArrays;
import it.unimi.dsi.fastutil.ints.IntBigList;
import it.unimi.dsi.fastutil.ints.IntIterable;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrayBigList;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.longs.LongBigLists;
import it.unimi.dsi.io.ByteBufferInputStream;
import it.unimi.dsi.io.InputBitStream;
import it.unimi.dsi.big.mg4j.index.BitStreamIndexWriter;
import it.unimi.dsi.big.mg4j.index.CompressionFlags.Coding;
import it.unimi.dsi.big.mg4j.index.CompressionFlags.Component;
import it.unimi.dsi.big.mg4j.index.Index.UriKeys;
import it.unimi.dsi.big.mg4j.index.payload.Payload;
import it.unimi.dsi.big.mg4j.util.SemiExternalOffsetBigList;
import it.unimi.dsi.sux4j.util.EliasFanoLongBigList;
import it.unimi.dsi.big.util.PrefixMap;
import it.unimi.dsi.util.Properties;
import it.unimi.dsi.big.util.StringMap;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.channels.FileChannel.MapMode;
import java.util.EnumMap;
import java.util.Map;
import java.util.NoSuchElementException;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;


/** A static container providing facilities to load an index based on data stored on disk.
 *
 * This class contains several useful static methods
 * such as {@link #readOffsets(InputBitStream, long)} and {@link #readSizes(CharSequence, long)},
 * and static factor methods such as {@link #getInstance(CharSequence, boolean, boolean, boolean, EnumMap)}
 * that take care of reading the properties associated with the index, identify
 * the correct {@link it.unimi.dsi.big.mg4j.index.Index} implementation that
 * should be used to load the index, and load the necessary data into memory. 
 * 
 * 
As an option, a disk-based index can be loaded into main memory (key: {@link Index.UriKeys#INMEMORY}), returning
 * an {@link it.unimi.dsi.big.mg4j.index.InMemoryIndex}/{@link InMemoryHPIndex}, or mapped into main memory (key: {@link Index.UriKeys#MAPPED}),
 * returning a {@link MemoryMappedIndex}/{@link InMemoryHPIndex} (note that the value assigned to the keys is irrelevant). 
 * In both cases some insurmountable Java problems
 * prevents using indices whose size exceeds two gigabytes (but see {@link MemoryMappedIndex} for
 * some elaboration on this topic).
 *  
 * 
Moreover, by default the
 * term-offset list is accessed using a {@link it.unimi.dsi.big.mg4j.util.SemiExternalOffsetBigList}
 * with a step of {@link #DEFAULT_OFFSET_STEP}. This behaviour can be changed using
 * the URI key {@link UriKeys#OFFSETSTEP}.
 * 
 * 
Disk-based indices are the workhorse of MG4J. All other indices (clustered,
 * remote, etc.) ultimately rely on disk-based indices to provide results.
 * 
 * 
Note that not all data produced by {@link it.unimi.dsi.big.mg4j.tool.Scan} and
 * by the other indexing utilities are actually necessary to run a disk-based
 * index. Usually the property file and the index file (plus the positions file, 
 * for {@linkplain BitStreamHPIndex high-performance indices}) are sufficient: if one
 * needs random access, also the offsets file must be present, and if the
 * compression method requires document sizes or if sizes are requested explicitly,
 * also the sizes file must be present. A {@link StringMap}
 * and possibly a {@link PrefixMap} will be fetched
 * automatically by {@link #getInstance(CharSequence, boolean, boolean)}
 * using standard extensions.
 *
 * 
Thread safety
 * 
 * A disk-based index is thread safe as long as the offset list, the size list and
 * the term/prefix map are. The static factory methods provided by this class load
 * offsets and sizes using data structures that are thread safe. If you use directly
 * a constructor, instead, it is your responsibility to pass thread-safe data structures.
 *
 * @author Sebastiano Vigna
 * @since 1.1
 */

public class DiskBasedIndex {
	private static final Logger LOGGER = Util.getLogger( DiskBasedIndex.class );
	private static final long serialVersionUID = 0;

	/** The default value for the query parameter {@link Index.UriKeys#OFFSETSTEP}. */
	public final static int DEFAULT_OFFSET_STEP = 256;

	/** Standard extension for the index bitstream. */
	public static final String INDEX_EXTENSION = ".index";
	/** Standard extension for the positions bitstream of a {@linkplain BitStreamHPIndexWriter high-performance index}. */
	public static final String POSITIONS_EXTENSION = ".positions";
	/** Standard extension for the index properties. */
	public static final String PROPERTIES_EXTENSION = ".properties";
	/** Standard extension for the file of sizes. */
	public static final String SIZES_EXTENSION = ".sizes";
	/** Standard extension for the file of offsets. */
	public static final String OFFSETS_EXTENSION = ".offsets";
	/** Standard extension for the file of lengths of positions. */
	public static final String POSITIONS_NUMBER_OF_BITS_EXTENSION = ".posnumbits";
	/** Standard extension for the file of global counts. */
	public static final String GLOBCOUNTS_EXTENSION = ".globcounts";
	/** Standard extension for the file of frequencies. */
	public static final String FREQUENCIES_EXTENSION = ".frequencies";
	/** Standard extension for the file of terms. */
	public static final String TERMS_EXTENSION = ".terms";
	/** Standard extension for the file of terms, unsorted. */
	public static final String UNSORTED_TERMS_EXTENSION = ".terms.unsorted";
	/** Standard extension for the term map. */
	public static final String TERMMAP_EXTENSION = ".termmap";
	/** Standard extension for the prefix map. */
	public static final String PREFIXMAP_EXTENSION = ".prefixmap";
	/** Standard extension for the stats file. */
	public static final String STATS_EXTENSION = ".stats";
	
	private DiskBasedIndex() {}
	
	/** Utility method to load a compressed offset file into a list.
	 *
	 * @param in the input bit stream providing the offsets (see {@link BitStreamIndexWriter}).
	 * @param T the number of terms indexed.
	 * @return a list of longs backed by an array; the list has
	 * an additional final element of index T that gives the number
	 * of bytes of the index file.
	 */

	public static LongBigList readOffsets( final InputBitStream in, final long T ) throws IOException {
		final long[][] offset = LongBigArrays.newBigArray( T + 1 );
		LOGGER.debug( "Loading offsets..." );
		long prev;
		LongBigArrays.set( offset, 0, prev = in.readLongGamma() );
		for( int i = 0; i < T; i++ ) LongBigArrays.set( offset, i + 1, prev = in.readLongGamma() + prev );
		LOGGER.debug( "Completed." );
		return LongBigArrayBigList.wrap( offset );
	}

	/** Utility method to load a compressed offset file into a list.
	 *
	 * @param filename the file containing the offsets (see {@link BitStreamIndexWriter}).
	 * @param T the number of terms indexed.
	 * @return a list of longs backed by an array; the list has
	 * an additional final element of index T that gives the number
	 * of bytes of the index file.
	 */

	public static LongBigList readOffsets( final CharSequence filename, final long T ) throws IOException {
		final InputBitStream in = new InputBitStream( filename.toString() );
		final long[][] offset = LongBigArrays.newBigArray( T + 1 );
		LOGGER.debug( "Loading offsets..." );
		long prev;
		LongBigArrays.set( offset, 0, prev = in.readLongGamma() );
		for( int i = 0; i < T; i++ ) LongBigArrays.set( offset, i + 1, prev = in.readLongGamma() + prev );
		in.close();
		LOGGER.debug( "Completed." );
		return LongBigArrayBigList.wrap( offset );
	}

	/** Utility method to load a compressed size file into a list.
	 *
	 * @param filename the filename containing the γ-coded sizes (see {@link BitStreamIndexWriter}).
	 * @param N the number of documents.
	 * @return a list of integers backed by an array.
	 */

	public static IntBigList readSizes( final CharSequence filename, final long N ) throws IOException {
		final int[][] size = IntBigArrays.newBigArray( N );
		final InputBitStream in = new InputBitStream( filename.toString() );
		LOGGER.debug( "Loading sizes..." );
		for( int segment = 0; segment < size.length; segment++ ) in.readGammas( size[ segment ], size[ segment ].length );		  
		LOGGER.debug( "Completed." );
		in.close();
		return IntBigArrayBigList.wrap( size );
	}

	/** Utility method to load a compressed size file into an {@linkplain EliasFanoLongBigList Elias–Fano compressed list}.
	 *
	 * @param filename the filename containing the γ-coded sizes (see {@link BitStreamIndexWriter}).
	 * @param N the number of documents indexed.
	 * @return a list of integers backed by an {@linkplain EliasFanoLongBigList Elias–Fano compressed list}.
	 * @throws IOException 
	 */

	public static IntBigList readSizesSuccinct( final CharSequence filename, final long N ) throws IOException {
		LOGGER.debug( "Loading sizes..." );
		final IntBigList sizes = new AbstractIntBigList() {
			final EliasFanoLongBigList list = new EliasFanoLongBigList( new GammaCodedIterableList( BinIO.loadBytes( filename ), N ) );

			public int getInt( long index ) {
				return (int)list.getLong( index );
			}

			public long size64() {
				return list.size64();
			}
		};
		LOGGER.debug( "Completed." );
		return sizes;
	}

	// TODO: replace this with a general-purpose class
	private static class GammaCodedIterableList implements IntIterable {
		private final long n;
		private final byte[] array;

		public GammaCodedIterableList( final byte []array, final long n ) {
			this.array = array;
			this.n = n;
		}

		public IntIterator iterator() {
			return new AbstractIntIterator() {
				final InputBitStream ibs = new InputBitStream( array );
				int pos;
				
				public boolean hasNext() {
					return pos < n;
				}
				
				public int nextInt() {
					if ( ! hasNext() ) throw new NoSuchElementException();
					pos++;
					try {
						return ibs.readGamma();
					}
					catch ( IOException e ) {
						throw new RuntimeException( e );
					} 
				}
			};
		}
	}
	
	
	/** Utility static method that loads a term map.
	 * 
	 * @param filename the name of the file containing the term map.
	 * @return the map, or null if the file did not exist.
	 * @throws IOException if some IOException (other than {@link FileNotFoundException}) occurred.
	 */
	@SuppressWarnings("unchecked")
	public static StringMap loadStringMap( final String filename ) throws IOException {
		try {
			return (StringMap) BinIO.loadObject( filename );
		} catch ( FileNotFoundException e ) {
			return null;
		} catch ( ClassNotFoundException e ) {
			throw new RuntimeException( e );
		}
	}

	/** Utility static method that loads a prefix map.
	 * 
	 * @param filename the name of the file containing the prefix map.
	 * @return the map, or null if the file did not exist.
	 * @throws IOException if some IOException (other than {@link FileNotFoundException}) occurred.
	 */
	@SuppressWarnings("unchecked")
	public static PrefixMap loadPrefixMap( final String filename ) throws IOException {
		try {
			return  (PrefixMap) BinIO.loadObject( filename );
		} catch ( FileNotFoundException e ) {
			return null;
		} catch ( ClassNotFoundException e ) {
			throw new RuntimeException( e );
		}
	}

	/** Returns a new disk-based index, loading exactly the specified parts and using preloaded {@link Properties}.
	 * 
	 * @param basename the basename of the index.
	 * @param properties the properties obtained from the given basename.
	 * @param termMap the term map for this index, or null for no term map.
	 * @param prefixMap the prefix map for this index, or null for no prefix map.
	 * @param randomAccess whether the index should be accessible randomly (e.g., if it will
	 * be possible to call {@link IndexReader#documents(long)} on the index readers returned by the index).
	 * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
	 * might be loaded anyway because the compression method for positions requires it).
	 * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or null.
	 */
	public static BitStreamIndex getInstance( final CharSequence basename, Properties properties, final StringMap termMap, final PrefixMap prefixMap, final boolean randomAccess, final boolean documentSizes, final EnumMap queryProperties ) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {

		// This could be null if old indices contain SkipIndex
		Class indexClass = null;
		try {
			indexClass = Class.forName( properties.getString( Index.PropertyKeys.INDEXCLASS, "(missing index class)" ));
		}
		catch( Exception ignore ) {}

		File indexFile = new File( basename + INDEX_EXTENSION );
		if ( ! indexFile.exists() ) throw new FileNotFoundException( "Cannot find index file " + indexFile.getName() );
		
		final Map flags = CompressionFlags.valueOf( properties.getStringArray( Index.PropertyKeys.CODING ), null );

		final long numberOfDocuments = properties.getLong( Index.PropertyKeys.DOCUMENTS ); 
		final long numberOfTerms = properties.getLong( Index.PropertyKeys.TERMS );
		final long numberOfPostings= properties.getLong( Index.PropertyKeys.POSTINGS ); 
		final long numberOfOccurrences = properties.getLong( Index.PropertyKeys.OCCURRENCES, -1 );
		final int maxCount = properties.getInt( Index.PropertyKeys.MAXCOUNT, -1 );
		final String field = properties.getString( Index.PropertyKeys.FIELD, new File( basename.toString() ).getName() );

		if ( termMap != null && termMap.size64() != numberOfTerms ) throw new IllegalArgumentException( "The size of the term map (" + termMap.size64() + ") is not equal to the number of terms (" + numberOfTerms + ")" );
		if ( prefixMap != null && prefixMap.size64() != numberOfTerms ) throw new IllegalArgumentException( "The size of the prefix map (" + prefixMap.size64() + ") is not equal to the number of terms (" + numberOfTerms + ")" );

		final Payload payload = (Payload)( properties.containsKey( Index.PropertyKeys.PAYLOADCLASS ) ? Class.forName( properties.getString( Index.PropertyKeys.PAYLOADCLASS ) ).newInstance() : null );
		final Coding frequencyCoding = flags.get( Component.FREQUENCIES );
		final Coding pointerCoding = flags.get( Component.POINTERS );
		final Coding countCoding = flags.get( Component.COUNTS );
		final Coding positionCoding = flags.get( Component.POSITIONS );
		
		if ( countCoding == null && positionCoding != null ) throw new IllegalArgumentException( "Index " + basename + " has positions but no counts (this can't happen)" );
		
		// Load document sizes if forced to do so, or if the pointer/position compression methods make it necessary.
		IntBigList sizes = null;
		// TODO: quick patch to avoid loading sizes in case of payloads.
		if ( payload == null && ( documentSizes || positionCoding == Coding.GOLOMB || positionCoding == Coding.INTERPOLATIVE ) ) {
			sizes = queryProperties != null && queryProperties.containsKey( UriKeys.SUCCINCTSIZES ) ? readSizesSuccinct( basename + DiskBasedIndex.SIZES_EXTENSION, numberOfDocuments ) : readSizes( basename + DiskBasedIndex.SIZES_EXTENSION, numberOfDocuments );
			if ( sizes.size64() != numberOfDocuments ) throw new IllegalStateException( "The length of the size list (" + sizes.size64() + ") is not equal to the number of documents (" + numberOfDocuments + ")" );
		}
		
		// Load offsets if forced to do so. Depending on a property, we use the core-memory or the semi-external version.
		final LongBigList offsets;
		// TODO: quick patch to avoid loading sizes in case of payloads.
		if ( payload == null && randomAccess ) {
			int offsetStep = queryProperties != null && queryProperties.get( UriKeys.OFFSETSTEP ) != null ? Integer.parseInt( queryProperties.get( UriKeys.OFFSETSTEP ) ) : DEFAULT_OFFSET_STEP;
			
			if ( offsetStep < 0 ) { // Memory-mapped
				offsetStep  = -offsetStep;
				offsets = LongBigLists.synchronize( new SemiExternalOffsetBigList( 
						new InputBitStream( ByteBufferInputStream.map( new FileInputStream( basename + DiskBasedIndex.OFFSETS_EXTENSION ).getChannel(), MapMode.READ_ONLY ) ),
						offsetStep, numberOfTerms + 1 ) );
			}
			else {
				offsets = offsetStep == 0? 
						DiskBasedIndex.readOffsets( basename + DiskBasedIndex.OFFSETS_EXTENSION, numberOfTerms ) :
							LongBigLists.synchronize( new SemiExternalOffsetBigList( new InputBitStream( basename + DiskBasedIndex.OFFSETS_EXTENSION, 1024 ), offsetStep, numberOfTerms + 1 ) );
			}
			if ( offsets.size64() != numberOfTerms + 1 ) throw new IllegalStateException( "The length of the offset list (" + offsets.size64() + ") is not equal to the number of terms plus one (" + numberOfTerms + " + 1)" );
		}
		else offsets = null;
		
		
		final int quantum = properties.getInt( BitStreamIndex.PropertyKeys.SKIPQUANTUM, -1 );
		final int height = properties.getInt( BitStreamIndex.PropertyKeys.SKIPHEIGHT, -1 );
		final int bufferSize = properties.getInt( BitStreamIndex.PropertyKeys.BUFFERSIZE, BitStreamIndex.DEFAULT_BUFFER_SIZE );

		final TermProcessor termProcessor = Index.getTermProcessor( properties );
		final boolean highPerformance = indexClass != null && FileHPIndex.class.isAssignableFrom( indexClass );
		
		if ( queryProperties != null && queryProperties.containsKey( UriKeys.INMEMORY ) ) {
			/*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) )
				return new SqrtSkipInMemoryIndex( BinIO.loadBytes( indexFile.toString() ), 
						numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, 
						frequencyCoding, pointerCoding, countCoding, positionCoding,
						termProcessor,
						field, properties, termMap, prefixMap, sizes, offsets );*/
			return highPerformance
			? new InMemoryHPIndex( BinIO.loadBytes( indexFile.toString() ), BinIO.loadBytes( basename + POSITIONS_EXTENSION ), 
					numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, 
					payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
					termProcessor,
					field, properties, termMap, prefixMap, sizes, offsets )
			: new InMemoryIndex( BinIO.loadBytes( indexFile.toString() ), 
					numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, 
					payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
					termProcessor,
					field, properties, termMap, prefixMap, sizes, offsets );
		}
		else if ( queryProperties != null && queryProperties.containsKey( UriKeys.MAPPED ) ) {
			final File positionsFile = new File( basename + POSITIONS_EXTENSION );
			final ByteBufferInputStream index = ByteBufferInputStream.map( new FileInputStream( indexFile ).getChannel(), MapMode.READ_ONLY );
			return highPerformance 
					? new MemoryMappedHPIndex( index, ByteBufferInputStream.map( new FileInputStream( positionsFile ).getChannel(), MapMode.READ_ONLY ),
					numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, 
					payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
					termProcessor,
					field, properties, termMap, prefixMap, sizes, offsets )
					: new MemoryMappedIndex( index,
							numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, 
							payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height,
							termProcessor,
							field, properties, termMap, prefixMap, sizes, offsets );
			
		}
		/*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) )
			return new SqrtSkipFileIndex( basename.toString(), 
				numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, 
				frequencyCoding, pointerCoding, countCoding, positionCoding,
				termProcessor,
				field, properties, termMap, prefixMap, sizes, offsets, indexFile );*/
		
		return highPerformance  
				? new FileHPIndex( basename.toString(), 
						numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, 
						payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, bufferSize,
						termProcessor,
						field, properties, termMap, prefixMap, sizes, offsets )
				: new FileIndex( basename.toString(), 
				numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount, 
				payload, frequencyCoding, pointerCoding, countCoding, positionCoding, quantum, height, bufferSize,
				termProcessor,
				field, properties, termMap, prefixMap, sizes, offsets );
		 
	}

	/** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename.
	 * 
	 * @param basename the basename of the index.
	 * @param properties the properties obtained by stemming basename.
	 * @param randomAccess whether the index should be accessible randomly.
	 * @param documentSizes if true, document sizes will be loaded.
	 * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded.
	 * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or null.
	 * @throws IllegalAccessException 
	 * @throws InstantiationException 
	 * 
	 * @see #getInstance(CharSequence, Properties, StringMap, PrefixMap, boolean, boolean, EnumMap)
	 */
	public static BitStreamIndex getInstance( final CharSequence basename, final Properties properties, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap queryProperties ) throws ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
		StringMap termMap = null;
		PrefixMap prefixMap = null;
		if ( maps ) {
			// TODO: check this logic
			termMap = DiskBasedIndex.loadStringMap( basename + DiskBasedIndex.TERMMAP_EXTENSION );
			if ( termMap != null && termMap instanceof PrefixMap ) return getInstance( basename, properties, termMap, (PrefixMap)termMap, randomAccess, documentSizes, queryProperties );
			prefixMap = DiskBasedIndex.loadPrefixMap( basename + DiskBasedIndex.PREFIXMAP_EXTENSION );
			if ( termMap != null ) return getInstance( basename, properties, termMap, prefixMap, randomAccess, documentSizes, queryProperties );
			if ( prefixMap != null ) return getInstance( basename, properties, prefixMap, prefixMap, randomAccess, documentSizes, queryProperties );
		}
		return getInstance( basename, properties, null, prefixMap, randomAccess, documentSizes, queryProperties );
	}


	/** Returns a new disk-based index, possibly guessing reasonable term and prefix maps from the basename.
	 * 
	 * 
If there is a term map file (basename stemmed with .termmap), it is used as term map and,
	 * in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with .prefixmap)
	 * and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map.
	 * 
	 * @param basename the basename of the index.
	 * @param randomAccess whether the index should be accessible randomly (e.g., if it will
	 * be possible to call {@link IndexReader#documents(long)} on the index readers returned by the index).
	 * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
	 * might be loaded anyway because the compression method for positions requires it).
	 * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
	 * feature might not be available with some kind of index). 
	 * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or null.
	 */
	public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap queryProperties ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
		return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, queryProperties );
	}


	/** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename.
	 * 
	 * If there is a term map file (basename stemmed with .termmap), it is used as term map and,
	 * in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with .prefixmap)
	 * and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map.
	 * 
	 * @param basename the basename of the index.
	 * @param randomAccess whether the index should be accessible randomly (e.g., if it will
	 * be possible to call {@link IndexReader#documents(long)} on the index readers returned by the index).
	 * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
	 * might be loaded anyway because the compression method for positions requires it).
	 * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
	 * feature might not be available with some kind of index).
	 * @see #getInstance(CharSequence, boolean, boolean, boolean, EnumMap) 
	 */
	public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
		return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, null );
	}

	
	/** Returns a new disk-based index, guessing reasonable term and prefix maps from the basename.
	 * 
	 * @param basename the basename of the index.
	 * @param randomAccess whether the index should be accessible randomly (e.g., if it will
	 * be possible to call {@link IndexReader#documents(long)} on the index readers returned by the index).
	 * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
	 * might be loaded anyway because the compression method for positions requires it).
	 */
	public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
		return getInstance( basename, randomAccess, documentSizes, true );
	}

	/** Returns a new local index, trying to guess reasonable term and prefix maps from the basename,
	 * and loading document sizes only if it is necessary.
	 * 
	 * @param basename the basename of the index.
	 * @param randomAccess whether the index should be accessible randomly (e.g., if it will
	 * be possible to call {@link IndexReader#documents(long)} on the index readers returned by the index).
	 */
	public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
		return getInstance( basename, randomAccess, false );
	}

	/** Returns a new local index, trying to guess reasonable term and prefix maps from the basename,
	 *  loading offsets but loading document sizes only if it is necessary.
	 * 
	 * @param basename the basename of the index.
	 */
	public static BitStreamIndex getInstance( final CharSequence basename ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
		return getInstance( basename, true );
	}
}