All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.it.unimi.dsi.big.mg4j.tool.MetadataOnlyTest Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.tool;

import static org.junit.Assert.*;
import it.unimi.dsi.big.mg4j.document.StringArrayDocumentCollection;
import it.unimi.dsi.big.mg4j.index.CompressionFlags;
import it.unimi.dsi.big.mg4j.index.DiskBasedIndex;
import it.unimi.dsi.big.mg4j.index.Index;
import it.unimi.dsi.big.util.SemiExternalGammaBigList;
import it.unimi.dsi.io.InputBitStream;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.io.filefilter.PrefixFileFilter;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

public class MetadataOnlyTest {

	private static String basename;
	@BeforeClass
	public static void setUp() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		basename = File.createTempFile( MetadataOnlyTest.class.getSimpleName(), "tmp" ).getCanonicalPath();
		new IndexBuilder( basename + "0", new StringArrayDocumentCollection( "a", "c", "a", "d" ) ).run();
		new IndexBuilder( basename + "1", new StringArrayDocumentCollection( "a", "c b", "a b" ) ).run();

		new Paste( basename, new String[] { basename + "0-text", basename + "1-text" }, false, true, 1024, null, 1024, CompressionFlags.DEFAULT_STANDARD_INDEX, false, false, 64, 10, 1024, 1000 ).run();		
		new Paste( basename + "-mo", new String[] { basename + "0-text", basename + "1-text" }, true, true, 1024, null, 1024, CompressionFlags.DEFAULT_STANDARD_INDEX, false, false, 64, 10, 1024, 1000 ).run();
	}

	@AfterClass
	public static void tearDown() {
		for( File f: new File( basename ).getParentFile().listFiles( (FileFilter)new PrefixFileFilter( new File( basename ).getName() ) ) )	f.delete();
	}
	
	@Test
	public void testPaste() throws IOException, ConfigurationException, SecurityException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
		
		
		Index index = Index.getInstance( basename );
		assertEquals( 2, index.documents( 0 ).frequency() );			
		assertEquals( 2, index.documents( 1 ).frequency() );
		assertEquals( 1, index.documents( 2 ).frequency() );
		assertEquals( 1, index.documents( 3 ).frequency() );
		
		SemiExternalGammaBigList frequencies = new SemiExternalGammaBigList( new InputBitStream( basename + "-mo" + DiskBasedIndex.FREQUENCIES_EXTENSION ), 1, 4 );

		assertEquals( 2, frequencies.getLong( 0 ) );			
		assertEquals( 2, frequencies.getLong( 1 ) );			
		assertEquals( 1, frequencies.getLong( 2 ) );
		assertEquals( 1, frequencies.getLong( 3 ) );
		assertEquals( 4, frequencies.size64() );
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy