All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.it.unimi.di.archive4j.tool.MergeTest Maven / Gradle / Ivy

Go to download

Archive4J is a suite of tools to store compactly term/count information of a document collection.

There is a newer version: 1.3.3
Show newest version
package it.unimi.di.archive4j.tool;

/*		 
 * Copyright (C) 2008-2013 Alessio Orlandi and Sebastiano Vigna 
 *
 *  This program is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the License, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */


import it.unimi.di.big.mg4j.document.DocumentSequence;
import it.unimi.di.big.mg4j.index.NullTermProcessor;
import it.unimi.di.archive4j.ArchiveLoader;
import it.unimi.di.archive4j.tool.MergePreprocessedData;
import it.unimi.di.archive4j.tool.Preprocess;
import it.unimi.di.archive4j.tool.Scan;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.util.Properties;
import junit.framework.TestCase;
import it.unimi.di.archive4j.TestCollection;




public class MergeTest extends TestCase {

	String basename0, basename1;

	@Override
	protected void setUp() throws Exception {
		super.setUp();

		basename0 = TestCollection.buildDocumentSequence( TestCollection.testBiParseDocSources[ 0 ] );
		basename1 = TestCollection.buildDocumentSequence( TestCollection.testBiParseDocSources[ 1 ] );
	}

	@Override
	protected void tearDown() throws Exception {
		super.tearDown();
	}

	public void testOne() throws Exception {
		Preprocess.run( basename0 + "-out", (DocumentSequence)BinIO.loadObject( basename0 ), TestCollection.getNullTermProcessor(), "text" );
		Preprocess.run( basename1 + "-out", (DocumentSequence)BinIO.loadObject( basename1 ), TestCollection.getNullTermProcessor(), "text" );

		Properties properties = new Properties();
		properties.setProperty( Preprocess.PropertyKeys.TERMPROCESSOR, NullTermProcessor.class.getName() );
		properties.setFileName( basename0 + "-merged" + Preprocess.PROPERTIES_EXTENSION );
		MergePreprocessedData.run( new String[] { basename0 + "-out", basename1 + "-out" }, basename0 + "-merged", new MergePreprocessedData.TermFilter[] { new MergePreprocessedData.LengthFilter( 2, Integer.MAX_VALUE ) } );

		Scan.main( ( "-I text -S " + basename0 + " " + basename0 + " " + basename0 + "-merged" ).split( " " ) );
		Scan.main( ( "-I text -S " + basename1 + " " + basename1 + " " + basename0 + "-merged" ).split( " " ) );

		TestCollection.compareSummaries( TestCollection.testBiParse[ 0 ], ArchiveLoader.getInstance( basename0 ) );
		TestCollection.compareSummaries( TestCollection.testBiParse[ 1 ], ArchiveLoader.getInstance( basename1 ) );

	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy