All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unipi.di.acube.batframework.examples.DumpDataset Maven / Gradle / Ivy

/**
 * (C) Copyright 2012-2013 A-cube lab - Università di Pisa - Dipartimento di Informatica. 
 * BAT-Framework is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
 * BAT-Framework is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with BAT-Framework.  If not, see .
 */

package it.unipi.di.acube.batframework.examples;

import it.unipi.di.acube.batframework.datasetPlugins.*;
import it.unipi.di.acube.batframework.problems.*;
import it.unipi.di.acube.batframework.utils.*;

import java.io.*;

public class DumpDataset {

	public static void main(String[] args) throws Exception {
		//Creating the API to Wikipedia.
		WikipediaApiInterface api = new WikipediaApiInterface("benchmark/cache/wid.cache", "benchmark/cache/redirect.cache");
		
		//Creating the IITB dataset.
		A2WDataset ds = new IITBDataset("benchmark/datasets/iitb/crawledDocs", "benchmark/datasets/iitb/CSAW_Annotations.xml", api);

		System.out.println("Printing basic information about dataset " + ds.getName());
		//Basic check & Dump basic information
		TestDataset.dumpInfo(ds, api);
		
		//Dump of the whole dataset
		DumpData.dumpDataset(ds.getTextInstanceList(), ds.getA2WGoldStandardList(), api);
		
		//Export to XML
		Exporter.exportA2WDataset(ds, new FileOutputStream("iitb.xml"), api);
		api.flush();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy