All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unipi.di.acube.batframework.examples.BatchMain Maven / Gradle / Ivy

There is a newer version: 1.3.6
Show newest version
/**
 * (C) Copyright 2012-2013 A-cube lab - Università di Pisa - Dipartimento di Informatica. 
 * BAT-Framework is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
 * BAT-Framework is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with BAT-Framework.  If not, see .
 */

package it.unipi.di.acube.batframework.examples;

import it.unipi.di.acube.batframework.cache.BenchmarkCache;
import it.unipi.di.acube.batframework.data.*;
import it.unipi.di.acube.batframework.datasetPlugins.*;
import it.unipi.di.acube.batframework.metrics.*;
import it.unipi.di.acube.batframework.problems.*;
import it.unipi.di.acube.batframework.systemPlugins.*;
import it.unipi.di.acube.batframework.utils.*;

import java.util.*;

public class BatchMain {

	public static void main(String[] args) throws Exception {
		//Use system DNS
		java.security.Security.setProperty("networkaddress.cache.negative.ttl", "0");
		java.security.Security.setProperty("networkaddress.cache.ttl", "0");
		
		//Cache retrieved annotations here
		BenchmarkCache.useCache("benchmark/cache/results.cache");
		
		System.out.println(BenchmarkCache.getCacheInfo());

		System.out.println("Creating the API to wikipedia...");
		WikipediaApiInterface wikiApi = new WikipediaApiInterface("benchmark/cache/wid.cache", "benchmark/cache/redirect.cache");
		DBPediaApi dbpApi = new DBPediaApi();

		System.out.println("Creating the taggers...");
		Sa2WSystem tagme = new TagmeAnnotator("benchmark/configs/tagme/config.xml");
		//Sa2WSystem illinois = new IllinoisAnnotator_Server();
		Sa2WSystem miner = new WikipediaMinerAnnotator("benchmark/configs/wikipediaminer/config.xml");
		Sa2WSystem spotLight = new SpotlightAnnotator(dbpApi, wikiApi);


		System.out.println("Loading the datasets...");
		C2WDataset meijDs = new MeijDataset("benchmark/datasets/meij/original_tweets.list", "benchmark/datasets/meij/wsdm2012_annotations.txt", "benchmark/datasets/meij/wsdm2012_qrels.txt");
		A2WDataset aquaintDs = new AQUAINTDataset("benchmark/datasets/AQUAINT/RawTexts", "benchmark/datasets/AQUAINT/Problems", wikiApi);
		A2WDataset aidaDs = new ConllAidaDataset("benchmark/datasets/aida/AIDA-YAGO2-dataset.tsv", wikiApi);
		A2WDataset msnbcDs = new MSNBCDataset("benchmark/datasets/MSNBC/RawTextsSimpleChars_utf8", "benchmark/datasets/MSNBC/Problems", wikiApi);
		A2WDataset iitbDs = new IITBDataset("benchmark/datasets/iitb/crawledDocs", "benchmark/datasets/iitb/CSAW_Annotations.xml", wikiApi);
		
		/** Create a vector containing all the A2W datasets */
		Vector dssA2W = new Vector();
		dssA2W.add(iitbDs);
		dssA2W.add(msnbcDs);
		dssA2W.add(aquaintDs);
		dssA2W.add(aidaDs);
		
		/** Create a vector containing all the C2W datasets */
		Vector dssC2W = new Vector();
		dssC2W.add(meijDs);
		dssC2W.add(iitbDs); //Yes, you can put a A2W dataset here, since it also provides a C2W gold standard.
		
		/** Create a vector containing all the Sa2W annotators */
		Vector sa2wAnnotators = new Vector();
		sa2wAnnotators.add(tagme);
		//sa2wAnnotators.add(illinois);
		sa2wAnnotators.add(spotLight);
		sa2wAnnotators.add(miner);

		/** Create the match relations */
		MatchRelation wam = new WeakAnnotationMatch(wikiApi);
		MatchRelation sam = new StrongAnnotationMatch(wikiApi);
		MatchRelation cam = new ConceptAnnotationMatch(wikiApi);
		MatchRelation mam = new MentionAnnotationMatch();
		
		/*********** A2W experiments ************/
		
		/** Create a vector containing the match relations we want to base our measurements for the A2W Experiment on. */
		Vector> matchRelationsA2W = new Vector>();
		matchRelationsA2W.add(wam);
		matchRelationsA2W.add(sam);
		matchRelationsA2W.add(cam);
		matchRelationsA2W.add(mam);
		
		/** Hashmap for saving the measurements results.
		 * The mapping will be: metric name -> tagger name -> dataset name -> threshold -> results set */
		HashMap>>> resA2W;
		
		/** Run the experiments for varying thresholds, store the resulting measures to resA2W */
		resA2W = RunExperiments.performA2WExpVarThreshold(matchRelationsA2W, null, sa2wAnnotators, dssA2W, wikiApi);
		
		Pair p =  RunExperiments.getBestRecord(resA2W, wam.getName(), tagme.getName(), iitbDs.getName());
		System.out.printf("The best micro-F1 for %s on %s is achieved with a threshold of $.3f. Its value is %.3f.%n",tagme.getName(), iitbDs.getName(), p.first, p.second.getMicroF1());
		
		/** Print the results about correctness (F1, precision, recall) to the screen */
		DumpResults.printCorrectnessPerformance(matchRelationsA2W, null, sa2wAnnotators, null, null, dssA2W, resA2W);
		
		/** Print the results about correctness to the screen as a Latex table */
		DumpResults.latexCorrectnessPerformance(matchRelationsA2W, null, null, sa2wAnnotators, null, null, dssA2W, false, true, false, resA2W);

		/** Output the results in a gnuplot data .dat file that can then be given to Gnuplot*/
		DumpResults.gnuplotCorrectnessPerformance(matchRelationsA2W, sa2wAnnotators, dssA2W, wikiApi, resA2W);

		/*********** C2W experiments ************/
		
		/** Hashmap for saving the measurements results. */
		HashMap>>> resC2W;

		/** Create a vector containing the match relations we want to base our measurements for the C2W Experiment on. */
		Vector> matchRelationsC2W = new Vector>();
		matchRelationsC2W.add(new StrongTagMatch(wikiApi));
		
		/** Run the experiments for varying thresholds, store the resulting measures to resC2W */
		resC2W = RunExperiments.performC2WExpVarThreshold(matchRelationsC2W, null, sa2wAnnotators, null, null, dssC2W, wikiApi);

		/** Print the results about correctness (F1, precision, recall) to the screen */
		DumpResults.printCorrectnessPerformance(matchRelationsC2W, null, sa2wAnnotators, null, null, dssC2W, resC2W);
		
		/** Print the results about correctness to the screen as a Latex table */
		DumpResults.latexCorrectnessPerformance(matchRelationsC2W, null, null, sa2wAnnotators, null, null, dssC2W, false, true ,false, resC2W);

		/** Output the results in a gnuplot data .dat file that can then be given to Gnuplot*/
		DumpResults.gnuplotCorrectnessPerformance(matchRelationsC2W, sa2wAnnotators, dssC2W, wikiApi, resC2W);

		/** Timing tables in two forms */
		Vector dss = new Vector();
		dss.addAll(dssC2W);
		dss.addAll(dssA2W);
		DumpResults.latexTimingPerformance(null, sa2wAnnotators, null, dss);
		DumpResults.latexTimingPerformance2(sa2wAnnotators, null, dss);
		
		/** F1-runtime plot, just for the IITB dataset. */
		DumpResults.gnuplotRuntimeF1(wam.getName(), null, sa2wAnnotators, iitbDs.getName(), wikiApi, resA2W);
		
		/** Output annotations similarity in a latex table. */
		DumpResults.latexSimilarityA2W(dssA2W, sa2wAnnotators, resA2W, wikiApi);

		wikiApi.flush();
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy