it.unipi.di.acube.batframework.utils.DumpResults Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of bat-framework Show documentation
A framework to compare entity annotation systems.
The newest version!
/**
 * (C) Copyright 2012-2013 A-cube lab - Università di Pisa - Dipartimento di Informatica. 
 * BAT-Framework is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
 * BAT-Framework is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with BAT-Framework.  If not, see .
 */

package it.unipi.di.acube.batframework.utils;

import it.unipi.di.acube.batframework.cache.BenchmarkCache;
import it.unipi.di.acube.batframework.data.*;
import it.unipi.di.acube.batframework.metrics.*;
import it.unipi.di.acube.batframework.problems.*;

import java.io.*;
import java.util.*;

/**
 * This class contains all the methods to dump the result of an experiment, both
 * to the screen (methods print*), in latex form (methods latex*), or as gnuplot
 * data used to generate charts (methods gnuplot*).
 * 
 */
public class DumpResults {
	private static final Locale LOCALE = Locale.US;
	private static final String CHARTS_DIR = "charts/";

	private static class StringLengthComparator implements Comparator {
		public int compare(String o1, String o2) {
			return o1.length() - o2.length();
		}
	}

	/**
	 * Writes (as {@code .dat} gnuplot data) the micro-F1, micro-precision and
	 * micro-recall achieved for each combination of Match relation - Annotator
	 * - Dataset, varying the score threshold in [0,1], to a file in the form
	 * matchrelation_datasetname_f1_threshold_annotatorname.dat. Note that this
	 * will generate
	 * {@code 3 * matchRels.size() * annotators.size() * dss.size()} files. Also
	 * note that the experiments must have already been performed and their
	 * results stored in {@code threshRecords}.
	 * 
	 * @param matchRels
	 *            the set of Match relations.
	 * @param annotators
	 *            the set of annotators.
	 * @param dss
	 *            the set of datasets.
	 * @param api
	 *            the API to Wikipedia (needed to print information about
	 *            annotations/tags).
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @param 
	 *            the type of system.
	 * @param 
	 *            the type of system's output.
	 * @param 
	 *            the type of dataset.
	 * @throws IOException
	 *             if something went wrong while querying the Wikipedia API.
	 */
	public static  void gnuplotCorrectnessPerformance(
			Vector> matchRels,
			List annotators,
			Vector dss,
			WikipediaInterface api,
			HashMap>>> threshRecords)
			throws IOException {

		for (MatchRelation m : matchRels)
			for (T3 ds : dss) {
				for (T1 t : annotators) {
					System.out.println("Writing to gnuplot-file "
							+ ds.getName() + "/" + t.getName()
							+ " with varying score threshold...");
					String prefix = CHARTS_DIR
							+ m.getName().replaceAll("[^a-zA-Z0-9]", "")
									.toLowerCase();
					String suffix = t.getName().replaceAll("[^a-zA-Z0-9]", "")
							.toLowerCase()
							+ "_"
							+ ds.getName().replaceAll("[^a-zA-Z0-9]", "")
									.toLowerCase() + ".dat";

					OutputStreamWriter precOs = new OutputStreamWriter(
							new FileOutputStream(prefix
									+ "_precision_threshold_" + suffix));
					OutputStreamWriter recOs = new OutputStreamWriter(
							new FileOutputStream(prefix + "_recall_threshold_"
									+ suffix));
					OutputStreamWriter f1Os = new OutputStreamWriter(
							new FileOutputStream(prefix + "_f1_threshold_"
									+ suffix));

					HashMap records = RunExperiments
							.getRecords(threshRecords, m.getName(),
									t.getName(), ds.getName());
					List thresholds = new Vector(records.keySet());
					Collections.sort(thresholds);

					for (Float thr : thresholds) {
						MetricsResultSet rs = records.get(thr);
						System.out.printf(
								LOCALE,
								t.getName() + " - " + ds.getName() + " " + thr
										+ " tp:" + rs.getGlobalTp() + " fp:"
										+ rs.getGlobalFp() + " fn:"
										+ rs.getGlobalFn() + " prec:"
										+ rs.getMicroPrecision() + " rec:"
										+ rs.getMicroRecall() + " f1:"
										+ rs.getMicroF1() + "\n");
						precOs.write(String.format(LOCALE, "%f\t%f%n", thr,
								rs.getMicroPrecision()));
						recOs.write(String.format(LOCALE, "%f\t%f%n", thr,
								rs.getMicroRecall()));
						f1Os.write(String.format(LOCALE, "%f\t%f%n", thr,
								rs.getMicroF1()));
					}
					precOs.close();
					recOs.close();
					f1Os.close();
				}
			}
		System.out.println("Flushing Wikipedia API cache...");
		api.flush();
	}

	/**
	 * Writes a file {@code runtime_f1.dat} storing (avg. runtime, achieved best
	 * micro-F1) pairs for a single dataset and a single match relation. Note
	 * that the experiments must have already been performed and their results
	 * stored in {@code threshRecords}.
	 * 
	 * @param matchRelName
	 *            The match relation used to compute the micro-F1.
	 * @param a2wAnnotators
	 *            The set of A2W Annotators for which a pair must be included in
	 *            the output.
	 * @param sa2wAnnotators
	 *            The set of Sa2W Annotators for which a pair must be included.
	 * @param datasetName
	 *            The name of the dataset for which the pairs must be included.
	 * @param api
	 *            the API to Wikipedia (needed to print information about
	 *            annotations/tags).
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @throws IOException
	 *             if something went wrong while querying the Wikipedia API.
	 * @throws Exception
	 *             if the cache does not contain records about the given
	 *             dataset/annotator avg. time.
	 */
	public static void gnuplotRuntimeF1(
			String matchRelName,
			Vector a2wAnnotators,
			Vector sa2wAnnotators,
			String datasetName,
			WikipediaInterface api,
			HashMap>>> threshRecords)
			throws IOException, Exception {
		OutputStreamWriter runTimeF1Stream = new OutputStreamWriter(
				new FileOutputStream(CHARTS_DIR + "runtime_f1.dat"));
		if (a2wAnnotators != null)
			for (A2WSystem t : a2wAnnotators)
				runTimeF1Stream.write(String.format(LOCALE, "%f\t%d\t\"%s\"\n",
						RunExperiments.getBestRecord(threshRecords,
								matchRelName, t.getName(), datasetName).second
								.getMicroF1(), (int) BenchmarkCache
								.getAvgA2WTimingsForDataset(t.getName(),
										datasetName), t.getName()));
		if (sa2wAnnotators != null)
			for (Sa2WSystem t : sa2wAnnotators)
				runTimeF1Stream.write(String.format(LOCALE, "%f\t%d\t\"%s\"\n",
						RunExperiments.getBestRecord(threshRecords,
								matchRelName, t.getName(), datasetName).second
								.getMicroF1(), (int) BenchmarkCache
								.getAvgSa2WTimingsForDataset(t.getName(),
										datasetName), t.getName()));
		runTimeF1Stream.close();
	}

	/**
	 * Print a latex tables reporting the best micro-F1 achieved by each
	 * (annotator, dataset) pair. A Latex table is printed for the metrics based
	 * on each match relation passed in {@code matchRels}.
	 * 
	 * @param matchRels
	 *            the set of Match relations for which a table will be printed.
	 * @param a2wAnnotators
	 *            The set of A2W Annotators for which the best result will be
	 *            included in the output.
	 * @param d2wAnnotators
	 *            The set of D2W Annotators for which the best result will be
	 *            included in the output.
	 * @param sa2wAnnotators
	 *            The set of Sa2W Annotators for which the best result will be
	 *            included.
	 * @param sc2wAnnotators
	 *            The set of Sc2W Annotators for which the best result will be
	 *            included.
	 * @param c2wAnnotators
	 *            The set of C2W Annotators for which the best result will be
	 *            included in the output.
	 * @param dss
	 *            The datasets for which the best result will be included.
	 * @param includeTpFpFn
	 *            true if the output table has to include the total number of
	 *            tp/fp/fn
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @param includeMicro
	 *            whether or not to include micro-measures in the table
	 * @param includeMacro
	 *            whether or not to include macro-measures in the table
	 * @param includeTpFpFn
	 *            whether or not to include TP, FP and FN count in the table
	 * @param 
	 *            the type of data on which the match relation operates.
	 * @param 
	 *            the type of dataset.
	 */
	public static  void latexCorrectnessPerformance(
			Vector> matchRels,
			Vector a2wAnnotators,
			Vector d2wAnnotators,
			Vector sa2wAnnotators,
			Vector sc2wAnnotators,
			Vector c2wAnnotators,
			Vector dss,
			boolean includeTpFpFn,
			boolean includeMicro,
			boolean includeMacro,
			HashMap>>> threshRecords) {
		System.out.println("Correctness performance - latex output");
		Vector allAnns = new Vector();
		if (sa2wAnnotators != null)
			allAnns.addAll(sa2wAnnotators);
		if (sc2wAnnotators != null)
			allAnns.addAll(sc2wAnnotators);
		if (a2wAnnotators != null)
			allAnns.addAll(a2wAnnotators);
		if (d2wAnnotators != null)
			allAnns.addAll(d2wAnnotators);
		if (c2wAnnotators != null)
			allAnns.addAll(c2wAnnotators);
		for (MatchRelation m : matchRels) {
			System.out.println("+++ Match Relation: " + m.getName());
			System.out
					.printf(LOCALE,
							"\\hline \n Dataset & Annotator & Best Threshold"
									+ (includeMicro ? " & $F1_{micro}$ & $P_{micro}$ & $R_{micro}$ "
											: "")
									+ (includeMacro ? " & $F1_{macro}$ & $P_{macro}$ & $R_{macro}$ "
											: "")
									+ (includeTpFpFn ? "& tp & fp & fn" : "")
									+ "\\\\ \n \\hline%n");
			for (TopicDataset d : dss) {
				long len = 0;
				for (String s : d.getTextInstanceList())
					len += s.length();
				System.out
						.printf(LOCALE,
								"\\multirow{%d}{*}{\\parbox{.20\\textwidth}{%s \\newline(avg-len\\newline %d chars)}} %n",
								allAnns.size(), d.getName(),
								(int) ((float) len / (float) d.getSize()));
				for (TopicSystem t : allAnns) {
					Pair values = RunExperiments
							.getBestRecord(threshRecords, m.getName(),
									t.getName(), d.getName());
					System.out.printf(LOCALE, "& %s & $%.3f$ ", t.getName(),
							values.first);
					if (includeMicro)
						System.out.printf(LOCALE,
								"& $%.1f$ & $%.1f$ & $%.1f$ ",
								values.second.getMicroF1() * 100f,
								values.second.getMicroPrecision() * 100f,
								values.second.getMicroRecall() * 100f);
					if (includeMacro)
						System.out.printf(LOCALE,
								"& $%.1f$ & $%.1f$ & $%.1f$ ",
								values.second.getMacroF1() * 100f,
								values.second.getMacroPrecision() * 100f,
								values.second.getMacroRecall() * 100f);
					if (includeTpFpFn)
						System.out.printf(LOCALE, "& $%d$ & $%d$ & $%d$",
								values.second.getGlobalTp(),
								values.second.getGlobalFp(),
								values.second.getGlobalFn());
					int nColumns = 3 + (includeTpFpFn?3:0)+(includeMacro?3:0)+(includeMicro?3:0); 
					System.out.printf(LOCALE, " \\\\ \\cline{2-"
							+ nColumns + "}%n");
				}
				System.out.printf(LOCALE, "\\hline%n");
			}
		}
	}

	/**
	 * Print a set of latex tables reporting the similarity between annotators
	 * in solving the Sa2W problem, for each dataset given in {@code dssA2W}. A
	 * table will be printed for each of these focuses:

	 * 1- similarity of the whole output ("wholeOutput")

	 * 2- similarity of the output, restricted to the true positives ("TPonly")

	 * 3- similarity of the mentions ("mention")

	 * 4- similarity of the concepts ("concept")
	 * 
	 * @param dssA2W
	 *            the datasets for which the set of tables will be printed.
	 * @param sa2wAnnotators
	 *            the Sa2W annotators whose similarity will be printed.
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @param api
	 *            the API to Wikipedia (needed to print information about
	 *            annotations/tags).
	 * @param 
	 *            the type of dataset.
	 * @throws Exception
	 *             if anything went wrong while retrieving the results.
	 */
	public static  void latexSimilarityA2W(
			Vector dssA2W,
			Vector sa2wAnnotators,
			HashMap>>> threshRecords,
			WikipediaInterface api) throws Exception {
		/** Difference between systems & "Jaccard" measure */
		for (String focus : new String[] { "wholeOutput", "TPonly", "mention",
				"concept", "mention/concept" }) {
			System.out.println("Similarity measures - latex output - " + focus);
			for (A2WDataset ds : dssA2W) {
				System.out.println("Dataset: " + ds.getName());

				for (Sa2WSystem t : sa2wAnnotators)
					System.out.printf(LOCALE, "&%s", t.getName());
				System.out.printf(LOCALE, "\\\\%n");
				System.out.printf(LOCALE, "\\hline%n");

				for (int i = 0; i < sa2wAnnotators.size(); i++) {
					Sa2WSystem t1 = sa2wAnnotators.get(i);
					System.out.printf(LOCALE, t1.getName());

					for (int j = 0; j < sa2wAnnotators.size(); j++) {
						if (j < i) {
							System.out.printf(LOCALE, "&");
							continue;
						}

						Sa2WSystem t2 = sa2wAnnotators.get(j);
						List> t1Annotations = BenchmarkCache
								.doSa2WAnnotations(t1, ds, null, 0);
						List> t2Annotations = BenchmarkCache
								.doSa2WAnnotations(t2, ds, null, 0);
						List> out1 = null;
						List> out2 = null;
						MatchRelation m = null;
						Metrics metrics = new Metrics();
						if (focus.equals("wholeOutput")) {
							m = new WeakAnnotationMatch(api);
							out1 = ProblemReduction.Sa2WToA2WList(
									t1Annotations, RunExperiments
											.getBestRecord(threshRecords,
													m.getName(), t1.getName(),
													ds.getName()).first);
							out2 = ProblemReduction.Sa2WToA2WList(
									t2Annotations, RunExperiments
											.getBestRecord(threshRecords,
													m.getName(), t2.getName(),
													ds.getName()).first);
							System.out
									.printf(LOCALE, "&$%.1f$",
											metrics.macroSimilarity(out1, out2,
													m) * 100);
						} else if (focus.equals("TPonly")) {
							m = new WeakAnnotationMatch(api);
							List> reducedT1Tags = ProblemReduction
									.Sa2WToA2WList(
											t1Annotations,
											RunExperiments.getBestRecord(
													threshRecords, m.getName(),
													t1.getName(), ds.getName()).first);
							List> reducedT2Tags = ProblemReduction
									.Sa2WToA2WList(
											t2Annotations,
											RunExperiments.getBestRecord(
													threshRecords, m.getName(),
													t2.getName(), ds.getName()).first);
							out1 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT1Tags, m);
							out2 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT2Tags, m);
							System.out
									.printf(LOCALE, "&$%.1f$",
											metrics.macroSimilarity(out1, out2,
													m) * 100);
						} else if (focus.equals("mention")) {
							m = new MentionAnnotationMatch();
							List> reducedT1Tags = ProblemReduction
									.Sa2WToA2WList(
											t1Annotations,
											RunExperiments.getBestRecord(
													threshRecords, m.getName(),
													t1.getName(), ds.getName()).first);
							List> reducedT2Tags = ProblemReduction
									.Sa2WToA2WList(
											t2Annotations,
											RunExperiments.getBestRecord(
													threshRecords, m.getName(),
													t2.getName(), ds.getName()).first);
							out1 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT1Tags, m);
							out2 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT2Tags, m);
							System.out
									.printf(LOCALE, "&$%.1f$",
											metrics.macroSimilarity(out1, out2,
													m) * 100);
						} else if (focus.equals("concept")) {
							m = new ConceptAnnotationMatch(api);
							List> reducedT1Tags = ProblemReduction
									.Sa2WToA2WList(
											t1Annotations,
											RunExperiments.getBestRecord(
													threshRecords, m.getName(),
													t1.getName(), ds.getName()).first);
							List> reducedT2Tags = ProblemReduction
									.Sa2WToA2WList(
											t2Annotations,
											RunExperiments.getBestRecord(
													threshRecords, m.getName(),
													t2.getName(), ds.getName()).first);
							out1 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT1Tags, m);
							out2 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT2Tags, m);
							System.out
									.printf(LOCALE, "&$%.1f$",
											metrics.macroSimilarity(out1, out2,
													m) * 100);
						} else if (focus.equals("mention/concept")) {
							m = new MentionAnnotationMatch();
							List> reducedT1Tags = ProblemReduction
									.Sa2WToA2WList(
											t1Annotations,
											RunExperiments.getBestRecord(
													threshRecords, m.getName(),
													t1.getName(), ds.getName()).first);
							List> reducedT2Tags = ProblemReduction
									.Sa2WToA2WList(
											t2Annotations,
											RunExperiments.getBestRecord(
													threshRecords, m.getName(),
													t2.getName(), ds.getName()).first);
							out1 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT1Tags, m);
							out2 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT2Tags, m);
							m = new ConceptAnnotationMatch(api);
							System.out
									.printf(LOCALE, "&$%.0f$/",
											metrics.macroSimilarity(out1, out2,
													m) * 100);
							reducedT1Tags = ProblemReduction.Sa2WToA2WList(
									t1Annotations, RunExperiments
											.getBestRecord(threshRecords,
													m.getName(), t1.getName(),
													ds.getName()).first);
							reducedT2Tags = ProblemReduction.Sa2WToA2WList(
									t2Annotations, RunExperiments
											.getBestRecord(threshRecords,
													m.getName(), t2.getName(),
													ds.getName()).first);
							out1 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT1Tags, m);
							out2 = metrics.getTp(ds.getA2WGoldStandardList(),
									reducedT2Tags, m);
							System.out
									.printf(LOCALE, "$%.0f$",
											metrics.macroSimilarity(out1, out2,
													m) * 100);
						}

					}
					System.out.printf(LOCALE, "\\\\%n");
					System.out.printf(LOCALE, "\\hline%n");
				}
			}
		}
	}

	/**
	 * Print a set of latex tables reporting the similarity between annotators
	 * in solving the C2W problem, for each dataset given in {@code dssC2W}.

	 * 
	 * @param dssC2W
	 *            the datasets for which the set of tables will be printed.
	 * @param sa2wAnnotators
	 *            the Sa2W annotators whose similarity will be printed.
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @param api
	 *            the API to Wikipedia (needed to print information about
	 *            annotations/tags).
	 * @throws Exception
	 *             if anything went wrong while retrieving the results.
	 */
	public static void latexSimilarityC2W(
			Vector dssC2W,
			Vector sa2wAnnotators,
			HashMap>>> threshRecords,
			WikipediaInterface api) throws Exception {
		System.out.println("Similarity measures - latex output - tp C2W");
		for (C2WDataset ds : dssC2W) {
			System.out.println("Dataset: " + ds.getName());

			for (Sa2WSystem t : sa2wAnnotators)
				System.out.printf(LOCALE, "&" + t.getName());
			System.out.printf(LOCALE, "\\\\%n");
			System.out.printf(LOCALE, "\\hline%n");

			for (int i = 0; i < sa2wAnnotators.size(); i++) {
				Sa2WSystem t1 = sa2wAnnotators.get(i);
				System.out.printf(LOCALE, t1.getName());

				for (int j = 0; j < sa2wAnnotators.size(); j++) {
					if (j < i) {
						System.out.printf(LOCALE, "&");
						continue;
					}

					Sa2WSystem t2 = sa2wAnnotators.get(j);
					MatchRelation m = new StrongTagMatch(api);
					Metrics metrics = new Metrics();
					List> t1Annotations = BenchmarkCache
							.doSa2WAnnotations(t1, ds, null, 0);
					List> t2Annotations = BenchmarkCache
							.doSa2WAnnotations(t2, ds, null, 0);
					List> reducedT1Anns = ProblemReduction
							.Sa2WToA2WList(t1Annotations, RunExperiments
									.getBestRecord(threshRecords, m.getName(),
											t1.getName(), ds.getName()).first);
					List> reducedT2Anns = ProblemReduction
							.Sa2WToA2WList(t2Annotations, RunExperiments
									.getBestRecord(threshRecords, m.getName(),
											t2.getName(), ds.getName()).first);
					List> reducedT1Tags = ProblemReduction
							.A2WToC2WList(reducedT1Anns);
					List> reducedT2Tags = ProblemReduction
							.A2WToC2WList(reducedT2Anns);
					List> tponlyTagsT1 = metrics.getTp(
							ds.getC2WGoldStandardList(), reducedT1Tags, m);
					List> tponlyTagsT2 = metrics.getTp(
							ds.getC2WGoldStandardList(), reducedT2Tags, m);

					System.out.printf(LOCALE, "&$%.3f$", metrics
							.macroSimilarity(tponlyTagsT1, tponlyTagsT2, m));
				}
				System.out.printf(LOCALE, "\\\\%n");
				System.out.printf(LOCALE, "\\hline%n");
			}
		}
	}

	/**
	 * Prints (in latex form) a table representing the average time needed by
	 * all systems to solve the instance of a dataset. Rows are grouped by
	 * dataset and are in the form (dataset, annotator, avg. time).
	 * 
	 * @param a2wAnnotators
	 *            The A2W annotators that will be included in the output.
	 * @param sa2wAnnotators
	 *            The Sa2W annotators that will be included in the output.
	 * @param sc2wAnnotators
	 *            The Sc2W annotators that will be included in the output.
	 * @param dss
	 *            The datasets that will be included in the output.
	 * @param 
	 *            the type of dataset.
	 * @throws Exception
	 *             if the cache does not contain records about the given
	 *             dataset/annotator avg. time.
	 */
	public static  void latexTimingPerformance(
			Vector a2wAnnotators, Vector sa2wAnnotators,
			Vector sc2wAnnotators, Vector dss) throws Exception {
		System.out.println("Timing performance - latex output");

		int allAnnsSize = 0;
		if (sa2wAnnotators != null)
			allAnnsSize += sa2wAnnotators.size();
		if (a2wAnnotators != null)
			allAnnsSize += a2wAnnotators.size();
		if (sc2wAnnotators != null)
			allAnnsSize += sc2wAnnotators.size();

		System.out.printf(LOCALE,
				"\\hline \nDataset & Tagger & Average Time\\\\ \n \\hline%n");
		for (TopicDataset d : dss) {
			long len = 0;
			for (String s : d.getTextInstanceList())
				len += s.length();
			System.out
					.printf(LOCALE,
							"\\multirow{%d}{*}{\\parbox{.40\\textwidth}{%s \\newline(len %d)}} %n",
							allAnnsSize, d.getName(),
							(int) ((float) len / (float) d.getSize()));
			if (a2wAnnotators != null)
				for (A2WSystem t : a2wAnnotators)
					System.out.printf(
							LOCALE,
							"& %s & $%d$ms \\\\ \\cline{2-3}%n",
							t.getName(),
							(int) BenchmarkCache.getAvgA2WTimingsForDataset(
									t.getName(), d.getName()));
			if (sa2wAnnotators != null)
				for (Sa2WSystem t : sa2wAnnotators)
					System.out.printf(
							LOCALE,
							"& %s & $%d$ms \\\\ \\cline{2-3}%n",
							t.getName(),
							(int) BenchmarkCache.getAvgSa2WTimingsForDataset(
									t.getName(), d.getName()));
			System.out.printf(LOCALE, "\\hline%n");

		}

	}

	/**
	 * Prints (in latex form) a table representing the average time needed by a
	 * system to solve the instance of a dataset. Rows are in the form
	 * (annotator, avg. time for system 1, avg. time for system 2, ...).
	 * 
	 * @param sa2wAnnotators
	 *            The Sa2W annotators that will be included in the output.
	 * @param sc2wAnnotators
	 *            The Sc2W annotators that will be included in the output.
	 * @param dss
	 *            The datasets that will be included in the output.
	 * @param 
	 *            the type of dataset.
	 * @throws Exception
	 *             if the cache does not contain records about the given
	 *             dataset/annotator avg. time.
	 */
	public static  void latexTimingPerformance2(
			Vector sa2wAnnotators, Vector sc2wAnnotators,
			Vector dss) throws Exception {
		System.out.println("Timing performance - latex output 2");

		System.out.printf(LOCALE, "\\hline \nSystem ");
		for (TopicDataset ds : dss)
			System.out.printf(LOCALE, " & " + ds.getName());
		System.out.printf(LOCALE, " \n \\hline%n");

		if (sa2wAnnotators != null)
			for (Sa2WSystem t : sa2wAnnotators) {
				System.out.printf(LOCALE, t.getName());
				for (TopicDataset d : dss)
					System.out.printf(
							LOCALE,
							" & $%d$ ",
							(int) BenchmarkCache.getAvgSa2WTimingsForDataset(
									t.getName(), d.getName()));
				System.out.printf(LOCALE, "\\\\%n");
				System.out.printf(LOCALE, "\\hline%n");

			}
		if (sc2wAnnotators != null)
			for (Sc2WSystem t : sc2wAnnotators) {
				System.out.printf(LOCALE, t.getName());
				for (TopicDataset d : dss)
					System.out.printf(
							LOCALE,
							" & $%d$ ",
							(int) BenchmarkCache.getAvgSa2WTimingsForDataset(
									t.getName(), d.getName()));
				System.out.printf(LOCALE, "\\\\%n");
				System.out.printf(LOCALE, "\\hline%n");

			}

	}

	public static  void printCorrectnessPerformance(
			Vector> matchRels,
			Vector a2wAnnotators,
			Vector sa2wAnnotators,
			Vector c2wAnnotators,
			Vector d2wAnnotators,
			Vector dss,
			HashMap>>> threshRecords,
			boolean printMicro, boolean printMacro, boolean printTpFpFn,
			float threshold) {

		Vector allAnns = new Vector();
		if (sa2wAnnotators != null)
			allAnns.addAll(sa2wAnnotators);
		if (c2wAnnotators != null)
			allAnns.addAll(c2wAnnotators);
		if (a2wAnnotators != null)
			allAnns.addAll(a2wAnnotators);
		if (d2wAnnotators != null)
			allAnns.addAll(d2wAnnotators);
		System.out.println("Correctness performance [F1/prec/rec]");
		for (MatchRelation metric : matchRels) {
			System.out.printf(LOCALE, "Best results (metrics: %s):%n",
					metric.getName());
			for (TopicDataset d : dss)
				for (TopicSystem t : allAnns) {
					Pair result = null;
					if (threshold >= 0)
						result = new Pair(threshold,
								RunExperiments.getRecords(threshRecords,
										metric.getName(), t.getName(),
										d.getName()).get(threshold));
					else
						result = RunExperiments.getBestRecord(threshRecords,
								metric.getName(), t.getName(), d.getName());
					System.out.printf(LOCALE, "%s\t%s\t%.3f\t", d.getName(),
							t.getName(), result.first);
					if (printMicro)
						System.out.printf(LOCALE, "[mic: %.3f\t%.3f\t%.3f] ",
								result.second.getMicroF1(),
								result.second.getMicroPrecision(),
								result.second.getMicroRecall());
					if (printMacro)
						System.out.printf(LOCALE, "[mac: %.3f\t%.3f\t%.3f] ",
								result.second.getMacroF1(),
								result.second.getMacroPrecision(),
								result.second.getMacroRecall());
					if (printTpFpFn)
						System.out.printf(LOCALE, "TP/FP/FN: %d/%d/%d",
								result.second.getGlobalTp(),
								result.second.getGlobalFp(),
								result.second.getGlobalFn());
					System.out.println();
				}
		}
	}

	/**
	 * Print the best micro- and macro- measures achieved by each (annotator,
	 * dataset) pair along with other data. Data is printed for the metrics
	 * based on each match relation passed in {@code matchRels}.
	 * 
	 * @param matchRels
	 *            the set of Match relations for which a table will be printed.
	 * @param a2wAnnotators
	 *            The set of A2W Annotators for which the best result will be
	 *            included in the output.
	 * @param d2wAnnotators
	 *            The set of D2W Annotators for which the best result will be
	 *            included in the output.
	 * @param sa2wAnnotators
	 *            The set of Sa2W Annotators for which the best result will be
	 *            included.
	 * @param c2wAnnotators
	 *            The set of Sc2W Annotators for which the best result will be
	 *            included.
	 * @param dss
	 *            The datasets for which the best result will be included.
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @param 
	 *            the type of data on which the match relation operates.
	 * @param 
	 *            the type of dataset.
	 */
	public static  void printCorrectnessPerformance(
			Vector> matchRels,
			Vector a2wAnnotators,
			Vector sa2wAnnotators,
			Vector c2wAnnotators,
			Vector d2wAnnotators,
			Vector dss,
			HashMap>>> threshRecords) {
		printCorrectnessPerformance(matchRels, a2wAnnotators, sa2wAnnotators,
				c2wAnnotators, d2wAnnotators, dss, threshRecords, true, true,
				false, -1);
	}

	/**
	 * Prints the similarity, dissimilarity, and union of the results for each
	 * pair of annotators.
	 * 
	 * @param dssA2W
	 *            The datasets for which the data will be printed.
	 * @param sa2wAnnotators
	 *            The annotators for which the data will be printed.
	 * @param api
	 *            the API to Wikipedia (needed to print information about
	 *            annotations/tags).
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @throws Exception
	 *             if something went wrong while retrieving the results.
	 */
	public static void printDissimilarityA2W(
			Vector dssA2W,
			Vector sa2wAnnotators,
			HashMap>>> threshRecords,
			WikipediaInterface api) throws Exception {
		Metrics metrics = new Metrics();
		WeakAnnotationMatch m = new WeakAnnotationMatch(api);

		for (A2WDataset ds : dssA2W) {
			System.out.println("Dataset: " + ds.getName());

			for (int i = 0; i < sa2wAnnotators.size(); i++) {
				Sa2WSystem t1 = sa2wAnnotators.get(i);

				for (int j = 0; j < sa2wAnnotators.size(); j++) {
					if (j <= i)
						continue;
					Sa2WSystem t2 = sa2wAnnotators.get(j);
					System.out.println("Annotator1: " + t1.getName());
					System.out.println("Annotator2: " + t2.getName());

					List> t1Annotations = BenchmarkCache
							.doSa2WAnnotations(t1, ds, null, 0);
					List> t2Annotations = BenchmarkCache
							.doSa2WAnnotations(t2, ds, null, 0);

					List> reducedT1Tags = ProblemReduction
							.Sa2WToA2WList(t1Annotations, RunExperiments
									.getBestRecord(threshRecords, m.getName(),
											t1.getName(), ds.getName()).first);
					List> reducedT2Tags = ProblemReduction
							.Sa2WToA2WList(t2Annotations, RunExperiments
									.getBestRecord(threshRecords, m.getName(),
											t2.getName(), ds.getName()).first);

					List> out1Tp = metrics.getTp(
							ds.getA2WGoldStandardList(), reducedT1Tags, m);
					List> out2Tp = metrics.getTp(
							ds.getA2WGoldStandardList(), reducedT2Tags, m);
					List> out1Fp = metrics.getFp(
							ds.getA2WGoldStandardList(), reducedT1Tags, m);
					List> out2Fp = metrics.getFp(
							ds.getA2WGoldStandardList(), reducedT2Tags, m);

					long tpUnion = metrics.listUnion(out1Tp, out2Tp, m);
					int tpdiss1 = metrics.dissimilarityListCount(out1Tp,
							out2Tp, m);
					int tpdiss2 = metrics.dissimilarityListCount(out2Tp,
							out1Tp, m);
					int tpsim = metrics.similarityListCount(out1Tp, out2Tp, m);

					long fpUnion = metrics.listUnion(out1Fp, out2Fp, m);
					int fpdiss1 = metrics.dissimilarityListCount(out1Fp,
							out2Fp, m);
					int fpdiss2 = metrics.dissimilarityListCount(out2Fp,
							out1Fp, m);
					int fpsim = metrics.similarityListCount(out1Fp, out2Fp, m);

					System.out.printf(LOCALE,
							"Ann1 dissimilarity tp/fp: %d(%.2f)/%d(%.2f)%n",
							tpdiss1, (float) tpdiss1
									/ (float) (tpdiss1 + fpdiss1), fpdiss1,
							(float) fpdiss1 / (float) (tpdiss1 + fpdiss1));
					System.out.printf(LOCALE,
							"Ann2 dissimilarity tp/fp: %d(%.2f)/%d(%.2f)%n",
							tpdiss2, (float) tpdiss2
									/ (float) (tpdiss2 + fpdiss2), fpdiss2,
							(float) fpdiss2 / (float) (tpdiss2 + fpdiss2));
					System.out.printf(LOCALE,
							"Anns similarity tp/fp: %d(%.2f)/%d(%.2f)%n",
							tpsim, (float) tpsim / (float) (tpsim + fpsim),
							fpsim, (float) fpsim / (float) (tpsim + fpsim));
					System.out.printf(LOCALE,
							"Anns union tp/fp: %d(%.2f)/%d(%.2f)%n", tpUnion,
							(float) tpUnion / (float) (tpUnion + fpUnion),
							fpUnion, (float) fpUnion
									/ (float) (tpUnion + fpUnion));

				}
			}
		}
	}

	/**
	 * Prints the document in a dataset with the most redirect documents for
	 * each (dataset, annotator) pair. (Mainly for debug purposes.)
	 * 
	 * @param dss
	 *            The datasets.
	 * @param sa2wAnnotators
	 *            The annotators.
	 * @param api
	 *            the API to Wikipedia (needed to find out if an
	 *            annotations/tags is a redirect).
	 * @throws Exception
	 *             if something went wrong while retrieving the results.
	 */
	public static void printMostRedirectDocument(Vector dss,
			Vector sa2wAnnotators, WikipediaInterface api)
			throws Exception {
		System.out.println("Returned Redirect");
		for (A2WDataset ds : dss) {
			for (Sa2WSystem t : sa2wAnnotators) {
				int totalTags = 0;
				int totalRedirects = 0;
				System.out
						.printf(LOCALE,
								"Finding document with most redirect tags for dataset:%s tagger:%s%n",
								ds.getName(), t.getName());
				List> compRes = BenchmarkCache
						.doSa2WAnnotations(t, ds, null, 0);
				int bestCount = -1;

				for (int i = 0; i < compRes.size(); i++) {
					HashSet comp = compRes.get(i);
					int count = 0;
					HashSet distinctRedirects = new HashSet();
					for (ScoredAnnotation tag : comp)
						if (api.isRedirect(tag.getConcept())) {
							count++;
							distinctRedirects.add(tag.getConcept());
						}
					if (bestCount < distinctRedirects.size())
						bestCount = distinctRedirects.size();
					totalTags += compRes.size();
					totalRedirects += count;
				}
				System.out.printf(LOCALE,
						"Tags found=%d Redirects=%d redirect/total=%.3f",
						totalTags, totalRedirects, (float) totalRedirects
								/ (float) totalTags);
			}
		}
	}

	/**
	 * Print data about correctness performance (f1, precision, recall, ...) for
	 * a given (annotator, dataset) pair.
	 * 
	 * @param ann
	 *            the annotator.
	 * @param m
	 *            the match relation which the measurements are based upon.
	 * @param goldStandard
	 *            the gold standard for the dataset.
	 * @param output
	 *            the output found by the tagger.
	 * @param api
	 *            the API to Wikipedia (needed to print information about
	 *            annotations/tags).
	 * @param 
	 *            the type of system output and dataset.
	 * @throws IOException
	 *             if something went wrong while querying the Wikipedia API.
	 */
	public static  void printCorrectnessPerformance(
			TopicSystem ann, MatchRelation m, List> goldStandard,
			List> output, WikipediaInterface api)
			throws IOException {
		Metrics metrics = new Metrics();
		MetricsResultSet rs = metrics.getResult(output, goldStandard, m);
		System.out.format(LOCALE,
				"%s tp:%d fp:%d fn:%d precision: %.3f recall:%.3f F1:%.3f%n",
				ann.getName(), rs.getGlobalTp(), rs.getGlobalFp(),
				rs.getGlobalFn(), rs.getMacroPrecision(), rs.getMacroRecall(),
				rs.getMacroF1());
		System.out.format(LOCALE,
				"%s micro-precision:%.3f micro-recall:%.3f micro-F1:%.3f%n",
				ann.getName(), rs.getMicroPrecision(), rs.getMicroRecall(),
				rs.getMicroF1());

	}

	/**
	 * Prints a table representing the average time needed by all systems to
	 * solve the instance of a dataset. Rows are grouped by dataset and are in
	 * the form (dataset, annotator, avg. time).
	 * 
	 * @param a2wAnnotators
	 *            The A2W annotators that will be included in the output.
	 * @param sa2wAnnotators
	 *            The Sa2W annotators that will be included in the output.
	 * @param sc2wAnnotators
	 *            The Sa2W annotators that will be included in the output.
	 * @param dss
	 *            The datasets that will be included in the output.
	 * @throws Exception
	 *             if the cache does not contain records about the given
	 *             dataset/annotator avg. time.
	 */
	public static void printTimingPerformance(Vector a2wAnnotators,
			Vector sa2wAnnotators,
			Vector sc2wAnnotators, Vector dss)
			throws Exception {
		if (sa2wAnnotators == null)
			sa2wAnnotators = new Vector();
		if (a2wAnnotators == null)
			a2wAnnotators = new Vector();
		if (sc2wAnnotators == null)
			sc2wAnnotators = new Vector();
		System.out.println("Timing performance:");
		for (A2WDataset d : dss) {
			for (A2WSystem t : a2wAnnotators) {
				System.out.printf(LOCALE,
						"Average time for T2W tagger=%s dataset=%s: %.3f%n", t
								.getName(), d.getName(), BenchmarkCache
								.getAvgA2WTimingsForDataset(t.getName(),
										d.getName()));
				if (d.getSize() != BenchmarkCache.getA2WTimingsForDataset(
						t.getName(), d.getName()).size())
					System.out
							.printf(LOCALE,
									"ERROR: size of dataset %s and computed results by %s mismatch! %d != %d",
									d.getName(),
									t.getName(),
									d.getSize(),
									BenchmarkCache.getA2WTimingsForDataset(
											t.getName(), d.getName()).size());
			}
			for (Sa2WSystem t : sa2wAnnotators) {
				System.out.printf(LOCALE,
						"Average time for St2W tagger=%s dataset=%s: %.3f%n", t
								.getName(), d.getName(), BenchmarkCache
								.getAvgSa2WTimingsForDataset(t.getName(),
										d.getName()));
				if (d.getSize() != BenchmarkCache.getSa2WTimingsForDataset(
						t.getName(), d.getName()).size())
					System.out
							.printf(LOCALE,
									"ERROR: size of dataset %s and computed results by %s mismatch! %d != %d",
									d.getName(),
									t.getName(),
									d.getSize(),
									BenchmarkCache.getSa2WTimingsForDataset(
											t.getName(), d.getName()).size());
			}
		}
	}

	/**
	 * For each dataset passed in {@code annotators}, writes one gnuplot
	 * {@code timing_annotator_dataset.dat} file containing the time needed by
	 * the annotator to annotate each document of a dataset, ordered by the
	 * amount of time.

	 * Note that the experiments must have already been performed and their
	 * results stored in the cache when this method is called.
	 * 
	 * @param annotators
	 *            the set of annotators for which a file will be created.
	 * @param ds
	 *            the dataset.
	 * @throws Exception
	 *             if there were errors in writing the file or in retrieving the
	 *             timing.
	 */
	public static void gnuplotTraceTiming(Vector annotators,
			A2WDataset ds) throws Exception {
		for (Sa2WSystem annotator : annotators) {
			String suffix = annotator.getName().replaceAll("[^a-zA-Z0-9]", "")
					.toLowerCase()
					+ "_"
					+ ds.getName().replaceAll("[^a-zA-Z0-9]", "").toLowerCase()
					+ ".dat";
			OutputStreamWriter relOs = new OutputStreamWriter(
					new FileOutputStream(CHARTS_DIR + "timing_" + suffix));

			Vector texts = new Vector(ds.getTextInstanceList());
			Collections.sort(texts, new StringLengthComparator());
			for (String text : texts) {
				long time = BenchmarkCache.getSa2WTiming(annotator.getName(),
						ds.getName(), text);
				relOs.write(String.format(LOCALE, "%d\t%d%n", text.length(),
						time));
			}
			relOs.close();
		}
	}

}