All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unipi.di.acube.batframework.utils.DumpResults Maven / Gradle / Ivy

/**
 * (C) Copyright 2012-2013 A-cube lab - Università di Pisa - Dipartimento di Informatica. 
 * BAT-Framework is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
 * BAT-Framework is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with BAT-Framework.  If not, see .
 */

package it.unipi.di.acube.batframework.utils;

import it.unipi.di.acube.batframework.cache.BenchmarkCache;
import it.unipi.di.acube.batframework.data.*;
import it.unipi.di.acube.batframework.metrics.*;
import it.unipi.di.acube.batframework.problems.*;

import java.io.*;
import java.util.*;

/**
 * This class contains all the methods to dump the result of an experiment, both
 * to the screen (methods print*), in latex form (methods latex*), or as gnuplot
 * data used to generate charts (methods gnuplot*).
 * 
 */
public class DumpResults {
	private static final Locale LOCALE = Locale.US;
	private static final String CHARTS_DIR = "charts/";

	private static class StringLengthComparator implements Comparator {
		public int compare(String o1, String o2) {
			return o1.length() - o2.length();
		}
	}

	/**
	 * Writes (as {@code .dat} gnuplot data) the micro-F1, micro-precision and
	 * micro-recall achieved for each combination of Match relation - Annotator
	 * - Dataset, varying the score threshold in [0,1], to a file in the form
	 * matchrelation_datasetname_f1_threshold_annotatorname.dat. Note that this
	 * will generate
	 * {@code 3 * matchRels.size() * annotators.size() * dss.size()} files. Also
	 * note that the experiments must have already been performed and their
	 * results stored in {@code threshRecords}.
	 * 
	 * @param matchRels
	 *            the set of Match relations.
	 * @param annotators
	 *            the set of annotators.
	 * @param dss
	 *            the set of datasets.
	 * @param api
	 *            the API to Wikipedia (needed to print information about
	 *            annotations/tags).
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @param 
	 *            the type of system.
	 * @param 
	 *            the type of system's output.
	 * @param 
	 *            the type of dataset.
	 * @throws IOException
	 *             if something went wrong while querying the Wikipedia API.
	 */
	public static  void gnuplotCorrectnessPerformance(
			Vector> matchRels,
			List annotators,
			Vector dss,
			WikipediaApiInterface api,
			HashMap>>> threshRecords)
			throws IOException {

		for (MatchRelation m : matchRels)
			for (T3 ds : dss) {
				for (T1 t : annotators) {
					System.out.println("Writing to gnuplot-file "
							+ ds.getName() + "/" + t.getName()
							+ " with varying score threshold...");
					String prefix = CHARTS_DIR
							+ m.getName().replaceAll("[^a-zA-Z0-9]", "")
									.toLowerCase();
					String suffix = t.getName().replaceAll("[^a-zA-Z0-9]", "")
							.toLowerCase()
							+ "_"
							+ ds.getName().replaceAll("[^a-zA-Z0-9]", "")
									.toLowerCase() + ".dat";

					OutputStreamWriter precOs = new OutputStreamWriter(
							new FileOutputStream(prefix
									+ "_precision_threshold_" + suffix));
					OutputStreamWriter recOs = new OutputStreamWriter(
							new FileOutputStream(prefix + "_recall_threshold_"
									+ suffix));
					OutputStreamWriter f1Os = new OutputStreamWriter(
							new FileOutputStream(prefix + "_f1_threshold_"
									+ suffix));

					HashMap records = RunExperiments
							.getRecords(threshRecords, m.getName(),
									t.getName(), ds.getName());
					List thresholds = new Vector(records.keySet());
					Collections.sort(thresholds);

					for (Float thr : thresholds) {
						MetricsResultSet rs = records.get(thr);
						System.out.printf(
								LOCALE,
								t.getName() + " - " + ds.getName() + " " + thr
										+ " tp:" + rs.getGlobalTp() + " fp:"
										+ rs.getGlobalFp() + " fn:"
										+ rs.getGlobalFn() + " prec:"
										+ rs.getMicroPrecision() + " rec:"
										+ rs.getMicroRecall() + " f1:"
										+ rs.getMicroF1() + "\n");
						precOs.write(String.format(LOCALE, "%f\t%f%n", thr,
								rs.getMicroPrecision()));
						recOs.write(String.format(LOCALE, "%f\t%f%n", thr,
								rs.getMicroRecall()));
						f1Os.write(String.format(LOCALE, "%f\t%f%n", thr,
								rs.getMicroF1()));
					}
					precOs.close();
					recOs.close();
					f1Os.close();
				}
			}
		System.out.println("Flushing Wikipedia API cache...");
		api.flush();
	}

	/**
	 * Writes a file {@code runtime_f1.dat} storing (avg. runtime, achieved best
	 * micro-F1) pairs for a single dataset and a single match relation. Note
	 * that the experiments must have already been performed and their results
	 * stored in {@code threshRecords}.
	 * 
	 * @param matchRelName
	 *            The match relation used to compute the micro-F1.
	 * @param a2wAnnotators
	 *            The set of A2W Annotators for which a pair must be included in
	 *            the output.
	 * @param sa2wAnnotators
	 *            The set of Sa2W Annotators for which a pair must be included.
	 * @param datasetName
	 *            The name of the dataset for which the pairs must be included.
	 * @param api
	 *            the API to Wikipedia (needed to print information about
	 *            annotations/tags).
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @throws IOException
	 *             if something went wrong while querying the Wikipedia API.
	 * @throws Exception
	 *             if the cache does not contain records about the given
	 *             dataset/annotator avg. time.
	 */
	public static void gnuplotRuntimeF1(
			String matchRelName,
			Vector a2wAnnotators,
			Vector sa2wAnnotators,
			String datasetName,
			WikipediaApiInterface api,
			HashMap>>> threshRecords)
			throws IOException, Exception {
		OutputStreamWriter runTimeF1Stream = new OutputStreamWriter(
				new FileOutputStream(CHARTS_DIR + "runtime_f1.dat"));
		if (a2wAnnotators != null)
			for (A2WSystem t : a2wAnnotators)
				runTimeF1Stream.write(String.format(LOCALE, "%f\t%d\t\"%s\"\n",
						RunExperiments.getBestRecord(threshRecords,
								matchRelName, t.getName(), datasetName).second
								.getMicroF1(), (int) BenchmarkCache
								.getAvgA2WTimingsForDataset(t.getName(),
										datasetName), t.getName()));
		if (sa2wAnnotators != null)
			for (Sa2WSystem t : sa2wAnnotators)
				runTimeF1Stream.write(String.format(LOCALE, "%f\t%d\t\"%s\"\n",
						RunExperiments.getBestRecord(threshRecords,
								matchRelName, t.getName(), datasetName).second
								.getMicroF1(), (int) BenchmarkCache
								.getAvgSa2WTimingsForDataset(t.getName(),
										datasetName), t.getName()));
		runTimeF1Stream.close();
	}

	/**
	 * Print a latex tables reporting the best micro-F1 achieved by each
	 * (annotator, dataset) pair. A Latex table is printed for the metrics based
	 * on each match relation passed in {@code matchRels}.
	 * 
	 * @param matchRels
	 *            the set of Match relations for which a table will be printed.
	 * @param a2wAnnotators
	 *            The set of A2W Annotators for which the best result will be
	 *            included in the output.
	 * @param d2wAnnotators
	 *            The set of D2W Annotators for which the best result will be
	 *            included in the output.
	 * @param sa2wAnnotators
	 *            The set of Sa2W Annotators for which the best result will be
	 *            included.
	 * @param sc2wAnnotators
	 *            The set of Sc2W Annotators for which the best result will be
	 *            included.
	 * @param c2wAnnotators
	 *            The set of C2W Annotators for which the best result will be
	 *            included in the output.
	 * @param dss
	 *            The datasets for which the best result will be included.
	 * @param includeTpFpFn
	 *            true if the output table has to include the total number of
	 *            tp/fp/fn
	 * @param threshRecords
	 *            the hashmap in the form metric -> annotator -> dataset ->
	 *            (threshold, results) where the results are stored.
	 * @param includeMicro
	 *            whether or not to include micro-measures in the table
	 * @param includeMacro
	 *            whether or not to include macro-measures in the table
	 * @param includeTpFpFn
	 *            whether or not to include TP, FP and FN count in the table
	 * @param 
	 *            the type of data on which the match relation operates.
	 * @param 
	 *            the type of dataset.
	 */
	public static  void latexCorrectnessPerformance(
			Vector> matchRels,
			Vector a2wAnnotators,
			Vector d2wAnnotators,
			Vector sa2wAnnotators,
			Vector sc2wAnnotators,
			Vector c2wAnnotators,
			Vector dss,
			boolean includeTpFpFn,
			boolean includeMicro,
			boolean includeMacro,
			HashMap>>> threshRecords) {
		System.out.println("Correctness performance - latex output");
		Vector allAnns = new Vector();
		if (sa2wAnnotators != null)
			allAnns.addAll(sa2wAnnotators);
		if (sc2wAnnotators != null)
			allAnns.addAll(sc2wAnnotators);
		if (a2wAnnotators != null)
			allAnns.addAll(a2wAnnotators);
		if (d2wAnnotators != null)
			allAnns.addAll(d2wAnnotators);
		if (c2wAnnotators != null)
			allAnns.addAll(c2wAnnotators);
		for (MatchRelation m : matchRels) {
			System.out.println("+++ Match Relation: " + m.getName());
			System.out
					.printf(LOCALE,
							"\\hline \n Dataset & Annotator & Best Threshold"
									+ (includeMicro ? " & $F1_{micro}$ & $P_{micro}$ & $R_{micro}$ "
											: "")
									+ (includeMacro ? " & $F1_{macro}$ & $P_{macro}$ & $R_{macro}$ "
											: "")
									+ (includeTpFpFn ? "& tp & fp & fn" : "")
									+ "\\\\ \n \\hline%n");
			for (TopicDataset d : dss) {
				long len = 0;
				for (String s : d.getTextInstanceList())
					len += s.length();
				System.out
						.printf(LOCALE,
								"\\multirow{%d}{*}{\\parbox{.20\\textwidth}{%s \\newline(avg-len\\newline %d chars)}} %n",
								allAnns.size(), d.getName(),
								(int) ((float) len / (float) d.getSize()));
				for (TopicSystem t : allAnns) {
					Pair values = RunExperiments
							.getBestRecord(threshRecords, m.getName(),
									t.getName(), d.getName());
					System.out.printf(LOCALE, "& %s & $%.3f$ ", t.getName(),
							values.first);
					if (includeMicro)
						System.out.printf(LOCALE,
								"& $%.1f$ & $%.1f$ & $%.1f$ ",
								values.second.getMicroF1() * 100f,
								values.second.getMicroPrecision() * 100f,
								values.second.getMicroRecall() * 100f);
					if (includeMacro)
						System.out.printf(LOCALE,
								"& $%.1f$ & $%.1f$ & $%.1f$ ",
								values.second.getMacroF1() * 100f,
								values.second.getMacroPrecision() * 100f,
								values.second.getMacroRecall() * 100f);
					if (includeTpFpFn)
						System.out.printf(LOCALE, "& $%d$ & $%d$ & $%d$",
								values.second.getGlobalTp(),
								values.second.getGlobalFp(),
								values.second.getGlobalFn());
					int nColumns = 3 + (includeTpFpFn?3:0)+(includeMacro?3:0)+(includeMicro?3:0); 
					System.out.printf(LOCALE, " \\\\ \\cline{2-"
							+ nColumns + "}%n");
				}
				System.out.printf(LOCALE, "\\hline%n");
			}
		}
	}

	/**
	 * Print a set of latex tables reporting the similarity between annotators
	 * in solving the Sa2W problem, for each dataset given in {@code dssA2W}. A
	 * table will be printed for each of these focuses:
* 1- similarity of the whole output ("wholeOutput")
* 2- similarity of the output, restricted to the true positives ("TPonly")
* 3- similarity of the mentions ("mention")
* 4- similarity of the concepts ("concept") * * @param dssA2W * the datasets for which the set of tables will be printed. * @param sa2wAnnotators * the Sa2W annotators whose similarity will be printed. * @param threshRecords * the hashmap in the form metric -> annotator -> dataset -> * (threshold, results) where the results are stored. * @param api * the API to Wikipedia (needed to print information about * annotations/tags). * @param * the type of dataset. * @throws Exception * if anything went wrong while retrieving the results. */ public static void latexSimilarityA2W( Vector dssA2W, Vector sa2wAnnotators, HashMap>>> threshRecords, WikipediaApiInterface api) throws Exception { /** Difference between systems & "Jaccard" measure */ for (String focus : new String[] { "wholeOutput", "TPonly", "mention", "concept", "mention/concept" }) { System.out.println("Similarity measures - latex output - " + focus); for (A2WDataset ds : dssA2W) { System.out.println("Dataset: " + ds.getName()); for (Sa2WSystem t : sa2wAnnotators) System.out.printf(LOCALE, "&%s", t.getName()); System.out.printf(LOCALE, "\\\\%n"); System.out.printf(LOCALE, "\\hline%n"); for (int i = 0; i < sa2wAnnotators.size(); i++) { Sa2WSystem t1 = sa2wAnnotators.get(i); System.out.printf(LOCALE, t1.getName()); for (int j = 0; j < sa2wAnnotators.size(); j++) { if (j < i) { System.out.printf(LOCALE, "&"); continue; } Sa2WSystem t2 = sa2wAnnotators.get(j); List> t1Annotations = BenchmarkCache .doSa2WAnnotations(t1, ds, null, 0); List> t2Annotations = BenchmarkCache .doSa2WAnnotations(t2, ds, null, 0); List> out1 = null; List> out2 = null; MatchRelation m = null; Metrics metrics = new Metrics(); if (focus.equals("wholeOutput")) { m = new WeakAnnotationMatch(api); out1 = ProblemReduction.Sa2WToA2WList( t1Annotations, RunExperiments .getBestRecord(threshRecords, m.getName(), t1.getName(), ds.getName()).first); out2 = ProblemReduction.Sa2WToA2WList( t2Annotations, RunExperiments .getBestRecord(threshRecords, m.getName(), t2.getName(), ds.getName()).first); System.out .printf(LOCALE, "&$%.1f$", metrics.macroSimilarity(out1, out2, m) * 100); } else if (focus.equals("TPonly")) { m = new WeakAnnotationMatch(api); List> reducedT1Tags = ProblemReduction .Sa2WToA2WList( t1Annotations, RunExperiments.getBestRecord( threshRecords, m.getName(), t1.getName(), ds.getName()).first); List> reducedT2Tags = ProblemReduction .Sa2WToA2WList( t2Annotations, RunExperiments.getBestRecord( threshRecords, m.getName(), t2.getName(), ds.getName()).first); out1 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT1Tags, m); out2 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT2Tags, m); System.out .printf(LOCALE, "&$%.1f$", metrics.macroSimilarity(out1, out2, m) * 100); } else if (focus.equals("mention")) { m = new MentionAnnotationMatch(); List> reducedT1Tags = ProblemReduction .Sa2WToA2WList( t1Annotations, RunExperiments.getBestRecord( threshRecords, m.getName(), t1.getName(), ds.getName()).first); List> reducedT2Tags = ProblemReduction .Sa2WToA2WList( t2Annotations, RunExperiments.getBestRecord( threshRecords, m.getName(), t2.getName(), ds.getName()).first); out1 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT1Tags, m); out2 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT2Tags, m); System.out .printf(LOCALE, "&$%.1f$", metrics.macroSimilarity(out1, out2, m) * 100); } else if (focus.equals("concept")) { m = new ConceptAnnotationMatch(api); List> reducedT1Tags = ProblemReduction .Sa2WToA2WList( t1Annotations, RunExperiments.getBestRecord( threshRecords, m.getName(), t1.getName(), ds.getName()).first); List> reducedT2Tags = ProblemReduction .Sa2WToA2WList( t2Annotations, RunExperiments.getBestRecord( threshRecords, m.getName(), t2.getName(), ds.getName()).first); out1 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT1Tags, m); out2 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT2Tags, m); System.out .printf(LOCALE, "&$%.1f$", metrics.macroSimilarity(out1, out2, m) * 100); } else if (focus.equals("mention/concept")) { m = new MentionAnnotationMatch(); List> reducedT1Tags = ProblemReduction .Sa2WToA2WList( t1Annotations, RunExperiments.getBestRecord( threshRecords, m.getName(), t1.getName(), ds.getName()).first); List> reducedT2Tags = ProblemReduction .Sa2WToA2WList( t2Annotations, RunExperiments.getBestRecord( threshRecords, m.getName(), t2.getName(), ds.getName()).first); out1 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT1Tags, m); out2 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT2Tags, m); m = new ConceptAnnotationMatch(api); System.out .printf(LOCALE, "&$%.0f$/", metrics.macroSimilarity(out1, out2, m) * 100); reducedT1Tags = ProblemReduction.Sa2WToA2WList( t1Annotations, RunExperiments .getBestRecord(threshRecords, m.getName(), t1.getName(), ds.getName()).first); reducedT2Tags = ProblemReduction.Sa2WToA2WList( t2Annotations, RunExperiments .getBestRecord(threshRecords, m.getName(), t2.getName(), ds.getName()).first); out1 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT1Tags, m); out2 = metrics.getTp(ds.getA2WGoldStandardList(), reducedT2Tags, m); System.out .printf(LOCALE, "$%.0f$", metrics.macroSimilarity(out1, out2, m) * 100); } } System.out.printf(LOCALE, "\\\\%n"); System.out.printf(LOCALE, "\\hline%n"); } } } } /** * Print a set of latex tables reporting the similarity between annotators * in solving the C2W problem, for each dataset given in {@code dssC2W}.
* * @param dssC2W * the datasets for which the set of tables will be printed. * @param sa2wAnnotators * the Sa2W annotators whose similarity will be printed. * @param threshRecords * the hashmap in the form metric -> annotator -> dataset -> * (threshold, results) where the results are stored. * @param api * the API to Wikipedia (needed to print information about * annotations/tags). * @throws Exception * if anything went wrong while retrieving the results. */ public static void latexSimilarityC2W( Vector dssC2W, Vector sa2wAnnotators, HashMap>>> threshRecords, WikipediaApiInterface api) throws Exception { System.out.println("Similarity measures - latex output - tp C2W"); for (C2WDataset ds : dssC2W) { System.out.println("Dataset: " + ds.getName()); for (Sa2WSystem t : sa2wAnnotators) System.out.printf(LOCALE, "&" + t.getName()); System.out.printf(LOCALE, "\\\\%n"); System.out.printf(LOCALE, "\\hline%n"); for (int i = 0; i < sa2wAnnotators.size(); i++) { Sa2WSystem t1 = sa2wAnnotators.get(i); System.out.printf(LOCALE, t1.getName()); for (int j = 0; j < sa2wAnnotators.size(); j++) { if (j < i) { System.out.printf(LOCALE, "&"); continue; } Sa2WSystem t2 = sa2wAnnotators.get(j); MatchRelation m = new StrongTagMatch(api); Metrics metrics = new Metrics(); List> t1Annotations = BenchmarkCache .doSa2WAnnotations(t1, ds, null, 0); List> t2Annotations = BenchmarkCache .doSa2WAnnotations(t2, ds, null, 0); List> reducedT1Anns = ProblemReduction .Sa2WToA2WList(t1Annotations, RunExperiments .getBestRecord(threshRecords, m.getName(), t1.getName(), ds.getName()).first); List> reducedT2Anns = ProblemReduction .Sa2WToA2WList(t2Annotations, RunExperiments .getBestRecord(threshRecords, m.getName(), t2.getName(), ds.getName()).first); List> reducedT1Tags = ProblemReduction .A2WToC2WList(reducedT1Anns); List> reducedT2Tags = ProblemReduction .A2WToC2WList(reducedT2Anns); List> tponlyTagsT1 = metrics.getTp( ds.getC2WGoldStandardList(), reducedT1Tags, m); List> tponlyTagsT2 = metrics.getTp( ds.getC2WGoldStandardList(), reducedT2Tags, m); System.out.printf(LOCALE, "&$%.3f$", metrics .macroSimilarity(tponlyTagsT1, tponlyTagsT2, m)); } System.out.printf(LOCALE, "\\\\%n"); System.out.printf(LOCALE, "\\hline%n"); } } } /** * Prints (in latex form) a table representing the average time needed by * all systems to solve the instance of a dataset. Rows are grouped by * dataset and are in the form (dataset, annotator, avg. time). * * @param a2wAnnotators * The A2W annotators that will be included in the output. * @param sa2wAnnotators * The Sa2W annotators that will be included in the output. * @param sc2wAnnotators * The Sc2W annotators that will be included in the output. * @param dss * The datasets that will be included in the output. * @param * the type of dataset. * @throws Exception * if the cache does not contain records about the given * dataset/annotator avg. time. */ public static void latexTimingPerformance( Vector a2wAnnotators, Vector sa2wAnnotators, Vector sc2wAnnotators, Vector dss) throws Exception { System.out.println("Timing performance - latex output"); int allAnnsSize = 0; if (sa2wAnnotators != null) allAnnsSize += sa2wAnnotators.size(); if (a2wAnnotators != null) allAnnsSize += a2wAnnotators.size(); if (sc2wAnnotators != null) allAnnsSize += sc2wAnnotators.size(); System.out.printf(LOCALE, "\\hline \nDataset & Tagger & Average Time\\\\ \n \\hline%n"); for (TopicDataset d : dss) { long len = 0; for (String s : d.getTextInstanceList()) len += s.length(); System.out .printf(LOCALE, "\\multirow{%d}{*}{\\parbox{.40\\textwidth}{%s \\newline(len %d)}} %n", allAnnsSize, d.getName(), (int) ((float) len / (float) d.getSize())); if (a2wAnnotators != null) for (A2WSystem t : a2wAnnotators) System.out.printf( LOCALE, "& %s & $%d$ms \\\\ \\cline{2-3}%n", t.getName(), (int) BenchmarkCache.getAvgA2WTimingsForDataset( t.getName(), d.getName())); if (sa2wAnnotators != null) for (Sa2WSystem t : sa2wAnnotators) System.out.printf( LOCALE, "& %s & $%d$ms \\\\ \\cline{2-3}%n", t.getName(), (int) BenchmarkCache.getAvgSa2WTimingsForDataset( t.getName(), d.getName())); System.out.printf(LOCALE, "\\hline%n"); } } /** * Prints (in latex form) a table representing the average time needed by a * system to solve the instance of a dataset. Rows are in the form * (annotator, avg. time for system 1, avg. time for system 2, ...). * * @param sa2wAnnotators * The Sa2W annotators that will be included in the output. * @param sc2wAnnotators * The Sc2W annotators that will be included in the output. * @param dss * The datasets that will be included in the output. * @param * the type of dataset. * @throws Exception * if the cache does not contain records about the given * dataset/annotator avg. time. */ public static void latexTimingPerformance2( Vector sa2wAnnotators, Vector sc2wAnnotators, Vector dss) throws Exception { System.out.println("Timing performance - latex output 2"); System.out.printf(LOCALE, "\\hline \nSystem "); for (TopicDataset ds : dss) System.out.printf(LOCALE, " & " + ds.getName()); System.out.printf(LOCALE, " \n \\hline%n"); if (sa2wAnnotators != null) for (Sa2WSystem t : sa2wAnnotators) { System.out.printf(LOCALE, t.getName()); for (TopicDataset d : dss) System.out.printf( LOCALE, " & $%d$ ", (int) BenchmarkCache.getAvgSa2WTimingsForDataset( t.getName(), d.getName())); System.out.printf(LOCALE, "\\\\%n"); System.out.printf(LOCALE, "\\hline%n"); } if (sc2wAnnotators != null) for (Sc2WSystem t : sc2wAnnotators) { System.out.printf(LOCALE, t.getName()); for (TopicDataset d : dss) System.out.printf( LOCALE, " & $%d$ ", (int) BenchmarkCache.getAvgSa2WTimingsForDataset( t.getName(), d.getName())); System.out.printf(LOCALE, "\\\\%n"); System.out.printf(LOCALE, "\\hline%n"); } } public static void printCorrectnessPerformance( Vector> matchRels, Vector a2wAnnotators, Vector sa2wAnnotators, Vector c2wAnnotators, Vector d2wAnnotators, Vector dss, HashMap>>> threshRecords, boolean printMicro, boolean printMacro, boolean printTpFpFn, float threshold) { Vector allAnns = new Vector(); if (sa2wAnnotators != null) allAnns.addAll(sa2wAnnotators); if (c2wAnnotators != null) allAnns.addAll(c2wAnnotators); if (a2wAnnotators != null) allAnns.addAll(a2wAnnotators); if (d2wAnnotators != null) allAnns.addAll(d2wAnnotators); System.out.println("Correctness performance [F1/prec/rec]"); for (MatchRelation metric : matchRels) { System.out.printf(LOCALE, "Best results (metrics: %s):%n", metric.getName()); for (TopicDataset d : dss) for (TopicSystem t : allAnns) { Pair result = null; if (threshold >= 0) result = new Pair(threshold, RunExperiments.getRecords(threshRecords, metric.getName(), t.getName(), d.getName()).get(threshold)); else result = RunExperiments.getBestRecord(threshRecords, metric.getName(), t.getName(), d.getName()); System.out.printf(LOCALE, "%s\t%s\t%.3f\t", d.getName(), t.getName(), result.first); if (printMicro) System.out.printf(LOCALE, "[mic: %.3f\t%.3f\t%.3f] ", result.second.getMicroF1(), result.second.getMicroPrecision(), result.second.getMicroRecall()); if (printMacro) System.out.printf(LOCALE, "[mac: %.3f\t%.3f\t%.3f] ", result.second.getMacroF1(), result.second.getMacroPrecision(), result.second.getMacroRecall()); if (printTpFpFn) System.out.printf(LOCALE, "TP/FP/FN: %d/%d/%d", result.second.getGlobalTp(), result.second.getGlobalFp(), result.second.getGlobalFn()); System.out.println(); } } } /** * Print the best micro- and macro- measures achieved by each (annotator, * dataset) pair along with other data. Data is printed for the metrics * based on each match relation passed in {@code matchRels}. * * @param matchRels * the set of Match relations for which a table will be printed. * @param a2wAnnotators * The set of A2W Annotators for which the best result will be * included in the output. * @param d2wAnnotators * The set of D2W Annotators for which the best result will be * included in the output. * @param sa2wAnnotators * The set of Sa2W Annotators for which the best result will be * included. * @param c2wAnnotators * The set of Sc2W Annotators for which the best result will be * included. * @param dss * The datasets for which the best result will be included. * @param threshRecords * the hashmap in the form metric -> annotator -> dataset -> * (threshold, results) where the results are stored. * @param * the type of data on which the match relation operates. * @param * the type of dataset. */ public static void printCorrectnessPerformance( Vector> matchRels, Vector a2wAnnotators, Vector sa2wAnnotators, Vector c2wAnnotators, Vector d2wAnnotators, Vector dss, HashMap>>> threshRecords) { printCorrectnessPerformance(matchRels, a2wAnnotators, sa2wAnnotators, c2wAnnotators, d2wAnnotators, dss, threshRecords, true, true, false, -1); } /** * Prints the similarity, dissimilarity, and union of the results for each * pair of annotators. * * @param dssA2W * The datasets for which the data will be printed. * @param sa2wAnnotators * The annotators for which the data will be printed. * @param api * the API to Wikipedia (needed to print information about * annotations/tags). * @param threshRecords * the hashmap in the form metric -> annotator -> dataset -> * (threshold, results) where the results are stored. * @throws Exception * if something went wrong while retrieving the results. */ public static void printDissimilarityA2W( Vector dssA2W, Vector sa2wAnnotators, HashMap>>> threshRecords, WikipediaApiInterface api) throws Exception { Metrics metrics = new Metrics(); WeakAnnotationMatch m = new WeakAnnotationMatch(api); for (A2WDataset ds : dssA2W) { System.out.println("Dataset: " + ds.getName()); for (int i = 0; i < sa2wAnnotators.size(); i++) { Sa2WSystem t1 = sa2wAnnotators.get(i); for (int j = 0; j < sa2wAnnotators.size(); j++) { if (j <= i) continue; Sa2WSystem t2 = sa2wAnnotators.get(j); System.out.println("Annotator1: " + t1.getName()); System.out.println("Annotator2: " + t2.getName()); List> t1Annotations = BenchmarkCache .doSa2WAnnotations(t1, ds, null, 0); List> t2Annotations = BenchmarkCache .doSa2WAnnotations(t2, ds, null, 0); List> reducedT1Tags = ProblemReduction .Sa2WToA2WList(t1Annotations, RunExperiments .getBestRecord(threshRecords, m.getName(), t1.getName(), ds.getName()).first); List> reducedT2Tags = ProblemReduction .Sa2WToA2WList(t2Annotations, RunExperiments .getBestRecord(threshRecords, m.getName(), t2.getName(), ds.getName()).first); List> out1Tp = metrics.getTp( ds.getA2WGoldStandardList(), reducedT1Tags, m); List> out2Tp = metrics.getTp( ds.getA2WGoldStandardList(), reducedT2Tags, m); List> out1Fp = metrics.getFp( ds.getA2WGoldStandardList(), reducedT1Tags, m); List> out2Fp = metrics.getFp( ds.getA2WGoldStandardList(), reducedT2Tags, m); long tpUnion = metrics.listUnion(out1Tp, out2Tp, m); int tpdiss1 = metrics.dissimilarityListCount(out1Tp, out2Tp, m); int tpdiss2 = metrics.dissimilarityListCount(out2Tp, out1Tp, m); int tpsim = metrics.similarityListCount(out1Tp, out2Tp, m); long fpUnion = metrics.listUnion(out1Fp, out2Fp, m); int fpdiss1 = metrics.dissimilarityListCount(out1Fp, out2Fp, m); int fpdiss2 = metrics.dissimilarityListCount(out2Fp, out1Fp, m); int fpsim = metrics.similarityListCount(out1Fp, out2Fp, m); System.out.printf(LOCALE, "Ann1 dissimilarity tp/fp: %d(%.2f)/%d(%.2f)%n", tpdiss1, (float) tpdiss1 / (float) (tpdiss1 + fpdiss1), fpdiss1, (float) fpdiss1 / (float) (tpdiss1 + fpdiss1)); System.out.printf(LOCALE, "Ann2 dissimilarity tp/fp: %d(%.2f)/%d(%.2f)%n", tpdiss2, (float) tpdiss2 / (float) (tpdiss2 + fpdiss2), fpdiss2, (float) fpdiss2 / (float) (tpdiss2 + fpdiss2)); System.out.printf(LOCALE, "Anns similarity tp/fp: %d(%.2f)/%d(%.2f)%n", tpsim, (float) tpsim / (float) (tpsim + fpsim), fpsim, (float) fpsim / (float) (tpsim + fpsim)); System.out.printf(LOCALE, "Anns union tp/fp: %d(%.2f)/%d(%.2f)%n", tpUnion, (float) tpUnion / (float) (tpUnion + fpUnion), fpUnion, (float) fpUnion / (float) (tpUnion + fpUnion)); } } } } /** * Prints the document in a dataset with the most redirect documents for * each (dataset, annotator) pair. (Mainly for debug purposes.) * * @param dss * The datasets. * @param sa2wAnnotators * The annotators. * @param api * the API to Wikipedia (needed to find out if an * annotations/tags is a redirect). * @throws Exception * if something went wrong while retrieving the results. */ public static void printMostRedirectDocument(Vector dss, Vector sa2wAnnotators, WikipediaApiInterface api) throws Exception { System.out.println("Returned Redirect"); for (A2WDataset ds : dss) { for (Sa2WSystem t : sa2wAnnotators) { int totalTags = 0; int totalRedirects = 0; System.out .printf(LOCALE, "Finding document with most redirect tags for dataset:%s tagger:%s%n", ds.getName(), t.getName()); List> compRes = BenchmarkCache .doSa2WAnnotations(t, ds, null, 0); int bestCount = -1; for (int i = 0; i < compRes.size(); i++) { HashSet comp = compRes.get(i); int count = 0; HashSet distinctRedirects = new HashSet(); for (ScoredAnnotation tag : comp) if (api.isRedirect(tag.getConcept())) { count++; distinctRedirects.add(tag.getConcept()); } if (bestCount < distinctRedirects.size()) bestCount = distinctRedirects.size(); totalTags += compRes.size(); totalRedirects += count; } System.out.printf(LOCALE, "Tags found=%d Redirects=%d redirect/total=%.3f", totalTags, totalRedirects, (float) totalRedirects / (float) totalTags); } } } /** * Print data about correctness performance (f1, precision, recall, ...) for * a given (annotator, dataset) pair. * * @param ann * the annotator. * @param m * the match relation which the measurements are based upon. * @param goldStandard * the gold standard for the dataset. * @param output * the output found by the tagger. * @param api * the API to Wikipedia (needed to print information about * annotations/tags). * @param * the type of system output and dataset. * @throws IOException * if something went wrong while querying the Wikipedia API. */ public static void printCorrectnessPerformance( TopicSystem ann, MatchRelation m, List> goldStandard, List> output, WikipediaApiInterface api) throws IOException { Metrics metrics = new Metrics(); MetricsResultSet rs = metrics.getResult(output, goldStandard, m); System.out.format(LOCALE, "%s tp:%d fp:%d fn:%d precision: %.3f recall:%.3f F1:%.3f%n", ann.getName(), rs.getGlobalTp(), rs.getGlobalFp(), rs.getGlobalFn(), rs.getMacroPrecision(), rs.getMacroRecall(), rs.getMacroF1()); System.out.format(LOCALE, "%s micro-precision:%.3f micro-recall:%.3f micro-F1:%.3f%n", ann.getName(), rs.getMicroPrecision(), rs.getMicroRecall(), rs.getMicroF1()); } /** * Prints a table representing the average time needed by all systems to * solve the instance of a dataset. Rows are grouped by dataset and are in * the form (dataset, annotator, avg. time). * * @param a2wAnnotators * The A2W annotators that will be included in the output. * @param sa2wAnnotators * The Sa2W annotators that will be included in the output. * @param sc2wAnnotators * The Sa2W annotators that will be included in the output. * @param dss * The datasets that will be included in the output. * @throws Exception * if the cache does not contain records about the given * dataset/annotator avg. time. */ public static void printTimingPerformance(Vector a2wAnnotators, Vector sa2wAnnotators, Vector sc2wAnnotators, Vector dss) throws Exception { if (sa2wAnnotators == null) sa2wAnnotators = new Vector(); if (a2wAnnotators == null) a2wAnnotators = new Vector(); if (sc2wAnnotators == null) sc2wAnnotators = new Vector(); System.out.println("Timing performance:"); for (A2WDataset d : dss) { for (A2WSystem t : a2wAnnotators) { System.out.printf(LOCALE, "Average time for T2W tagger=%s dataset=%s: %.3f%n", t .getName(), d.getName(), BenchmarkCache .getAvgA2WTimingsForDataset(t.getName(), d.getName())); if (d.getSize() != BenchmarkCache.getA2WTimingsForDataset( t.getName(), d.getName()).size()) System.out .printf(LOCALE, "ERROR: size of dataset %s and computed results by %s mismatch! %d != %d", d.getName(), t.getName(), d.getSize(), BenchmarkCache.getA2WTimingsForDataset( t.getName(), d.getName()).size()); } for (Sa2WSystem t : sa2wAnnotators) { System.out.printf(LOCALE, "Average time for St2W tagger=%s dataset=%s: %.3f%n", t .getName(), d.getName(), BenchmarkCache .getAvgSa2WTimingsForDataset(t.getName(), d.getName())); if (d.getSize() != BenchmarkCache.getSa2WTimingsForDataset( t.getName(), d.getName()).size()) System.out .printf(LOCALE, "ERROR: size of dataset %s and computed results by %s mismatch! %d != %d", d.getName(), t.getName(), d.getSize(), BenchmarkCache.getSa2WTimingsForDataset( t.getName(), d.getName()).size()); } } } /** * For each dataset passed in {@code annotators}, writes one gnuplot * {@code timing_annotator_dataset.dat} file containing the time needed by * the annotator to annotate each document of a dataset, ordered by the * amount of time.
* Note that the experiments must have already been performed and their * results stored in the cache when this method is called. * * @param annotators * the set of annotators for which a file will be created. * @param ds * the dataset. * @throws Exception * if there were errors in writing the file or in retrieving the * timing. */ public static void gnuplotTraceTiming(Vector annotators, A2WDataset ds) throws Exception { for (Sa2WSystem annotator : annotators) { String suffix = annotator.getName().replaceAll("[^a-zA-Z0-9]", "") .toLowerCase() + "_" + ds.getName().replaceAll("[^a-zA-Z0-9]", "").toLowerCase() + ".dat"; OutputStreamWriter relOs = new OutputStreamWriter( new FileOutputStream(CHARTS_DIR + "timing_" + suffix)); Vector texts = new Vector(ds.getTextInstanceList()); Collections.sort(texts, new StringLengthComparator()); for (String text : texts) { long time = BenchmarkCache.getSa2WTiming(annotator.getName(), ds.getName(), text); relOs.write(String.format(LOCALE, "%d\t%d%n", text.length(), time)); } relOs.close(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy