All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.ie.machinereading.ResultsPrinter Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.ie.machinereading;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.util.CoreMap;

/**
 * Class for comparing the output of information extraction to a gold standard, and printing the results.
 * Subclasses may customize the formatting and content of the printout.
 *
 * @author mrsmith
 *
 */
public abstract class ResultsPrinter {

  /**
   * Given a set of sentences with annotations from an information extractor class, and the same sentences
   * with gold-standard annotations, print results on how the information extraction performed.
   */
  public String printResults(CoreMap goldStandard, CoreMap extractorOutput) {
    StringWriter sw = new StringWriter();
    PrintWriter pw = new PrintWriter(sw, true);
    List mutableGold = new ArrayList<>();
    mutableGold.addAll(goldStandard.get(CoreAnnotations.SentencesAnnotation.class));
    List mutableOutput = new ArrayList<>();
    mutableOutput.addAll(extractorOutput.get(CoreAnnotations.SentencesAnnotation.class));
    printResults(pw, mutableGold, mutableOutput);
    return sw.getBuffer().toString();
  }

  public String printResults(List goldStandard, List extractorOutput) {
    StringWriter sw = new StringWriter();
    PrintWriter pw = new PrintWriter(sw, true);
    printResultsUsingLabels(pw, goldStandard, extractorOutput);
    return sw.getBuffer().toString();
  }

  public abstract void printResults(PrintWriter pw, List goldStandard, List extractorOutput);

  public abstract void printResultsUsingLabels(PrintWriter pw,
                                               List goldStandard,
                                               List extractorOutput);

  /**
   * If the same set of sentences is contained in two lists, order the lists so that their sentences are in the same order (and return true).
   * Return false if the lists don't contain the same set of sentences.
   */
  public static void align(List list1, List list2) {
    boolean alignable = true;
    if (list1.size() != list2.size())
      alignable = false;

    class CompareSentences implements Comparator {
      @Override
      public int compare(CoreMap sent1, CoreMap sent2) {
        String d1 = sent1.get(CoreAnnotations.DocIDAnnotation.class);
        String d2 = sent2.get(CoreAnnotations.DocIDAnnotation.class);
        if (d1 != null && d2 != null && !d1.equals(d2))
          return d1.compareTo(d2);

        String t1 = sent1.get(CoreAnnotations.TextAnnotation.class);
        String t2 = sent2.get(CoreAnnotations.TextAnnotation.class);
        return t1.compareTo(t2);
      }
    }
    Collections.sort(list1,new CompareSentences());
    Collections.sort(list2,new CompareSentences());

    for (int i = 0; i < list1.size(); i++) {
      if (!list1.get(i).get(CoreAnnotations.TextAnnotation.class).equals(list2.get(i).get(CoreAnnotations.TextAnnotation.class)))
        alignable = false;
    }

    if (!alignable) {
      throw new RuntimeException("ResultsPrinter.align: gold standard sentences don't match extractor output sentences!");
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy