All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.classify.JointClassifierEvaluator Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
/*
 * LingPipe v. 4.1.0
 * Copyright (C) 2003-2011 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */
package com.aliasi.classify;

import com.aliasi.util.Scored;

import java.util.ArrayList;
import java.util.List;

/**
 * A {@code JointClassifierEvaluator} provides an evaluation harness
 * for joint probability-based n-best classifiers.  It extends the
 * conditional classifier evaluator with joint probability specific
 * evaluation metrics.
 *
 * 

Thread Safety

* * This class must be read-write synchronized externally for use in * multiple threads. * * @author Bob Carpenter * @version 3.9.1 * @since LingPipe3.9.1 * @param The type of objects being classified by the evaluated classifier. */ public class JointClassifierEvaluator extends ConditionalClassifierEvaluator { /** * Construct a scored classifier evaluator with the specified * classifier, categories and flag indicating whether or not to * store inputs. * * @param classifier Classifier to evaluate. * @param categories Complete list of categories. * @param storeInputs Set to {@code true} to store input objects. */ public JointClassifierEvaluator(JointClassifier classifier, String[] categories, boolean storeInputs) { super(classifier,categories,storeInputs); } /** * Set the classifier being evaluated to the specified value. This * method is useful to evaluate multiple classifiers with the same * evaluator, for instance for use in cross-validation. * * @param classifier New classifier for this evaluation. * @throws IllegalArgumentException If called from an evaluator with * a runtime type other than {@code JointClassifierEvaluator}. */ public void setClassifier(JointClassifier classifier) { setClassifier(classifier,JointClassifierEvaluator.class); } /** * Return the classifier being evaluated. * * @return The classifier for this evaluator. */ @Override public JointClassifier classifier() { @SuppressWarnings("unchecked") JointClassifier result = (JointClassifier) super.classifier(); return result; } /** * Returns the average log (base 2) joint probability of the * response category for cases of the specified reference * category. If there are no cases matching the reference * category, the result is Double.NaN. * *

Better classifiers return high values when the reference * and response categories are the same and lower values * when they are different. Unlike the conditional probability * values, joint probability averages are not particularly * useful because they are not normalized by input length. For * the language model classifiers, the scores are normalized * by length, and provide a better cross-case view. * * @param refCategory Reference category. * @param responseCategory Response category. * @return Average log (base 2) conditional probability of * response category in cases for specified reference category. * @throws IllegalArgumentException If the either category is unknown. * @throws ClassCastException if the classifications are not joint * classifications. */ public double averageLog2JointProbability(String refCategory, String responseCategory) { validateCategory(refCategory); validateCategory(responseCategory); double sum = 0.0; int count = 0; for (int i = 0; i < mReferenceCategories.size(); ++i) { if (mReferenceCategories.get(i).equals(refCategory)) { JointClassification c = (JointClassification) mClassifications.get(i); for (int rank = 0; rank < c.size(); ++rank) { if (c.category(rank).equals(responseCategory)) { sum += c.jointLog2Probability(rank); ++count; break; } } } } return sum / (double) count; } /** * Returns the average over all test cases of the joint log (base * 2) probability of the response that matches the reference * category. Better classifiers return higher values for this * average. * *

Whether average scores make sense across training instances * depends on the classifier. For the language-model based * classifiers, the normalized score values are more reasonable * averages. * * @return The average joint log probability of the reference * category in the response. */ public double averageLog2JointProbabilityReference() { double sum = 0.0; for (int i = 0; i < mReferenceCategories.size(); ++i) { String refCategory = mReferenceCategories.get(i).toString(); JointClassification c = (JointClassification) mClassifications.get(i); for (int rank = 0; rank < c.size(); ++rank) { if (c.category(rank).equals(refCategory)) { sum += c.jointLog2Probability(rank); break; } } } return sum / (double) mReferenceCategories.size(); } /** * Returns the joint log (base 2) probability of the entire * evaluation corpus. This is defined independently of the * reference categories by summing over inputs x: * *

* log2 p(corpus) * = Σx in corpus log2 p(x) *
* * where the probability p(x) for a single case with * input x is defined in the usual way by summing * over categories: * *
* p(x) = Σc in cats p(c,x) *
* * @return The log probability of the set of inputs. * @throws ClassCastException if the classifications are not joint * classifications. */ public double corpusLog2JointProbability() { double total = 0.0; for (int i = 0; i < mClassifications.size(); ++i) { JointClassification c = (JointClassification) mClassifications.get(i); double maxJointLog2P = Double.NEGATIVE_INFINITY; for (int rank = 0; rank < c.size(); ++rank) { double jointLog2P = c.jointLog2Probability(rank); if (jointLog2P > maxJointLog2P) maxJointLog2P = jointLog2P; } double sum = 0.0; for (int rank = 0; rank < c.size(); ++rank) sum += Math.pow(2.0,c.jointLog2Probability(rank) - maxJointLog2P); total += maxJointLog2P + com.aliasi.util.Math.log2(sum); } return total; } @Override void baseToString(StringBuilder sb) { super.baseToString(sb); sb.append("Average Log2 Joint Probability Reference=" + averageLog2JointProbabilityReference() + "\n"); } @Override void oneVsAllToString(StringBuilder sb, String category, int i) { super.oneVsAllToString(sb,category,i); sb.append("Average Joint Probability Histogram=\n"); appendCategoryLine(sb); for (int j = 0; j < numCategories(); ++j) { if (j > 0) sb.append(','); sb.append(averageLog2JointProbability(category, categories()[j])); } sb.append("\n"); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy