com.aliasi.classify.JointClassifierEvaluator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aliasi-lingpipe Show documentation
This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.
There is a newer version: 4.1.2-JL1.0
Show newest version
/*
 * LingPipe v. 4.1.0
 * Copyright (C) 2003-2011 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */
package com.aliasi.classify;

import com.aliasi.util.Scored;

import java.util.ArrayList;
import java.util.List;

/**
 * A {@code JointClassifierEvaluator} provides an evaluation harness
 * for joint probability-based n-best classifiers.  It extends the
 * conditional classifier evaluator with joint probability specific
 * evaluation metrics.
 *
 * Thread Safety
 *
 * This class must be read-write synchronized externally for use in
 * multiple threads.
 *
 * @author  Bob Carpenter
 * @version 3.9.1
 * @since   LingPipe3.9.1
 * @param  The type of objects being classified by the evaluated classifier.
 */
public class JointClassifierEvaluator extends ConditionalClassifierEvaluator {

    /**
     * Construct a scored classifier evaluator with the specified
     * classifier, categories and flag indicating whether or not to
     * store inputs.
     *
     * @param classifier Classifier to evaluate.
     * @param categories Complete list of categories.
     * @param storeInputs Set to {@code true} to store input objects.
     */
    public JointClassifierEvaluator(JointClassifier classifier,
                                    String[] categories,
                                    boolean storeInputs) {
        super(classifier,categories,storeInputs);
    }

    /**
     * Set the classifier being evaluated to the specified value.  This
     * method is useful to evaluate multiple classifiers with the same
     * evaluator, for instance for use in cross-validation.
     *
     * @param classifier New classifier for this evaluation.
     * @throws IllegalArgumentException If called from an evaluator with
     * a runtime type other than {@code JointClassifierEvaluator}.
     */
    public void setClassifier(JointClassifier classifier) {
        setClassifier(classifier,JointClassifierEvaluator.class);
    }

    /**
     * Return the classifier being evaluated.
     *
     * @return The classifier for this evaluator.
     */
    @Override
    public JointClassifier classifier() {
        @SuppressWarnings("unchecked")
        JointClassifier result
            = (JointClassifier) super.classifier();
        return result;
    }

    /**
     * Returns the average log (base 2) joint probability of the
     * response category for cases of the specified reference
     * category.  If there are no cases matching the reference
     * category, the result is Double.NaN.
     *
     * Better classifiers return high values when the reference
     * and response categories are the same and lower values
     * when they are different.  Unlike the conditional probability
     * values, joint probability averages are not particularly
     * useful because they are not normalized by input length.  For
     * the language model classifiers, the scores are normalized
     * by length, and provide a better cross-case view.
     *
     * @param refCategory Reference category.
     * @param responseCategory Response category.
     * @return Average log (base 2) conditional probability of
     * response category in cases for specified reference category.
     * @throws IllegalArgumentException If the either category is unknown.
     * @throws ClassCastException if the classifications are not joint
     * classifications.
     */
    public double averageLog2JointProbability(String refCategory,
                                              String responseCategory) {
        validateCategory(refCategory);
        validateCategory(responseCategory);
        double sum = 0.0;
        int count = 0;
        for (int i = 0; i < mReferenceCategories.size(); ++i) {
            if (mReferenceCategories.get(i).equals(refCategory)) {
                JointClassification c
                    = (JointClassification) mClassifications.get(i);
                for (int rank = 0; rank < c.size(); ++rank) {
                    if (c.category(rank).equals(responseCategory)) {
                        sum += c.jointLog2Probability(rank);
                        ++count;
                        break;
                    }
                }
            }
        }
        return sum / (double) count;
    }


    /**
     * Returns the average over all test cases of the joint log (base
     * 2) probability of the response that matches the reference
     * category.  Better classifiers return higher values for this
     * average.
     *
     * 
Whether average scores make sense across training instances
     * depends on the classifier.  For the language-model based
     * classifiers, the normalized score values are more reasonable
     * averages.
     *
     * @return The average joint log probability of the reference
     * category in the response.
     */
    public double averageLog2JointProbabilityReference() {
        double sum = 0.0;
        for (int i = 0; i < mReferenceCategories.size(); ++i) {
            String refCategory = mReferenceCategories.get(i).toString();
            JointClassification c
                = (JointClassification) mClassifications.get(i);
            for (int rank = 0; rank < c.size(); ++rank) {
                if (c.category(rank).equals(refCategory)) {
                    sum += c.jointLog2Probability(rank);
                    break;
                }
            }
        }
        return sum / (double) mReferenceCategories.size();
    }

    /**
     * Returns the joint log (base 2) probability of the entire
     * evaluation corpus.  This is defined independently of the
     * reference categories by summing over inputs x:
     *
     * 

     * log2 p(corpus)
     * = Σ_{_{x in corpus}} log2 p(x)
     * 
     *
     * where the probability p(x) for a single case with
     * input x is defined in the usual way by summing
     * over categories:
     *
     * 
     * p(x) = Σ_{_{c in cats}} p(c,x)
     * 
     *
     * @return The log probability of the set of inputs.
     * @throws ClassCastException if the classifications are not joint
     * classifications.
     */
    public double corpusLog2JointProbability() {
        double total = 0.0;
        for (int i = 0; i < mClassifications.size(); ++i) {
            JointClassification c
                = (JointClassification) mClassifications.get(i);
            double maxJointLog2P = Double.NEGATIVE_INFINITY;
            for (int rank = 0; rank < c.size(); ++rank) {
                double jointLog2P = c.jointLog2Probability(rank);
                if (jointLog2P > maxJointLog2P)
                    maxJointLog2P = jointLog2P;
            }
            double sum = 0.0;
            for (int rank = 0; rank < c.size(); ++rank)
                sum += Math.pow(2.0,c.jointLog2Probability(rank) - maxJointLog2P);
            total += maxJointLog2P + com.aliasi.util.Math.log2(sum);
        }
        return total;
    }

    @Override
    void baseToString(StringBuilder sb) {
        super.baseToString(sb);
        sb.append("Average Log2 Joint Probability Reference="
                  + averageLog2JointProbabilityReference() + "\n");
    }

    @Override
    void oneVsAllToString(StringBuilder sb, String category, int i) {
        super.oneVsAllToString(sb,category,i);
        sb.append("Average Joint Probability Histogram=\n");
        appendCategoryLine(sb);
        for (int j = 0; j < numCategories(); ++j) {
            if (j > 0) sb.append(',');
            sb.append(averageLog2JointProbability(category,
                                                  categories()[j]));
        }
        sb.append("\n");
    }


}