All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.classify.BaseClassifierEvaluator Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
/*
 * LingPipe v. 4.1.0
 * Copyright (C) 2003-2011 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */
package com.aliasi.classify;

import com.aliasi.corpus.ObjectHandler;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * A BaseClassifierEvaluator provides an evaluation harness
 * for first-best classifiers.  An evaluator is constructed from a classifier and
 * a complete list of the categories returned by the classifier.  
 *
 * 

Test cases are then added using the {@link #handle(Classified)} * which accepts a string-based category and object to classify. The * evaluator will run the classifier over the input object and collect * results over multiple cases. * *

There are subtypes of this classifier that evaluate richer * classifiers. * *

An exhaustive set of evaluation metrics for first-best * classification results is accessbile as a confusion matrix through * the {@link #confusionMatrix()} method. Confusion matrices provide * dozens of statistics on classification which can be computed from * first-best results; see {@link ConfusionMatrix} for more * information. * *

Thread Safety

* *

This class requires concurrent read and synchronous write * synchronization. Reads are any of the statistics gathering methods * and write is just adding new test cases. * *

Storing Cases

* * This class always stores the classification results and true * category of an cases. There is a flag in the constructor that * additionally allows the inputs for cases to be stored as part of an * evaluation. If the flag is set to {@code true}, all input cases * are stored. This enables the output of true positives, false * positives, false negatives, and true negatives through the methods * of the same names. * * @author Bob Carpenter * @version 3.9.1 * @since LingPipe3.9.1 * @param The type of objects being classified by the evaluated classifier. */ public class BaseClassifierEvaluator implements ObjectHandler> { private final ConfusionMatrix mConfusionMatrix; private int mNumCases = 0; final String[] mCategories; final Set mCategorySet; final boolean mStoreInputs; BaseClassifier mClassifier; final List mClassifications = new ArrayList(); final List mCases = new ArrayList(); final List mReferenceCategories = new ArrayList(); /** * Construct a classifier evaluator for the specified classifier * that records results for the specified set of categories, * storing cases or not based on the specified flag. * *

If the classifier evaluator is only going to be populated * using the {@link #addClassification(String,Classification,Object)} * method, then the classifier may be null. * * @param classifier Classifier to evaluate. * @param categories Categories of the classifier. * @param storeInputs Flag indicating whether input cases should be * stored. */ public BaseClassifierEvaluator(BaseClassifier classifier, String[] categories, boolean storeInputs) { mClassifier = classifier; mStoreInputs = storeInputs; mCategories = categories; mCategorySet = new HashSet(); Collections.addAll(mCategorySet,categories); mConfusionMatrix = new ConfusionMatrix(categories); } /** * Returns the number of categories for the classifier being * evaluated. * * @return Number of categories being evaluated. */ public int numCategories() { return mCategories.length; } /** * Returns a copy of the the categories for which this evaluator * stores results. * * @return The categories for which this evaluator stores * results. */ public String[] categories() { return mCategories.clone(); } /** * Returns the classifier for this evaluator. * * @return The classifier for this evaluator. */ public BaseClassifier classifier() { return mClassifier; } /** * Set the classfier for this evaluator to the specified value. * This method allows the results of evaluating several different * classifiers to be aggregated into a single evaluation. The * primary use case is cross-validation, where a single evaluator * may be used to evaluate classifiers trained on different folds. * *

This method will throw an exception if called from an * evaluator with a more specific runtime type. * * @param classifier New classifier for this evaluator. * @throws IllegalArgumentException If called from a class with * a runtime type other than {@code BaseClassifierEvaluator}. */ public void setClassifier(BaseClassifier classifier) { setClassifier(classifier,BaseClassifierEvaluator.class); } /** * Returns the list of true positive cases along with their * classifications for items of the specified category. * *

The cases will be returned in decreasing order of * conditional probability if applicable, decreasing order of * score otherwise, and if not scored, in the order in which they * were processed. * *

A true positive case for the specified category has * reference category equal to the specified category and * first-best classification result equal to the specified * category. * * @param category Category whose cases are returned. * @return True positives for specified category. * @throws UnsupportedOperationException If this class does not * store its cases. */ public List> truePositives(String category) { return caseTypes(category,true,true); } /** * Returns the list of false positive cases along with their * classifications for items of the specified category. * *

A false positive case for the specified category has * reference category unequal to the specified category and * first-best classification result equal to the specified * category. * * @param category Category whose cases are returned. * @return False positives for specified category. * @throws UnsupportedOperationException If this class does not * store its cases. */ public List> falsePositives(String category) { return caseTypes(category,false,true); } /** * Returns the list of false negative cases along with their * classifications for items of the specified category. * *

A false negative case for the specified category has * reference category equal to the specified category and * first-best classification result unequal to the specified * category. * * @param category Category whose cases are returned. * @return False negatives for specified category. * @throws UnsupportedOperationException If this class does not * store its cases. */ public List> falseNegatives(String category) { return caseTypes(category,true,false); } /** * Returns the list of true negative cases along with their * classifications for items of the specified category. * *

A true negative case for the specified category has * reference category unequal to the specified category and * first-best classification result unequal to the specified * category. * * @param category Category whose cases are returned. * @return True positives for specified category. * @throws UnsupportedOperationException If this class does not * store its cases. */ public List> trueNegatives(String category) { return caseTypes(category,false,false); } /** * Add the specified classified object to this evaluator. * * @param classified Classified object to add to evaluation. */ public void handle(Classified classified) { // CUT AND PASTE INTO SUBCLASS E input = classified.getObject(); Classification refClassification = classified.getClassification(); String refCategory = refClassification.bestCategory(); validateCategory(refCategory); Classification classification = mClassifier.classify(input); addClassification(refCategory,classification,input); } /** * Adds the specified classification as a response for the specified * reference category. If this evaluator stores cases, the input * will be stored. * * @param referenceCategory Reference category for case. * @param classification Response classification for case. * @param input Input for the specified classification. */ public void addClassification(String referenceCategory, Classification classification, E input) { addClassificationOld(referenceCategory,classification); if (mStoreInputs) mCases.add(input); } /** * Returns the number of test cases which have been provided * to this evaluator. * * @return The number of test cases which have been provided * to this evaluator. */ public int numCases() { return mNumCases; } /** * Returns the confusion matrix of first-best classification * result statistics for this evaluator. See {@link * ConfusionMatrix} for details of the numerous available * evaluation metrics provided by confusion matrices. * * @return The confusion matrix for the test cases evaluated so far. */ public ConfusionMatrix confusionMatrix() { return mConfusionMatrix; } /** * Returns the first-best one-versus-all precision-recall * evaluation of the classification of the specified reference * category versus all other categories. This method may be * called for any evaluation. * * @param refCategory Reference category. * @return The first-best one-versus-all precision-recall * evaluatuion. * @throws IllegalArgumentException If the specified category * is unknown. */ public PrecisionRecallEvaluation oneVersusAll(String refCategory) { validateCategory(refCategory); PrecisionRecallEvaluation prEval = new PrecisionRecallEvaluation(); int numCases = mReferenceCategories.size(); for (int i = 0; i < numCases; ++i) { Object caseRefCategory = mReferenceCategories.get(i); Classification response = mClassifications.get(i); Object caseResponseCategory = response.bestCategory(); boolean inRef = caseRefCategory.equals(refCategory); boolean inResp = caseResponseCategory.equals(refCategory); prEval.addCase(inRef,inResp); } return prEval; } /** * Return a string-based representation of the results of the * evaluation. * * @return String-based representation of the results. */ @Override public String toString() { StringBuilder sb = new StringBuilder(); baseToString(sb); sb.append("\nONE VERSUS ALL EVALUATIONS BY CATEGORY\n"); String[] cats = categories(); for (int i = 0; i < cats.length; ++i) { sb.append("\n\nCATEGORY[" + i + "]=" + cats[i] + " VERSUS ALL\n"); oneVsAllToString(sb,cats[i],i); } return sb.toString(); } void baseToString(StringBuilder sb) { sb.append("BASE CLASSIFIER EVALUATION\n"); mConfusionMatrix.toStringGlobal(sb); } void oneVsAllToString(StringBuilder sb, String category, int i) { sb.append("\nFirst-Best Precision/Recall Evaluation\n"); sb.append(oneVersusAll(category)); sb.append('\n'); } void setClassifier(BaseClassifier classifier, Class clazz) { if (!this.getClass().equals(clazz)) { String msg = "Require appropriate classifier type." + " Evaluator class=" + this.getClass() + " Found classifier.class=" + classifier.getClass(); throw new IllegalArgumentException(msg); } mClassifier = classifier; } private List> caseTypes(String category, boolean refMatch, boolean respMatch) { if (!mStoreInputs) { String msg = "Class must store items to return true positives." + " Use appropriate constructor flag to store."; throw new UnsupportedOperationException(msg); } List> result = new ArrayList>(); for (int i = 0; i < mReferenceCategories.size(); ++i) { String refCat = mReferenceCategories.get(i); Classification c = mClassifications.get(i); String respCat = c.bestCategory(); if (category.equals(refCat) != refMatch) continue; if (category.equals(respCat) != respMatch) continue; Classified classified = new Classified(mCases.get(i),c); result.add(classified); } return result; } private void addClassificationOld(String referenceCategory, Classification classification) { mConfusionMatrix.increment(referenceCategory, classification.bestCategory()); mReferenceCategories.add(referenceCategory); mClassifications.add(classification); ++mNumCases; } void validateCategory(String category) { if (mCategorySet.contains(category)) return; String msg = "Unknown category=" + category; throw new IllegalArgumentException(msg); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy