com.aliasi.cluster.ClusterScore Maven / Gradle / Ivy
Show all versions of aliasi-lingpipe Show documentation
/*
* LingPipe v. 4.1.0
* Copyright (C) 2003-2011 Alias-i
*
* This program is licensed under the Alias-i Royalty Free License
* Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i
* Royalty Free License Version 1 for more details.
*
* You should have received a copy of the Alias-i Royalty Free License
* Version 1 along with this program; if not, visit
* http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
* Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
* +1 (718) 290-9170.
*/
package com.aliasi.cluster;
import com.aliasi.classify.PrecisionRecallEvaluation;
import com.aliasi.util.Distance;
import com.aliasi.util.Tuple;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
/**
* A ClusterScore
provides a range of cluster scoring
* metrics for reference partitions versus response partitions.
*
* Traditional evaluation measures for pairs of parititions involve
* comparing the equivalence relations generated by the partitions
* pointwise. A partition defines an equivalence relation in the
* usual way: a pair (A,B)
is in the equivalence if there
* is a cluster that contains both A
and B
.
* Each element is assumed to be equal to itself. A pair
* (A,B)
is a true positive if it is an equivalence in
* the reference and response clustes, a false positive if it is in
* the response but not the reference, and so on. The resulting
* precision-recall statistics over the relations is reported through
* {@link #equivalenceEvaluation()}.
*
*
The scoring used for the Message Understanding Conferences is:
*
*
* mucRecall(referencePartition,responsePartition)
*
= Σc in referencePartition
* (size(c) - overlap(c,responsePartition))
*
/ Σc in referencePartition
* ( size(c) - 1 )
*
*
* where size(c)
is the number of elements in the
* cluster c
, and overlap(c,responsePartition)
* is the number of clusters in the response partition that intersect
* the cluster c
. Precision is defined dually by
* reversing the roles of reference and response, and f-measure is defined
* as usual. Further details and examples can be found in:
*
*
* Marc Vilain, John Burger, John Aberdeen, Dennis Connolly, and
* Lynette Hirschman.
* 1995.
* A model-theoretic coreference scoring scheme.
* In Proceedings fo the 6th Message Understanding Conference (MUC6).
* 45--52. Morgan Kaufmann.
*
*
* B-cubed cluster scoring was defined as an alternative to the MUC
* scoring metric. There are two variants B-cubed cluster precision, both
* of which are weighted averages of a per-element precision score:
*
*
* b3Precision(A,referencePartition,responsePartition)
*
= |cluster(responsePartition,A) INTERSECT cluster(referencePartition,A)|
*
/ |cluster(responsePartition,A)|
*
*
* where cluster(partition,a)
is the cluster in the
* partition partition
containing the element a
;
* in other words, this is A
's equivalence class and contains
* the set of all elements equivalent to A
in the partition.
*
* For the uniform cluster method, each cluster in the reference partition is
* weighted equally, and each element is weighted equally within a cluster:
*
*
* b3ClusterPrecision(referencePartition,responsePartition)
*
= Σa
* b3Precision(a,referencePartition,responsePartition)
*
/ (|referencePartition| * |cluster(referencePartition,a)|)
*
*
* For the uniform element method, each element a
is weighted uniformly:
*
*
* b3ElementPrecision(ReferencePartition,ResponsePartition)
*
= Σa
* b3Precision(a,referencePartition,responsePartition) / numElements
*
*
* where numElements
is the total number of elements in
* the partitions. For both B-cubed approaches, recall is defined
* dually by switching the roles of reference and response, and the
* F1-measure is defined in the usual way.
*
* The B-cubed method is described in detail in:
*
*
* Bagga, Amit and Breck Baldwin.
* 1998.
* Algorithms
* for scoring coreference chains.
* In Proceedings of the First International Conference
* on Language Resources and Evaluation Workshop on Linguistic
* Coreference.
*
*
* As an example, consider the following two partitions:
*
*
* reference = { {1, 2, 3, 4, 5}, {6, 7}, {8, 9, A, B, C} }
*
* response = { { 1, 2, 3, 4, 5, 8, 9, A, B, C }, { 6, 7} }
*
*
* which produce the following values for method calls:
*
*
*
* Method Result
* {@link #equivalenceEvaluation()}
* PrecisionRecallEvaluation(54,0,50,40)
*
TP=54; FN=0; FP=50; TN=40
* {@link #mucPrecision()}
* 0.9
* {@link #mucRecall()}
* 1.0
* {@link #mucF()}
* 0.947
* {@link #b3ElementPrecision()}
* 0.583
* {@link #b3ElementRecall()}
* 1.0
* {@link #b3ElementF()}
* 0.737
* {@link #b3ClusterPrecision()}
* 0.75
* {@link #b3ClusterRecall()}
* 1.0
* {@link #b3ClusterF()}
* 0.857
*
*
*
* Note that there are additional scoring metrics within the {@link
* Dendrogram} class for cophenetic correlation and dendrogram-specific
* within-cluster scatter.
*
* @author Bob Carpenter
* @version 3.8
* @since LingPipe2.0
* @param the type of objects being clustered
*/
public class ClusterScore {
private final PrecisionRecallEvaluation mPrEval;
private final Set extends Set extends E>> mReferencePartition;
private final Set extends Set extends E>> mResponsePartition;
/**
* Construct a cluster score object from the specified reference and
* response partitions. A partition is a set of disjoint sets of
* elements. A partition defines an equivalence relation where the
* disjoint sets represent the equivalence classes.
*
* The reference partition is taken to represent the "truth"
* or the "correct" answer, also known as the "gold standard".
* The response partition is the partition to evaluate against the
* reference partition.
*
*
If the specified partitions are not over the same sets
* or if the equivalence classes are not disjoint, an illegal
* argument exception is raised.
*
* @param referencePartition Partition of reference elements.
* @param responsePartition Partition of response elements.
* @throws IllegalArgumentException If the partitions are not
* valid partitions over the same set of elements.
*/
public ClusterScore(Set extends Set extends E>> referencePartition,
Set extends Set extends E>> responsePartition) {
assertPartitionSameSets(referencePartition,responsePartition);
mReferencePartition = referencePartition;
mResponsePartition = responsePartition;
mPrEval = calculateConfusionMatrix();
}
/**
* Returns the precision-recall evaluation corresponding to
* equalities in the reference and response clusterings.
*
* @return The precision-recall evaluation.
*/
public PrecisionRecallEvaluation equivalenceEvaluation() {
return mPrEval;
}
/**
* Returns the precision as defined by MUC. See the class
* documentation above for definitions.
*
* @return The precision as defined by MUC.
*/
public double mucPrecision() {
return mucRecall(mResponsePartition,mReferencePartition);
}
/**
* Returns the recall as defined by MUC. See the class
* documentation above for definitions.
*
* @return The recall as defined by MUC.
*/
public double mucRecall() {
return mucRecall(mReferencePartition,mResponsePartition);
}
/**
* Returns the F1 measure of the MUC recall
* and precision. See the class
* documentation above for definitions.
*
* @return The F measure as defined by MUC.
*/
public double mucF() {
return f(mucPrecision(),mucRecall());
}
/**
* Returns the precision as defined by B3 metric with
* equal cluster weighting. See the class documentation above for
* definitions.
*
* @return The B-cubed equal cluster precision.
*/
public double b3ClusterPrecision() {
return b3ClusterRecall(mResponsePartition,mReferencePartition);
}
/**
* Returns the recall as defined by B3 metric with
* equal cluster weighting. See the class documentation above for
* definitions.
*
* @return The B-cubed equal cluster recall.
*/
public double b3ClusterRecall() {
return b3ClusterRecall(mReferencePartition,mResponsePartition);
}
/**
* Returns the F1 measure of the
* B3 precision and recall metrics with equal cluster
* weighting. See the class documentation above for definitions.
*
* @return The B-cubed equal cluster F measure.
*/
public double b3ClusterF() {
return f(b3ClusterPrecision(),
b3ClusterRecall());
}
/**
* Returns the precision as defined by B3 metric with
* equal element weighting. See the class documentation above for
* definitions.
*
* @return The B-cubed equal element precision.
*/
public double b3ElementPrecision() {
return b3ElementRecall(mResponsePartition,mReferencePartition);
}
/**
* Returns the recall as defined by B3 metric with
* equal element weighting. See the class documentation above for
* definitions.
*
* @return The B-cubed equal element recall.
*/
public double b3ElementRecall() {
return b3ElementRecall(mReferencePartition,mResponsePartition);
}
/**
* Returns the F1 measure of the
* B3 precision and recall metrics with equal element
* weighting. See the class documentation above for definitions.
*
* @return The B-cubed equal element F measure.
*/
public double b3ElementF() {
return f(b3ElementPrecision(),
b3ElementRecall());
}
/**
* Returns the set of true positive relations for this scoring.
* Each relation is an instance of {@link Tuple}. These true
* positives will include both (x,y)
and
* (y,x)
for a true positive relation between
* x
and y
.
*
* @return The set of true positives.
*/
public Set> truePositives() {
Set> referenceEquivalences = toEquivalences(mReferencePartition);
Set> responseEquivalences = toEquivalences(mResponsePartition);
referenceEquivalences.retainAll(responseEquivalences);
return referenceEquivalences;
}
/**
* Returns the set of false positive relations for this scoring.
* Each relation is an instance of {@link Tuple}. The false
* positives will include both (x,y)
and
* (y,x)
for a false positive relation between
* x
and y
.
*
* @return The set of false positives.
*/
public Set> falsePositives() {
Set> referenceEquivalences = toEquivalences(mReferencePartition);
Set> responseEquivalences = toEquivalences(mResponsePartition);
responseEquivalences.removeAll(referenceEquivalences);
return responseEquivalences;
}
/**
* Returns the set of false negative relations for this scoring.
* Each relation is an instance of {@link Tuple}. The false
* negative set will include both (x,y)
and
* (y,x)
for a false negative relation between
* x
and y
.
*
* @return The set of false negatives.
*/
public Set> falseNegatives() {
Set> referenceEquivalences = toEquivalences(mReferencePartition);
Set> responseEquivalences = toEquivalences(mResponsePartition);
referenceEquivalences.removeAll(responseEquivalences);
return referenceEquivalences;
}
private PrecisionRecallEvaluation calculateConfusionMatrix() {
Set> referenceEquivalences = toEquivalences(mReferencePartition);
Set> responseEquivalences = toEquivalences(mResponsePartition);
long tp = 0;
long fn = 0;
for (Tuple tuple : referenceEquivalences) {
if (responseEquivalences.remove(tuple))
++tp;
else
++fn;
}
long numElements = ClusterScore.elementsOf(mReferencePartition).size();
long totalCount = numElements * numElements;
long fp = responseEquivalences.size();
long tn = totalCount - tp - fn - fp;
return new PrecisionRecallEvaluation(tp,fn,fp,tn);
}
/**
* Returns a string representation of the statistics for this
* score. The string includes the information in all of the
* methods of this class: b3 scores by cluster and by element,
* muc scores, and the precision-recall evaluation based on
* equivalence.
*
* @return String-based representation of this score.
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("CLUSTER SCORE");
sb.append("\nEquivalence Evaluation\n");
sb.append(mPrEval.toString());
sb.append("\nMUC Evaluation");
sb.append("\n MUC Precision = " + mucPrecision());
sb.append("\n MUC Recall = " + mucRecall());
sb.append("\n MUC F(1) = " + mucF());
sb.append("\nB-Cubed Evaluation");
sb.append("\n B3 Cluster Averaged Precision = "
+ b3ClusterPrecision());
sb.append("\n B3 Cluster Averaged Recall = " + b3ClusterRecall());
sb.append("\n B3 Cluster Averaged F(1) = " + b3ClusterF());
sb.append("\n B3 Element Averaged Precision = "
+ b3ElementPrecision());
sb.append("\n B3 Element Averaged Recall = " + b3ElementRecall());
sb.append("\n B3 Element Averaged F(1) = " + b3ElementF());
return sb.toString();
}
/**
* Returns the within-cluster scatter measure for the specified
* clustering with respect to the specified distance. The
* within-cluster scatter is simply the sum of the scatters for
* each set in the clustering; see {@link #scatter(Set,Distance)}
* for a definition of scatter.
*
*
* withinClusterScatter(clusters,distance)
* = Σcluster in clusters scatter(cluster,distance)
*
* As the number of clusters increases, the within-cluster
* scatter decreases monotonically. Typically, this is used
* to determine how many clusters to return, by inspecting
* a plot of within-cluster scatter against number of clusters
* and looking for a "knee" in the graph.
*
* @param clustering Clustering to evaluate.
* @param distance Distance against which to evaluate.
* @return The within-cluster scatter score.
* @param the type of objects being clustered
*/
static public double
withinClusterScatter(Set extends Set extends E>> clustering,
Distance super E> distance) {
double scatter = 0.0;
for (Set extends E> s : clustering)
scatter += scatter(s,distance);
return scatter;
}
/**
* Returns the scatter for the specified cluster based on the
* specified distance. The scatter is the sum of all of the
* pairwise distances between elements, with each pair of elements
* counted once. Abusing notation to use xs[i]
for
* the i
th element returned by the set's iterator,
** scatter is defined by:
*
*
* scatter(xs,distance)
* = Σi Σj < i distance(xs[i],xs[j])
*
* Note that elements are not compared to themselves. This
* presupposes a distance for which the distance of an element to
* itself is zero and which is symmetric.
*
* @param cluster Cluster to evaluate.
* @param distance Distance against which to evaluate.
* @return The total scatter for the specified set.
* @param the type of objects being clustered
*/
static public double scatter(Set extends E> cluster,
Distance super E> distance) {
// required for array; want array for indexing
@SuppressWarnings("unchecked")
E[] elements = (E[]) cluster.toArray();
double scatter = 0.0;
for (int i = 0; i < elements.length; ++i)
for (int j = i+1; j < elements.length; ++j)
scatter += distance.distance(elements[i],elements[j]);
return scatter;
}
// includes self-equivalences for completeness of counts
Set> toEquivalences(Set extends Set extends E>> partition) {
Set> equivalences = new HashSet>();
for (Set extends E> equivalenceClass : partition) {
// required for array
@SuppressWarnings("unchecked")
E[] xs = (E[]) new Object[equivalenceClass.size()];
equivalenceClass.toArray(xs);
for (int i = 0; i < xs.length; ++i)
for (int j = 0; j < xs.length; ++j)
equivalences.add(Tuple.create(xs[i],xs[j]));
}
return equivalences;
}
private static double b3ElementRecall(Set extends Set extends F>> referencePartition,
Set extends Set extends F>> responsePartition) {
double score = 0.0;
Set elementsOfReference = ClusterScore.elementsOf(referencePartition);
for (Set extends F> referenceEqClass : referencePartition)
for (F referenceEqClassElt : referenceEqClass)
score += uniformElementWeight(elementsOfReference)
* b3Recall(referenceEqClassElt,
referenceEqClass,responsePartition);
return score;
}
private static double uniformElementWeight(Set extends F> elements) {
return 1.0 / (double) elements.size();
}
private static double uniformClusterWeight(Set extends F> eqClass,
Set extends Set extends F>> partition) {
return 1.0 / ((double) (eqClass.size() * partition.size()));
}
private static double b3ClusterRecall(Set extends Set extends F>> referencePartition,
Set extends Set extends F>> responsePartition) {
double score = 0.0;
for (Set extends F> referenceEqClass : referencePartition)
for (F referenceEqClassElt : referenceEqClass)
score += uniformClusterWeight(referenceEqClass,referencePartition)
* b3Recall(referenceEqClassElt,
referenceEqClass,responsePartition);
return score;
}
private static double b3Recall(F element,
Set extends F> referenceEqClass,
Set extends Set extends F>> responsePartition) {
Set extends F> responseClass = getEquivalenceClass(element,responsePartition);
return ClusterScore.recallSets(referenceEqClass,responseClass);
}
private static double recallSets(Set extends F> referenceSet, Set extends F> responseSet) {
if (referenceSet.size() == 0) return 1.0;
return ((double) intersectionSize(referenceSet,responseSet))
/ (double) referenceSet.size();
}
private static long intersectionSize(Set extends F> set1, Set extends F> set2) {
long count = 0;
for (F f : set1)
if (set2.contains(f))
++count;
return count;
}
private static void assertPartitionSameSets(Set extends Set extends F>> set1,
Set extends Set extends F>> set2) {
ClusterScore.assertValidPartition(set1);
ClusterScore.assertValidPartition(set2);
if (!elementsOf(set1).equals(elementsOf(set2))) {
String msg = "Partitions must be of same sets.";
throw new IllegalArgumentException(msg);
}
}
private static void assertValidPartition(Set extends Set extends F>> partition) {
Set eltsSoFar = new HashSet();
for (Set extends F> eqClass : partition) {
for (F member : eqClass) {
if (!eltsSoFar.add(member)) {
String msg = "Partitions must not contain overlapping members."
+ " Found overlapping element=" + member;
throw new IllegalArgumentException(msg);
}
}
}
}
private static Set extends F> getEquivalenceClass(F element,
Set extends Set extends F>> partition) {
for (Set extends F> equivalenceClass : partition)
if (equivalenceClass.contains(element))
return equivalenceClass;
throw new IllegalArgumentException("Element must be in an equivalence class in partition.");
}
private static Set elementsOf(Set extends Set extends F>> partition) {
Set elementSet = new HashSet();
for (Set extends F> eqClass : partition)
elementSet.addAll(eqClass);
return elementSet;
}
private static double f(double precision,
double recall) {
return 2.0 * precision * recall
/ (precision + recall);
}
private static double mucRecall(Set extends Set extends F>> referencePartition,
Set extends Set extends F>> responsePartition) {
long numerator = 0;
long denominator = 0;
for (Set extends F> referenceEqClass : referencePartition) {
long numPartitions = 0;
for (Set extends F> responseEqClass : responsePartition) {
if (!Collections.disjoint(referenceEqClass,responseEqClass))
++numPartitions;
}
numerator += referenceEqClass.size() - numPartitions;
denominator += referenceEqClass.size() - 1;
}
if (denominator == 0) return 1.0;
return ((double) numerator) / (double) denominator;
}
}