
cc.mallet.cluster.neighbor_evaluator.MedoidEvaluator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
The newest version!
package cc.mallet.cluster.neighbor_evaluator;
//import weka.core.Instances;
import cc.mallet.classify.Classifier;
import cc.mallet.cluster.Clustering;
import cc.mallet.cluster.util.PairwiseMatrix;
import cc.mallet.types.MatrixOps;
/**
* Uses a {@link Classifier} over pairs of {@link Instances} to score
* {@link Neighbor}. Currently only supports {@link
* AgglomerativeNeighbor}s.
*
* @author "Michael Wick"
* @version 1.0
* @since 1.0
* @see ClassifyingNeighborEvaluator
*/
public class MedoidEvaluator extends ClassifyingNeighborEvaluator {
private static final long serialVersionUID = 1L;
/**
* If single link is true, then the score of clusters A and B is the score of the link between the two medoids.
*/
boolean singleLink=false;
/**
* How to combine a set of pairwise scores (e.g. mean, max, ...)... [currently not supported in this class]
*/
CombiningStrategy combiningStrategy;
/**
* If true, score all edges involved in a merge. If false, only
* score the edges that croess the boundaries of the clusters being
* merged.
*/
boolean mergeFirst=true;
/**
* Cache for calls to getScore. In some experiments, reduced running
* time by nearly half.
*/
PairwiseMatrix scoreCache;
/**
*
* @param classifier Classifier to assign scores to {@link
* Neighbor}s for which a pair of Instances has been merged.
* @param scoringLabel The predicted label that corresponds to a
* positive example (e.g. "YES").
* @param combiningStrategy How to combine the pairwise scores
* (e.g. max, mean, ...).
* @param mergeFirst If true, score all edges involved in a
* merge. If false, only score the edges that cross the boundaries
* of the clusters being merged.
* @return
*/
public MedoidEvaluator(Classifier classifier, String scoringLabel)
{
super(classifier,scoringLabel);
System.out.println("Using Medoid Evaluator");
}
public MedoidEvaluator(Classifier classifier, String scoringLabel,boolean singleLink,boolean mergeFirst)
{
super(classifier,scoringLabel);
this.singleLink=singleLink;
this.mergeFirst=mergeFirst;
System.out.println("Using Medoid Evaluator. Single link="+singleLink+".");
}
/*
public MedoidEvaluator (Classifier classifier,
String scoringLabel,
CombiningStrategy combiningStrategy,
boolean mergeFirst) {
super(classifier, scoringLabel);
this.combiningStrategy = combiningStrategy;
this.mergeFirst = mergeFirst;
System.out.println("Using Centroid Evaluator (2)");
}
*/
public double[] evaluate (Neighbor[] neighbors) {
double[] scores = new double[neighbors.length];
for (int i = 0; i < neighbors.length; i++)
scores[i] = evaluate(neighbors[i]);
return scores;
}
public double evaluate(Neighbor neighbor)
{
int result[] = new int[2];
if (!(neighbor instanceof AgglomerativeNeighbor))
throw new IllegalArgumentException("Expect AgglomerativeNeighbor not " + neighbor.getClass().getName());
int[][] oldIndices = ((AgglomerativeNeighbor)neighbor).getOldClusters();
int[] mergedIndices=((AgglomerativeNeighbor)neighbor).getNewCluster();
Clustering original = neighbor.getOriginal();
result[0]=getCentroid(oldIndices[0],original);
result[1]=getCentroid(oldIndices[1],original);
if(singleLink) //scores a cluster based on link between medoid of each cluster
{
AgglomerativeNeighbor pwn = new AgglomerativeNeighbor(original,original,oldIndices[0][result[0]],oldIndices[1][result[1]]);
double score = getScore(pwn);
return score;
}
//
//Returns average weighted average where weights are proportional to similarity to medoid
double[] medsA=getMedWeights(result[0],oldIndices[0],original);
double[] medsB=getMedWeights(result[1],oldIndices[1],original);
double numerator=0;
double denominator=0;
for(int i=0;icentDist)
{
centDist=scores[i];
centIdx=i;
//centIdx=indices[i];
}
}
return centIdx;
}
/*
public double evaluate (Neighbor neighbor) {
if (!(neighbor instanceof AgglomerativeNeighbor))
throw new IllegalArgumentException("Expect AgglomerativeNeighbor not " + neighbor.getClass().getName());
Clustering original = neighbor.getOriginal();
int[] mergedIndices = ((AgglomerativeNeighbor)neighbor).getNewCluster();
ArrayList scores = new ArrayList();
for (int i = 0; i < mergedIndices.length; i++) {
for (int j = i + 1; j < mergedIndices.length; j++) {
if ((original.getLabel(mergedIndices[i]) != original.getLabel(mergedIndices[j])) || mergeFirst) {
AgglomerativeNeighbor pwneighbor =
new AgglomerativeNeighbor(original, original,
mergedIndices[i], mergedIndices[j]);
scores.add(new Double(getScore(pwneighbor)));
}
}
}
if (scores.size() < 1)
throw new IllegalStateException("No pairs of Instances were scored.");
double[] vals = new double[scores.size()];
for (int i = 0; i < vals.length; i++)
vals[i] = ((Double)scores.get(i)).doubleValue();
return combiningStrategy.combine(vals);
}
*/
public void reset () {
scoreCache = null;
}
public String toString () {
return "class=" + this.getClass().getName() +
" classifier=" + classifier.getClass().getName();
}
private double getScore (AgglomerativeNeighbor pwneighbor) {
if (scoreCache == null)
scoreCache = new PairwiseMatrix(pwneighbor.getOriginal().getNumInstances());
int[] indices = pwneighbor.getNewCluster();
if (scoreCache.get(indices[0], indices[1]) == 0.0) {
scoreCache.set(indices[0], indices[1],
classifier.classify(pwneighbor).getLabelVector().value(scoringLabel));
}
return scoreCache.get(indices[0], indices[1]);
}
/**
* Specifies how to combine a set of pairwise scores into a
* cluster-wise score.
*
* @author "Aron Culotta"
* @version 1.0
* @since 1.0
*/
public static interface CombiningStrategy {
public double combine (double[] scores);
}
public static class Average implements CombiningStrategy {
public double combine (double[] scores) {
return MatrixOps.mean(scores);
}
}
public static class Minimum implements CombiningStrategy {
public double combine (double[] scores) {
return MatrixOps.min(scores);
}
}
public static class Maximum implements CombiningStrategy {
public double combine (double[] scores) {
return MatrixOps.max(scores);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy