All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.cluster.neighbor_evaluator.MedoidEvaluator Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
package cc.mallet.cluster.neighbor_evaluator;


//import weka.core.Instances;
import cc.mallet.classify.Classifier;
import cc.mallet.cluster.Clustering;
import cc.mallet.cluster.util.PairwiseMatrix;
import cc.mallet.types.MatrixOps;

/**
 * Uses a {@link Classifier} over pairs of {@link Instances} to score
 * {@link Neighbor}. Currently only supports {@link
 * AgglomerativeNeighbor}s.
 *
 * @author "Michael Wick" 
 * @version 1.0
 * @since 1.0
 * @see ClassifyingNeighborEvaluator
 */
public class MedoidEvaluator extends ClassifyingNeighborEvaluator {

	private static final long serialVersionUID = 1L;

	/**
     * If single link is true, then the score of clusters A and B is the score of the link between the two medoids.
     */
    boolean singleLink=false;

	/**
	 * How to combine a set of pairwise scores (e.g. mean, max, ...)... [currently not supported in this class]
	 */
	CombiningStrategy combiningStrategy;

	/**
	 * If true, score all edges involved in a merge. If false, only
	 * score the edges that croess the boundaries of the clusters being
	 * merged.
	 */
	boolean mergeFirst=true;

	/**
	 * Cache for calls to getScore. In some experiments, reduced running
	 * time by nearly half.
	 */
	PairwiseMatrix scoreCache;
	
	/**
	 *
	 * @param classifier Classifier to assign scores to {@link
	 * Neighbor}s for which a pair of Instances has been merged.
	 * @param scoringLabel The predicted label that corresponds to a
	 * positive example (e.g. "YES").
	 * @param combiningStrategy How to combine the pairwise scores
	 * (e.g. max, mean, ...).
	 * @param mergeFirst If true, score all edges involved in a
	 * merge. If false, only score the edges that cross the boundaries
	 * of the clusters being merged.
	 * @return
	 */
    public MedoidEvaluator(Classifier classifier, String scoringLabel)
    {
	super(classifier,scoringLabel);
	System.out.println("Using Medoid Evaluator");
    }
    public MedoidEvaluator(Classifier classifier, String scoringLabel,boolean singleLink,boolean mergeFirst)
    {
	super(classifier,scoringLabel);
	this.singleLink=singleLink;
	this.mergeFirst=mergeFirst;
	System.out.println("Using Medoid Evaluator. Single link="+singleLink+".");
    }

    /*
    public MedoidEvaluator (Classifier classifier,
			      String scoringLabel,
			      CombiningStrategy combiningStrategy,
			      boolean mergeFirst) {
		super(classifier, scoringLabel);
		this.combiningStrategy = combiningStrategy;
		this.mergeFirst = mergeFirst;
	System.out.println("Using Centroid Evaluator (2)");

	}
    */

	public double[] evaluate (Neighbor[] neighbors) {
		double[] scores = new double[neighbors.length];
		for (int i = 0; i < neighbors.length; i++)
			scores[i] = evaluate(neighbors[i]);
		return scores;
	}
	

    
    public double evaluate(Neighbor neighbor)
    {
	int result[] = new int[2];
	if (!(neighbor instanceof AgglomerativeNeighbor))
	    throw new IllegalArgumentException("Expect AgglomerativeNeighbor not " + neighbor.getClass().getName());
	int[][] oldIndices = ((AgglomerativeNeighbor)neighbor).getOldClusters();
	int[] mergedIndices=((AgglomerativeNeighbor)neighbor).getNewCluster();

	Clustering original = neighbor.getOriginal();

	result[0]=getCentroid(oldIndices[0],original);
	result[1]=getCentroid(oldIndices[1],original);
	if(singleLink) //scores a cluster based on link between medoid of each cluster
	    {
		AgglomerativeNeighbor pwn = new AgglomerativeNeighbor(original,original,oldIndices[0][result[0]],oldIndices[1][result[1]]);
		double score = getScore(pwn);
		return score;
	    }

	//
	//Returns average weighted average where weights are proportional to similarity to medoid
	double[] medsA=getMedWeights(result[0],oldIndices[0],original);
	double[] medsB=getMedWeights(result[1],oldIndices[1],original);

	double numerator=0;
	double denominator=0;
	for(int i=0;icentDist)
		    {
			centDist=scores[i];
			centIdx=i;
			//centIdx=indices[i];
		    }
	    }
	return centIdx;
    }
    
    /*
	public double evaluate (Neighbor neighbor) {
 		if (!(neighbor instanceof AgglomerativeNeighbor))
 			throw new IllegalArgumentException("Expect AgglomerativeNeighbor not " + neighbor.getClass().getName());

		Clustering original = neighbor.getOriginal();
		int[] mergedIndices = ((AgglomerativeNeighbor)neighbor).getNewCluster();
		ArrayList scores = new ArrayList();
 		for (int i = 0; i < mergedIndices.length; i++) {
			for (int j = i + 1; j < mergedIndices.length; j++) {
				if ((original.getLabel(mergedIndices[i]) != original.getLabel(mergedIndices[j])) || mergeFirst) {
					AgglomerativeNeighbor pwneighbor =
						new AgglomerativeNeighbor(original,	original,
																			mergedIndices[i], mergedIndices[j]);
					scores.add(new Double(getScore(pwneighbor)));
				}
			}
		}

		if (scores.size() < 1)
			throw new IllegalStateException("No pairs of Instances were scored.");
		
 		double[] vals = new double[scores.size()];
		for (int i = 0; i < vals.length; i++)
			vals[i] = ((Double)scores.get(i)).doubleValue();
 		return combiningStrategy.combine(vals);
	}
    */

	public void reset () {
		scoreCache = null;
	}
	
	public String toString () {
		return "class=" + this.getClass().getName() +
			" classifier=" + classifier.getClass().getName();
	}

	private double getScore (AgglomerativeNeighbor pwneighbor) {
		if (scoreCache == null)
			scoreCache = new PairwiseMatrix(pwneighbor.getOriginal().getNumInstances());
		int[] indices = pwneighbor.getNewCluster();
		if (scoreCache.get(indices[0], indices[1]) == 0.0) {
			scoreCache.set(indices[0], indices[1],
								 classifier.classify(pwneighbor).getLabelVector().value(scoringLabel));
		}
		return scoreCache.get(indices[0], indices[1]);
	}

	/**
	 * Specifies how to combine a set of pairwise scores into a
	 * cluster-wise score.
	 *
	 * @author "Aron Culotta" 
	 * @version 1.0
	 * @since 1.0
	 */
	public static interface CombiningStrategy {
		public double combine (double[] scores);
	}

	public static class Average implements CombiningStrategy {
		public double combine (double[] scores) {
			return MatrixOps.mean(scores);
		}		
	}

	public static class Minimum implements CombiningStrategy {
		public double combine (double[] scores) {
			return MatrixOps.min(scores);
		}		
	}

	public static class Maximum implements CombiningStrategy {
		public double combine (double[] scores) {
			return MatrixOps.max(scores);
		}		
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy