![JAR search and dependency download from the Maven repository](/logo.png)
moa.evaluation.General Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of moa Show documentation
Show all versions of moa Show documentation
Massive On-line Analysis is an environment for massive data mining. MOA
provides a framework for data stream mining and includes tools for evaluation
and a collection of machine learning algorithms. Related to the WEKA project,
also written in Java, while scaling to more demanding problems.
The newest version!
/*
* General.java
* Copyright (C) 2010 RWTH Aachen University, Germany
* @author Jansen ([email protected])
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*/
package moa.evaluation;
import java.util.ArrayList;
import moa.cluster.Clustering;
import moa.cluster.SphereCluster;
import moa.gui.visualization.DataPoint;
import com.yahoo.labs.samoa.instances.Instance;
public class General extends MeasureCollection{
private int numPoints;
private int numFClusters;
private int numDims;
private double pointInclusionProbThreshold = 0.8;
private Clustering clustering;
private ArrayList points;
public General() {
super();
}
@Override
protected String[] getNames() {
String[] names = {"GPrecision","GRecall","Redundancy","numCluster","numClasses"};
//String[] names = {"GPrecision","GRecall","Redundancy","Overlap","numCluster","numClasses","Compactness"};
return names;
}
// @Override
// protected boolean[] getDefaultEnabled() {
// boolean [] defaults = {false, false, false, false, false ,false};
// return defaults;
// }
@Override
public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList points) throws Exception{
this.points = points;
this.clustering = clustering;
numPoints = points.size();
numFClusters = clustering.size();
numDims = points.get(0).numAttributes()-1;
int totalRedundancy = 0;
int trueCoverage = 0;
int totalCoverage = 0;
int numNoise = 0;
for (int p = 0; p < numPoints; p++) {
int coverage = 0;
for (int c = 0; c < numFClusters; c++) {
//contained in cluster c?
if(clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold){
coverage++;
}
}
if(points.get(p).classValue()==-1){
numNoise++;
}
else{
if(coverage>0) trueCoverage++;
}
if(coverage>0) totalCoverage++; //points covered by clustering (incl. noise)
if(coverage>1) totalRedundancy++; //include noise
}
addValue("numCluster", clustering.size());
addValue("numClasses", trueClustering.size());
addValue("Redundancy", ((double)totalRedundancy/(double)numPoints));
addValue("GPrecision", (totalCoverage==0?0:((double)trueCoverage/(double)(totalCoverage))));
addValue("GRecall", ((double)trueCoverage/(double)(numPoints-numNoise)));
// if(isEnabled(3)){
// addValue("Compactness", computeCompactness());
// }
// if(isEnabled(3)){
// addValue("Overlap", computeOverlap());
// }
}
private double computeOverlap(){
for (int c = 0; c < numFClusters; c++) {
if(!(clustering.get(c) instanceof SphereCluster)){
System.out.println("Overlap only supports Sphere Cluster. Found: "+clustering.get(c).getClass());
return Double.NaN;
}
}
boolean[] overlap = new boolean[numFClusters];
for (int c0 = 0; c0 < numFClusters; c0++) {
if(overlap[c0]) continue;
SphereCluster s0 = (SphereCluster)clustering.get(c0);
for (int c1 = c0; c1 < clustering.size(); c1++) {
if(c1 == c0) continue;
SphereCluster s1 = (SphereCluster)clustering.get(c1);
if(s0.overlapRadiusDegree(s1) > 0){
overlap[c0] = overlap[c1] = true;
}
}
}
double totalOverlap = 0;
for (int c0 = 0; c0 < numFClusters; c0++) {
if(overlap[c0])
totalOverlap++;
}
// if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause();
if(numFClusters>0) totalOverlap/=(double)numFClusters;
return totalOverlap;
}
private double computeCompactness(){
if(numFClusters == 0) return 0;
for (int c = 0; c < numFClusters; c++) {
if(!(clustering.get(c) instanceof SphereCluster)){
System.out.println("Compactness only supports Sphere Cluster. Found: "+clustering.get(c).getClass());
return Double.NaN;
}
}
//TODO weight radius by number of dimensions
double totalCompactness = 0;
for (int c = 0; c < numFClusters; c++) {
ArrayList containedPoints = new ArrayList();
for (int p = 0; p < numPoints; p++) {
//p in c
if(clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold){
containedPoints.add(points.get(p));
}
}
double compactness = 0;
if(containedPoints.size()>1){
//cluster not empty
SphereCluster minEnclosingCluster = new SphereCluster(containedPoints, numDims);
double minRadius = minEnclosingCluster.getRadius();
double cfRadius = ((SphereCluster)clustering.get(c)).getRadius();
if(Math.abs(minRadius-cfRadius) < 0.1e-10){
compactness = 1;
}
else
if(minRadius < cfRadius)
compactness = minRadius/cfRadius;
else{
System.out.println("Optimal radius bigger then real one ("+(cfRadius-minRadius)+"), this is really wrong");
compactness = 1;
}
}
else{
double cfRadius = ((SphereCluster)clustering.get(c)).getRadius();
if(cfRadius==0) compactness = 1;
}
//weight by weight of cluster???
totalCompactness+=compactness;
clustering.get(c).setMeasureValue("Compactness", Double.toString(compactness));
}
return (totalCompactness/numFClusters);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy