weka.clusterers.Cobweb Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* Cobweb.java
* Copyright (C) 2001-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.clusterers;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.experiment.Stats;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Add;
/**
* Class implementing the Cobweb and Classit
* clustering algorithms.
*
* Note: the application of node operators (merging, splitting etc.) in terms of
* ordering and priority differs (and is somewhat ambiguous) between the
* original Cobweb and Classit papers. This algorithm always compares the best
* host, adding a new leaf, merging the two best hosts, and splitting the best
* host when considering where to place a new instance.
*
* For more information see:
*
* D. Fisher (1987). Knowledge acquisition via incremental conceptual
* clustering. Machine Learning. 2(2):139-172.
*
* J. H. Gennari, P. Langley, D. Fisher (1990). Models of incremental concept
* formation. Artificial Intelligence. 40:11-61.
*
*
*
* BibTeX:
*
*
* @article{Fisher1987,
* author = {D. Fisher},
* journal = {Machine Learning},
* number = {2},
* pages = {139-172},
* title = {Knowledge acquisition via incremental conceptual clustering},
* volume = {2},
* year = {1987}
* }
*
* @article{Gennari1990,
* author = {J. H. Gennari and P. Langley and D. Fisher},
* journal = {Artificial Intelligence},
* pages = {11-61},
* title = {Models of incremental concept formation},
* volume = {40},
* year = {1990}
* }
*
*
*
*
* Valid options are:
*
*
*
* -A <acuity>
* Acuity.
* (default=1.0)
*
*
*
* -C <cutoff>
* Cutoff.
* (default=0.002)
*
*
*
* -S <num>
* Random number seed.
* (default 42)
*
*
*
*
* @author Mark Hall
* @version $Revision: 10203 $
* @see RandomizableClusterer
* @see Drawable
*/
public class Cobweb extends RandomizableClusterer implements Drawable,
TechnicalInformationHandler, UpdateableClusterer {
/** for serialization */
static final long serialVersionUID = 928406656495092318L;
/**
* Inner class handling node operations for Cobweb.
*
* @see Serializable
*/
public class CNode implements Serializable, RevisionHandler {
/** for serialization */
static final long serialVersionUID = 3452097436933325631L;
/**
* Within cluster attribute statistics
*/
private AttributeStats[] m_attStats;
/**
* Number of attributes
*/
private final int m_numAttributes;
/**
* Instances at this node
*/
protected Instances m_clusterInstances = null;
/**
* Children of this node
*/
private ArrayList m_children = null;
/**
* Total instances at this node
*/
private double m_totalInstances = 0.0;
/**
* Cluster number of this node
*/
private int m_clusterNum = -1;
/**
* Creates an empty CNode
instance.
*
* @param numAttributes the number of attributes in the data
*/
public CNode(int numAttributes) {
m_numAttributes = numAttributes;
}
/**
* Creates a new leaf CNode
instance.
*
* @param numAttributes the number of attributes in the data
* @param leafInstance the instance to store at this leaf
*/
public CNode(int numAttributes, Instance leafInstance) {
this(numAttributes);
if (m_clusterInstances == null) {
m_clusterInstances = new Instances(leafInstance.dataset(), 1);
}
m_clusterInstances.add(leafInstance);
updateStats(leafInstance, false);
}
/**
* Adds an instance to this cluster.
*
* @param newInstance the instance to add
* @throws Exception if an error occurs
*/
protected void addInstance(Instance newInstance) throws Exception {
// Add the instance to this cluster
if (m_clusterInstances == null) {
m_clusterInstances = new Instances(newInstance.dataset(), 1);
m_clusterInstances.add(newInstance);
updateStats(newInstance, false);
return;
} else if (m_children == null) {
/*
* we are a leaf, so make our existing instance(s) into a child and then
* add the new instance as a child
*/
m_children = new ArrayList();
CNode tempSubCluster = new CNode(m_numAttributes,
m_clusterInstances.instance(0));
// System.out.println("Dumping "+m_clusterInstances.numInstances());
for (int i = 1; i < m_clusterInstances.numInstances(); i++) {
tempSubCluster.m_clusterInstances.add(m_clusterInstances.instance(i));
tempSubCluster.updateStats(m_clusterInstances.instance(i), false);
}
m_children = new ArrayList();
m_children.add(tempSubCluster);
m_children.add(new CNode(m_numAttributes, newInstance));
m_clusterInstances.add(newInstance);
updateStats(newInstance, false);
// here is where we check against cutoff (also check cutoff
// in findHost)
if (categoryUtility() < m_cutoff) {
// System.out.println("Cutting (leaf add) ");
m_children = null;
}
return;
}
// otherwise, find the best host for this instance
CNode bestHost = findHost(newInstance, false);
if (bestHost != null) {
// now add to the best host
bestHost.addInstance(newInstance);
}
}
/**
* Temporarily adds a new instance to each of this nodes children in turn
* and computes the category utility.
*
* @param newInstance the new instance to evaluate
* @return an array of category utility values---the result of considering
* each child in turn as a host for the new instance
* @throws Exception if an error occurs
*/
private double[] cuScoresForChildren(Instance newInstance) throws Exception {
// look for a host in existing children
double[] categoryUtils = new double[m_children.size()];
// look for a home for this instance in the existing children
for (int i = 0; i < m_children.size(); i++) {
CNode temp = m_children.get(i);
// tentitively add the new instance to this child
temp.updateStats(newInstance, false);
categoryUtils[i] = categoryUtility();
// remove the new instance from this child
temp.updateStats(newInstance, true);
}
return categoryUtils;
}
private double cuScoreForBestTwoMerged(CNode merged, CNode a, CNode b,
Instance newInstance) throws Exception {
double mergedCU = -Double.MAX_VALUE;
// consider merging the best and second
// best.
merged.m_clusterInstances = new Instances(m_clusterInstances, 1);
merged.addChildNode(a);
merged.addChildNode(b);
merged.updateStats(newInstance, false); // add new instance to stats
// remove the best and second best nodes
m_children.remove(m_children.indexOf(a));
m_children.remove(m_children.indexOf(b));
m_children.add(merged);
mergedCU = categoryUtility();
// restore the status quo
merged.updateStats(newInstance, true);
m_children.remove(m_children.indexOf(merged));
m_children.add(a);
m_children.add(b);
return mergedCU;
}
/**
* Finds a host for the new instance in this nodes children. Also considers
* merging the two best hosts and splitting the best host.
*
* @param newInstance the instance to find a host for
* @param structureFrozen true if the instance is not to be added to the
* tree and instead the best potential host is to be returned
* @return the best host
* @throws Exception if an error occurs
*/
private CNode findHost(Instance newInstance, boolean structureFrozen)
throws Exception {
if (!structureFrozen) {
updateStats(newInstance, false);
}
// look for a host in existing children and also consider as a new leaf
double[] categoryUtils = cuScoresForChildren(newInstance);
// make a temporary new leaf for this instance and get CU
CNode newLeaf = new CNode(m_numAttributes, newInstance);
m_children.add(newLeaf);
double bestHostCU = categoryUtility();
CNode finalBestHost = newLeaf;
// remove new leaf when searching for best and second best nodes to
// consider for merging and splitting
m_children.remove(m_children.size() - 1);
// now determine the best host (and the second best)
int best = 0;
int secondBest = 0;
for (int i = 0; i < categoryUtils.length; i++) {
if (categoryUtils[i] > categoryUtils[secondBest]) {
if (categoryUtils[i] > categoryUtils[best]) {
secondBest = best;
best = i;
} else {
secondBest = i;
}
}
}
CNode a = m_children.get(best);
CNode b = m_children.get(secondBest);
if (categoryUtils[best] > bestHostCU) {
bestHostCU = categoryUtils[best];
finalBestHost = a;
// System.out.println("Node is best");
}
if (structureFrozen) {
if (finalBestHost == newLeaf) {
return null; // *this* node is the best host
} else {
return finalBestHost;
}
}
double mergedCU = -Double.MAX_VALUE;
CNode merged = new CNode(m_numAttributes);
if (a != b) {
mergedCU = cuScoreForBestTwoMerged(merged, a, b, newInstance);
if (mergedCU > bestHostCU) {
bestHostCU = mergedCU;
finalBestHost = merged;
}
}
// Consider splitting the best
double splitCU = -Double.MAX_VALUE;
double splitBestChildCU = -Double.MAX_VALUE;
double splitPlusNewLeafCU = -Double.MAX_VALUE;
double splitPlusMergeBestTwoCU = -Double.MAX_VALUE;
if (a.m_children != null) {
ArrayList tempChildren = new ArrayList();
for (int i = 0; i < m_children.size(); i++) {
CNode existingChild = m_children.get(i);
if (existingChild != a) {
tempChildren.add(existingChild);
}
}
for (int i = 0; i < a.m_children.size(); i++) {
CNode promotedChild = a.m_children.get(i);
tempChildren.add(promotedChild);
}
// also add the new leaf
tempChildren.add(newLeaf);
ArrayList saveStatusQuo = m_children;
m_children = tempChildren;
splitPlusNewLeafCU = categoryUtility(); // split + new leaf
// remove the new leaf
tempChildren.remove(tempChildren.size() - 1);
// now look for best and second best
categoryUtils = cuScoresForChildren(newInstance);
// now determine the best host (and the second best)
best = 0;
secondBest = 0;
for (int i = 0; i < categoryUtils.length; i++) {
if (categoryUtils[i] > categoryUtils[secondBest]) {
if (categoryUtils[i] > categoryUtils[best]) {
secondBest = best;
best = i;
} else {
secondBest = i;
}
}
}
CNode sa = m_children.get(best);
CNode sb = m_children.get(secondBest);
splitBestChildCU = categoryUtils[best];
// now merge best and second best
CNode mergedSplitChildren = new CNode(m_numAttributes);
if (sa != sb) {
splitPlusMergeBestTwoCU = cuScoreForBestTwoMerged(
mergedSplitChildren, sa, sb, newInstance);
}
splitCU = (splitBestChildCU > splitPlusNewLeafCU) ? splitBestChildCU
: splitPlusNewLeafCU;
splitCU = (splitCU > splitPlusMergeBestTwoCU) ? splitCU
: splitPlusMergeBestTwoCU;
if (splitCU > bestHostCU) {
bestHostCU = splitCU;
finalBestHost = this;
// tempChildren.remove(tempChildren.size()-1);
} else {
// restore the status quo
m_children = saveStatusQuo;
}
}
if (finalBestHost != this) {
// can commit the instance to the set of instances at this node
m_clusterInstances.add(newInstance);
} else {
m_numberSplits++;
}
if (finalBestHost == merged) {
m_numberMerges++;
m_children.remove(m_children.indexOf(a));
m_children.remove(m_children.indexOf(b));
m_children.add(merged);
}
if (finalBestHost == newLeaf) {
finalBestHost = new CNode(m_numAttributes);
m_children.add(finalBestHost);
}
if (bestHostCU < m_cutoff) {
if (finalBestHost == this) {
// splitting was the best, but since we are cutting all children
// recursion is aborted and we still need to add the instance
// to the set of instances at this node
m_clusterInstances.add(newInstance);
}
m_children = null;
finalBestHost = null;
}
if (finalBestHost == this) {
// splitting is still the best, so downdate the stats as
// we'll be recursively calling on this node
updateStats(newInstance, true);
}
return finalBestHost;
}
/**
* Adds the supplied node as a child of this node. All of the child's
* instances are added to this nodes instances
*
* @param child the child to add
*/
protected void addChildNode(CNode child) {
for (int i = 0; i < child.m_clusterInstances.numInstances(); i++) {
Instance temp = child.m_clusterInstances.instance(i);
m_clusterInstances.add(temp);
updateStats(temp, false);
}
if (m_children == null) {
m_children = new ArrayList();
}
m_children.add(child);
}
/**
* Computes the utility of all children with respect to this node
*
* @return the category utility of the children with respect to this node.
* @throws Exception if there are no children
*/
protected double categoryUtility() throws Exception {
if (m_children == null) {
throw new Exception("categoryUtility: No children!");
}
double totalCU = 0;
for (int i = 0; i < m_children.size(); i++) {
CNode child = m_children.get(i);
totalCU += categoryUtilityChild(child);
}
totalCU /= m_children.size();
return totalCU;
}
/**
* Computes the utility of a single child with respect to this node
*
* @param child the child for which to compute the utility
* @return the utility of the child with respect to this node
* @throws Exception if something goes wrong
*/
protected double categoryUtilityChild(CNode child) throws Exception {
double sum = 0;
for (int i = 0; i < m_numAttributes; i++) {
if (m_clusterInstances.attribute(i).isNominal()) {
for (int j = 0; j < m_clusterInstances.attribute(i).numValues(); j++) {
double x = child.getProbability(i, j);
double y = getProbability(i, j);
sum += (x * x) - (y * y);
}
} else {
// numeric attribute
sum += ((m_normal / child.getStandardDev(i)) - (m_normal / getStandardDev(i)));
}
}
return (child.m_totalInstances / m_totalInstances) * sum;
}
/**
* Returns the probability of a value of a nominal attribute in this node
*
* @param attIndex the index of the attribute
* @param valueIndex the index of the value of the attribute
* @return the probability
* @throws Exception if the requested attribute is not nominal
*/
protected double getProbability(int attIndex, int valueIndex)
throws Exception {
if (!m_clusterInstances.attribute(attIndex).isNominal()) {
throw new Exception("getProbability: attribute is not nominal");
}
if (m_attStats[attIndex].totalCount <= 0) {
return 0;
}
return (double) m_attStats[attIndex].nominalCounts[valueIndex]
/ (double) m_attStats[attIndex].totalCount;
}
/**
* Returns the standard deviation of a numeric attribute
*
* @param attIndex the index of the attribute
* @return the standard deviation
* @throws Exception if an error occurs
*/
protected double getStandardDev(int attIndex) throws Exception {
if (!m_clusterInstances.attribute(attIndex).isNumeric()) {
throw new Exception("getStandardDev: attribute is not numeric");
}
m_attStats[attIndex].numericStats.calculateDerived();
double stdDev = m_attStats[attIndex].numericStats.stdDev;
if (Double.isNaN(stdDev) || Double.isInfinite(stdDev)) {
return m_acuity;
}
return Math.max(m_acuity, stdDev);
}
/**
* Update attribute stats using the supplied instance.
*
* @param updateInstance the instance for updating
* @param delete true if the values of the supplied instance are to be
* removed from the statistics
*/
protected void updateStats(Instance updateInstance, boolean delete) {
if (m_attStats == null) {
m_attStats = new AttributeStats[m_numAttributes];
for (int i = 0; i < m_numAttributes; i++) {
m_attStats[i] = new AttributeStats();
if (m_clusterInstances.attribute(i).isNominal()) {
m_attStats[i].nominalCounts = new int[m_clusterInstances.attribute(
i).numValues()];
} else {
m_attStats[i].numericStats = new Stats();
}
}
}
for (int i = 0; i < m_numAttributes; i++) {
if (!updateInstance.isMissing(i)) {
double value = updateInstance.value(i);
if (m_clusterInstances.attribute(i).isNominal()) {
m_attStats[i].nominalCounts[(int) value] += (delete) ? (-1.0 * updateInstance
.weight()) : updateInstance.weight();
m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance
.weight()) : updateInstance.weight();
} else {
if (delete) {
m_attStats[i].numericStats.subtract(value,
updateInstance.weight());
} else {
m_attStats[i].numericStats.add(value, updateInstance.weight());
}
}
}
}
m_totalInstances += (delete) ? (-1.0 * updateInstance.weight())
: (updateInstance.weight());
}
/**
* Recursively assigns numbers to the nodes in the tree.
*
* @param cl_num an int[]
value
* @throws Exception if an error occurs
*/
private void assignClusterNums(int[] cl_num) throws Exception {
if (m_children != null && m_children.size() < 2) {
throw new Exception("assignClusterNums: tree not built correctly!");
}
m_clusterNum = cl_num[0];
cl_num[0]++;
if (m_children != null) {
for (int i = 0; i < m_children.size(); i++) {
CNode child = m_children.get(i);
child.assignClusterNums(cl_num);
}
}
}
/**
* Recursively build a string representation of the Cobweb tree
*
* @param depth depth of this node in the tree
* @param text holds the string representation
*/
protected void dumpTree(int depth, StringBuffer text) {
if (depth == 0) {
determineNumberOfClusters();
}
if (m_children == null) {
text.append("\n");
for (int j = 0; j < depth; j++) {
text.append("| ");
}
text.append("leaf " + m_clusterNum + " ["
+ m_clusterInstances.numInstances() + "]");
} else {
for (int i = 0; i < m_children.size(); i++) {
text.append("\n");
for (int j = 0; j < depth; j++) {
text.append("| ");
}
text.append("node " + m_clusterNum + " ["
+ m_clusterInstances.numInstances() + "]");
m_children.get(i).dumpTree(depth + 1, text);
}
}
}
/**
* Returns the instances at this node as a string. Appends the cluster
* number of the child that each instance belongs to.
*
* @return a String
value
* @throws Exception if an error occurs
*/
protected String dumpData() throws Exception {
if (m_children == null) {
return m_clusterInstances.toString();
}
// construct instances string with cluster numbers attached
CNode tempNode = new CNode(m_numAttributes);
tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1);
for (int i = 0; i < m_children.size(); i++) {
tempNode.addChildNode(m_children.get(i));
}
Instances tempInst = tempNode.m_clusterInstances;
tempNode = null;
Add af = new Add();
af.setAttributeName("Cluster");
String labels = "";
for (int i = 0; i < m_children.size(); i++) {
CNode temp = m_children.get(i);
labels += ("C" + temp.m_clusterNum);
if (i < m_children.size() - 1) {
labels += ",";
}
}
af.setNominalLabels(labels);
af.setInputFormat(tempInst);
tempInst = Filter.useFilter(tempInst, af);
tempInst.setRelationName("Cluster " + m_clusterNum);
int z = 0;
for (int i = 0; i < m_children.size(); i++) {
CNode temp = m_children.get(i);
for (int j = 0; j < temp.m_clusterInstances.numInstances(); j++) {
tempInst.instance(z).setValue(m_numAttributes, i);
z++;
}
}
return tempInst.toString();
}
/**
* Recursively generate the graph string for the Cobweb tree.
*
* @param text holds the graph string
* @throws Exception if generation fails
*/
protected void graphTree(StringBuffer text) throws Exception {
text.append("N" + m_clusterNum + " [label=\""
+ ((m_children == null) ? "leaf " : "node ") + m_clusterNum + " "
+ " (" + m_clusterInstances.numInstances() + ")\" "
+ ((m_children == null) ? "shape=box style=filled " : "")
+ (m_saveInstances ? "data =\n" + dumpData() + "\n,\n" : "") + "]\n");
if (m_children != null) {
for (int i = 0; i < m_children.size(); i++) {
CNode temp = m_children.get(i);
text.append("N" + m_clusterNum + "->" + "N" + temp.m_clusterNum
+ "\n");
}
for (int i = 0; i < m_children.size(); i++) {
CNode temp = m_children.get(i);
temp.graphTree(text);
}
}
}
/**
* Returns the revision string.
*
* @return the revision
*/
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 10203 $");
}
}
/**
* Normal constant.
*/
protected static final double m_normal = 1.0 / (2 * Math.sqrt(Math.PI));
/**
* Acuity (minimum standard deviation).
*/
protected double m_acuity = 1.0;
/**
* Cutoff (minimum category utility).
*/
protected double m_cutoff = 0.01 * Cobweb.m_normal;
/**
* Holds the root of the Cobweb tree.
*/
protected CNode m_cobwebTree = null;
/**
* Number of clusters (nodes in the tree). Must never be queried directly,
* only via the method numberOfClusters(). Otherwise it's not guaranteed that
* it contains the correct value.
*
* @see #numberOfClusters()
* @see #m_numberOfClustersDetermined
*/
protected int m_numberOfClusters = -1;
/** whether the number of clusters was already determined */
protected boolean m_numberOfClustersDetermined = false;
/** the number of splits that happened */
protected int m_numberSplits;
/** the number of merges that happened */
protected int m_numberMerges;
/**
* Output instances in graph representation of Cobweb tree (Allows instances
* at nodes in the tree to be visualized in the Explorer).
*/
protected boolean m_saveInstances = false;
/**
* default constructor
*/
public Cobweb() {
super();
m_SeedDefault = 42;
setSeed(m_SeedDefault);
}
/**
* Returns a string describing this clusterer
*
* @return a description of the evaluator suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "Class implementing the Cobweb and Classit clustering algorithms.\n\n"
+ "Note: the application of node operators (merging, splitting etc.) in "
+ "terms of ordering and priority differs (and is somewhat ambiguous) "
+ "between the original Cobweb and Classit papers. This algorithm always "
+ "compares the best host, adding a new leaf, merging the two best hosts, "
+ "and splitting the best host when considering where to place a new "
+ "instance.\n\n"
+ "For more information see:\n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
TechnicalInformation additional;
result = new TechnicalInformation(Type.ARTICLE);
result.setValue(Field.AUTHOR, "D. Fisher");
result.setValue(Field.YEAR, "1987");
result.setValue(Field.TITLE,
"Knowledge acquisition via incremental conceptual clustering");
result.setValue(Field.JOURNAL, "Machine Learning");
result.setValue(Field.VOLUME, "2");
result.setValue(Field.NUMBER, "2");
result.setValue(Field.PAGES, "139-172");
additional = result.add(Type.ARTICLE);
additional.setValue(Field.AUTHOR,
"J. H. Gennari and P. Langley and D. Fisher");
additional.setValue(Field.YEAR, "1990");
additional.setValue(Field.TITLE, "Models of incremental concept formation");
additional.setValue(Field.JOURNAL, "Artificial Intelligence");
additional.setValue(Field.VOLUME, "40");
additional.setValue(Field.PAGES, "11-61");
return result;
}
/**
* Returns default capabilities of the clusterer.
*
* @return the capabilities of this clusterer
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
result.enable(Capability.NO_CLASS);
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// other
result.setMinimumNumberInstances(0);
return result;
}
/**
* Builds the clusterer.
*
* @param data the training instances.
* @throws Exception if something goes wrong.
*/
@Override
public void buildClusterer(Instances data) throws Exception {
m_numberOfClusters = -1;
m_cobwebTree = null;
m_numberSplits = 0;
m_numberMerges = 0;
// can clusterer handle the data?
getCapabilities().testWithFail(data);
// randomize the instances
data = new Instances(data);
if (getSeed() >= 0) {
data.randomize(new Random(getSeed()));
}
for (int i = 0; i < data.numInstances(); i++) {
updateClusterer(data.instance(i));
}
updateFinished();
}
/**
* Singals the end of the updating.
*/
@Override
public void updateFinished() {
determineNumberOfClusters();
}
/**
* Classifies a given instance.
*
* @param instance the instance to be assigned to a cluster
* @return the number of the assigned cluster as an interger if the class is
* enumerated, otherwise the predicted value
* @throws Exception if instance could not be classified successfully
*/
@Override
public int clusterInstance(Instance instance) throws Exception {
CNode host = m_cobwebTree;
CNode temp = null;
determineNumberOfClusters();
do {
if (host.m_children == null) {
temp = null;
break;
}
// host.updateStats(instance, false);
temp = host.findHost(instance, true);
// host.updateStats(instance, true);
if (temp != null) {
host = temp;
}
} while (temp != null);
return host.m_clusterNum;
}
/**
* determines the number of clusters if necessary
*
* @see #m_numberOfClusters
* @see #m_numberOfClustersDetermined
*/
protected void determineNumberOfClusters() {
if (!m_numberOfClustersDetermined && (m_cobwebTree != null)) {
int[] numClusts = new int[1];
numClusts[0] = 0;
try {
m_cobwebTree.assignClusterNums(numClusts);
} catch (Exception e) {
e.printStackTrace();
numClusts[0] = 0;
}
m_numberOfClusters = numClusts[0];
m_numberOfClustersDetermined = true;
}
}
/**
* Returns the number of clusters.
*
* @return the number of clusters
*/
@Override
public int numberOfClusters() {
determineNumberOfClusters();
return m_numberOfClusters;
}
/**
* Get the root of the tree.
*
* @return the root of the tree.
*/
public CNode getTreeRoot() {
return m_cobwebTree;
}
/**
* Adds an instance to the clusterer.
*
* @param newInstance the instance to be added
* @throws Exception if something goes wrong
*/
@Override
public void updateClusterer(Instance newInstance) throws Exception {
m_numberOfClustersDetermined = false;
if (m_cobwebTree == null) {
m_cobwebTree = new CNode(newInstance.numAttributes(), newInstance);
} else {
m_cobwebTree.addInstance(newInstance);
}
}
/**
* Adds an instance to the Cobweb tree.
*
* @param newInstance the instance to be added
* @throws Exception if something goes wrong
* @deprecated updateClusterer(Instance) should be used instead
* @see #updateClusterer(Instance)
*/
@Deprecated
public void addInstance(Instance newInstance) throws Exception {
updateClusterer(newInstance);
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
**/
@Override
public Enumeration
© 2015 - 2024 Weber Informatics LLC | Privacy Policy