weka.classifiers.mi.miti.Split Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of multiInstanceLearning Show documentation
Show all versions of multiInstanceLearning Show documentation
A collection of multi-instance learning classifiers. Includes the Citation KNN method, several variants of the diverse density method, support vector machines for multi-instance learning, simple wrappers for applying standard propositional learners to multi-instance data, decision tree and rule learners, and some other methods.
The newest version!
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* Split.java
* Copyright (C) 2011 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.mi.miti;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import weka.core.Attribute;
import weka.core.Instance;
/**
* Represents a split in the decision tree.
*
* @author Luke Bjerring
* @version $Revision: 10369 $
*/
public class Split implements Serializable {
/** ID added to avoid warning */
private static final long serialVersionUID = 147371323803237346L;
// The attribute used for the split
public Attribute attribute;
// The split point, in case the attribute is numeric
public double splitPoint;
// The score associated with the split
public double score;
// Whether the split is on a nominal attribute
public boolean isNominal = false;
/**
* Finds the best split based on the given arguments.
*/
public static Split getBestSplitPoint(final Attribute a,
ArrayList enabled, HashMap instanceBags,
AlgorithmConfiguration settings) {
// Get the split method
IBestSplitMeasure bsm;
if (settings.method == weka.classifiers.mi.MITI.SPLITMETHOD_GINI) {
bsm = new Gini();
} else if (settings.method == weka.classifiers.mi.MITI.SPLITMETHOD_SSBEPP) {
bsm = new SSBEPP();
} else {
bsm = new MaxBEPP();
}
// Nominal values get a different method
if (a.isNominal()) {
return getBestNominalSplitPoint(a, enabled, instanceBags, settings, bsm);
}
// Order the data by the attribute we're looking at
Collections.sort(enabled, new Comparator() {
@Override
public int compare(Instance arg0, Instance arg1) {
return Double.compare(arg0.value(a), arg1.value(a));
}
});
Split split = null;
SufficientStatistics ss;
if (!settings.useBagStatistics) {
ss = new SufficientInstanceStatistics(enabled, instanceBags);
} else {
ss = new SufficientBagStatistics(enabled, instanceBags,
settings.bagCountMultiplier);
}
// Iterate through all splits, and score them, keeping the best one
for (int i = 0; i < enabled.size() - 1; i++) {
ss.updateStats(enabled.get(i), instanceBags);
if (enabled.get(i).value(a) == enabled.get(i + 1).value(a)) {
continue;
}
double splitPoint = (enabled.get(i).value(a) + enabled.get(i + 1)
.value(a)) / 2;
double score = bsm.getScore(ss, settings.kBEPPConstant,
settings.unbiasedEstimate);
if (split == null) {
split = new Split();
split.attribute = a;
split.score = score;
split.splitPoint = splitPoint;
continue;
}
if (score > split.score) {
split.score = score;
split.splitPoint = splitPoint;
}
}
return split;
}
/**
* Computes split for a nominal attribute based on given arguments.
*/
private static Split getBestNominalSplitPoint(Attribute a,
ArrayList enabled, HashMap instanceBags,
AlgorithmConfiguration settings, IBestSplitMeasure bsm) {
Split s = new Split();
s.isNominal = true;
s.attribute = a;
SufficientStatistics[] ss = new SufficientStatistics[a.numValues()];
if (!settings.useBagStatistics) {
for (int i = 0; i < a.numValues(); i++) {
ss[i] = new SufficientInstanceStatistics(enabled, instanceBags);
}
} else {
for (int i = 0; i < a.numValues(); i++) {
ss[i] = new SufficientBagStatistics(enabled, instanceBags,
settings.bagCountMultiplier);
}
}
for (Instance i : enabled) {
ss[(int) i.value(a)].updateStats(i, instanceBags);
}
double[] totals = new double[a.numValues()];
double[] positiveCounts = new double[a.numValues()];
for (int i = 0; i < a.numValues(); i++) {
totals[i] = ss[i].totalCountLeft();
positiveCounts[i] = ss[i].positiveCountLeft();
}
s.score = bsm.getScore(totals, positiveCounts, settings.kBEPPConstant,
settings.unbiasedEstimate);
return s;
}
}