weka.classifiers.trees.lmt.ResidualModelSelection Maven / Gradle / Ivy
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* ResidualModelSelection.java
* Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.trees.lmt;
import weka.classifiers.trees.j48.ClassifierSplitModel;
import weka.classifiers.trees.j48.Distribution;
import weka.classifiers.trees.j48.ModelSelection;
import weka.classifiers.trees.j48.NoSplit;
import weka.core.Instances;
import weka.core.RevisionUtils;
/**
* Helper class for logistic model trees (weka.classifiers.trees.lmt.LMT) to implement the
* splitting criterion based on residuals.
*
* @author Niels Landwehr
* @version $Revision: 8034 $
*/
public class ResidualModelSelection
extends ModelSelection {
/** for serialization */
private static final long serialVersionUID = -293098783159385148L;
/** Minimum number of instances for leaves*/
protected int m_minNumInstances;
/** Minimum information gain for split*/
protected double m_minInfoGain;
/**
* Constructor to create ResidualModelSelection object.
* @param minNumInstances minimum number of instances for leaves
*/
public ResidualModelSelection(int minNumInstances) {
m_minNumInstances = minNumInstances;
m_minInfoGain = 1.0E-4;
}
/**Method not in use*/
public void cleanup() {
//method not in use
}
/**
* Selects split based on residuals for the given dataset.
*/
public final ClassifierSplitModel selectModel(Instances data,
double[][] dataZs, double[][] dataWs) throws Exception{
int numAttributes = data.numAttributes();
if (numAttributes < 2) throw new Exception("Can't select Model without non-class attribute");
if (data.numInstances() < m_minNumInstances) return new NoSplit(new Distribution(data));
double bestGain = -Double.MAX_VALUE;
int bestAttribute = -1;
//try split on every attribute
for (int i = 0; i < numAttributes; i++) {
if (i != data.classIndex()) {
//build split
ResidualSplit split = new ResidualSplit(i);
split.buildClassifier(data, dataZs, dataWs);
if (split.checkModel(m_minNumInstances)){
//evaluate split
double gain = split.entropyGain();
if (gain > bestGain) {
bestGain = gain;
bestAttribute = i;
}
}
}
}
if (bestGain >= m_minInfoGain){
//return best split
ResidualSplit split = new ResidualSplit(bestAttribute);
split.buildClassifier(data, dataZs, dataWs);
return split;
} else {
//could not find any split with enough information gain
return new NoSplit(new Distribution(data));
}
}
/**Method not in use*/
public final ClassifierSplitModel selectModel(Instances train) {
//method not in use
return null;
}
/**Method not in use*/
public final ClassifierSplitModel selectModel(Instances train, Instances test) {
//method not in use
return null;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 8034 $");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy