weka.classifiers.trees.ft.FTNode Maven / Gradle / Ivy
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* FTNode.java
* Copyright (C) 2007 University of Porto, Porto, Portugal
*
*/
package weka.classifiers.trees.ft;
import weka.classifiers.functions.SimpleLinearRegression;
import weka.classifiers.trees.j48.C45ModelSelection;
import weka.classifiers.trees.j48.C45Split;
import weka.classifiers.trees.j48.NoSplit;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.Utils;
/**
* Class for Functional tree structure.
*
* @author Jo\~{a}o Gama
* @author Carlos Ferreira
*
* @version $Revision: 1.4 $
*/
public class FTNode
extends FTtree {
/** for serialization. */
private static final long serialVersionUID = 2317688685139295063L;
/**
* Constructor for Functional tree node.
*
* @param errorOnProbabilities Use error on probabilities for stopping criterion of LogitBoost?
* @param numBoostingIterations sets the numBoostingIterations parameter
* @param minNumInstances minimum number of instances at which a node is considered for splitting
*
*/
public FTNode( boolean errorOnProbabilities, int numBoostingIterations,
int minNumInstances, double weightTrimBeta, boolean useAIC) {
m_errorOnProbabilities = errorOnProbabilities;
m_fixedNumIterations = numBoostingIterations;
m_minNumInstances = minNumInstances;
m_maxIterations = 200;
setWeightTrimBeta(weightTrimBeta);
setUseAIC(useAIC);
}
/**
* Method for building a Functional tree (only called for the root node).
* Grows an initial Functional Tree.
*
* @param data the data to train with
* @throws Exception if something goes wrong
*/
public void buildClassifier(Instances data) throws Exception{
// Insert new attributes
data= insertNewAttr(data);
//build tree using all the data
buildTree(data, null, data.numInstances(), 0);
}
/**
* Method for building the tree structure.
* Builds a logistic model, splits the node and recursively builds tree for child nodes.
* @param data the training data passed on to this node
* @param higherRegressions An array of regression functions produced by LogitBoost at higher
* levels in the tree. They represent a logistic regression model that is refined locally
* at this node.
* @param totalInstanceWeight the total number of training examples
* @param higherNumParameters effective number of parameters in the logistic regression model built
* in parent nodes
* @throws Exception if something goes wrong
*/
public void buildTree(Instances data, SimpleLinearRegression[][] higherRegressions,
double totalInstanceWeight, double higherNumParameters) throws Exception{
//save some stuff
m_totalInstanceWeight = totalInstanceWeight;
m_train = new Instances(data);
m_train= removeExtAttributes( m_train);
m_isLeaf = true;
m_sons = null;
m_numInstances = m_train.numInstances();
m_numClasses = m_train.numClasses();
//init
m_numericData = getNumericData(m_train);
m_numericDataHeader = new Instances(m_numericData, 0);
m_regressions = initRegressions();
m_numRegressions = 0;
if (higherRegressions != null) m_higherRegressions = higherRegressions;
else m_higherRegressions = new SimpleLinearRegression[m_numClasses][0];
m_numHigherRegressions = m_higherRegressions[0].length;
m_numParameters = higherNumParameters;
//build logistic model
if (m_numInstances >= m_numFoldsBoosting) {
if (m_fixedNumIterations > 0){
performBoosting(m_fixedNumIterations);
} else if (getUseAIC()) {
performBoostingInfCriterion();
} else {
performBoostingCV();
}
}
m_numParameters += m_numRegressions;
//only keep the simple regression functions that correspond to the selected number of LogitBoost iterations
m_regressions = selectRegressions(m_regressions);
boolean grow;
//Compute logistic probs
double[][] FsConst;
double[] probsConst;
int j;
FsConst = getFs(m_numericData);
for (j = 0; j < data.numInstances(); j++)
{
probsConst=probs(FsConst[j]);
// auxiliary to compute constructor error
if (data.instance(j).classValue()!=getConstError(probsConst)) m_constError=m_constError +1;
for (int i = 0; i m_minNumInstances) {
grow = (m_localModel.numSubsets() > 1);
} else {
grow = false;
}
// logitboost uses distribution for instance
m_hasConstr=false;
m_train=data;
if (grow) {
//create and build children of node
m_isLeaf = false;
Instances[] localInstances = m_localModel.split(data);
// deletes extended attributes
if (((C45Split)m_localModel).attIndex() >=0 && ((C45Split)m_localModel).attIndex()< data.classAttribute().numValues())
m_hasConstr=true;
m_sons = new FTNode[m_localModel.numSubsets()];
for (int i = 0; i < m_sons.length; i++) {
m_sons[i] = new FTNode(m_errorOnProbabilities,m_fixedNumIterations,
m_minNumInstances,getWeightTrimBeta(), getUseAIC());
m_sons[i].buildTree(localInstances[i],
mergeArrays(m_regressions, m_higherRegressions), m_totalInstanceWeight, m_numParameters);
localInstances[i] = null;
}
}
else{
m_leafclass=m_localModel.distribution().maxClass();
}
}
/**
* Method for prunning a tree using C4.5 pruning procedure.
*
* @exception Exception if something goes wrong
*/
public double prune() throws Exception {
double errorsLeaf;
double errorsTree;
double errorsConstModel;
double treeError=0;
int i;
double probBranch;
// Compute error if this Tree would be leaf without contructor
errorsLeaf = getEstimatedErrorsForDistribution(m_localModel.distribution());
if (m_isLeaf ) {
return errorsLeaf;
} else {
//Computes da error of the constructor model
errorsConstModel = getEtimateConstModel(m_localModel.distribution());
errorsTree=0;
for (i = 0; i < m_sons.length; i++) {
probBranch = m_localModel.distribution().perBag(i) /
m_localModel.distribution().total();
errorsTree += probBranch* m_sons[i].prune();
}
// Decide if leaf is best choice.
if (Utils.smOrEq(errorsLeaf, errorsTree) && Utils.smOrEq(errorsLeaf, errorsConstModel)) {
// Free son Trees
m_sons = null;
m_isLeaf = true;
m_hasConstr=false;
m_leafclass=m_localModel.distribution().maxClass();
// Get NoSplit Model for node.
m_localModel = new NoSplit(m_localModel.distribution());
treeError=errorsLeaf;
}else{
// Decide if Constructor is best choice.
if (Utils.smOrEq(errorsConstModel, errorsTree)) {
// Free son Trees
m_sons = null;
m_isLeaf = true;
m_hasConstr =true;
// Get NoSplit Model for node.
m_localModel = new NoSplit(m_localModel.distribution());
treeError=errorsConstModel;
} else
treeError=errorsTree;
}
}
return treeError;
}
/**
* Returns the class probabilities for an instance given by the Functional Tree.
* @param instance the instance
* @return the array of probabilities
*/
public double[] distributionForInstance(Instance instance) throws Exception {
double[] probs;
if (m_isLeaf && m_hasConstr) { //leaf
//leaf: use majoraty class or constructor model
probs = modelDistributionForInstance(instance);
} else {
if (m_isLeaf && !m_hasConstr)
{
probs=new double[instance.numClasses()];
probs[m_leafclass]=(double)1;
}else{
probs = modelDistributionForInstance(instance);
//Built auxiliary split instance
Instance instanceSplit=new Instance(instance.numAttributes()+instance.numClasses());
instanceSplit.setDataset(instance.dataset());
// Inserts attribute and their value
for(int i=0; i< instance.numClasses();i++)
{
instanceSplit.dataset().insertAttributeAt( new Attribute("N"+ (instance.numClasses()-i)), 0);
instanceSplit.setValue(i,probs[i]);
}
for(int i=0; i< instance.numAttributes();i++)
instanceSplit.setValue(i+instance.numClasses(),instance.value(i));
//chooses best branch
int branch = m_localModel.whichSubset(instanceSplit); //split
//delete added attributes
for(int i=0; i< instance.numClasses();i++)
instanceSplit.dataset().deleteAttributeAt(0);
probs = m_sons[branch].distributionForInstance(instance);
}
}
return probs;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 1.4 $");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy