All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.trees.ft.FTNode Maven / Gradle / Ivy

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    FTNode.java
 *    Copyright (C) 2007 University of Porto, Porto, Portugal
 *
 */

package weka.classifiers.trees.ft;

import weka.classifiers.functions.SimpleLinearRegression;
import weka.classifiers.trees.j48.C45ModelSelection;
import weka.classifiers.trees.j48.C45Split;
import weka.classifiers.trees.j48.NoSplit;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 * Class for Functional tree structure. 
 *
 * @author Jo\~{a}o Gama
 * @author Carlos Ferreira
 *
 * @version $Revision: 1.4 $
 */
public class FTNode 
  extends FTtree {   
  
  /** for serialization. */
  private static final long serialVersionUID = 2317688685139295063L;

  /**
   * Constructor for Functional tree node. 
   *
   * @param errorOnProbabilities Use error on probabilities for stopping criterion of LogitBoost?
   * @param numBoostingIterations sets the numBoostingIterations parameter
   * @param minNumInstances minimum number of instances at which a node is considered for splitting
   *
   */
  public FTNode( boolean errorOnProbabilities, int numBoostingIterations, 
                 int minNumInstances, double weightTrimBeta, boolean useAIC) {
    m_errorOnProbabilities = errorOnProbabilities;
    m_fixedNumIterations = numBoostingIterations;      
    m_minNumInstances = minNumInstances;
    m_maxIterations = 200;
    setWeightTrimBeta(weightTrimBeta);
    setUseAIC(useAIC);
  }         
    
  /**
   * Method for building a Functional tree (only called for the root node).
   * Grows an initial Functional Tree.
   *
   * @param data the data to train with
   * @throws Exception if something goes wrong
   */
  public void buildClassifier(Instances data) throws Exception{
	
    // Insert new attributes
    data= insertNewAttr(data); 
         
    //build tree using all the data
    buildTree(data, null, data.numInstances(), 0);
	
  }

  /**
   * Method for building the tree structure.
   * Builds a logistic model, splits the node and recursively builds tree for child nodes.
   * @param data the training data passed on to this node
   * @param higherRegressions An array of regression functions produced by LogitBoost at higher 
   * levels in the tree. They represent a logistic regression model that is refined locally 
   * at this node.
   * @param totalInstanceWeight the total number of training examples
   * @param higherNumParameters effective number of parameters in the logistic regression model built
   * in parent nodes
   * @throws Exception if something goes wrong
   */
  public void buildTree(Instances data, SimpleLinearRegression[][] higherRegressions, 
                        double totalInstanceWeight, double higherNumParameters) throws Exception{

    //save some stuff
    m_totalInstanceWeight = totalInstanceWeight;
    m_train = new Instances(data);
    m_train= removeExtAttributes( m_train);
        
    m_isLeaf = true;
    m_sons = null;
	
    m_numInstances = m_train.numInstances();
    m_numClasses = m_train.numClasses();				
	
    //init 
    m_numericData = getNumericData(m_train);		  
    m_numericDataHeader = new Instances(m_numericData, 0);
	
    m_regressions = initRegressions();
    m_numRegressions = 0;
	
    if (higherRegressions != null) m_higherRegressions = higherRegressions;
    else m_higherRegressions = new SimpleLinearRegression[m_numClasses][0];	

    m_numHigherRegressions = m_higherRegressions[0].length;	
        
    m_numParameters = higherNumParameters;
        
    //build logistic model
    if (m_numInstances >= m_numFoldsBoosting) {
      if (m_fixedNumIterations > 0){
        performBoosting(m_fixedNumIterations);
      } else if (getUseAIC()) {
        performBoostingInfCriterion();
      } else {
        performBoostingCV();
      }
    }
        
    m_numParameters += m_numRegressions;
	
    //only keep the simple regression functions that correspond to the selected number of LogitBoost iterations
    m_regressions = selectRegressions(m_regressions);
         
    boolean grow;
       
    //Compute logistic probs
    double[][] FsConst;
    double[] probsConst;
    int j;
    FsConst = getFs(m_numericData);
        
    for (j = 0; j < data.numInstances(); j++)
      {
        probsConst=probs(FsConst[j]);
        // auxiliary to compute constructor error
        if (data.instance(j).classValue()!=getConstError(probsConst)) m_constError=m_constError +1;
        for (int i = 0; i m_minNumInstances) {
      grow = (m_localModel.numSubsets() > 1);
    } else {
      grow = false;
    }
        
    // logitboost uses distribution for instance
    m_hasConstr=false;
    m_train=data;
    if (grow) {	
      //create and build children of node
      m_isLeaf = false;
      Instances[] localInstances = m_localModel.split(data);
      // deletes extended attributes
      if (((C45Split)m_localModel).attIndex() >=0 && ((C45Split)m_localModel).attIndex()< data.classAttribute().numValues()) 
        m_hasConstr=true;                         
               
      m_sons = new FTNode[m_localModel.numSubsets()];
      for (int i = 0; i < m_sons.length; i++) {
        m_sons[i] = new FTNode(m_errorOnProbabilities,m_fixedNumIterations, 
                               m_minNumInstances,getWeightTrimBeta(), getUseAIC());
        m_sons[i].buildTree(localInstances[i],
                            mergeArrays(m_regressions, m_higherRegressions), m_totalInstanceWeight, m_numParameters);		
        localInstances[i] = null;
      }	    
    } 
    else{
      m_leafclass=m_localModel.distribution().maxClass();
    }
  }

  /**
   * Method for prunning a tree using C4.5 pruning procedure.
   *
   * @exception Exception if something goes wrong
   */
  public double prune() throws Exception {

    double errorsLeaf;
    double errorsTree;
    double errorsConstModel;
    double treeError=0;
    int i;
    double probBranch;

    // Compute error if this Tree would be leaf without contructor
    errorsLeaf = getEstimatedErrorsForDistribution(m_localModel.distribution());
    if (m_isLeaf ) { 
      return  errorsLeaf;
    } else {
      //Computes da error of the constructor model
      errorsConstModel = getEtimateConstModel(m_localModel.distribution());
      errorsTree=0;
      for (i = 0; i < m_sons.length; i++) {
        probBranch = m_localModel.distribution().perBag(i) /
          m_localModel.distribution().total();
        errorsTree += probBranch* m_sons[i].prune();
      }
      // Decide if leaf is best choice.

      if (Utils.smOrEq(errorsLeaf, errorsTree) && Utils.smOrEq(errorsLeaf, errorsConstModel)) {
        // Free son Trees
        m_sons = null;
        m_isLeaf = true;
        m_hasConstr=false;
        m_leafclass=m_localModel.distribution().maxClass();
        // Get NoSplit Model for node.
        m_localModel = new NoSplit(m_localModel.distribution());
        treeError=errorsLeaf;

      }else{
        // Decide if Constructor is best choice.
        if (Utils.smOrEq(errorsConstModel, errorsTree)) {
          // Free son Trees
          m_sons = null;
          m_isLeaf = true;
          m_hasConstr =true;
          // Get NoSplit Model for node.
          m_localModel = new NoSplit(m_localModel.distribution());
          treeError=errorsConstModel;
        } else
          treeError=errorsTree;
      }
    }
    return  treeError;
  }
 
  /**
   * Returns the class probabilities for an instance given by the Functional Tree.
   * @param instance the instance
   * @return the array of probabilities
   */
  public double[] distributionForInstance(Instance instance) throws Exception {
    double[] probs;

    if (m_isLeaf && m_hasConstr) { //leaf
      //leaf: use majoraty class or constructor model
      probs = modelDistributionForInstance(instance);
    } else { 
      if (m_isLeaf && !m_hasConstr)
        {
          probs=new double[instance.numClasses()];
          probs[m_leafclass]=(double)1;
        }else{
               
        probs = modelDistributionForInstance(instance);
        //Built auxiliary split instance    
        Instance instanceSplit=new Instance(instance.numAttributes()+instance.numClasses());
        instanceSplit.setDataset(instance.dataset());
           
        // Inserts attribute and their value
        for(int i=0; i< instance.numClasses();i++)
          {
            instanceSplit.dataset().insertAttributeAt( new Attribute("N"+ (instance.numClasses()-i)), 0);
            instanceSplit.setValue(i,probs[i]);
          }
        for(int i=0; i< instance.numAttributes();i++)
          instanceSplit.setValue(i+instance.numClasses(),instance.value(i));
           
        //chooses best branch           
        int branch = m_localModel.whichSubset(instanceSplit); //split
           
        //delete added attributes
        for(int i=0; i< instance.numClasses();i++)
          instanceSplit.dataset().deleteAttributeAt(0);
            
        probs = m_sons[branch].distributionForInstance(instance);
      }
    }
    return probs;
	
  }
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.4 $");
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy