All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.trees.lmt.ResidualModelSelection Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    ResidualModelSelection.java
 *    Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.trees.lmt;

import weka.classifiers.trees.j48.ClassifierSplitModel;
import weka.classifiers.trees.j48.Distribution;
import weka.classifiers.trees.j48.ModelSelection;
import weka.classifiers.trees.j48.NoSplit;
import weka.core.Instances;
import weka.core.RevisionUtils;

/**
 * Helper class for logistic model trees (weka.classifiers.trees.lmt.LMT) to implement the 
 * splitting criterion based on residuals.
 * 
 * @author Niels Landwehr
 * @version $Revision: 8034 $
 */
public class ResidualModelSelection
  extends ModelSelection {

  /** for serialization */
  private static final long serialVersionUID = -293098783159385148L;

  /** Minimum number of instances for leaves*/
  protected int m_minNumInstances;

  /** Minimum information gain for split*/
  protected double m_minInfoGain;    

  /**
   * Constructor to create ResidualModelSelection object. 
   * @param minNumInstances minimum number of instances for leaves
   */
  public ResidualModelSelection(int minNumInstances) {
    m_minNumInstances = minNumInstances;
    m_minInfoGain = 1.0E-4;
  }

  /**Method not in use*/
  public void cleanup() {
    //method not in use
  }

  /**
   * Selects split based on residuals for the given dataset.
   */
  public final ClassifierSplitModel selectModel(Instances data, 
      double[][] dataZs, double[][] dataWs) throws Exception{

    int numAttributes = data.numAttributes();

    if (numAttributes < 2) throw new Exception("Can't select Model without non-class attribute");
    if (data.numInstances() < m_minNumInstances) return new NoSplit(new Distribution(data));


    double bestGain = -Double.MAX_VALUE;
    int bestAttribute = -1;

    //try split on every attribute
    for (int i = 0; i < numAttributes; i++) {
      if (i != data.classIndex()) {

	//build split
	ResidualSplit split = new ResidualSplit(i);	    
	split.buildClassifier(data, dataZs, dataWs);

	if (split.checkModel(m_minNumInstances)){

	  //evaluate split 
	  double gain = split.entropyGain();	
	  if (gain > bestGain) {
	    bestGain = gain;
	    bestAttribute = i;
	  }
	}
      }    	    
    }     

    if (bestGain >= m_minInfoGain){
      //return best split
      ResidualSplit split = new ResidualSplit(bestAttribute);
      split.buildClassifier(data, dataZs, dataWs);	
      return split;	    
    } else {	    
      //could not find any split with enough information gain
      return new NoSplit(new Distribution(data));	    
    }
  }

  /**Method not in use*/
  public final ClassifierSplitModel selectModel(Instances train) {
    //method not in use
    return null;
  }

  /**Method not in use*/
  public final ClassifierSplitModel selectModel(Instances train, Instances test) {
    //method not in use
    return null;
  }
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 8034 $");
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy