All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.trees.RandomTree Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    RandomTree.java
 *    Copyright (C) 2001-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.trees;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.ContingencyTables;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.PartitionGenerator;
import weka.core.Randomizable;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.gui.ProgrammaticProperty;

import java.io.Serializable;
import java.util.Collections;
import java.util.Enumeration;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Random;
import java.util.Vector;

/**
 *  Class for constructing a tree that considers K
 * randomly chosen attributes at each node. Performs no pruning. Also has an
 * option to allow estimation of class probabilities (or target mean in the
 * regression case) based on a hold-out set (backfitting). 
*
* * * Valid options are: *

* *

 * -K <number of attributes>
 *  Number of attributes to randomly investigate. (default 0)
 *  (<1 = int(log_2(#predictors)+1)).
 * 
* *
 * -M <minimum number of instances>
 *  Set minimum number of instances per leaf.
 *  (default 1)
 * 
* *
 * -V <minimum variance for split>
 *  Set minimum numeric class variance proportion
 *  of train variance for split (default 1e-3).
 * 
* *
 * -S <num>
 *  Seed for random number generator.
 *  (default 1)
 * 
* *
 * -depth <num>
 *  The maximum depth of the tree, 0 for unlimited.
 *  (default 0)
 * 
* *
 * -N <num>
 *  Number of folds for backfitting (default 0, no backfitting).
 * 
* *
 * -U
 *  Allow unclassified instances.
 * 
* *
 * -B
 *  Break ties randomly when several attributes look equally good.
 * 
* *
 * -output-debug-info
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
* *
 * -do-not-check-capabilities
 *  If set, classifier capabilities are not checked before classifier is built
 *  (use with caution).
 * 
* *
 * -num-decimal-places
 *  The number of decimal places for the output of numbers in the model (default 2).
 * 
* * * * @author Eibe Frank ([email protected]) * @author Richard Kirkby ([email protected]) * @version $Revision: 13865 $ */ public class RandomTree extends AbstractClassifier implements OptionHandler, WeightedInstancesHandler, Randomizable, Drawable, PartitionGenerator { /** for serialization */ private static final long serialVersionUID = -9051119597407396024L; /** The Tree object */ protected Tree m_Tree = null; /** The header information. */ protected Instances m_Info = null; /** Minimum number of instances for leaf. */ protected double m_MinNum = 1.0; /** The number of attributes considered for a split. */ protected int m_KValue = 0; /** The random seed to use. */ protected int m_randomSeed = 1; /** The maximum depth of the tree (0 = unlimited) */ protected int m_MaxDepth = 0; /** Determines how much data is used for backfitting */ protected int m_NumFolds = 0; /** Whether unclassified instances are allowed */ protected boolean m_AllowUnclassifiedInstances = false; /** Whether to break ties randomly. */ protected boolean m_BreakTiesRandomly = false; /** a ZeroR model in case no model can be built from the data */ protected Classifier m_zeroR; /** * The minimum proportion of the total variance (over all the data) required * for split. */ protected double m_MinVarianceProp = 1e-3; /** Whether to store the impurity decrease/gain sum */ protected boolean m_computeImpurityDecreases; /** * Indexed by attribute, each two element array contains impurity * decrease/gain sum in first element and count in the second */ protected double[][] m_impurityDecreasees; /** * Returns a string describing classifier * * @return a description suitable for displaying in the explorer/experimenter * gui */ public String globalInfo() { return "Class for constructing a tree that considers K randomly " + " chosen attributes at each node. Performs no pruning. Also has" + " an option to allow estimation of class probabilities (or target mean " + "in the regression case) based on a hold-out set (backfitting)."; } /** * Get the array of impurity decrease/gain sums * * @return the array of impurity decrease/gain sums */ public double[][] getImpurityDecreases() { return m_impurityDecreasees; } /** * Set whether to compute/store impurity decreases for variable importance * in RandomForest * * @param computeImpurityDecreases true to compute and store impurity decrease * values for splitting attributes */ @ProgrammaticProperty public void setComputeImpurityDecreases(boolean computeImpurityDecreases) { m_computeImpurityDecreases = computeImpurityDecreases; } /** * Get whether to compute/store impurity decreases for variable importance * in RandomForest * * @return true to compute and store impurity decrease * values for splitting attributes */ public boolean getComputeImpurityDecreases() { return m_computeImpurityDecreases; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String minNumTipText() { return "The minimum total weight of the instances in a leaf."; } /** * Get the value of MinNum. * * @return Value of MinNum. */ public double getMinNum() { return m_MinNum; } /** * Set the value of MinNum. * * @param newMinNum Value to assign to MinNum. */ public void setMinNum(double newMinNum) { m_MinNum = newMinNum; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String minVariancePropTipText() { return "The minimum proportion of the variance on all the data " + "that needs to be present at a node in order for splitting to " + "be performed in regression trees."; } /** * Get the value of MinVarianceProp. * * @return Value of MinVarianceProp. */ public double getMinVarianceProp() { return m_MinVarianceProp; } /** * Set the value of MinVarianceProp. * * @param newMinVarianceProp Value to assign to MinVarianceProp. */ public void setMinVarianceProp(double newMinVarianceProp) { m_MinVarianceProp = newMinVarianceProp; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String KValueTipText() { return "Sets the number of randomly chosen attributes. If 0, int(log_2(#predictors) + 1) is used."; } /** * Get the value of K. * * @return Value of K. */ public int getKValue() { return m_KValue; } /** * Set the value of K. * * @param k Value to assign to K. */ public void setKValue(int k) { m_KValue = k; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String seedTipText() { return "The random number seed used for selecting attributes."; } /** * Set the seed for random number generation. * * @param seed the seed */ @Override public void setSeed(int seed) { m_randomSeed = seed; } /** * Gets the seed for the random number generations * * @return the seed for the random number generation */ @Override public int getSeed() { return m_randomSeed; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String maxDepthTipText() { return "The maximum depth of the tree, 0 for unlimited."; } /** * Get the maximum depth of trh tree, 0 for unlimited. * * @return the maximum depth. */ public int getMaxDepth() { return m_MaxDepth; } /** * Set the maximum depth of the tree, 0 for unlimited. * * @param value the maximum depth. */ public void setMaxDepth(int value) { m_MaxDepth = value; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String numFoldsTipText() { return "Determines the amount of data used for backfitting. One fold is used for " + "backfitting, the rest for growing the tree. (Default: 0, no backfitting)"; } /** * Get the value of NumFolds. * * @return Value of NumFolds. */ public int getNumFolds() { return m_NumFolds; } /** * Set the value of NumFolds. * * @param newNumFolds Value to assign to NumFolds. */ public void setNumFolds(int newNumFolds) { m_NumFolds = newNumFolds; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String allowUnclassifiedInstancesTipText() { return "Whether to allow unclassified instances."; } /** * Gets whether tree is allowed to abstain from making a prediction. * * @return true if tree is allowed to abstain from making a prediction. */ public boolean getAllowUnclassifiedInstances() { return m_AllowUnclassifiedInstances; } /** * Set the value of AllowUnclassifiedInstances. * * @param newAllowUnclassifiedInstances true if tree is allowed to abstain * from making a prediction */ public void setAllowUnclassifiedInstances( boolean newAllowUnclassifiedInstances) { m_AllowUnclassifiedInstances = newAllowUnclassifiedInstances; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String breakTiesRandomlyTipText() { return "Break ties randomly when several attributes look equally good."; } /** * Get whether to break ties randomly. * * @return true if ties are to be broken randomly. */ public boolean getBreakTiesRandomly() { return m_BreakTiesRandomly; } /** * Set whether to break ties randomly. * * @param newBreakTiesRandomly true if ties are to be broken randomly */ public void setBreakTiesRandomly(boolean newBreakTiesRandomly) { m_BreakTiesRandomly = newBreakTiesRandomly; } /** * Lists the command-line options for this classifier. * * @return an enumeration over all possible options */ @Override public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy