Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* HoeffdingTree.java
* Copyright (C) 2013 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.trees;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.UpdateableClassifier;
import weka.classifiers.trees.ht.ActiveHNode;
import weka.classifiers.trees.ht.GiniSplitMetric;
import weka.classifiers.trees.ht.HNode;
import weka.classifiers.trees.ht.InactiveHNode;
import weka.classifiers.trees.ht.InfoGainSplitMetric;
import weka.classifiers.trees.ht.LeafNode;
import weka.classifiers.trees.ht.LearningNode;
import weka.classifiers.trees.ht.NBNode;
import weka.classifiers.trees.ht.NBNodeAdaptive;
import weka.classifiers.trees.ht.SplitCandidate;
import weka.classifiers.trees.ht.SplitMetric;
import weka.classifiers.trees.ht.SplitNode;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
/**
* A Hoeffding tree (VFDT) is an incremental, anytime
* decision tree induction algorithm that is capable of learning from massive
* data streams, assuming that the distribution generating examples does not
* change over time. Hoeffding trees exploit the fact that a small sample can
* often be enough to choose an optimal splitting attribute. This idea is
* supported mathematically by the Hoeffding bound, which quantifies the number
* of observations (in our case, examples) needed to estimate some statistics
* within a prescribed precision (in our case, the goodness of an attribute).
*
* A theoretically appealing feature of Hoeffding Trees not shared by
* otherincremental decision tree learners is that it has sound guarantees of
* performance. Using the Hoeffding bound one can show that its output is
* asymptotically nearly identical to that of a non-incremental learner using
* infinitely many examples. For more information see:
*
* Geoff Hulten, Laurie Spencer, Pedro Domingos: Mining time-changing data
* streams. In: ACM SIGKDD Intl. Conf. on Knowledge Discovery and Data Mining,
* 97-106, 2001.
*
*
* BibTeX:
*
*
* @inproceedings{Hulten2001,
* author = {Geoff Hulten and Laurie Spencer and Pedro Domingos},
* booktitle = {ACM SIGKDD Intl. Conf. on Knowledge Discovery and Data Mining},
* pages = {97-106},
* publisher = {ACM Press},
* title = {Mining time-changing data streams},
* year = {2001}
* }
*
* -S
* The splitting criterion to use. 0 = Gini, 1 = Info gain
* (default = 0)
*
*
*
* -E
* The allowable error in a split decision - values closer to zero will take longer to decide
* (default = 1e-7)
*
*
*
* -H
* Threshold below which a split will be forced to break ties
* (default = 0.05)
*
*
*
* -M
* Minimum fraction of weight required down at least two branches for info gain splitting
* (default = 0.01)
*
*
*
* -G
* Grace period - the number of instances a leaf should observe between split attempts
* (default = 200)
*
*
*
* -N
* The number of instances (weight) a leaf should observe before allowing naive Bayes to make predictions (NB or NB adaptive only)
* (default = 0)
*
*
*
* -P
* Print leaf models when using naive Bayes at the leaves.
*
*
*
* @author Richard Kirkby ([email protected])
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 11006 $
*/
public class HoeffdingTree extends AbstractClassifier implements
UpdateableClassifier, WeightedInstancesHandler, OptionHandler,
RevisionHandler, TechnicalInformationHandler, Drawable, Serializable {
/**
* For serialization
*/
private static final long serialVersionUID = 7117521775722396251L;
protected Instances m_header;
protected HNode m_root;
/** The number of instances a leaf should observe between split attempts */
protected double m_gracePeriod = 200;
/**
* The allowable error in a split decision. Values closer to zero will take
* longer to decide
*/
protected double m_splitConfidence = 0.0000001;
/** Threshold below which a split will be forced to break ties */
protected double m_hoeffdingTieThreshold = 0.05;
/**
* The minimum fraction of weight required down at least two branches for info
* gain splitting
*/
protected double m_minFracWeightForTwoBranchesGain = 0.01;
/** The splitting metric to use */
protected int m_selectedSplitMetric = INFO_GAIN_SPLIT;
protected SplitMetric m_splitMetric = new InfoGainSplitMetric(
m_minFracWeightForTwoBranchesGain);
/** The leaf prediction strategy to use */
protected int m_leafStrategy = LEAF_NB_ADAPTIVE;
/**
* The number of instances (total weight) a leaf should observe before
* allowing naive Bayes to make predictions
*/
protected double m_nbThreshold = 0;
protected int m_activeLeafCount;
protected int m_inactiveLeafCount;
protected int m_decisionNodeCount;
public static final int GINI_SPLIT = 0;
public static final int INFO_GAIN_SPLIT = 1;
public static final Tag[] TAGS_SELECTION = {
new Tag(GINI_SPLIT, "Gini split"),
new Tag(INFO_GAIN_SPLIT, "Info gain split") };
public static final int LEAF_MAJ_CLASS = 0;
public static final int LEAF_NB = 1;
public static final int LEAF_NB_ADAPTIVE = 2;
public static final Tag[] TAGS_SELECTION2 = {
new Tag(LEAF_MAJ_CLASS, "Majority class"),
new Tag(LEAF_NB, "Naive Bayes"),
new Tag(LEAF_NB_ADAPTIVE, "Naive Bayes adaptive") };
/**
* Print out leaf models in the case of naive Bayes or naive Bayes adaptive
* leaves
*/
protected boolean m_printLeafModels;
/**
* Returns a string describing classifier
*
* @return a description suitable for displaying in the explorer/experimenter
* gui
*/
public String globalInfo() {
return "A Hoeffding tree (VFDT) is an incremental, anytime decision tree induction algorithm"
+ " that is capable of learning from massive data streams, assuming that the"
+ " distribution generating examples does not change over time. Hoeffding trees"
+ " exploit the fact that a small sample can often be enough to choose an optimal"
+ " splitting attribute. This idea is supported mathematically by the Hoeffding"
+ " bound, which quantifies the number of observations (in our case, examples)"
+ " needed to estimate some statistics within a prescribed precision (in our"
+ " case, the goodness of an attribute).\n\nA theoretically appealing feature "
+ " of Hoeffding Trees not shared by otherincremental decision tree learners is that "
+ " it has sound guarantees of performance. Using the Hoeffding bound one can show that "
+ " its output is asymptotically nearly identical to that of a non-incremental learner "
+ " using infinitely many examples. For more information see: \n\n"
+ getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.INPROCEEDINGS);
result.setValue(Field.AUTHOR,
"Geoff Hulten and Laurie Spencer and Pedro Domingos");
result.setValue(Field.TITLE, "Mining time-changing data streams");
result.setValue(Field.BOOKTITLE,
"ACM SIGKDD Intl. Conf. on Knowledge Discovery and Data Mining");
result.setValue(Field.YEAR, "2001");
result.setValue(Field.PAGES, "97-106");
result.setValue(Field.PUBLISHER, "ACM Press");
return result;
}
protected void reset() {
m_root = null;
m_activeLeafCount = 0;
m_inactiveLeafCount = 0;
m_decisionNodeCount = 0;
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
result.setMinimumNumberInstances(0);
return result;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration