Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* J48.java
* Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.trees;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Sourcable;
import weka.classifiers.trees.j48.BinC45ModelSelection;
import weka.classifiers.trees.j48.C45ModelSelection;
import weka.classifiers.trees.j48.C45PruneableClassifierTree;
import weka.classifiers.trees.j48.ClassifierTree;
import weka.classifiers.trees.j48.ModelSelection;
import weka.classifiers.trees.j48.PruneableClassifierTree;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Matchable;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.PartitionGenerator;
import weka.core.RevisionUtils;
import weka.core.Summarizable;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
/**
* Class for generating a pruned or unpruned C4.5
* decision tree. For more information, see
*
* Ross Quinlan (1993). C4.5: Programs for Machine Learning. Morgan Kaufmann
* Publishers, San Mateo, CA.
*
*
*
* BibTeX:
*
*
* -C <pruning confidence>
* Set confidence threshold for pruning.
* (default 0.25)
*
*
*
* -M <minimum number of instances>
* Set minimum number of instances per leaf.
* (default 2)
*
*
*
* -R
* Use reduced error pruning.
*
*
*
* -N <number of folds>
* Set number of folds for reduced error
* pruning. One fold is used as pruning set.
* (default 3)
*
*
*
* -B
* Use binary splits only.
*
*
*
* -S
* Don't perform subtree raising.
*
*
*
* -L
* Do not clean up after the tree has been built.
*
*
*
* -A
* Laplace smoothing for predicted probabilities.
*
*
*
* -J
* Do not use MDL correction for info gain on numeric attributes.
*
*
*
* -Q <seed>
* Seed for random data shuffling (default 1).
*
*
*
* -doNotMakeSplitPointActualValue
* Do not make split point actual value.
*
*
*
*
* @author Eibe Frank ([email protected])
* @version $Revision: 15520 $
*/
public class J48 extends AbstractClassifier implements OptionHandler, Drawable,
Matchable, Sourcable, WeightedInstancesHandler, Summarizable,
AdditionalMeasureProducer, TechnicalInformationHandler, PartitionGenerator {
/** for serialization */
static final long serialVersionUID = -217733168393644444L;
/** The decision tree */
protected ClassifierTree m_root;
/** Unpruned tree? */
protected boolean m_unpruned = false;
/** Collapse tree? */
protected boolean m_collapseTree = true;
/** Confidence level */
protected float m_CF = 0.25f;
/** Minimum number of instances */
protected int m_minNumObj = 2;
/** Use MDL correction? */
protected boolean m_useMDLcorrection = true;
/**
* Determines whether probabilities are smoothed using Laplace correction when
* predictions are generated
*/
protected boolean m_useLaplace = false;
/** Use reduced error pruning? */
protected boolean m_reducedErrorPruning = false;
/** Number of folds for reduced error pruning. */
protected int m_numFolds = 3;
/** Binary splits on nominal attributes? */
protected boolean m_binarySplits = false;
/** Subtree raising to be performed? */
protected boolean m_subtreeRaising = true;
/** Cleanup after the tree has been built. */
protected boolean m_noCleanup = false;
/** Random number seed for reduced-error pruning. */
protected int m_Seed = 1;
/** Do not relocate split point to actual data value */
protected boolean m_doNotMakeSplitPointActualValue;
/**
* Returns a string describing classifier
*
* @return a description suitable for displaying in the explorer/experimenter
* gui
*/
public String globalInfo() {
return "Class for generating a pruned or unpruned C4.5 decision tree. For more "
+ "information, see\n\n" + getTechnicalInformation().toString();
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.BOOK);
result.setValue(Field.AUTHOR, "Ross Quinlan");
result.setValue(Field.YEAR, "1993");
result.setValue(Field.TITLE, "C4.5: Programs for Machine Learning");
result.setValue(Field.PUBLISHER, "Morgan Kaufmann Publishers");
result.setValue(Field.ADDRESS, "San Mateo, CA");
return result;
}
/**
* Returns default capabilities of the classifier.
*
* @return the capabilities of this classifier
*/
@Override
public Capabilities getCapabilities() {
Capabilities result;
result = new Capabilities(this);
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
// instances
result.setMinimumNumberInstances(0);
return result;
}
/**
* Generates the classifier.
*
* @param instances the data to train the classifier with
* @throws Exception if classifier can't be built successfully
*/
@Override
public void buildClassifier(Instances instances) throws Exception {
if ((m_unpruned) && (!m_subtreeRaising)) {
throw new Exception("Subtree raising does not need to be unset for unpruned trees!");
}
if ((m_unpruned) && (m_reducedErrorPruning)) {
throw new Exception("Unpruned tree and reduced error pruning cannot be selected simultaneously!");
}
if ((m_unpruned) && (m_CF != 0.25f)) {
throw new Exception("It does not make sense to change the confidence for an unpruned tree!");
}
if ((m_reducedErrorPruning) && (m_CF != 0.25f)) {
throw new Exception("Changing the confidence does not make sense for reduced error pruning.");
}
if ((!m_reducedErrorPruning) && (m_numFolds != 3)) {
throw new Exception("Changing the number of folds does not make sense if"
+ " reduced error pruning is not selected.");
}
if ((!m_reducedErrorPruning) && (m_Seed != 1)) {
throw new Exception("Changing the seed does not make sense if"
+ " reduced error pruning is not selected.");
}
if ((m_CF <= 0) || (m_CF >= 1)) {
throw new Exception("Confidence has to be greater than zero and smaller than one!");
}
getCapabilities().testWithFail(instances);
ModelSelection modSelection;
if (m_binarySplits) {
modSelection = new BinC45ModelSelection(m_minNumObj, instances,
m_useMDLcorrection, m_doNotMakeSplitPointActualValue);
} else {
modSelection = new C45ModelSelection(m_minNumObj, instances,
m_useMDLcorrection, m_doNotMakeSplitPointActualValue);
}
if (!m_reducedErrorPruning) {
m_root = new C45PruneableClassifierTree(modSelection, !m_unpruned, m_CF,
m_subtreeRaising, !m_noCleanup, m_collapseTree);
} else {
m_root = new PruneableClassifierTree(modSelection, !m_unpruned,
m_numFolds, !m_noCleanup, m_Seed);
}
m_root.buildClassifier(instances);
if (m_binarySplits) {
((BinC45ModelSelection) modSelection).cleanup();
} else {
((C45ModelSelection) modSelection).cleanup();
}
}
/**
* Classifies an instance.
*
* @param instance the instance to classify
* @return the classification for the instance
* @throws Exception if instance can't be classified successfully
*/
@Override
public double classifyInstance(Instance instance) throws Exception {
return m_root.classifyInstance(instance);
}
/**
* Returns class probabilities for an instance.
*
* @param instance the instance to calculate the class probabilities for
* @return the class probabilities
* @throws Exception if distribution can't be computed successfully
*/
@Override
public final double[] distributionForInstance(Instance instance)
throws Exception {
return m_root.distributionForInstance(instance, m_useLaplace);
}
/**
* Returns the type of graph this classifier represents.
*
* @return Drawable.TREE
*/
@Override
public int graphType() {
return Drawable.TREE;
}
/**
* Returns graph describing the tree.
*
* @return the graph describing the tree
* @throws Exception if graph can't be computed
*/
@Override
public String graph() throws Exception {
return m_root.graph();
}
/**
* Returns tree in prefix order.
*
* @return the tree in prefix order
* @throws Exception if something goes wrong
*/
@Override
public String prefix() throws Exception {
return m_root.prefix();
}
/**
* Returns tree as an if-then statement.
*
* @param className the name of the Java class
* @return the tree as a Java if-then type statement
* @throws Exception if something goes wrong
*/
@Override
public String toSource(String className) throws Exception {
StringBuffer[] source = m_root.toSource(className);
return "class " + className + " {\n\n"
+ " public static double classify(Object[] i)\n"
+ " throws Exception {\n\n" + " double p = Double.NaN;\n"
+ source[0] // Assignment code
+ " return p;\n" + " }\n" + source[1] // Support code
+ "}\n";
}
/**
* Returns an enumeration describing the available options.
*
* Valid options are:
*
*
* -U
* Use unpruned tree.
*
*
* -C confidence
* Set confidence threshold for pruning. (Default: 0.25)
*
*
* -M number
* Set minimum number of instances per leaf. (Default: 2)
*
*
* -R
* Use reduced error pruning. No subtree raising is performed.
*
*
* -N number
* Set number of folds for reduced error pruning. One fold is used as the
* pruning set. (Default: 3)
*
*
* -B
* Use binary splits for nominal attributes.
*
*
* -S
* Don't perform subtree raising.
*
*
* -L
* Do not clean up after the tree has been built.
*
* -A
* If set, Laplace smoothing is used for predicted probabilites.
*
*
* -Q
* The seed for reduced-error pruning.
*
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration