All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.trees.m5.M5Base Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    M5Base.java
 *    Copyright (C) 2000-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.trees.m5;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.classifiers.functions.LinearRegression;
import weka.core.AdditionalMeasureProducer;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.supervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.RemoveUseless;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

/**
 * M5Base. Implements base routines for generating M5 Model trees and rules.
 * 

* * The original algorithm M5 was invented by Quinlan:
* * Quinlan J. R. (1992). Learning with continuous classes. Proceedings of the * Australian Joint Conference on Artificial Intelligence. 343--348. World * Scientific, Singapore. *

* * Yong Wang made improvements and created M5':
* * Wang, Y and Witten, I. H. (1997). Induction of model trees for predicting * continuous classes. Proceedings of the poster papers of the European * Conference on Machine Learning. University of Economics, Faculty of * Informatics and Statistics, Prague. *

* * Valid options are: *

* * -U
* Use unsmoothed predictions. *

* * -R
* Build regression tree/rule rather than model tree/rule * * @author Mark Hall ([email protected]) * @version $Revision: 14534 $ */ public abstract class M5Base extends AbstractClassifier implements AdditionalMeasureProducer, TechnicalInformationHandler { /** for serialization */ private static final long serialVersionUID = -4022221950191647679L; /** * the instances covered by the tree/rules */ private Instances m_instances; /** * the rule set */ protected ArrayList m_ruleSet; /** * generate a decision list instead of a single tree. */ private boolean m_generateRules; /** * use unsmoothed predictions */ private boolean m_unsmoothedPredictions; /** * filter to fill in missing values */ private ReplaceMissingValues m_replaceMissing; /** * filter to convert nominal attributes to binary */ private NominalToBinary m_nominalToBinary; /** * for removing useless attributes */ private RemoveUseless m_removeUseless; /** * Save instances at each node in an M5 tree for visualization purposes. */ protected boolean m_saveInstances = false; /** * Make a regression tree/rule instead of a model tree/rule */ protected boolean m_regressionTree; /** * Do not prune tree/rules */ protected boolean m_useUnpruned = false; /** * The minimum number of instances to allow at a leaf node */ protected double m_minNumInstances = 4; /** * Constructor */ public M5Base() { m_generateRules = false; m_unsmoothedPredictions = false; m_useUnpruned = false; m_minNumInstances = 4; } /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Ross J. Quinlan"); result.setValue(Field.TITLE, "Learning with Continuous Classes"); result.setValue(Field.BOOKTITLE, "5th Australian Joint Conference on Artificial Intelligence"); result.setValue(Field.YEAR, "1992"); result.setValue(Field.PAGES, "343-348"); result.setValue(Field.PUBLISHER, "World Scientific"); result.setValue(Field.ADDRESS, "Singapore"); additional = result.add(Type.INPROCEEDINGS); additional.setValue(Field.AUTHOR, "Y. Wang and I. H. Witten"); additional.setValue(Field.TITLE, "Induction of model trees for predicting continuous classes"); additional.setValue(Field.BOOKTITLE, "Poster papers of the 9th European Conference on Machine Learning"); additional.setValue(Field.YEAR, "1997"); additional.setValue(Field.PUBLISHER, "Springer"); return result; } /** * Returns an enumeration describing the available options * * @return an enumeration of all the available options */ @Override public Enumeration

* * Valid options are: *

* * -U
* Use unsmoothed predictions. *

* * -R
* Build a regression tree rather than a model tree. *

* * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { setUnpruned(Utils.getFlag('N', options)); setUseUnsmoothed(Utils.getFlag('U', options)); setBuildRegressionTree(Utils.getFlag('R', options)); String optionString = Utils.getOption('M', options); if (optionString.length() != 0) { setMinNumInstances((new Double(optionString)).doubleValue()); } super.setOptions(options); Utils.checkForRemainingOptions(options); } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector result = new Vector(); if (getUnpruned()) { result.add("-N"); } if (getUseUnsmoothed()) { result.add("-U"); } if (getBuildRegressionTree()) { result.add("-R"); } result.add("-M"); result.add("" + getMinNumInstances()); Collections.addAll(result, super.getOptions()); return result.toArray(new String[result.size()]); } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String unprunedTipText() { return "Whether unpruned tree/rules are to be generated."; } /** * Use unpruned tree/rules * * @param unpruned true if unpruned tree/rules are to be generated */ public void setUnpruned(boolean unpruned) { m_useUnpruned = unpruned; } /** * Get whether unpruned tree/rules are being generated * * @return true if unpruned tree/rules are to be generated */ public boolean getUnpruned() { return m_useUnpruned; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String generateRulesTipText() { return "Whether to generate rules (decision list) rather than a tree."; } /** * Generate rules (decision list) rather than a tree * * @param u true if rules are to be generated */ protected void setGenerateRules(boolean u) { m_generateRules = u; } /** * get whether rules are being generated rather than a tree * * @return true if rules are to be generated */ protected boolean getGenerateRules() { return m_generateRules; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String useUnsmoothedTipText() { return "Whether to use unsmoothed predictions."; } /** * Use unsmoothed predictions * * @param s true if unsmoothed predictions are to be used */ public void setUseUnsmoothed(boolean s) { m_unsmoothedPredictions = s; } /** * Get whether or not smoothing is being used * * @return true if unsmoothed predictions are to be used */ public boolean getUseUnsmoothed() { return m_unsmoothedPredictions; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String buildRegressionTreeTipText() { return "Whether to generate a regression tree/rule instead of a model tree/rule."; } /** * Get the value of regressionTree. * * @return Value of regressionTree. */ public boolean getBuildRegressionTree() { return m_regressionTree; } /** * Set the value of regressionTree. * * @param newregressionTree Value to assign to regressionTree. */ public void setBuildRegressionTree(boolean newregressionTree) { m_regressionTree = newregressionTree; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String minNumInstancesTipText() { return "The minimum number of instances to allow at a leaf node."; } /** * Set the minimum number of instances to allow at a leaf node * * @param minNum the minimum number of instances */ public void setMinNumInstances(double minNum) { m_minNumInstances = minNum; } /** * Get the minimum number of instances to allow at a leaf node * * @return a double value */ public double getMinNumInstances() { return m_minNumInstances; } /** * Returns default capabilities of the classifier, i.e., of LinearRegression. * * @return the capabilities of this classifier */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES); result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES); result.enable(Capabilities.Capability.DATE_ATTRIBUTES); result.enable(Capabilities.Capability.MISSING_VALUES); // class result.enable(Capabilities.Capability.NUMERIC_CLASS); result.enable(Capabilities.Capability.DATE_CLASS); result.enable(Capabilities.Capability.MISSING_CLASS_VALUES); return result; } /** * Generates the classifier. * * @param data set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ @Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); m_instances = new Instances(data); m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(m_instances); m_instances = Filter.useFilter(m_instances, m_replaceMissing); m_nominalToBinary = new NominalToBinary(); m_nominalToBinary.setInputFormat(m_instances); m_instances = Filter.useFilter(m_instances, m_nominalToBinary); m_removeUseless = new RemoveUseless(); m_removeUseless.setInputFormat(m_instances); m_instances = Filter.useFilter(m_instances, m_removeUseless); m_instances.randomize(new Random(1)); m_ruleSet = new ArrayList(); Rule tempRule; if (m_generateRules) { Instances tempInst = m_instances; do { tempRule = new Rule(); tempRule.setSmoothing(!m_unsmoothedPredictions); tempRule.setRegressionTree(m_regressionTree); tempRule.setUnpruned(m_useUnpruned); tempRule.setSaveInstances(false); tempRule.setMinNumInstances(m_minNumInstances); tempRule.buildClassifier(tempInst); m_ruleSet.add(tempRule); // System.err.println("Built rule : "+tempRule.toString()); tempInst = tempRule.notCoveredInstances(); tempRule.freeNotCoveredInstances(); } while (tempInst.numInstances() > 0); } else { // just build a single tree tempRule = new Rule(); tempRule.setUseTree(true); // tempRule.setGrowFullTree(true); tempRule.setSmoothing(!m_unsmoothedPredictions); tempRule.setSaveInstances(m_saveInstances); tempRule.setRegressionTree(m_regressionTree); tempRule.setUnpruned(m_useUnpruned); tempRule.setMinNumInstances(m_minNumInstances); Instances temp_train; temp_train = m_instances; tempRule.buildClassifier(temp_train); m_ruleSet.add(tempRule); // System.err.print(tempRule.m_topOfTree.treeToString(0)); } // save space m_instances = new Instances(m_instances, 0); } /** * Calculates a prediction for an instance using a set of rules or an M5 model * tree * * @param inst the instance whos class value is to be predicted * @return the prediction * @throws Exception if a prediction can't be made. */ @Override public double classifyInstance(Instance inst) throws Exception { Rule temp; double prediction = 0; boolean success = false; m_replaceMissing.input(inst); inst = m_replaceMissing.output(); m_nominalToBinary.input(inst); inst = m_nominalToBinary.output(); m_removeUseless.input(inst); inst = m_removeUseless.output(); if (m_ruleSet == null) { throw new Exception("Classifier has not been built yet!"); } if (!m_generateRules) { temp = m_ruleSet.get(0); return temp.classifyInstance(inst); } boolean cont; int i; for (i = 0; i < m_ruleSet.size(); i++) { cont = false; temp = m_ruleSet.get(i); try { prediction = temp.classifyInstance(inst); success = true; } catch (Exception e) { cont = true; } if (!cont) { break; } } if (!success) { System.out.println("Error in predicting (DecList)"); } return prediction; } /** * Returns a description of the classifier * * @return a description of the classifier as a String */ @Override public String toString() { StringBuffer text = new StringBuffer(); Rule temp; if (m_ruleSet == null) { return "Classifier hasn't been built yet!"; } if (m_generateRules) { text.append("M5 " + ((m_useUnpruned == true) ? "unpruned " : "pruned ") + ((m_regressionTree == true) ? "regression " : "model ") + "rules "); if (!m_unsmoothedPredictions) { text.append("\n(using smoothed linear models) "); } text.append(":\n"); text.append("Number of Rules : " + m_ruleSet.size() + "\n\n"); for (int j = 0; j < m_ruleSet.size(); j++) { temp = m_ruleSet.get(j); text.append("Rule: " + (j + 1) + "\n"); text.append(temp.toString()); } } else { temp = m_ruleSet.get(0); text.append(temp.toString()); } return text.toString(); } /** * Returns an enumeration of the additional measure names * * @return an enumeration of the measure names */ @Override public Enumeration enumerateMeasures() { Vector newVector = new Vector(1); newVector.add("measureNumRules"); return newVector.elements(); } /** * Returns the value of the named measure * * @param additionalMeasureName the name of the measure to query for its value * @return the value of the named measure * @throws Exception if the named measure is not supported */ @Override public double getMeasure(String additionalMeasureName) { if (additionalMeasureName.compareToIgnoreCase("measureNumRules") == 0) { return measureNumRules(); } else { throw new IllegalArgumentException(additionalMeasureName + " not supported (M5)"); } } /** * return the number of rules * * @return the number of rules (same as # linear models & # leaves in the * tree) */ public double measureNumRules() { if (m_generateRules) { return m_ruleSet.size(); } return m_ruleSet.get(0).m_topOfTree.numberOfLinearModels(); } public RuleNode getM5RootNode() { Rule temp = m_ruleSet.get(0); return temp.getM5RootNode(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy