All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.datagenerators.classifiers.classification.RDG1 Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * RDG1.java
 * Copyright (C) 2000-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.datagenerators.classifiers.classification;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WekaEnumeration;
import weka.datagenerators.ClassificationGenerator;
import weka.datagenerators.Test;

/**
 *  A data generator that produces data randomly by
 * producing a decision list.
* The decision list consists of rules.
* Instances are generated randomly one by one. If decision list fails to * classify the current instance, a new rule according to this current instance * is generated and added to the decision list.
*
* The option -V switches on voting, which means that at the end of the * generation all instances are reclassified to the class value that is * supported by the most rules.
*
* This data generator can generate 'boolean' attributes (= nominal with the * values {true, false}) and numeric attributes. The rules can be 'A' or 'NOT A' * for boolean values and 'B < random_value' or 'B >= random_value' for * numeric values. *

* * * Valid options are: *

* *

 * -h
 *  Prints this help.
 * 
* *
 * -o <file>
 *  The name of the output file, otherwise the generated data is
 *  printed to stdout.
 * 
* *
 * -r <name>
 *  The name of the relation.
 * 
* *
 * -d
 *  Whether to print debug informations.
 * 
* *
 * -S
 *  The seed for random function (default 1)
 * 
* *
 * -n <num>
 *  The number of examples to generate (default 100)
 * 
* *
 * -a <num>
 *  The number of attributes (default 10).
 * 
* *
 * -c <num>
 *  The number of classes (default 2)
 * 
* *
 * -R <num>
 *  maximum size for rules (default 10)
 * 
* *
 * -M <num>
 *  minimum size for rules (default 1)
 * 
* *
 * -I <num>
 *  number of irrelevant attributes (default 0)
 * 
* *
 * -N
 *  number of numeric attributes (default 0)
 * 
* *
 * -V
 *  switch on voting (default is no voting)
 * 
* * * * Following an example of a generated dataset:
* *
 * %
 * % weka.datagenerators.RDG1 -r expl -a 2 -c 3 -n 4 -N 1 -I 0 -M 2 -R 10 -S 2
 * %
 * relation expl
 * 
 * attribute a0 {false,true}
 * attribute a1 numeric
 * attribute class {c0,c1,c2}
 * 
 * data
 * 
 * true,0.496823,c0
 * false,0.743158,c1
 * false,0.408285,c1
 * false,0.993687,c2
 * %
 * % Number of attributes chosen as irrelevant = 0
 * %
 * % DECISIONLIST (number of rules = 3):
 * % RULE 0:   c0 := a1 < 0.986, a0
 * % RULE 1:   c1 := a1 < 0.95, not(a0)
 * % RULE 2:   c2 := not(a0), a1 >= 0.562
 * 
* * @author Gabi Schmidberger ([email protected]) * @version $Revision: 10203 $ */ public class RDG1 extends ClassificationGenerator { /** for serialization */ static final long serialVersionUID = 7751005204635320414L; /** * class to represent decisionlist */ private class RuleList implements Serializable, RevisionHandler { /** for serialization */ static final long serialVersionUID = 2830125413361938177L; /** rule list */ private ArrayList m_RuleList = null; /** class */ double m_ClassValue = 0.0; /** * returns the class value * * @return the class value */ public double getClassValue() { return m_ClassValue; } /** * sets the class value * * @param newClassValue the new classvalue */ public void setClassValue(double newClassValue) { m_ClassValue = newClassValue; } /** * adds the given test to the list * * @param newTest the test to add */ private void addTest(Test newTest) { if (m_RuleList == null) { m_RuleList = new ArrayList(); } m_RuleList.add(newTest); } /** * classifies the given example * * @param example the instance to classify * @return the classification * @throws Exception if classification fails */ private double classifyInstance(Instance example) throws Exception { boolean passedAllTests = true; for (Enumeration e = new WekaEnumeration(m_RuleList); passedAllTests && e.hasMoreElements();) { Test test = e.nextElement(); passedAllTests = test.passesTest(example); } if (passedAllTests) { return m_ClassValue; } else { return -1.0; } } /** * returns a string representation of the rule list * * @return the rule list as string */ @Override public String toString() { StringBuffer str = new StringBuffer(); str = str.append(" c" + (int) m_ClassValue + " := "); Enumeration e = new WekaEnumeration(m_RuleList); if (e.hasMoreElements()) { Test test = e.nextElement(); str = str.append(test.toPrologString()); } while (e.hasMoreElements()) { Test test = e.nextElement(); str = str.append(", " + test.toPrologString()); } return str.toString(); } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 10203 $"); } } /* end class RuleList ***** */ /** Number of attribute the dataset should have */ protected int m_NumAttributes; /** Number of Classes the dataset should have */ protected int m_NumClasses; /** maximum rule size */ private int m_MaxRuleSize; /** minimum rule size */ private int m_MinRuleSize; /** number of irrelevant attributes. */ private int m_NumIrrelevant; /** number of numeric attribute */ private int m_NumNumeric; /** flag that stores if voting is wished */ private boolean m_VoteFlag = false; /** decision list */ private ArrayList m_DecisionList = null; /** * array defines which attributes are irrelevant, with: true = attribute is * irrelevant; false = attribute is not irrelevant */ boolean[] m_AttList_Irr; /** * initializes the generator with default values */ public RDG1() { super(); setNumAttributes(defaultNumAttributes()); setNumClasses(defaultNumClasses()); setMaxRuleSize(defaultMaxRuleSize()); setMinRuleSize(defaultMinRuleSize()); setNumIrrelevant(defaultNumIrrelevant()); setNumNumeric(defaultNumNumeric()); } /** * Returns a string describing this data generator. * * @return a description of the data generator suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "A data generator that produces data randomly by producing a decision list.\n" + "The decision list consists of rules.\n" + "Instances are generated randomly one by one. If decision list fails " + "to classify the current instance, a new rule according to this current " + "instance is generated and added to the decision list.\n\n" + "The option -V switches on voting, which means that at the end " + "of the generation all instances are " + "reclassified to the class value that is supported by the most rules.\n\n" + "This data generator can generate 'boolean' attributes (= nominal with " + "the values {true, false}) and numeric attributes. The rules can be " + "'A' or 'NOT A' for boolean values and 'B < random_value' or " + "'B >= random_value' for numeric values."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy