Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
weka.datagenerators.classifiers.classification.RDG1 Maven / Gradle / Ivy
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* RDG1.java
* Copyright (C) 2000-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.datagenerators.classifiers.classification;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WekaEnumeration;
import weka.datagenerators.ClassificationGenerator;
import weka.datagenerators.Test;
/**
* A data generator that produces data randomly by
* producing a decision list.
* The decision list consists of rules.
* Instances are generated randomly one by one. If decision list fails to
* classify the current instance, a new rule according to this current instance
* is generated and added to the decision list.
*
* The option -V switches on voting, which means that at the end of the
* generation all instances are reclassified to the class value that is
* supported by the most rules.
*
* This data generator can generate 'boolean' attributes (= nominal with the
* values {true, false}) and numeric attributes. The rules can be 'A' or 'NOT A'
* for boolean values and 'B < random_value' or 'B >= random_value' for
* numeric values.
*
*
*
* Valid options are:
*
*
*
* -h
* Prints this help.
*
*
*
* -o <file>
* The name of the output file, otherwise the generated data is
* printed to stdout.
*
*
*
* -r <name>
* The name of the relation.
*
*
*
* -d
* Whether to print debug informations.
*
*
*
* -S
* The seed for random function (default 1)
*
*
*
* -n <num>
* The number of examples to generate (default 100)
*
*
*
* -a <num>
* The number of attributes (default 10).
*
*
*
* -c <num>
* The number of classes (default 2)
*
*
*
* -R <num>
* maximum size for rules (default 10)
*
*
*
* -M <num>
* minimum size for rules (default 1)
*
*
*
* -I <num>
* number of irrelevant attributes (default 0)
*
*
*
* -N
* number of numeric attributes (default 0)
*
*
*
* -V
* switch on voting (default is no voting)
*
*
*
*
* Following an example of a generated dataset:
*
*
* %
* % weka.datagenerators.RDG1 -r expl -a 2 -c 3 -n 4 -N 1 -I 0 -M 2 -R 10 -S 2
* %
* relation expl
*
* attribute a0 {false,true}
* attribute a1 numeric
* attribute class {c0,c1,c2}
*
* data
*
* true,0.496823,c0
* false,0.743158,c1
* false,0.408285,c1
* false,0.993687,c2
* %
* % Number of attributes chosen as irrelevant = 0
* %
* % DECISIONLIST (number of rules = 3):
* % RULE 0: c0 := a1 < 0.986, a0
* % RULE 1: c1 := a1 < 0.95, not(a0)
* % RULE 2: c2 := not(a0), a1 >= 0.562
*
*
* @author Gabi Schmidberger ([email protected] )
* @version $Revision: 10203 $
*/
public class RDG1 extends ClassificationGenerator {
/** for serialization */
static final long serialVersionUID = 7751005204635320414L;
/**
* class to represent decisionlist
*/
private class RuleList implements Serializable, RevisionHandler {
/** for serialization */
static final long serialVersionUID = 2830125413361938177L;
/** rule list */
private ArrayList m_RuleList = null;
/** class */
double m_ClassValue = 0.0;
/**
* returns the class value
*
* @return the class value
*/
public double getClassValue() {
return m_ClassValue;
}
/**
* sets the class value
*
* @param newClassValue the new classvalue
*/
public void setClassValue(double newClassValue) {
m_ClassValue = newClassValue;
}
/**
* adds the given test to the list
*
* @param newTest the test to add
*/
private void addTest(Test newTest) {
if (m_RuleList == null) {
m_RuleList = new ArrayList();
}
m_RuleList.add(newTest);
}
/**
* classifies the given example
*
* @param example the instance to classify
* @return the classification
* @throws Exception if classification fails
*/
private double classifyInstance(Instance example) throws Exception {
boolean passedAllTests = true;
for (Enumeration e = new WekaEnumeration(m_RuleList); passedAllTests
&& e.hasMoreElements();) {
Test test = e.nextElement();
passedAllTests = test.passesTest(example);
}
if (passedAllTests) {
return m_ClassValue;
} else {
return -1.0;
}
}
/**
* returns a string representation of the rule list
*
* @return the rule list as string
*/
@Override
public String toString() {
StringBuffer str = new StringBuffer();
str = str.append(" c" + (int) m_ClassValue + " := ");
Enumeration e = new WekaEnumeration(m_RuleList);
if (e.hasMoreElements()) {
Test test = e.nextElement();
str = str.append(test.toPrologString());
}
while (e.hasMoreElements()) {
Test test = e.nextElement();
str = str.append(", " + test.toPrologString());
}
return str.toString();
}
/**
* Returns the revision string.
*
* @return the revision
*/
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 10203 $");
}
} /* end class RuleList ***** */
/** Number of attribute the dataset should have */
protected int m_NumAttributes;
/** Number of Classes the dataset should have */
protected int m_NumClasses;
/** maximum rule size */
private int m_MaxRuleSize;
/** minimum rule size */
private int m_MinRuleSize;
/** number of irrelevant attributes. */
private int m_NumIrrelevant;
/** number of numeric attribute */
private int m_NumNumeric;
/** flag that stores if voting is wished */
private boolean m_VoteFlag = false;
/** decision list */
private ArrayList m_DecisionList = null;
/**
* array defines which attributes are irrelevant, with: true = attribute is
* irrelevant; false = attribute is not irrelevant
*/
boolean[] m_AttList_Irr;
/**
* initializes the generator with default values
*/
public RDG1() {
super();
setNumAttributes(defaultNumAttributes());
setNumClasses(defaultNumClasses());
setMaxRuleSize(defaultMaxRuleSize());
setMinRuleSize(defaultMinRuleSize());
setNumIrrelevant(defaultNumIrrelevant());
setNumNumeric(defaultNumNumeric());
}
/**
* Returns a string describing this data generator.
*
* @return a description of the data generator suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "A data generator that produces data randomly by producing a decision list.\n"
+ "The decision list consists of rules.\n"
+ "Instances are generated randomly one by one. If decision list fails "
+ "to classify the current instance, a new rule according to this current "
+ "instance is generated and added to the decision list.\n\n"
+ "The option -V switches on voting, which means that at the end "
+ "of the generation all instances are "
+ "reclassified to the class value that is supported by the most rules.\n\n"
+ "This data generator can generate 'boolean' attributes (= nominal with "
+ "the values {true, false}) and numeric attributes. The rules can be "
+ "'A' or 'NOT A' for boolean values and 'B < random_value' or "
+ "'B >= random_value' for numeric values.";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options
*/
@Override
public Enumeration listOptions() {
Vector result = enumToVector(super.listOptions());
result.addElement(new Option("\tThe number of attributes (default "
+ defaultNumAttributes() + ").", "a", 1, "-a "));
result.addElement(new Option("\tThe number of classes (default "
+ defaultNumClasses() + ")", "c", 1, "-c "));
result.addElement(new Option("\tmaximum size for rules (default "
+ defaultMaxRuleSize() + ") ", "R", 1, "-R "));
result.addElement(new Option("\tminimum size for rules (default "
+ defaultMinRuleSize() + ") ", "M", 1, "-M "));
result.addElement(new Option("\tnumber of irrelevant attributes (default "
+ defaultNumIrrelevant() + ")", "I", 1, "-I "));
result.addElement(new Option("\tnumber of numeric attributes (default "
+ defaultNumNumeric() + ")", "N", 1, "-N"));
result.addElement(new Option("\tswitch on voting (default is no voting)",
"V", 1, "-V"));
return result.elements();
}
/**
* Parses a list of options for this object.
*
*
* Valid options are:
*
*
*
* -h
* Prints this help.
*
*
*
* -o <file>
* The name of the output file, otherwise the generated data is
* printed to stdout.
*
*
*
* -r <name>
* The name of the relation.
*
*
*
* -d
* Whether to print debug informations.
*
*
*
* -S
* The seed for random function (default 1)
*
*
*
* -n <num>
* The number of examples to generate (default 100)
*
*
*
* -a <num>
* The number of attributes (default 10).
*
*
*
* -c <num>
* The number of classes (default 2)
*
*
*
* -R <num>
* maximum size for rules (default 10)
*
*
*
* -M <num>
* minimum size for rules (default 1)
*
*
*
* -I <num>
* number of irrelevant attributes (default 0)
*
*
*
* -N
* number of numeric attributes (default 0)
*
*
*
* -V
* switch on voting (default is no voting)
*
*
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
@Override
public void setOptions(String[] options) throws Exception {
String tmpStr;
super.setOptions(options);
tmpStr = Utils.getOption('a', options);
if (tmpStr.length() != 0) {
setNumAttributes(Integer.parseInt(tmpStr));
} else {
setNumAttributes(defaultNumAttributes());
}
tmpStr = Utils.getOption('c', options);
if (tmpStr.length() != 0) {
setNumClasses(Integer.parseInt(tmpStr));
} else {
setNumClasses(defaultNumClasses());
}
tmpStr = Utils.getOption('R', options);
if (tmpStr.length() != 0) {
setMaxRuleSize(Integer.parseInt(tmpStr));
} else {
setMaxRuleSize(defaultMaxRuleSize());
}
tmpStr = Utils.getOption('M', options);
if (tmpStr.length() != 0) {
setMinRuleSize(Integer.parseInt(tmpStr));
} else {
setMinRuleSize(defaultMinRuleSize());
}
tmpStr = Utils.getOption('I', options);
if (tmpStr.length() != 0) {
setNumIrrelevant(Integer.parseInt(tmpStr));
} else {
setNumIrrelevant(defaultNumIrrelevant());
}
if ((getNumAttributes() - getNumIrrelevant()) < getMinRuleSize()) {
throw new Exception("Possible rule size is below minimal rule size.");
}
tmpStr = Utils.getOption('N', options);
if (tmpStr.length() != 0) {
setNumNumeric(Integer.parseInt(tmpStr));
} else {
setNumNumeric(defaultNumNumeric());
}
setVoteFlag(Utils.getFlag('V', options));
}
/**
* Gets the current settings of the datagenerator RDG1.
*
* @return an array of strings suitable for passing to setOptions
*/
@Override
public String[] getOptions() {
Vector result = new Vector();
Collections.addAll(result, super.getOptions());
result.add("-a");
result.add("" + getNumAttributes());
result.add("-c");
result.add("" + getNumClasses());
result.add("-N");
result.add("" + getNumNumeric());
result.add("-I");
result.add("" + getNumIrrelevant());
result.add("-M");
result.add("" + getMinRuleSize());
result.add("-R");
result.add("" + getMaxRuleSize());
if (getVoteFlag()) {
result.add("-V");
}
return result.toArray(new String[result.size()]);
}
/**
* returns the default number of attributes
*
* @return the default number of attributes
*/
protected int defaultNumAttributes() {
return 10;
}
/**
* Sets the number of attributes the dataset should have.
*
* @param numAttributes the new number of attributes
*/
public void setNumAttributes(int numAttributes) {
m_NumAttributes = numAttributes;
}
/**
* Gets the number of attributes that should be produced.
*
* @return the number of attributes that should be produced
*/
public int getNumAttributes() {
return m_NumAttributes;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String numAttributesTipText() {
return "The number of attributes the generated data will contain.";
}
/**
* returns the default number of classes
*
* @return the default number of classes
*/
protected int defaultNumClasses() {
return 2;
}
/**
* Sets the number of classes the dataset should have.
*
* @param numClasses the new number of classes
*/
public void setNumClasses(int numClasses) {
m_NumClasses = numClasses;
}
/**
* Gets the number of classes the dataset should have.
*
* @return the number of classes the dataset should have
*/
public int getNumClasses() {
return m_NumClasses;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String numClassesTipText() {
return "The number of classes to generate.";
}
/**
* returns the default max size of rules
*
* @return the default max size of rules
*/
protected int defaultMaxRuleSize() {
return 10;
}
/**
* Gets the maximum number of tests in rules.
*
* @return the maximum number of tests allowed in rules
*/
public int getMaxRuleSize() {
return m_MaxRuleSize;
}
/**
* Sets the maximum number of tests in rules.
*
* @param newMaxRuleSize new maximum number of tests allowed in rules.
*/
public void setMaxRuleSize(int newMaxRuleSize) {
m_MaxRuleSize = newMaxRuleSize;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String maxRuleSizeTipText() {
return "The maximum number of tests in rules.";
}
/**
* returns the default min size of rules
*
* @return the default min size of rules
*/
protected int defaultMinRuleSize() {
return 1;
}
/**
* Gets the minimum number of tests in rules.
*
* @return the minimum number of tests allowed in rules
*/
public int getMinRuleSize() {
return m_MinRuleSize;
}
/**
* Sets the minimum number of tests in rules.
*
* @param newMinRuleSize new minimum number of test in rules.
*/
public void setMinRuleSize(int newMinRuleSize) {
m_MinRuleSize = newMinRuleSize;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String minRuleSizeTipText() {
return "The minimum number of tests in rules.";
}
/**
* returns the default number of irrelevant attributes
*
* @return the default number of irrelevant attributes
*/
protected int defaultNumIrrelevant() {
return 0;
}
/**
* Gets the number of irrelevant attributes.
*
* @return the number of irrelevant attributes
*/
public int getNumIrrelevant() {
return m_NumIrrelevant;
}
/**
* Sets the number of irrelevant attributes.
*
* @param newNumIrrelevant the number of irrelevant attributes.
*/
public void setNumIrrelevant(int newNumIrrelevant) {
m_NumIrrelevant = newNumIrrelevant;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String numIrrelevantTipText() {
return "The number of irrelevant attributes.";
}
/**
* returns the default number of numeric attributes
*
* @return the default number of numeric attributes
*/
protected int defaultNumNumeric() {
return 0;
}
/**
* Gets the number of numerical attributes.
*
* @return the number of numerical attributes.
*/
public int getNumNumeric() {
return m_NumNumeric;
}
/**
* Sets the number of numerical attributes.
*
* @param newNumNumeric the number of numerical attributes.
*/
public void setNumNumeric(int newNumNumeric) {
m_NumNumeric = newNumNumeric;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String numNumericTipText() {
return "The number of numerical attributes.";
}
/**
* Gets the vote flag.
*
* @return voting flag.
*/
public boolean getVoteFlag() {
return m_VoteFlag;
}
/**
* Sets the vote flag.
*
* @param newVoteFlag boolean with the new setting of the vote flag.
*/
public void setVoteFlag(boolean newVoteFlag) {
m_VoteFlag = newVoteFlag;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String voteFlagTipText() {
return "Whether to use voting or not.";
}
/**
* Gets the single mode flag.
*
* @return true if methode generateExample can be used.
*/
@Override
public boolean getSingleModeFlag() {
return (!getVoteFlag());
}
/**
* Gets the array that defines which of the attributes are seen to be
* irrelevant.
*
* @return the array that defines the irrelevant attributes
*/
public boolean[] getAttList_Irr() {
return m_AttList_Irr;
}
/**
* Sets the array that defines which of the attributes are seen to be
* irrelevant.
*
* @param newAttList_Irr array that defines the irrelevant attributes.
*/
public void setAttList_Irr(boolean[] newAttList_Irr) {
m_AttList_Irr = newAttList_Irr;
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for displaying in the
* explorer/experimenter gui
*/
public String attList_IrrTipText() {
return "The array with the indices of the irrelevant attributes.";
}
/**
* Initializes the format for the dataset produced.
*
* @return the output data format
* @throws Exception data format could not be defined
*/
@Override
public Instances defineDataFormat() throws Exception {
Instances dataset;
Random random = new Random(getSeed());
setRandom(random);
m_DecisionList = new ArrayList();
// number of examples is the same as given per option
setNumExamplesAct(getNumExamples());
// define dataset
dataset = defineDataset(random);
return dataset;
}
/**
* Generate an example of the dataset dataset.
*
* @return the instance generated
* @throws Exception if format not defined or generating
* examples one by one is not possible, because voting is chosen
*/
@Override
public Instance generateExample() throws Exception {
Random random = getRandom();
Instances format = getDatasetFormat();
if (format == null) {
throw new Exception("Dataset format not defined.");
}
if (getVoteFlag()) {
throw new Exception("Examples cannot be generated one by one.");
}
// generate values for all attributes
format = generateExamples(1, random, format);
return format.lastInstance();
}
/**
* Generate all examples of the dataset.
*
* @return the instance generated
* @throws Exception if format not defined or generating
* examples one by one is not possible, because voting is chosen
*/
@Override
public Instances generateExamples() throws Exception {
Random random = getRandom();
Instances format = getDatasetFormat();
if (format == null) {
throw new Exception("Dataset format not defined.");
}
// generate values for all attributes
format = generateExamples(getNumExamplesAct(), random, format);
// vote all examples, and set new class value
if (getVoteFlag()) {
format = voteDataset(format);
}
return format;
}
/**
* Generate all examples of the dataset.
*
* @param num the number of examples to generate
* @param random the random number generator to use
* @param format the dataset format
* @return the instance generated
* @throws Exception if format not defined or generating
* examples one by one is not possible, because voting is chosen
*/
public Instances generateExamples(int num, Random random, Instances format)
throws Exception {
if (format == null) {
throw new Exception("Dataset format not defined.");
}
// generate values for all attributes
for (int i = 0; i < num; i++) {
// over all examples to be produced
Instance example = generateExample(random, format);
// set class of example using decision list
boolean classDefined = classifyExample(example);
if (!classDefined) {
// set class with newly generated rule
example = updateDecisionList(random, example);
}
example.setDataset(format);
format.add(example);
}
return (format);
}
/**
* Generates a new rule for the decision list. and classifies the new example
*
* @param random random number generator
* @param example example used to update decision list
* @return the classified example
* @throws Exception if dataset format not defined
*/
private Instance updateDecisionList(Random random, Instance example)
throws Exception {
ArrayList TestList;
Instances format = getDatasetFormat();
if (format == null) {
throw new Exception("Dataset format not defined.");
}
TestList = generateTestList(random, example);
int maxSize = getMaxRuleSize() < TestList.size() ? getMaxRuleSize()
: TestList.size();
int ruleSize = ((int) (random.nextDouble() * (maxSize - getMinRuleSize())))
+ getMinRuleSize();
RuleList newRule = new RuleList();
for (int i = 0; i < ruleSize; i++) {
int testIndex = (int) (random.nextDouble() * TestList.size());
Test test = TestList.get(testIndex);
newRule.addTest(test);
TestList.remove(testIndex);
}
double newClassValue = 0.0;
if (m_DecisionList.size() > 0) {
RuleList r = (m_DecisionList.get(m_DecisionList.size() - 1));
double oldClassValue = (r.getClassValue());
newClassValue = (double) ((int) oldClassValue + 1) % getNumClasses();
}
newRule.setClassValue(newClassValue);
m_DecisionList.add(newRule);
example = (Instance) example.copy();
example.setDataset(format);
example.setClassValue(newClassValue);
return example;
}
/**
* Generates a new rule for the decision list and classifies the new example.
*
* @param random random number generator
* @param example the instance to classify
* @return a list of tests
* @throws Exception if dataset format not defined
*/
private ArrayList generateTestList(Random random, Instance example)
throws Exception {
Instances format = getDatasetFormat();
if (format == null) {
throw new Exception("Dataset format not defined.");
}
int numTests = getNumAttributes() - getNumIrrelevant();
ArrayList TestList = new ArrayList(numTests);
boolean[] irrelevant = getAttList_Irr();
for (int i = 0; i < getNumAttributes(); i++) {
if (!irrelevant[i]) {
Test newTest = null;
Attribute att = example.attribute(i);
if (att.isNumeric()) {
double newSplit = random.nextDouble();
boolean newNot = newSplit < example.value(i);
newTest = new Test(i, newSplit, format, newNot);
} else {
newTest = new Test(i, example.value(i), format, false);
}
TestList.add(newTest);
}
}
return TestList;
}
/**
* Generates an example with its classvalue set to missing and binds it to the
* datasets.
*
* @param random random number generator
* @param format dataset the example gets bind to
* @return the generated example
* @throws Exception if attribute type not supported
*/
private Instance generateExample(Random random, Instances format)
throws Exception {
double[] attributes;
Instance example;
attributes = new double[getNumAttributes() + 1];
for (int i = 0; i < getNumAttributes(); i++) {
double value = random.nextDouble();
if (format.attribute(i).isNumeric()) {
attributes[i] = value;
} else {
if (format.attribute(i).isNominal()) {
attributes[i] = (value > 0.5) ? 1.0 : 0.0;
} else {
throw new Exception("Attribute type is not supported.");
}
}
}
example = new DenseInstance(1.0, attributes);
example.setDataset(format);
example.setClassMissing();
return example;
}
/**
* Tries to classify an example.
*
* @param example the example to classify
* @return true if it could be classified
* @throws Exception if something goes wrong
*/
private boolean classifyExample(Instance example) throws Exception {
double classValue = -1.0;
for (Enumeration e = new WekaEnumeration(m_DecisionList); e
.hasMoreElements() && classValue < 0.0;) {
RuleList rl = e.nextElement();
classValue = rl.classifyInstance(example);
}
if (classValue >= 0.0) {
example.setClassValue(classValue);
return true;
} else {
return false;
}
}
/**
* Classify example with maximum vote the following way. With every rule in
* the decisionlist, it is evaluated if the given instance could be the class
* of the rule. Finally the class value that receives the highest number of
* votes is assigned to the example.
*
* @param example example to be reclassified
* @return instance with new class value
* @throws Exception if classification fails
*/
private Instance votedReclassifyExample(Instance example) throws Exception {
int classVotes[] = new int[getNumClasses()];
for (int i = 0; i < classVotes.length; i++) {
classVotes[i] = 0;
}
for (Enumeration e = new WekaEnumeration(m_DecisionList); e
.hasMoreElements();) {
RuleList rl = e.nextElement();
int classValue = (int) rl.classifyInstance(example);
if (classValue >= 0) {
classVotes[classValue]++;
}
}
int maxVote = 0;
int vote = -1;
for (int i = 0; i < classVotes.length; i++) {
if (classVotes[i] > maxVote) {
maxVote = classVotes[i];
vote = i;
}
}
if (vote >= 0) {
example.setClassValue(vote);
} else {
throw new Exception("Error in instance classification.");
}
return example;
}
/**
* Returns a dataset header.
*
* @param random random number generator
* @return dataset header
* @throws Exception if something goes wrong
*/
private Instances defineDataset(Random random) throws Exception {
boolean[] attList_Irr;
int[] attList_Num;
ArrayList attributes = new ArrayList();
Attribute attribute;
ArrayList nominalValues = new ArrayList(2);
nominalValues.add("false");
nominalValues.add("true");
ArrayList classValues = new ArrayList(getNumClasses());
Instances dataset;
// set randomly those attributes that are irrelevant
attList_Irr = defineIrrelevant(random);
setAttList_Irr(attList_Irr);
// set randomly those attributes that are numeric
attList_Num = defineNumeric(random);
// define dataset
for (int i = 0; i < getNumAttributes(); i++) {
if (attList_Num[i] == Attribute.NUMERIC) {
attribute = new Attribute("a" + i);
} else {
attribute = new Attribute("a" + i, nominalValues);
}
attributes.add(attribute);
}
for (int i = 0; i < getNumClasses(); i++) {
classValues.add("c" + i);
}
attribute = new Attribute("class", classValues);
attributes.add(attribute);
dataset = new Instances(getRelationNameToUse(), attributes,
getNumExamplesAct());
dataset.setClassIndex(getNumAttributes());
// set dataset format of this class
Instances format = new Instances(dataset, 0);
setDatasetFormat(format);
return dataset;
}
/**
* Defines randomly the attributes as irrelevant. Number of attributes to be
* set as irrelevant is either set with a preceeding call of
* setNumIrrelevant() or is per default 0.
*
* @param random the random number generator to use
* @return list of boolean values with one value for each attribute, and each
* value set true or false according to if the corresponding attribute
* was defined irrelevant or not
*/
private boolean[] defineIrrelevant(Random random) {
boolean[] irr = new boolean[getNumAttributes()];
// initialize
for (int i = 0; i < irr.length; i++) {
irr[i] = false;
}
// set randomly
int numIrr = 0;
for (int i = 0; (numIrr < getNumIrrelevant())
&& (i < getNumAttributes() * 5); i++) {
int maybeNext = (int) (random.nextDouble() * irr.length);
if (irr[maybeNext] == false) {
irr[maybeNext] = true;
numIrr++;
}
}
return irr;
}
/**
* Chooses randomly the attributes that get datatyp numeric.
*
* @param random the random number generator to use
* @return list of integer values, with one value for each attribute, and each
* value set to Attribut.NOMINAL or Attribut.NUMERIC
*/
private int[] defineNumeric(Random random) {
int[] num = new int[getNumAttributes()];
// initialize
for (int i = 0; i < num.length; i++) {
num[i] = Attribute.NOMINAL;
}
int numNum = 0;
for (int i = 0; (numNum < getNumNumeric()) && (i < getNumAttributes() * 5); i++) {
int maybeNext = (int) (random.nextDouble() * num.length);
if (num[maybeNext] != Attribute.NUMERIC) {
num[maybeNext] = Attribute.NUMERIC;
numNum++;
}
}
return num;
}
/**
* Generates a comment string that documentates the data generator. By default
* this string is added at the beginning of the produced output as ARFF file
* type, next after the options.
*
* @return string contains info about the generated rules
*/
@Override
public String generateStart() {
return "";
}
/**
* Compiles documentation about the data generation. This is the number of
* irrelevant attributes and the decisionlist with all rules. Considering that
* the decisionlist might get enhanced until the last instance is generated,
* this method should be called at the end of the data generation process.
*
* @return string with additional information about generated dataset
* @throws Exception no input structure has been defined
*/
@Override
public String generateFinished() throws Exception {
StringBuffer dLString = new StringBuffer();
// string for output at end of ARFF-File
boolean[] attList_Irr = getAttList_Irr();
Instances format = getDatasetFormat();
dLString.append("%\n% Number of attributes chosen as irrelevant = "
+ getNumIrrelevant() + "\n");
for (int i = 0; i < attList_Irr.length; i++) {
if (attList_Irr[i]) {
dLString.append("% " + format.attribute(i).name() + "\n");
}
}
dLString.append("%\n% DECISIONLIST (number of rules = "
+ m_DecisionList.size() + "):\n");
for (int i = 0; i < m_DecisionList.size(); i++) {
RuleList rl = m_DecisionList.get(i);
dLString.append("% RULE " + i + ": " + rl.toString() + "\n");
}
return dLString.toString();
}
/**
* Resets the class values of all instances using voting. For each instance
* the class value that satisfies the most rules is choosen as new class
* value.
*
* @param dataset the dataset to work on
* @return the changed instances
* @throws Exception if something goes wrong
*/
private Instances voteDataset(Instances dataset) throws Exception {
for (int i = 0; i < dataset.numInstances(); i++) {
Instance inst = dataset.firstInstance();
inst = votedReclassifyExample(inst);
dataset.add(inst);
dataset.delete(0);
}
return dataset;
}
/**
* Returns the revision string.
*
* @return the revision
*/
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 10203 $");
}
/**
* Main method for testing this class.
*
* @param args should contain arguments for the data producer:
*/
public static void main(String[] args) {
runDataGenerator(new RDG1(), args);
}
}