All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.TestInstances Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * TestInstances.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 */

package weka.core;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Random;
import java.util.StringTokenizer;
import java.util.Vector;

import weka.core.Capabilities.Capability;

/**
 * Generates artificial datasets for testing. In case of Multi-Instance data the
 * settings for the number of attributes applies to the data inside the bag.
 * Originally based on code from the CheckClassifier.
 * 

* * Valid options are: *

* *

 * -relation <name>
 *  The name of the data set.
 * 
* *
 * -seed <num>
 *  The seed value.
 * 
* *
 * -num-instances <num>
 *  The number of instances in the datasets (default 20).
 * 
* *
 * -class-type <num>
 *  The class type, see constants in weka.core.Attribute
 *  (default 1=nominal).
 * 
* *
 * -class-values <num>
 *  The number of classes to generate (for nominal classes only)
 *  (default 2).
 * 
* *
 * -class-index <num>
 *  The class index, with -1=last, (default -1).
 * 
* *
 * -no-class
 *  Doesn't include a class attribute in the output.
 * 
* *
 * -nominal <num>
 *  The number of nominal attributes (default 1).
 * 
* *
 * -nominal-values <num>
 *  The number of values for nominal attributes (default 2).
 * 
* *
 * -numeric <num>
 *  The number of numeric attributes (default 0).
 * 
* *
 * -string <num>
 *  The number of string attributes (default 0).
 * 
* *
 * -words <comma-separated-list>
 *  The words to use in string attributes.
 * 
* *
 * -word-separators <chars>
 *  The word separators to use in string attributes.
 * 
* *
 * -date <num>
 *  The number of date attributes (default 0).
 * 
* *
 * -relational <num>
 *  The number of relational attributes (default 0).
 * 
* *
 * -relational-nominal <num>
 *  The number of nominal attributes in a rel. attribute (default 1).
 * 
* *
 * -relational-nominal-values <num>
 *  The number of values for nominal attributes in a rel. attribute (default 2).
 * 
* *
 * -relational-numeric <num>
 *  The number of numeric attributes in a rel. attribute (default 0).
 * 
* *
 * -relational-string <num>
 *  The number of string attributes in a rel. attribute (default 0).
 * 
* *
 * -relational-date <num>
 *  The number of date attributes in a rel. attribute (default 0).
 * 
* *
 * -num-instances-relational <num>
 *  The number of instances in relational/bag attributes (default 10).
 * 
* *
 * -multi-instance
 *  Generates multi-instance data.
 * 
* *
 * -W <classname>
 *  The Capabilities handler to base the dataset on.
 *  The other parameters can be used to override the ones
 *  determined from the handler. Additional parameters for
 *  handler can be passed on after the '--'.
 * 
* * * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 11506 $ * @see weka.classifiers.CheckClassifier */ public class TestInstances implements Cloneable, Serializable, OptionHandler, RevisionHandler { /** for serialization */ private static final long serialVersionUID = -6263968936330390469L; /** * can be used for settting the class attribute index to last * * @see #setClassIndex(int) */ public final static int CLASS_IS_LAST = -1; /** * can be used to avoid generating a class attribute * * @see #setClassIndex(int) */ public final static int NO_CLASS = -2; /** the default list of words used in strings */ public final static String[] DEFAULT_WORDS = { "The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog" }; /** the default word separators used in strings */ public final static String DEFAULT_SEPARATORS = " "; /** for generating String attributes/classes */ protected String[] m_Words = DEFAULT_WORDS; /** for generating String attributes/classes */ protected String m_WordSeparators = DEFAULT_SEPARATORS; /** the name of the relation */ protected String m_Relation = "Testdata"; /** the seed value */ protected int m_Seed = 1; /** the random number generator */ protected Random m_Random = new Random(m_Seed); /** the number of instances */ protected int m_NumInstances = 20; /** the class type */ protected int m_ClassType = Attribute.NOMINAL; /** the number of classes (in case of NOMINAL class) */ protected int m_NumClasses = 2; /** * the class index (-1 is LAST, -2 means no class) * * @see #CLASS_IS_LAST * @see #NO_CLASS */ protected int m_ClassIndex = CLASS_IS_LAST; /** the number of nominal attributes */ protected int m_NumNominal = 1; /** the number of values for nominal attributes */ protected int m_NumNominalValues = 2; /** the number of numeric attributes */ protected int m_NumNumeric = 0; /** the number of string attributes */ protected int m_NumString = 0; /** the number of date attributes */ protected int m_NumDate = 0; /** the number of relational attributes */ protected int m_NumRelational = 0; /** the number of nominal attributes in a relational attribute */ protected int m_NumRelationalNominal = 1; /** the number of values for nominal attributes in relational attributes */ protected int m_NumRelationalNominalValues = 2; /** the number of numeric attributes in a relational attribute */ protected int m_NumRelationalNumeric = 0; /** the number of string attributes in a relational attribute */ protected int m_NumRelationalString = 0; /** the number of date attributes in a relational attribute */ protected int m_NumRelationalDate = 0; /** whether to generate Multi-Instance data or not */ protected boolean m_MultiInstance = false; /** * the number of instances in relational attributes (applies also for bags in * multi-instance) */ protected int m_NumInstancesRelational = 10; /** the format of the multi-instance data */ protected Instances[] m_RelationalFormat = null; /** the format of the multi-instance data of the class */ protected Instances m_RelationalClassFormat = null; /** the generated data */ protected Instances m_Data = null; /** the CapabilitiesHandler to get the Capabilities from */ protected CapabilitiesHandler m_Handler = null; /** * the default constructor */ public TestInstances() { super(); setRelation("Testdata"); setSeed(1); setNumInstances(20); setClassType(Attribute.NOMINAL); setNumClasses(2); setClassIndex(CLASS_IS_LAST); setNumNominal(1); setNumNominalValues(2); setNumNumeric(0); setNumString(0); setNumDate(0); setNumRelational(0); setNumRelationalNominal(1); setNumRelationalNominalValues(2); setNumRelationalNumeric(0); setNumRelationalString(0); setNumRelationalDate(0); setNumInstancesRelational(10); setMultiInstance(false); setWords(arrayToList(DEFAULT_WORDS)); setWordSeparators(DEFAULT_SEPARATORS); } /** * creates a clone of the current object * * @return a clone of the current object */ @Override public Object clone() { TestInstances result; result = new TestInstances(); result.assign(this); return result; } /** * updates itself with all the settings from the given TestInstances object * * @param t the object to get the settings from */ public void assign(TestInstances t) { setRelation(t.getRelation()); setSeed(t.getSeed()); setNumInstances(t.getNumInstances()); setClassType(t.getClassType()); setNumClasses(t.getNumClasses()); setClassIndex(t.getClassIndex()); setNumNominal(t.getNumNominal()); setNumNominalValues(t.getNumNominalValues()); setNumNumeric(t.getNumNumeric()); setNumString(t.getNumString()); setNumDate(t.getNumDate()); setNumRelational(t.getNumRelational()); setNumRelationalNominal(t.getNumRelationalNominal()); setNumRelationalNominalValues(t.getNumRelationalNominalValues()); setNumRelationalNumeric(t.getNumRelationalNumeric()); setNumRelationalString(t.getNumRelationalString()); setNumRelationalDate(t.getNumRelationalDate()); setMultiInstance(t.getMultiInstance()); for (int i = 0; i < t.getNumRelational(); i++) { setRelationalFormat(i, t.getRelationalFormat(i)); } setRelationalClassFormat(t.getRelationalClassFormat()); setNumInstancesRelational(t.getNumInstancesRelational()); setWords(t.getWords()); setWordSeparators(t.getWordSeparators()); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy