Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
weka.core.CheckScheme Maven / Gradle / Ivy
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* CheckScheme.java
* Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.StringTokenizer;
import java.util.Vector;
/**
* Abstract general class for testing schemes in Weka. Derived classes are
* also used for JUnit tests.
*
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 11247 $
* @see TestInstances
*/
public abstract class CheckScheme
extends Check {
/** a class for postprocessing the test-data */
public static class PostProcessor
implements RevisionHandler {
/**
* Provides a hook for derived classes to further modify the data. Currently,
* the data is just passed through.
*
* @param data the data to process
* @return the processed data
*/
public Instances process(Instances data) {
return data;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 11247 $");
}
}
/** The number of instances in the datasets */
protected int m_NumInstances = 20;
/** the number of nominal attributes */
protected int m_NumNominal = 2;
/** the number of numeric attributes */
protected int m_NumNumeric = 1;
/** the number of string attributes */
protected int m_NumString = 1;
/** the number of date attributes */
protected int m_NumDate = 1;
/** the number of relational attributes */
protected int m_NumRelational = 1;
/** the number of instances in relational attributes (applies also for bags
* in multi-instance) */
protected int m_NumInstancesRelational = 10;
/** for generating String attributes/classes */
protected String[] m_Words = TestInstances.DEFAULT_WORDS;
/** for generating String attributes/classes */
protected String m_WordSeparators = TestInstances.DEFAULT_SEPARATORS;
/** for post-processing the data even further */
protected PostProcessor m_PostProcessor = null;
/** whether classpath problems occurred */
protected boolean m_ClasspathProblems = false;
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector result = new Vector ();
result.addAll(Collections.list(super.listOptions()));
result.addElement(new Option(
"\tThe number of instances in the datasets (default 20).",
"N", 1, "-N "));
result.addElement(new Option(
"\tThe number of nominal attributes (default 2).",
"nominal", 1, "-nominal "));
result.addElement(new Option(
"\tThe number of values for nominal attributes (default 1).",
"nominal-values", 1, "-nominal-values "));
result.addElement(new Option(
"\tThe number of numeric attributes (default 1).",
"numeric", 1, "-numeric "));
result.addElement(new Option(
"\tThe number of string attributes (default 1).",
"string", 1, "-string "));
result.addElement(new Option(
"\tThe number of date attributes (default 1).",
"date", 1, "-date "));
result.addElement(new Option(
"\tThe number of relational attributes (default 1).",
"relational", 1, "-relational "));
result.addElement(new Option(
"\tThe number of instances in relational/bag attributes (default 10).",
"num-instances-relational", 1, "-num-instances-relational "));
result.addElement(new Option(
"\tThe words to use in string attributes.",
"words", 1, "-words "));
result.addElement(new Option(
"\tThe word separators to use in string attributes.",
"word-separators", 1, "-word-separators "));
return result.elements();
}
/**
* Parses a given list of options.
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String tmpStr;
super.setOptions(options);
tmpStr = Utils.getOption('N', options);
if (tmpStr.length() != 0)
setNumInstances(Integer.parseInt(tmpStr));
else
setNumInstances(20);
tmpStr = Utils.getOption("nominal", options);
if (tmpStr.length() != 0)
setNumNominal(Integer.parseInt(tmpStr));
else
setNumNominal(2);
tmpStr = Utils.getOption("numeric", options);
if (tmpStr.length() != 0)
setNumNumeric(Integer.parseInt(tmpStr));
else
setNumNumeric(1);
tmpStr = Utils.getOption("string", options);
if (tmpStr.length() != 0)
setNumString(Integer.parseInt(tmpStr));
else
setNumString(1);
tmpStr = Utils.getOption("date", options);
if (tmpStr.length() != 0)
setNumDate(Integer.parseInt(tmpStr));
else
setNumDate(1);
tmpStr = Utils.getOption("relational", options);
if (tmpStr.length() != 0)
setNumRelational(Integer.parseInt(tmpStr));
else
setNumRelational(1);
tmpStr = Utils.getOption("num-instances-relational", options);
if (tmpStr.length() != 0)
setNumInstancesRelational(Integer.parseInt(tmpStr));
else
setNumInstancesRelational(10);
tmpStr = Utils.getOption("words", options);
if (tmpStr.length() != 0)
setWords(tmpStr);
else
setWords(new TestInstances().getWords());
if (Utils.getOptionPos("word-separators", options) > -1) {
tmpStr = Utils.getOption("word-separators", options);
setWordSeparators(tmpStr);
}
else {
setWordSeparators(TestInstances.DEFAULT_SEPARATORS);
}
}
/**
* Gets the current settings of the CheckClassifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
Vector result;
String[] options;
int i;
result = new Vector();
options = super.getOptions();
for (i = 0; i < options.length; i++)
result.add(options[i]);
result.add("-N");
result.add("" + getNumInstances());
result.add("-nominal");
result.add("" + getNumNominal());
result.add("-numeric");
result.add("" + getNumNumeric());
result.add("-string");
result.add("" + getNumString());
result.add("-date");
result.add("" + getNumDate());
result.add("-relational");
result.add("" + getNumRelational());
result.add("-words");
result.add("" + getWords());
result.add("-word-separators");
result.add("" + getWordSeparators());
return (String[]) result.toArray(new String[result.size()]);
}
/**
* sets the PostProcessor to use
*
* @param value the new PostProcessor
* @see #m_PostProcessor
*/
public void setPostProcessor(PostProcessor value) {
m_PostProcessor = value;
}
/**
* returns the current PostProcessor, can be null
*
* @return the current PostProcessor
*/
public PostProcessor getPostProcessor() {
return m_PostProcessor;
}
/**
* returns TRUE if the classifier returned a "not in classpath" Exception
*
* @return true if CLASSPATH problems occurred
*/
public boolean hasClasspathProblems() {
return m_ClasspathProblems;
}
/**
* Begin the tests, reporting results to System.out
*/
public abstract void doTests();
/**
* Sets the number of instances to use in the datasets (some classifiers
* might require more instances).
*
* @param value the number of instances to use
*/
public void setNumInstances(int value) {
m_NumInstances = value;
}
/**
* Gets the current number of instances to use for the datasets.
*
* @return the number of instances
*/
public int getNumInstances() {
return m_NumInstances;
}
/**
* sets the number of nominal attributes
*
* @param value the number of nominal attributes
*/
public void setNumNominal(int value) {
m_NumNominal = value;
}
/**
* returns the current number of nominal attributes
*
* @return the number of nominal attributes
*/
public int getNumNominal() {
return m_NumNominal;
}
/**
* sets the number of numeric attributes
*
* @param value the number of numeric attributes
*/
public void setNumNumeric(int value) {
m_NumNumeric = value;
}
/**
* returns the current number of numeric attributes
*
* @return the number of numeric attributes
*/
public int getNumNumeric() {
return m_NumNumeric;
}
/**
* sets the number of string attributes
*
* @param value the number of string attributes
*/
public void setNumString(int value) {
m_NumString = value;
}
/**
* returns the current number of string attributes
*
* @return the number of string attributes
*/
public int getNumString() {
return m_NumString;
}
/**
* sets the number of data attributes
*
* @param value the number of date attributes
*/
public void setNumDate(int value) {
m_NumDate = value;
}
/**
* returns the current number of date attributes
*
* @return the number of date attributes
*/
public int getNumDate() {
return m_NumDate;
}
/**
* sets the number of relational attributes
*
* @param value the number of relational attributes
*/
public void setNumRelational(int value) {
m_NumRelational = value;
}
/**
* returns the current number of relational attributes
*
* @return the number of relational attributes
*/
public int getNumRelational() {
return m_NumRelational;
}
/**
* sets the number of instances in relational/bag attributes to produce
*
* @param value the number of instances
*/
public void setNumInstancesRelational(int value) {
m_NumInstancesRelational = value;
}
/**
* returns the current number of instances in relational/bag attributes to produce
*
* @return the number of instances
*/
public int getNumInstancesRelational() {
return m_NumInstancesRelational;
}
/**
* turns the comma-separated list into an array
*
* @param value the list to process
* @return the list as array
*/
protected static String[] listToArray(String value) {
StringTokenizer tok;
Vector list;
list = new Vector();
tok = new StringTokenizer(value, ",");
while (tok.hasMoreTokens())
list.add(tok.nextToken());
return (String[]) list.toArray(new String[list.size()]);
}
/**
* turns the array into a comma-separated list
*
* @param value the array to process
* @return the array as list
*/
protected static String arrayToList(String[] value) {
String result;
int i;
result = "";
for (i = 0; i < value.length; i++) {
if (i > 0)
result += ",";
result += value[i];
}
return result;
}
/**
* returns a string representation of the attribute type
*
* @param type the attribute type to get a string rerpresentation for
* @return the string representation
*/
public static String attributeTypeToString(int type) {
String result;
switch (type) {
case Attribute.NUMERIC:
result = "numeric";
break;
case Attribute.NOMINAL:
result = "nominal";
break;
case Attribute.STRING:
result = "string";
break;
case Attribute.DATE:
result = "date";
break;
case Attribute.RELATIONAL:
result = "relational";
break;
default:
result = "???";
}
return result;
}
/**
* Sets the comma-separated list of words to use for generating strings. The
* list must contain at least 2 words, otherwise an exception will be thrown.
*
* @param value the list of words
* @throws IllegalArgumentException if not at least 2 words are provided
*/
public void setWords(String value) {
if (listToArray(value).length < 2)
throw new IllegalArgumentException("At least 2 words must be provided!");
m_Words = listToArray(value);
}
/**
* returns the words used for assembling strings in a comma-separated list.
*
* @return the words as comma-separated list
*/
public String getWords() {
return arrayToList(m_Words);
}
/**
* sets the word separators (chars) to use for assembling strings.
*
* @param value the characters to use as separators
*/
public void setWordSeparators(String value) {
m_WordSeparators = value;
}
/**
* returns the word separators (chars) to use for assembling strings.
*
* @return the current separators
*/
public String getWordSeparators() {
return m_WordSeparators;
}
/**
* Compare two datasets to see if they differ.
*
* @param data1 one set of instances
* @param data2 the other set of instances
* @throws Exception if the datasets differ
*/
protected void compareDatasets(Instances data1, Instances data2)
throws Exception {
if (!data2.equalHeaders(data1)) {
throw new Exception("header has been modified\n" + data2.equalHeadersMsg(data1));
}
if (!(data2.numInstances() == data1.numInstances())) {
throw new Exception("number of instances has changed");
}
for (int i = 0; i < data2.numInstances(); i++) {
Instance orig = data1.instance(i);
Instance copy = data2.instance(i);
for (int j = 0; j < orig.numAttributes(); j++) {
if (orig.isMissing(j)) {
if (!copy.isMissing(j)) {
throw new Exception("instances have changed");
}
} else if (orig.value(j) != copy.value(j)) {
throw new Exception("instances have changed");
}
if (orig.weight() != copy.weight()) {
throw new Exception("instance weights have changed");
}
}
}
}
/**
* Add missing values to a dataset.
*
* @param data the instances to add missing values to
* @param level the level of missing values to add (if positive, this
* is the probability that a value will be set to missing, if negative
* all but one value will be set to missing (not yet implemented))
* @param predictorMissing if true, predictor attributes will be modified
* @param classMissing if true, the class attribute will be modified
*/
protected void addMissing(Instances data, int level,
boolean predictorMissing, boolean classMissing) {
int classIndex = data.classIndex();
Random random = new Random(1);
for (int i = 0; i < data.numInstances(); i++) {
Instance current = data.instance(i);
for (int j = 0; j < data.numAttributes(); j++) {
if (((j == classIndex) && classMissing) ||
((j != classIndex) && predictorMissing)) {
if (random.nextInt(100) < level)
current.setMissing(j);
}
}
}
}
/**
* Provides a hook for derived classes to further modify the data.
*
* @param data the data to process
* @return the processed data
* @see #m_PostProcessor
*/
protected Instances process(Instances data) {
if (getPostProcessor() == null)
return data;
else
return getPostProcessor().process(data);
}
}