All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.estimators.CheckEstimator Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    CheckEstimator.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.estimators;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/**
 * Class for examining the capabilities and finding problems with 
 * estimators. If you implement a estimator using the WEKA.libraries,
 * you should run the checks on it to ensure robustness and correct
 * operation. Passing all the tests of this object does not mean
 * bugs in the estimator don't exist, but this will help find some
 * common ones. 

* * Typical usage:

* java weka.estimators.CheckEstimator -W estimator_name * estimator_options

* * This class uses code from the CheckEstimatorClass * ATTENTION! Current estimators can only * 1. split on a nominal class attribute * 2. build estimators for nominal and numeric attributes * 3. build estimators independendly of the class type * The functionality to test on other class and attribute types * is left in big parts in the code. * * CheckEstimator reports on the following: *

    *
  • Estimator abilities *
      *
    • Possible command line options to the estimator
    • *
    • Whether the estimator can predict nominal, numeric, string, * date or relational class attributes. Warnings will be displayed if * performance is worse than ZeroR
    • *
    • Whether the estimator can be trained incrementally
    • *
    • Whether the estimator can build estimates for numeric attributes
    • *
    • Whether the estimator can handle nominal attributes
    • *
    • Whether the estimator can handle string attributes
    • *
    • Whether the estimator can handle date attributes
    • *
    • Whether the estimator can handle relational attributes
    • *
    • Whether the estimator build estimates for multi-instance data
    • *
    • Whether the estimator can handle missing attribute values
    • *
    • Whether the estimator can handle missing class values
    • *
    • Whether a nominal estimator only handles 2 class problems
    • *
    • Whether the estimator can handle instance weights
    • *
    *
  • *
  • Correct functioning *
      *
    • Correct initialisation during addvalues (i.e. no result * changes when addValues called repeatedly)
    • *
    • Whether incremental training produces the same results * as during non-incremental training (which may or may not * be OK)
    • *
    • Whether the estimator alters the data pased to it * (number of instances, instance order, instance weights, etc)
    • *
    *
  • *
  • Degenerate cases *
      *
    • building estimator with zero training instances
    • *
    • all but one attribute attribute values missing
    • *
    • all attribute attribute values missing
    • *
    • all but one class values missing
    • *
    • all class values missing
    • *
    *
  • *
* Running CheckEstimator with the debug option set will output the * training and test datasets for any failed tests.

* * The weka.estimators.AbstractEstimatorTest uses this * class to test all the estimators. Any changes here, have to be * checked in that abstract test class, too.

* * Valid options are:

* *

 -D
 *  Turn on debugging output.
* *
 -S
 *  Silent mode - prints nothing to stdout.
* *
 -N <num>
 *  The number of instances in the datasets (default 100).
* *
 -W
 *  Full name of the estimator analysed.
 *  eg: weka.estimators.NormalEstimator
* *
 
 * Options specific to estimator weka.estimators.NormalEstimator:
 * 
* *
 -D
 *  If set, estimator is run in debug mode and
 *  may output additional info to the console
* * * Options after -- are passed to the designated estimator.

* * @author Len Trigg ([email protected]) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.5 $ * @see TestInstances */ public class CheckEstimator implements OptionHandler, RevisionHandler { /* * Note about test methods: * - methods return array of booleans * - first index: success or not * - second index: acceptable or not (e.g., Exception is OK) * - in case the performance is worse than that of ZeroR both indices are true * * FracPete (fracpete at waikato dot ac dot nz) */ /** a class for postprocessing the test-data */ public class PostProcessor implements RevisionHandler { /** * Provides a hook for derived classes to further modify the data. Currently, * the data is just passed through. * * @param data the data to process * @return the processed data */ protected Instances process(Instances data) { return data; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.5 $"); } } /*** The estimator to be examined */ protected Estimator m_Estimator = (Estimator) new weka.estimators.NormalEstimator(0.000001); /** The options to be passed to the base estimator. */ protected String[] m_EstimatorOptions; /** The results of the analysis as a string */ protected String m_AnalysisResults; /** Debugging mode, gives extra output if true */ protected boolean m_Debug = false; /** Silent mode, for no output at all to stdout */ protected boolean m_Silent = false; /** The number of instances in the datasets */ protected int m_NumInstances = 100; /** for post-processing the data even further */ protected PostProcessor m_PostProcessor = null; /** whether classpath problems occurred */ protected boolean m_ClasspathProblems = false; /** * class that contains info about the attribute types the estimator can estimate * estimator work on one attribute only */ public static class AttrTypes implements RevisionHandler { boolean nominal = false; boolean numeric = false; boolean string = false; boolean date = false; boolean relational = false; AttrTypes() { } AttrTypes (AttrTypes newTypes) { nominal = newTypes.nominal; numeric = newTypes.numeric; string = newTypes.string; date = newTypes.date; relational = newTypes.relational; } AttrTypes (int type) { if (type == Attribute.NOMINAL) nominal = true; if (type == Attribute.NUMERIC) numeric = true; if (type == Attribute.STRING) string = true; if (type == Attribute.DATE) date = true; if (type == Attribute.RELATIONAL) relational = true; } int getSetType() throws Exception { int sum = 0; int type = -1; if (nominal) { sum ++; type = Attribute.NOMINAL; } if (numeric) { sum ++; type = Attribute.NUMERIC; } if (string) { sum ++; type = Attribute.STRING; } if (date) { sum ++; type = Attribute.DATE; } if (relational) { sum ++; type = Attribute.RELATIONAL; } if (sum > 1) throw new Exception("Expected to have only one type set used wrongly."); if (type < 0) throw new Exception("No type set."); return type; } boolean oneIsSet() { return (nominal || numeric || string || date || relational); } public Vector getVectorOfAttrTypes() { Vector attrs = new Vector(); if (nominal) attrs.add(new Integer(Attribute.NOMINAL)); if (numeric) attrs.add(new Integer(Attribute.NUMERIC)); if (string) attrs.add(new Integer(Attribute.STRING)); if (date) attrs.add(new Integer(Attribute.DATE)); if (relational) attrs.add(new Integer(Attribute.RELATIONAL)); return attrs; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.5 $"); } } /** * public class that contains info about the chosen attribute type * estimator work on one attribute only */ public static class EstTypes implements RevisionHandler { boolean incremental = false; boolean weighted = false; boolean supervised = false; /** * Constructor */ public EstTypes () { } /** * Constructor */ public EstTypes (boolean i, boolean w, boolean s) { incremental = i; weighted = w; supervised = s; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.5 $"); } } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(2); newVector.addElement(new Option( "\tTurn on debugging output.", "D", 0, "-D")); newVector.addElement(new Option( "\tSilent mode - prints nothing to stdout.", "S", 0, "-S")); newVector.addElement(new Option( "\tThe number of instances in the datasets (default 100).", "N", 1, "-N ")); newVector.addElement(new Option( "\tFull name of the estimator analysed.\n" +"\teg: weka.estimators.NormalEstimator", "W", 1, "-W")); if ((m_Estimator != null) && (m_Estimator instanceof OptionHandler)) { newVector.addElement(new Option("", "", 0, "\nOptions specific to estimator " + m_Estimator.getClass().getName() + ":")); Enumeration enu = ((OptionHandler)m_Estimator).listOptions(); while (enu.hasMoreElements()) newVector.addElement(enu.nextElement()); } return newVector.elements(); } /** * Parses a given list of options. * * Valid options are:

* *

 -D
   *  Turn on debugging output.
* *
 -S
   *  Silent mode - prints nothing to stdout.
* *
 -N <num>
   *  The number of instances in the datasets (default 100).
* *
 -W
   *  Full name of the estimator analysed.
   *  eg: weka.estimators.NormalEstimator
* *
 
   * Options specific to estimator weka.estimators.NormalEstimator:
   * 
* *
 -D
   *  If set, estimator is run in debug mode and
   *  may output additional info to the console
* * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; setDebug(Utils.getFlag('D', options)); setSilent(Utils.getFlag('S', options)); tmpStr = Utils.getOption('N', options); if (tmpStr.length() != 0) setNumInstances(Integer.parseInt(tmpStr)); else setNumInstances(100); tmpStr = Utils.getOption('W', options); if (tmpStr.length() == 0) throw new Exception("A estimator must be specified with the -W option."); setEstimator(Estimator.forName(tmpStr, Utils.partitionOptions(options))); } /** * Gets the current settings of the CheckEstimator. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); if (getDebug()) result.add("-D"); if (getSilent()) result.add("-S"); result.add("-N"); result.add("" + getNumInstances()); if (getEstimator() != null) { result.add("-W"); result.add(getEstimator().getClass().getName()); } if ((m_Estimator != null) && (m_Estimator instanceof OptionHandler)) options = ((OptionHandler) m_Estimator).getOptions(); else options = new String[0]; if (options.length > 0) { result.add("--"); for (i = 0; i < options.length; i++) result.add(options[i]); } return (String[]) result.toArray(new String[result.size()]); } /** * sets the PostProcessor to use * * @param value the new PostProcessor * @see #m_PostProcessor */ public void setPostProcessor(PostProcessor value) { m_PostProcessor = value; } /** * returns the current PostProcessor, can be null * * @return the current PostProcessor */ public PostProcessor getPostProcessor() { return m_PostProcessor; } /** * returns TRUE if the estimator returned a "not in classpath" Exception * * @return true if CLASSPATH problems occurred */ public boolean hasClasspathProblems() { return m_ClasspathProblems; } /** * Begin the tests, reporting results to System.out */ public void doTests() { if (getEstimator() == null) { println("\n=== No estimator set ==="); return; } println("\n=== Check on Estimator: " + getEstimator().getClass().getName() + " ===\n"); m_ClasspathProblems = false; // Start tests with test for options canTakeOptions(); // test what type of estimator it is EstTypes estTypes = new EstTypes(); estTypes.incremental = incrementalEstimator()[0]; estTypes.weighted = weightedInstancesHandler()[0]; estTypes.supervised = supervisedEstimator()[0]; // in none of the estimators yet the functionality is depending on the class type // since this could change the basic structure taken from checkclassifiers is kept here int classType = Attribute.NOMINAL; AttrTypes attrTypes = testsPerClassType(classType, estTypes); // only nominal class can be split up so far canSplitUpClass(attrTypes, classType); } /** * Set debugging mode * * @param debug true if debug output should be printed */ public void setDebug(boolean debug) { m_Debug = debug; // disable silent mode, if necessary if (getDebug()) setSilent(false); } /** * Get whether debugging is turned on * * @return true if debugging output is on */ public boolean getDebug() { return m_Debug; } /** * Set slient mode, i.e., no output at all to stdout * * @param value whether silent mode is active or not */ public void setSilent(boolean value) { m_Silent = value; } /** * Get whether silent mode is turned on * * @return true if silent mode is on */ public boolean getSilent() { return m_Silent; } /** * Sets the number of instances to use in the datasets (some estimators * might require more instances). * * @param value the number of instances to use */ public void setNumInstances(int value) { m_NumInstances = value; } /** * Gets the current number of instances to use for the datasets. * * @return the number of instances */ public int getNumInstances() { return m_NumInstances; } /** * Set the estimator for boosting. * * @param newEstimator the Estimator to use. */ public void setEstimator(Estimator newEstimator) { m_Estimator = newEstimator; } /** * Get the estimator used as the estimator * * @return the estimator used as the estimator */ public Estimator getEstimator() { return m_Estimator; } /** * prints the given message to stdout, if not silent mode * * @param msg the text to print to stdout */ protected void print(Object msg) { if (!getSilent()) System.out.print(msg); } /** * prints the given message (+ LF) to stdout, if not silent mode * * @param msg the message to println to stdout */ protected void println(Object msg) { print(msg + "\n"); } /** * prints a LF to stdout, if not silent mode */ protected void println() { print("\n"); } /** * Run a battery of tests for a given class attribute type * * @param classType true if the class attribute should be numeric * @param estTypes types the estimator is, like incremental, weighted, supervised etc * @return attribute types estimator can work with */ protected AttrTypes testsPerClassType(int classType, EstTypes estTypes) { // in none of the estimators yet is the estimation depending on the class type // since this could change the basic structure taken from checkclassifiers is kept here // test A: simple test - if can estimate AttrTypes attrTypes = new AttrTypes(); AttrTypes at = new AttrTypes(Attribute.NOMINAL); attrTypes.nominal = canEstimate(at, estTypes.supervised, classType)[0]; at = new AttrTypes(Attribute.NUMERIC); attrTypes.numeric = canEstimate(at, estTypes.supervised, classType)[0]; attrTypes.string = false; attrTypes.date = false; attrTypes.relational = false; // if (!multiInstance) // PRel = canEstimate(false, false, false, false, true, classType)[0]; // else // PRel = false; // one of the attribute types succeeded if (attrTypes.oneIsSet()) { Vector attributesSet = attrTypes.getVectorOfAttrTypes(); // make tests for each attribute for (int i = 0; i < attributesSet.size(); i++) { AttrTypes workAttrTypes = new AttrTypes(((Integer) attributesSet.elementAt(i)).intValue()); // test B: weights change estimate or not if (estTypes.weighted) instanceWeights(workAttrTypes, classType); if (classType == Attribute.NOMINAL) { int numClasses = 4; canHandleNClasses(workAttrTypes, numClasses); } // tests with class not the last attribute and the attribute not the first // if (!multiInstance) { int numAtt = 4; canHandleClassAsNthAttribute(workAttrTypes, numAtt, 0, classType, 1); //TODOTODOcanHandleAttrAsNthAttribute(workAttrTypes, numAtt, 2, classType); //} canHandleZeroTraining(workAttrTypes, classType); boolean handleMissingAttributes = canHandleMissing(workAttrTypes, classType, true, false, 20)[0]; if (handleMissingAttributes) canHandleMissing(workAttrTypes, classType, true, false, 100); boolean handleMissingClass = canHandleMissing(workAttrTypes, classType, false, true, 20)[0]; if (handleMissingClass) canHandleMissing(workAttrTypes, classType, false, true, 100); correctBuildInitialisation(workAttrTypes, classType); datasetIntegrity(workAttrTypes, classType, handleMissingAttributes, handleMissingClass); if (estTypes.incremental) incrementingEquality(workAttrTypes, classType); } } return attrTypes; } /** * Checks whether the scheme can take command line options. * * @return index 0 is true if the estimator can take options */ protected boolean[] canTakeOptions() { boolean[] result = new boolean[2]; print("options..."); if (m_Estimator instanceof OptionHandler) { println("yes"); if (m_Debug) { println("\n=== Full report ==="); Enumeration enu = ((OptionHandler)m_Estimator).listOptions(); while (enu.hasMoreElements()) { Option option = (Option) enu.nextElement(); print(option.synopsis() + "\n" + option.description() + "\n"); } println("\n"); } result[0] = true; } else { println("no"); result[0] = false; } return result; } /** * Checks whether the scheme can build models incrementally. * * @return index 0 is true if the estimator can train incrementally */ protected boolean[] incrementalEstimator() { boolean[] result = new boolean[2]; print("incremental estimator..."); if (m_Estimator instanceof IncrementalEstimator) { println("yes"); result[0] = true; } else { println("no"); result[0] = false; } return result; } /** * Checks whether the scheme says it can handle instance weights. * * @return true if the estimator handles instance weights */ protected boolean[] weightedInstancesHandler() { boolean[] result = new boolean[2]; print("weighted instances estimator..."); if (m_Estimator instanceof WeightedInstancesHandler) { println("yes"); result[0] = true; } else { println("no"); result[0] = false; } return result; } /** * Checks whether the estimator is supervised. * * @return true if the estimator handles instance weights */ protected boolean[] supervisedEstimator() { boolean[] result = new boolean[2]; result[0] = false; return result; } /** * Checks basic estimation of one attribute of the scheme, for simple non-troublesome * datasets. * * @param attrTypes the types the estimator can work with * @param classType the class type (NOMINAL, NUMERIC, etc.) * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canEstimate(AttrTypes attrTypes, boolean supervised, int classType) { // supervised is ignored, no supervised estimators used yet print("basic estimation"); printAttributeSummary(attrTypes, classType); print("..."); FastVector accepts = new FastVector(); accepts.addElement("nominal"); accepts.addElement("numeric"); accepts.addElement("string"); accepts.addElement("date"); accepts.addElement("relational"); accepts.addElement("not in classpath"); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean attributeMissing = false, classMissing = false; int numAtts = 1, attrIndex = 0; return runBasicTest(attrTypes, numAtts, attrIndex, classType, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks basic estimation of one attribute of the scheme, for simple non-troublesome * datasets. * * @param attrTypes the types the estimator can work with * @param classType the class type (NOMINAL, NUMERIC, etc.) */ protected void canSplitUpClass(AttrTypes attrTypes, int classType) { if (attrTypes.nominal) canSplitUpClass(Attribute.NOMINAL, classType); if (attrTypes.numeric) canSplitUpClass(Attribute.NUMERIC, classType); } /** * Checks basic estimation of one attribute of the scheme, for simple non-troublesome * datasets. * * @param attrType the type of the estimator * @param classType the class type (NOMINAL, NUMERIC, etc.) * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canSplitUpClass(int attrType, int classType) { boolean[] result = new boolean[2]; FastVector accepts = new FastVector(); accepts.addElement("not in classpath"); // supervised is ignored, no supervised estimators used yet print("split per class type "); printAttributeSummary(attrType, Attribute.NOMINAL); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2; boolean attributeMissing = false, classMissing = false; int numAtts = 3, attrIndex = 0, classIndex = 1; Instances train = null; Vector test; Estimator estimator = null; boolean built = false; try { AttrTypes at = new AttrTypes(attrType); train = makeTestDataset(42, numTrain, numAtts, at, numClasses, classType, classIndex); // prepare training data set and test value list test = makeTestValueList(24, numTest, train, attrIndex, attrType); estimator = Estimator.makeCopies(getEstimator(), 1)[0]; } catch (Exception ex) { ex.printStackTrace(); throw new Error("Error setting up for tests: " + ex.getMessage()); } try { estimator.addValues(train, attrIndex, classType, classIndex); built = true; testWithTestValues(estimator, test); println("yes"); result[0] = true; } catch (Exception ex) { boolean acceptable = false; String msg; if (ex.getMessage() == null) msg = ""; else msg = ex.getMessage().toLowerCase(); if (msg.indexOf("not in classpath") > -1) m_ClasspathProblems = true; for (int i = 0; i < accepts.size(); i++) { if (msg.indexOf((String)accepts.elementAt(i)) >= 0) { acceptable = true; } } println("no" + (acceptable ? " (OK error message)" : "")); result[1] = acceptable; if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); if (!acceptable) { if (accepts.size() > 0) { print("Error message doesn't mention "); for (int i = 0; i < accepts.size(); i++) { if (i != 0) { print(" or "); } print('"' + (String)accepts.elementAt(i) + '"'); } } println("here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } } return result; } /** * Checks whether nominal schemes can handle more than two classes. * If a scheme is only designed for two-class problems it should * throw an appropriate exception for multi-class problems. * * @param attrTypes attribute types the estimator excepts * @param numClasses the number of classes to test * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canHandleNClasses(AttrTypes attrTypes, int numClasses) { print("more than two class problems"); printAttributeSummary(attrTypes, Attribute.NOMINAL); print("..."); FastVector accepts = new FastVector(); accepts.addElement("number"); accepts.addElement("class"); int numTrain = getNumInstances(), numTest = getNumInstances(), missingLevel = 0; boolean attributeMissing = false, classMissing = false; int numAttr = 1, attrIndex = 0; return runBasicTest(attrTypes, numAttr, attrIndex, Attribute.NOMINAL, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether the scheme can handle class attributes as Nth attribute. * * @param attrTypes the attribute types the estimator accepts * @param numAtts of attributes * @param attrIndex the index of the attribute * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param classIndex the index of the class attribute (0-based, -1 means last attribute) * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable * @see TestInstances#CLASS_IS_LAST */ protected boolean[] canHandleClassAsNthAttribute(AttrTypes attrTypes, int numAtts, int attrIndex, int classType, int classIndex) { if (classIndex == TestInstances.CLASS_IS_LAST) print("class attribute as last attribute"); else print("class attribute as " + (classIndex + 1) + ". attribute"); printAttributeSummary(attrTypes, classType); print("..."); FastVector accepts = new FastVector(); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean attributeMissing = false, classMissing = false; return runBasicTest(attrTypes, numAtts, attrIndex, classType, classIndex, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether the scheme can handle zero training instances. * * @param attrTypes attribute types that can be estimated * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canHandleZeroTraining(AttrTypes attrTypes, int classType) { print("handle zero training instances"); printAttributeSummary(attrTypes, classType); print("..."); FastVector accepts = new FastVector(); accepts.addElement("train"); accepts.addElement("value"); int numTrain = 0, numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean attributeMissing = false, classMissing = false; int numAtts = 1; int attrIndex = 0; return runBasicTest( attrTypes, numAtts, attrIndex, classType, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether the scheme correctly initialises models when * buildEstimator is called. This test calls buildEstimator with * one training dataset and records performance on a test set. * buildEstimator is then called on a training set with different * structure, and then again with the original training set. The * performance on the test set is compared with the original results * and any performance difference noted as incorrect build initialisation. * * @param attrTypes attribute types that can be estimated * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed, index 1 is true if the * scheme performs worse than ZeroR, but without error (index 0 is * false) */ protected boolean[] correctBuildInitialisation(AttrTypes attrTypes, int classType) { boolean[] result = new boolean[2]; print("correct initialisation during buildEstimator"); printAttributeSummary(attrTypes, classType); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean attributeMissing = false, classMissing = false; Instances train1 = null; Instances test1 = null; Instances train2 = null; Instances test2 = null; Estimator estimator = null; Estimator estimator1 = null; boolean built = false; int stage = 0; int attrIndex1 = 1; int attrIndex2 = 2; try { // Make two sets of train/test splits with different // numbers of attributes train1 = makeTestDataset(42, numTrain, 2, attrTypes, numClasses, classType); train2 = makeTestDataset(84, numTrain, 3, attrTypes, numClasses, classType); if (missingLevel > 0) { addMissing(train1, missingLevel, attributeMissing, classMissing, attrIndex1); addMissing(train2, missingLevel, attributeMissing, classMissing, attrIndex2); } estimator = Estimator.makeCopies(getEstimator(), 1)[0]; } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { //TESTING?? stage = 0; estimator.addValues(train1, attrIndex1); built = true; estimator1 = estimator.makeCopies(getEstimator(), 1)[0]; stage = 1; built = false; estimator.addValues(train2, attrIndex2); built = true; stage = 2; built = false; estimator.addValues(train1, attrIndex1); built = true; stage = 3; if (!estimator.equals(estimator1)) { if (m_Debug) { println("\n=== Full report ===\n" + "\nFirst build estimator\n"+ estimator.toString() + "\n\n"); println("\nSecond build estimator\n"+ estimator.toString() + "\n\n"); } throw new Exception("Results differ between buildEstimator calls"); } println("yes"); result[0] = true; if (false && m_Debug) { println("\n=== Full report ===\n" + "\nFirst buildEstimator()" + "\n\n"); println("\nSecond buildEstimator()" + "\n\n"); } } catch (Exception ex) { String msg = ex.getMessage().toLowerCase(); if (msg.indexOf("worse than zeror") >= 0) { println("warning: performs worse than ZeroR"); result[0] = true; result[1] = true; } else { println("no"); result[0] = false; } if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } switch (stage) { case 0: print(" of dataset 1"); break; case 1: print(" of dataset 2"); break; case 2: print(" of dataset 1 (2nd build)"); break; case 3: print(", comparing results from builds of dataset 1"); break; } println(": " + ex.getMessage() + "\n"); println("here are the datasets:\n"); println("=== Train1 Dataset ===\n" + train1.toString() + "\n"); println("=== Test1 Dataset ===\n" + test1.toString() + "\n\n"); println("=== Train2 Dataset ===\n" + train2.toString() + "\n"); println("=== Test2 Dataset ===\n" + test2.toString() + "\n\n"); } } return result; } /** * Checks basic missing value handling of the scheme. If the missing * values cause an exception to be thrown by the scheme, this will be * recorded. * * @param attrTypes attribute types that can be estimated * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param attributeMissing true if the missing values may be in * the attributes * @param classMissing true if the missing values may be in the class * @param missingLevel the percentage of missing values * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canHandleMissing(AttrTypes attrTypes, int classType, boolean attributeMissing, boolean classMissing, int missingLevel) { if (missingLevel == 100) print("100% "); print("missing"); if (attributeMissing) { print(" attribute"); if (classMissing) print(" and"); } if (classMissing) print(" class"); print(" values"); printAttributeSummary(attrTypes, classType); print("..."); FastVector accepts = new FastVector(); accepts.addElement("missing"); accepts.addElement("value"); accepts.addElement("train"); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2; int numAtts = 1, attrIndex = 0; return runBasicTest(attrTypes, numAtts, attrIndex, classType, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether an incremental scheme produces the same model when * trained incrementally as when batch trained. The model itself * cannot be compared, so we compare the evaluation on test data * for both models. It is possible to get a false positive on this * test (likelihood depends on the estimator). * * @param attrTypes attribute types that can be estimated * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed */ protected boolean[] incrementingEquality(AttrTypes attrTypes, int classType) { print("incremental training produces the same results" + " as batch training"); printAttributeSummary(attrTypes, classType); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean attributeMissing = false, classMissing = false; boolean[] result = new boolean[2]; Instances train = null; Estimator [] estimators = null; boolean built = false; int attrIndex = 0; Vector test; try { train = makeTestDataset(42, numTrain, 1, attrTypes, numClasses, classType ); // prepare training data set and test value list test = makeTestValueList(24, numTest, train, attrIndex, attrTypes.getSetType()); if (missingLevel > 0) { addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex); } estimators = Estimator.makeCopies(getEstimator(), 2); estimators[0].addValues(train, attrIndex); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { for (int i = 0; i < train.numInstances(); i++) { ((IncrementalEstimator)estimators[1]).addValue(train.instance(i).value(attrIndex), 1.0); } built = true; if (!estimators[0].equals(estimators[1])) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); println("Results differ between batch and " + "incrementally built models.\n" + "Depending on the estimator, this may be OK"); println("Here are the results:\n"); println("batch built results\n" + estimators[0].toString()); println("incrementally built results\n" + estimators[1].toString()); println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } else { println("yes"); result[0] = true; } } catch (Exception ex) { result[0] = false; print("Problem during"); if (built) print(" testing"); else print(" training"); println(": " + ex.getMessage() + "\n"); } return result; } /** * Checks whether the estimator can handle instance weights. * This test compares the estimator performance on two datasets * that are identical except for the training weights. If the * results change, then the estimator must be using the weights. It * may be possible to get a false positive from this test if the * weight changes aren't significant enough to induce a change * in estimator performance (but the weights are chosen to minimize * the likelihood of this). * * @param attrTypes attribute types that can be estimated * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 true if the test was passed */ protected boolean[] instanceWeights(AttrTypes attrTypes, int classType) { print("estimator uses instance weights"); printAttributeSummary(attrTypes, classType); print("..."); int numTrain = 2 * getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean attributeMissing = false, classMissing = false; boolean[] result = new boolean[2]; Instances train = null; Vector test = null; Estimator [] estimators = null; Vector resultProbsO = null; Vector resultProbsW = null; boolean built = false; boolean evalFail = false; int attrIndex = 0; try { train = makeTestDataset(42, numTrain, 1, attrTypes, numClasses, classType); // prepare training data set and test value list test = makeTestValueList(24, numTest, train, attrIndex, attrTypes.getSetType()); if (missingLevel > 0) { addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex); } estimators = Estimator.makeCopies(getEstimator(), 2); estimators[0].addValues(train, attrIndex); resultProbsO = testWithTestValues(estimators[0], test); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { // Now modify instance weights and re-built for (int i = 0; i < train.numInstances(); i++) { train.instance(i).setWeight(0); } Random random = new Random(1); for (int i = 0; i < train.numInstances() / 2; i++) { int inst = Math.abs(random.nextInt()) % train.numInstances(); int weight = Math.abs(random.nextInt()) % 10 + 1; train.instance(inst).setWeight(weight); } estimators[1].addValues(train, attrIndex); resultProbsW = testWithTestValues(estimators[1], test); built = true; if (resultProbsO.equals(resultProbsW)) { // println("no"); evalFail = true; throw new Exception("evalFail"); } println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); if (evalFail) { println("Results don't differ between non-weighted and " + "weighted instance models."); println("Here are the results:\n"); println(probsToString(resultProbsO)); } else { print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); } println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Train Weights ===\n"); for (int i = 0; i < train.numInstances(); i++) { println(" " + (i + 1) + " " + train.instance(i).weight()); } println("=== Test Dataset ===\n" + test.toString() + "\n\n"); println("(test weights all 1.0\n"); } } return result; } /** * Checks whether the scheme alters the training dataset during * training. If the scheme needs to modify the training * data it should take a copy of the training data. Currently checks * for changes to header structure, number of instances, order of * instances, instance weights. * * @param attrTypes attribute types that can be estimated * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param attributeMissing true if we know the estimator can handle * (at least) moderate missing attribute values * @param classMissing true if we know the estimator can handle * (at least) moderate missing class values * @return index 0 is true if the test was passed */ protected boolean[] datasetIntegrity(AttrTypes attrTypes, int classType, boolean attributeMissing, boolean classMissing) { Estimator estimator = null; print("estimator doesn't alter original datasets"); printAttributeSummary(attrTypes, classType); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 100; boolean[] result = new boolean[2]; Instances train = null; boolean built = false; try { train = makeTestDataset(42, numTrain, 1, attrTypes, numClasses, classType); int attrIndex = 0; if (missingLevel > 0) { addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex); } estimator = Estimator.makeCopies(getEstimator(), 1)[0]; } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { Instances trainCopy = new Instances(train); int attrIndex = 0; estimator.addValues(trainCopy, attrIndex); compareDatasets(train, trainCopy); built = true; println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); } } return result; } /** * Runs a text on the datasets with the given characteristics. * * @param attrTypes attribute types that can be estimated * @param numAtts number of attributes * @param attrIndex attribute index * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param missingLevel the percentage of missing values * @param attributeMissing true if the missing values may be in * the attributes * @param classMissing true if the missing values may be in the class * @param numTrain the number of instances in the training set * @param numTest the number of instaces in the test set * @param numClasses the number of classes * @param accepts the acceptable string in an exception * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] runBasicTest(AttrTypes attrTypes, int numAtts, int attrIndex, int classType, int missingLevel, boolean attributeMissing, boolean classMissing, int numTrain, int numTest, int numClasses, FastVector accepts) { return runBasicTest(attrTypes, numAtts, attrIndex, classType, TestInstances.CLASS_IS_LAST, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Runs a text on the datasets with the given characteristics. * * @param attrTypes attribute types that can be estimated * @param numAtts number of attributes * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param classIndex the attribute index of the class * @param missingLevel the percentage of missing values * @param attributeMissing true if the missing values may be in * the attributes * @param classMissing true if the missing values may be in the class * @param numTrain the number of instances in the training set * @param numTest the number of instaces in the test set * @param numClasses the number of classes * @param accepts the acceptable string in an exception * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] runBasicTest(AttrTypes attrTypes, int numAtts, int attrIndex, int classType, int classIndex, int missingLevel, boolean attributeMissing, boolean classMissing, int numTrain, int numTest, int numClasses, FastVector accepts) { boolean[] result = new boolean[2]; Instances train = null; Vector test = null; Estimator estimator = null; boolean built = false; try { train = makeTestDataset(42, numTrain, numAtts, attrTypes, numClasses, classType, classIndex); // prepare training data set and test value list if (numTrain > 0) { test = makeTestValueList(24, numTest, train, attrIndex, attrTypes.getSetType()); } else { double min = -10.0; double max = 8.0; test = makeTestValueList(24, numTest, min, max, attrTypes.getSetType()); } if (missingLevel > 0) { addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex); } estimator = Estimator.makeCopies(getEstimator(), 1)[0]; } catch (Exception ex) { ex.printStackTrace(); throw new Error("Error setting up for tests: " + ex.getMessage()); } try { estimator.addValues(train, attrIndex); built = true; testWithTestValues(estimator, test); println("yes"); result[0] = true; } catch (Exception ex) { boolean acceptable = false; String msg; if (ex.getMessage() == null) msg = ""; else msg = ex.getMessage().toLowerCase(); if (msg.indexOf("not in classpath") > -1) m_ClasspathProblems = true; for (int i = 0; i < accepts.size(); i++) { if (msg.indexOf((String)accepts.elementAt(i)) >= 0) { acceptable = true; } } println("no" + (acceptable ? " (OK error message)" : "")); result[1] = acceptable; if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); if (!acceptable) { if (accepts.size() > 0) { print("Error message doesn't mention "); for (int i = 0; i < accepts.size(); i++) { if (i != 0) { print(" or "); } print('"' + (String)accepts.elementAt(i) + '"'); } } println("here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } } return result; } /** * Compare two datasets to see if they differ. * * @param data1 one set of instances * @param data2 the other set of instances * @throws Exception if the datasets differ */ protected void compareDatasets(Instances data1, Instances data2) throws Exception { if (!data2.equalHeaders(data1)) { throw new Exception("header has been modified"); } if (!(data2.numInstances() == data1.numInstances())) { throw new Exception("number of instances has changed"); } for (int i = 0; i < data2.numInstances(); i++) { Instance orig = data1.instance(i); Instance copy = data2.instance(i); for (int j = 0; j < orig.numAttributes(); j++) { if (orig.isMissing(j)) { if (!copy.isMissing(j)) { throw new Exception("instances have changed"); } } else if (orig.value(j) != copy.value(j)) { throw new Exception("instances have changed"); } if (orig.weight() != copy.weight()) { throw new Exception("instance weights have changed"); } } } } /** * Add missing values to a dataset. * * @param data the instances to add missing values to * @param level the level of missing values to add (if positive, this * is the probability that a value will be set to missing, if negative * all but one value will be set to missing (not yet implemented)) * @param attributeMissing if true, attributes will be modified * @param classMissing if true, the class attribute will be modified * @param attrIndex index of the attribute */ protected void addMissing(Instances data, int level, boolean attributeMissing, boolean classMissing, int attrIndex) { int classIndex = data.classIndex(); Random random = new Random(1); for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < data.numAttributes(); j++) { if (((j == classIndex) && classMissing) || ((j == attrIndex) && attributeMissing)) { if (Math.abs(random.nextInt()) % 100 < level) current.setMissing(j); } } } } /** * Make a simple set of instances, which can later be modified * for use in specific tests. * * @param seed the random number seed * @param numInstances the number of instances to generate * @param numAttr the number of attributes * @param attrTypes the attribute types * @param numClasses the number of classes (if nominal class) * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return the test dataset * @throws Exception if the dataset couldn't be generated * @see #process(Instances) */ protected Instances makeTestDataset(int seed, int numInstances, int numAttr, AttrTypes attrTypes, int numClasses, int classType) throws Exception { return makeTestDataset( seed, numInstances, numAttr, attrTypes, numClasses, classType, TestInstances.CLASS_IS_LAST); } /** * Make a simple set of instances with variable position of the class * attribute, which can later be modified for use in specific tests. * * @param seed the random number seed * @param numInstances the number of instances to generate * @param numAttr the number of attributes to generate * @param attrTypes the type of attrbute that is excepted * @param numClasses the number of classes (if nominal class) * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param classIndex the index of the class (0-based, -1 as last) * @return the test dataset * @throws Exception if the dataset couldn't be generated * @see TestInstances#CLASS_IS_LAST * @see #process(Instances) */ protected Instances makeTestDataset(int seed, int numInstances, int numAttr, AttrTypes attrTypes, int numClasses, int classType, int classIndex) throws Exception { TestInstances dataset = new TestInstances(); dataset.setSeed(seed); dataset.setNumInstances(numInstances); dataset.setNumNominal (attrTypes.nominal ? numAttr : 0); dataset.setNumNumeric (attrTypes.numeric ? numAttr : 0); dataset.setNumString (attrTypes.string ? numAttr : 0); dataset.setNumDate (attrTypes.date ? numAttr : 0); dataset.setNumRelational(attrTypes.relational ? numAttr : 0); dataset.setNumClasses(numClasses); dataset.setClassType(classType); dataset.setClassIndex(classIndex); return process(dataset.generate()); } /** * Make a simple set of values. Only one of the num'type' parameters should be larger 0. * (just to make parameter similar to the makeTestDataset parameters) * * @param seed the random number seed * @param numValues the number of values to generate * @param data the dataset to make test examples for * @param attrIndex index of the attribute * @param attrType the class type (NUMERIC, NOMINAL, etc.) * @throws Exception if the dataset couldn't be generated * @see #process(Instances) */ protected Vector makeTestValueList(int seed, int numValues, Instances data, int attrIndex, int attrType) throws Exception { // get min max double []minMax = getMinimumMaximum(data, attrIndex); double minValue = minMax[0]; double maxValue = minMax[1]; // make value list and put into a VECTOR double range = maxValue - minValue; Vector values = new Vector(numValues); Random random = new Random(seed); if (attrType == Attribute.NOMINAL) { for (int i = 0; i < numValues; i++) { Double v = new Double((Math.abs(random.nextInt()) % (int)range)+ (int)minValue); values.add(v); } } if (attrType == Attribute.NUMERIC) { for (int i = 0; i < numValues; i++) { Double v = new Double(random.nextDouble() * range + minValue); values.add(v); } } return values; } /** * Make a simple set of values. Only one of the num'type' parameters should be larger 0. * (just to make parameter similar to the makeTestDataset parameters) * * @param seed the random number seed * @param numValues the number of values to generate * @param minValue the minimal data value * @param maxValue the maximal data value * @param attrType the class type (NUMERIC, NOMINAL, etc.) * @throws Exception if the dataset couldn't be generated * @see #process(Instances) */ protected Vector makeTestValueList(int seed, int numValues, double minValue, double maxValue, int attrType) throws Exception { // make value list and put into a VECTOR double range = maxValue - minValue; Vector values = new Vector(numValues); Random random = new Random(seed); if (attrType == Attribute.NOMINAL) { for (int i = 0; i < numValues; i++) { Double v = new Double((Math.abs(random.nextInt()) % (int)range)+ (int)minValue); values.add(v); } } if (attrType == Attribute.NUMERIC) { for (int i = 0; i < numValues; i++) { Double v = new Double(random.nextDouble() * range + minValue); values.add(v); } } return values; } /** * Test with test values. * * @param est estimator to be tested * @param test vector with test values * **/ protected Vector testWithTestValues(Estimator est, Vector test) { Vector results = new Vector(); for (int i = 0; i < test.size(); i++) { double testValue = ((Double)(test.elementAt(i))).doubleValue(); double prob = est.getProbability(testValue); Double p = new Double(prob); results.add(p); } return results; } /** * Gets the minimum and maximum of the values a the first attribute * of the given data set * * @param inst the instance * @param attrIndex the index of the attribut to find min and max * @return the array with the minimum value on index 0 and the max on index 1 */ protected double[] getMinimumMaximum(Instances inst, int attrIndex) { double []minMax = new double[2]; try { int num = getMinMax(inst, attrIndex, minMax); } catch (Exception ex) { ex.printStackTrace(); System.out.println(ex.getMessage()); } return minMax; // double minValue = minMax[0]; // double maxValue = minMax[1]; } /** * Find the minimum and the maximum of the attribute and return it in * the last parameter.. * @param inst instances used to build the estimator * @param attrIndex index of the attribute * @param minMax the array to return minimum and maximum in * @return number of not missing values * @exception Exception if parameter minMax wasn't initialized properly */ public static int getMinMax(Instances inst, int attrIndex, double [] minMax) throws Exception { double min = Double.NaN; double max = Double.NaN; Instance instance = null; int numNotMissing = 0; if ((minMax == null) || (minMax.length < 2)) { throw new Exception("Error in Program, privat method getMinMax"); } Enumeration enumInst = inst.enumerateInstances(); if (enumInst.hasMoreElements()) { do { instance = (Instance) enumInst.nextElement(); } while (instance.isMissing(attrIndex) && (enumInst.hasMoreElements())); // add values if not missing if (!instance.isMissing(attrIndex)) { numNotMissing++; min = instance.value(attrIndex); max = instance.value(attrIndex); } while (enumInst.hasMoreElements()) { instance = (Instance) enumInst.nextElement(); if (!instance.isMissing(attrIndex)) { numNotMissing++; if (instance.value(attrIndex) < min) { min = (instance.value(attrIndex)); } else { if (instance.value(attrIndex) > max) { max = (instance.value(attrIndex)); } } } } } minMax[0] = min; minMax[1] = max; return numNotMissing; } /** * Print the probabilities after testing * @param probs vector with probability values * @return string with probability values printed */ private String probsToString(Vector probs) { StringBuffer txt = new StringBuffer (" "); for (int i = 0; i < probs.size(); i++) { txt.append("" + ((Double)(probs.elementAt(i))).doubleValue() + " "); } return txt.toString(); } /** * Provides a hook for derived classes to further modify the data. * * @param data the data to process * @return the processed data * @see #m_PostProcessor */ protected Instances process(Instances data) { if (getPostProcessor() == null) return data; else return getPostProcessor().process(data); } /** * Print out a short summary string for the dataset characteristics * * @param attrTypes the attribute types used (NUMERIC, NOMINAL, etc.) * @param classType the class type (NUMERIC, NOMINAL, etc.) */ protected void printAttributeSummary(AttrTypes attrTypes, int classType) { String str = ""; if (attrTypes.numeric) str += " numeric"; if (attrTypes.nominal) { if (str.length() > 0) str += " &"; str += " nominal"; } if (attrTypes.string) { if (str.length() > 0) str += " &"; str += " string"; } if (attrTypes.date) { if (str.length() > 0) str += " &"; str += " date"; } if (attrTypes.relational) { if (str.length() > 0) str += " &"; str += " relational"; } str += " attributes)"; switch (classType) { case Attribute.NUMERIC: str = " (numeric class," + str; break; case Attribute.NOMINAL: str = " (nominal class," + str; break; case Attribute.STRING: str = " (string class," + str; break; case Attribute.DATE: str = " (date class," + str; break; case Attribute.RELATIONAL: str = " (relational class," + str; break; } print(str); } /** * Print out a short summary string for the dataset characteristics * * @param attrType the attribute type (NUMERIC, NOMINAL, etc.) * @param classType the class type (NUMERIC, NOMINAL, etc.) */ protected void printAttributeSummary(int attrType, int classType) { String str = ""; switch (attrType) { case Attribute.NUMERIC: str = " numeric" + str; break; case Attribute.NOMINAL: str = " nominal" + str; break; case Attribute.STRING: str = " string" + str; break; case Attribute.DATE: str = " date" + str; break; case Attribute.RELATIONAL: str = " relational" + str; break; } str += " attribute(s))"; switch (classType) { case Attribute.NUMERIC: str = " (numeric class," + str; break; case Attribute.NOMINAL: str = " (nominal class," + str; break; case Attribute.STRING: str = " (string class," + str; break; case Attribute.DATE: str = " (date class," + str; break; case Attribute.RELATIONAL: str = " (relational class," + str; break; } print(str); } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.5 $"); } /** * Test method for this class * * @param args the commandline parameters */ public static void main(String [] args) { try { CheckEstimator check = new CheckEstimator(); try { check.setOptions(args); Utils.checkForRemainingOptions(args); } catch (Exception ex) { String result = ex.getMessage() + "\n\n" + check.getClass().getName().replaceAll(".*\\.", "") + " Options:\n\n"; Enumeration enu = check.listOptions(); while (enu.hasMoreElements()) { Option option = (Option) enu.nextElement(); result += option.synopsis() + "\n" + option.description() + "\n"; } throw new Exception(result); } check.doTests(); } catch (Exception ex) { System.err.println(ex.getMessage()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy