All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.estimators.CheckEstimator Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    CheckEstimator.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.estimators;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/**
 * Class for examining the capabilities and finding problems with estimators. If
 * you implement a estimator using the WEKA.libraries, you should run the checks
 * on it to ensure robustness and correct operation. Passing all the tests of
 * this object does not mean bugs in the estimator don't exist, but this will
 * help find some common ones.
 * 

* * Typical usage: *

* java weka.estimators.CheckEstimator -W estimator_name * estimator_options *

* * This class uses code from the CheckEstimatorClass ATTENTION! Current * estimators can only 1. split on a nominal class attribute 2. build estimators * for nominal and numeric attributes 3. build estimators independendly of the * class type The functionality to test on other class and attribute types is * left in big parts in the code. * * CheckEstimator reports on the following: *

    *
  • Estimator abilities *
      *
    • Possible command line options to the estimator
    • *
    • Whether the estimator can predict nominal, numeric, string, date or * relational class attributes. Warnings will be displayed if performance is * worse than ZeroR
    • *
    • Whether the estimator can be trained incrementally
    • *
    • Whether the estimator can build estimates for numeric attributes
    • *
    • Whether the estimator can handle nominal attributes
    • *
    • Whether the estimator can handle string attributes
    • *
    • Whether the estimator can handle date attributes
    • *
    • Whether the estimator can handle relational attributes
    • *
    • Whether the estimator build estimates for multi-instance data
    • *
    • Whether the estimator can handle missing attribute values
    • *
    • Whether the estimator can handle missing class values
    • *
    • Whether a nominal estimator only handles 2 class problems
    • *
    • Whether the estimator can handle instance weights
    • *
    *
  • *
  • Correct functioning *
      *
    • Correct initialisation during addvalues (i.e. no result changes when * addValues called repeatedly)
    • *
    • Whether incremental training produces the same results as during * non-incremental training (which may or may not be OK)
    • *
    • Whether the estimator alters the data pased to it (number of instances, * instance order, instance weights, etc)
    • *
    *
  • *
  • Degenerate cases *
      *
    • building estimator with zero training instances
    • *
    • all but one attribute attribute values missing
    • *
    • all attribute attribute values missing
    • *
    • all but one class values missing
    • *
    • all class values missing
    • *
    *
  • *
* Running CheckEstimator with the debug option set will output the training and * test datasets for any failed tests. *

* * The weka.estimators.AbstractEstimatorTest uses this class to * test all the estimators. Any changes here, have to be checked in that * abstract test class, too. *

* * Valid options are: *

* *

 * -D
 *  Turn on debugging output.
 * 
* *
 * -S
 *  Silent mode - prints nothing to stdout.
 * 
* *
 * -N <num>
 *  The number of instances in the datasets (default 100).
 * 
* *
 * -W
 *  Full name of the estimator analysed.
 *  eg: weka.estimators.NormalEstimator
 * 
* *
 * Options specific to estimator weka.estimators.NormalEstimator:
 * 
* *
 * -D
 *  If set, estimator is run in debug mode and
 *  may output additional info to the console
 * 
* * * * Options after -- are passed to the designated estimator. *

* * @author Len Trigg ([email protected]) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 11247 $ * @see TestInstances */ public class CheckEstimator implements OptionHandler, RevisionHandler { /* * Note about test methods: - methods return array of booleans - first index: * success or not - second index: acceptable or not (e.g., Exception is OK) - * in case the performance is worse than that of ZeroR both indices are true * * FracPete (fracpete at waikato dot ac dot nz) */ /** * a class for postprocessing the test-data */ public class PostProcessor implements RevisionHandler { /** * Provides a hook for derived classes to further modify the data. * Currently, the data is just passed through. * * @param data the data to process * @return the processed data */ protected Instances process(Instances data) { return data; } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 11247 $"); } } /*** The estimator to be examined */ protected Estimator m_Estimator = new weka.estimators.NormalEstimator( 0.000001); /** The options to be passed to the base estimator. */ protected String[] m_EstimatorOptions; /** The results of the analysis as a string */ protected String m_AnalysisResults; /** Debugging mode, gives extra output if true */ protected boolean m_Debug = false; /** Silent mode, for no output at all to stdout */ protected boolean m_Silent = false; /** The number of instances in the datasets */ protected int m_NumInstances = 100; /** for post-processing the data even further */ protected PostProcessor m_PostProcessor = null; /** whether classpath problems occurred */ protected boolean m_ClasspathProblems = false; /** * class that contains info about the attribute types the estimator can * estimate estimator work on one attribute only */ public static class AttrTypes implements RevisionHandler { boolean nominal = false; boolean numeric = false; boolean string = false; boolean date = false; boolean relational = false; AttrTypes() { } AttrTypes(AttrTypes newTypes) { nominal = newTypes.nominal; numeric = newTypes.numeric; string = newTypes.string; date = newTypes.date; relational = newTypes.relational; } AttrTypes(int type) { if (type == Attribute.NOMINAL) { nominal = true; } if (type == Attribute.NUMERIC) { numeric = true; } if (type == Attribute.STRING) { string = true; } if (type == Attribute.DATE) { date = true; } if (type == Attribute.RELATIONAL) { relational = true; } } int getSetType() throws Exception { int sum = 0; int type = -1; if (nominal) { sum++; type = Attribute.NOMINAL; } if (numeric) { sum++; type = Attribute.NUMERIC; } if (string) { sum++; type = Attribute.STRING; } if (date) { sum++; type = Attribute.DATE; } if (relational) { sum++; type = Attribute.RELATIONAL; } if (sum > 1) { throw new Exception("Expected to have only one type set used wrongly."); } if (type < 0) { throw new Exception("No type set."); } return type; } boolean oneIsSet() { return (nominal || numeric || string || date || relational); } public Vector getVectorOfAttrTypes() { Vector attrs = new Vector(); if (nominal) { attrs.add(new Integer(Attribute.NOMINAL)); } if (numeric) { attrs.add(new Integer(Attribute.NUMERIC)); } if (string) { attrs.add(new Integer(Attribute.STRING)); } if (date) { attrs.add(new Integer(Attribute.DATE)); } if (relational) { attrs.add(new Integer(Attribute.RELATIONAL)); } return attrs; } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 11247 $"); } } /** * public class that contains info about the chosen attribute type estimator * work on one attribute only */ public static class EstTypes implements RevisionHandler { boolean incremental = false; boolean weighted = false; boolean supervised = false; /** * Constructor */ public EstTypes() { } /** * Constructor */ public EstTypes(boolean i, boolean w, boolean s) { incremental = i; weighted = w; supervised = s; } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 11247 $"); } } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration

* *

   * -D
   *  Turn on debugging output.
   * 
* *
   * -S
   *  Silent mode - prints nothing to stdout.
   * 
* *
   * -N <num>
   *  The number of instances in the datasets (default 100).
   * 
* *
   * -W
   *  Full name of the estimator analysed.
   *  eg: weka.estimators.NormalEstimator
   * 
* *
   * Options specific to estimator weka.estimators.NormalEstimator:
   * 
* *
   * -D
   *  If set, estimator is run in debug mode and
   *  may output additional info to the console
   * 
* * * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String tmpStr; setDebug(Utils.getFlag('D', options)); setSilent(Utils.getFlag('S', options)); tmpStr = Utils.getOption('N', options); if (tmpStr.length() != 0) { setNumInstances(Integer.parseInt(tmpStr)); } else { setNumInstances(100); } tmpStr = Utils.getOption('W', options); if (tmpStr.length() == 0) { throw new Exception("A estimator must be specified with the -W option."); } setEstimator(Estimator.forName(tmpStr, Utils.partitionOptions(options))); } /** * Gets the current settings of the CheckEstimator. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector result = new Vector(); if (getDebug()) { result.add("-D"); } if (getSilent()) { result.add("-S"); } result.add("-N"); result.add("" + getNumInstances()); if (getEstimator() != null) { result.add("-W"); result.add(getEstimator().getClass().getName()); } if ((m_Estimator != null) && (m_Estimator instanceof OptionHandler)) { String[] options = ((OptionHandler) m_Estimator).getOptions(); if (options.length > 0) { result.add("--"); Collections.addAll(result, options); } } return result.toArray(new String[result.size()]); } /** * sets the PostProcessor to use * * @param value the new PostProcessor * @see #m_PostProcessor */ public void setPostProcessor(PostProcessor value) { m_PostProcessor = value; } /** * returns the current PostProcessor, can be null * * @return the current PostProcessor */ public PostProcessor getPostProcessor() { return m_PostProcessor; } /** * returns TRUE if the estimator returned a "not in classpath" Exception * * @return true if CLASSPATH problems occurred */ public boolean hasClasspathProblems() { return m_ClasspathProblems; } /** * Begin the tests, reporting results to System.out */ public void doTests() { if (getEstimator() == null) { println("\n=== No estimator set ==="); return; } println("\n=== Check on Estimator: " + getEstimator().getClass().getName() + " ===\n"); m_ClasspathProblems = false; // Start tests with test for options canTakeOptions(); // test what type of estimator it is EstTypes estTypes = new EstTypes(); estTypes.incremental = incrementalEstimator()[0]; estTypes.weighted = weightedInstancesHandler()[0]; estTypes.supervised = supervisedEstimator()[0]; // in none of the estimators yet the functionality is depending on the class // type // since this could change the basic structure taken from checkclassifiers // is kept here int classType = Attribute.NOMINAL; AttrTypes attrTypes = testsPerClassType(classType, estTypes); // only nominal class can be split up so far canSplitUpClass(attrTypes, classType); } /** * Set debugging mode * * @param debug true if debug output should be printed */ public void setDebug(boolean debug) { m_Debug = debug; // disable silent mode, if necessary if (getDebug()) { setSilent(false); } } /** * Get whether debugging is turned on * * @return true if debugging output is on */ public boolean getDebug() { return m_Debug; } /** * Set slient mode, i.e., no output at all to stdout * * @param value whether silent mode is active or not */ public void setSilent(boolean value) { m_Silent = value; } /** * Get whether silent mode is turned on * * @return true if silent mode is on */ public boolean getSilent() { return m_Silent; } /** * Sets the number of instances to use in the datasets (some estimators might * require more instances). * * @param value the number of instances to use */ public void setNumInstances(int value) { m_NumInstances = value; } /** * Gets the current number of instances to use for the datasets. * * @return the number of instances */ public int getNumInstances() { return m_NumInstances; } /** * Set the estimator for boosting. * * @param newEstimator the Estimator to use. */ public void setEstimator(Estimator newEstimator) { m_Estimator = newEstimator; } /** * Get the estimator used as the estimator * * @return the estimator used as the estimator */ public Estimator getEstimator() { return m_Estimator; } /** * prints the given message to stdout, if not silent mode * * @param msg the text to print to stdout */ protected void print(Object msg) { if (!getSilent()) { System.out.print(msg); } } /** * prints the given message (+ LF) to stdout, if not silent mode * * @param msg the message to println to stdout */ protected void println(Object msg) { print(msg + "\n"); } /** * prints a LF to stdout, if not silent mode */ protected void println() { print("\n"); } /** * Run a battery of tests for a given class attribute type * * @param classType true if the class attribute should be numeric * @param estTypes types the estimator is, like incremental, weighted, * supervised etc * @return attribute types estimator can work with */ protected AttrTypes testsPerClassType(int classType, EstTypes estTypes) { // in none of the estimators yet is the estimation depending on the class // type // since this could change the basic structure taken from checkclassifiers // is kept here // test A: simple test - if can estimate AttrTypes attrTypes = new AttrTypes(); AttrTypes at = new AttrTypes(Attribute.NOMINAL); attrTypes.nominal = canEstimate(at, estTypes.supervised, classType)[0]; at = new AttrTypes(Attribute.NUMERIC); attrTypes.numeric = canEstimate(at, estTypes.supervised, classType)[0]; attrTypes.string = false; attrTypes.date = false; attrTypes.relational = false; // if (!multiInstance) // PRel = canEstimate(false, false, false, false, true, classType)[0]; // else // PRel = false; // one of the attribute types succeeded if (attrTypes.oneIsSet()) { Vector attributesSet = attrTypes.getVectorOfAttrTypes(); // make tests for each attribute for (int i = 0; i < attributesSet.size(); i++) { AttrTypes workAttrTypes = new AttrTypes(attributesSet.elementAt(i) .intValue()); // test B: weights change estimate or not if (estTypes.weighted) { instanceWeights(workAttrTypes, classType); } if (classType == Attribute.NOMINAL) { int numClasses = 4; canHandleNClasses(workAttrTypes, numClasses); } // tests with class not the last attribute and the attribute not the // first // if (!multiInstance) { int numAtt = 4; canHandleClassAsNthAttribute(workAttrTypes, numAtt, 0, classType, 1); // TODOTODOcanHandleAttrAsNthAttribute(workAttrTypes, numAtt, 2, // classType); // } canHandleZeroTraining(workAttrTypes, classType); boolean handleMissingAttributes = canHandleMissing(workAttrTypes, classType, true, false, 20)[0]; if (handleMissingAttributes) { canHandleMissing(workAttrTypes, classType, true, false, 100); } boolean handleMissingClass = canHandleMissing(workAttrTypes, classType, false, true, 20)[0]; if (handleMissingClass) { canHandleMissing(workAttrTypes, classType, false, true, 100); } correctBuildInitialisation(workAttrTypes, classType); datasetIntegrity(workAttrTypes, classType, handleMissingAttributes, handleMissingClass); if (estTypes.incremental) { incrementingEquality(workAttrTypes, classType); } } } return attrTypes; } /** * Checks whether the scheme can take command line options. * * @return index 0 is true if the estimator can take options */ protected boolean[] canTakeOptions() { boolean[] result = new boolean[2]; print("options..."); if (m_Estimator instanceof OptionHandler) { println("yes"); if (m_Debug) { println("\n=== Full report ==="); Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy