All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.CheckClassifier Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    CheckClassifier.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.CheckScheme;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializationHelper;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/**
 * Class for examining the capabilities and finding problems with classifiers.
 * If you implement a classifier using the WEKA.libraries, you should run the
 * checks on it to ensure robustness and correct operation. Passing all the
 * tests of this object does not mean bugs in the classifier don't exist, but
 * this will help find some common ones.
 * 

* * Typical usage: *

* java weka.classifiers.CheckClassifier -W classifier_name * classifier_options *

* * CheckClassifier reports on the following: *

    *
  • Classifier abilities *
      *
    • Possible command line options to the classifier
    • *
    • Whether the classifier can predict nominal, numeric, string, date or * relational class attributes. Warnings will be displayed if performance is * worse than ZeroR
    • *
    • Whether the classifier can be trained incrementally
    • *
    • Whether the classifier can handle numeric predictor attributes
    • *
    • Whether the classifier can handle nominal predictor attributes
    • *
    • Whether the classifier can handle string predictor attributes
    • *
    • Whether the classifier can handle date predictor attributes
    • *
    • Whether the classifier can handle relational predictor attributes
    • *
    • Whether the classifier can handle multi-instance data
    • *
    • Whether the classifier can handle missing predictor values
    • *
    • Whether the classifier can handle missing class values
    • *
    • Whether a nominal classifier only handles 2 class problems
    • *
    • Whether the classifier can handle instance weights
    • *
    *
  • *
  • Correct functioning *
      *
    • Correct initialisation during buildClassifier (i.e. no result changes * when buildClassifier called repeatedly)
    • *
    • Whether incremental training produces the same results as during * non-incremental training (which may or may not be OK)
    • *
    • Whether the classifier alters the data pased to it (number of instances, * instance order, instance weights, etc)
    • *
    • Whether the toString() method works correctly before the classifier has * been built.
    • *
    *
  • *
  • Degenerate cases *
      *
    • building classifier with zero training instances
    • *
    • all but one predictor attribute values missing
    • *
    • all predictor attribute values missing
    • *
    • all but one class values missing
    • *
    • all class values missing
    • *
    *
  • *
* Running CheckClassifier with the debug option set will output the training * and test datasets for any failed tests. *

* * The weka.classifiers.AbstractClassifierTest uses this class to * test all the classifiers. Any changes here, have to be checked in that * abstract test class, too. *

* * Valid options are: *

* *

 * -D
 *  Turn on debugging output.
 * 
* *
 * -S
 *  Silent mode - prints nothing to stdout.
 * 
* *
 * -N <num>
 *  The number of instances in the datasets (default 20).
 * 
* *
 * -nominal <num>
 *  The number of nominal attributes (default 2).
 * 
* *
 * -nominal-values <num>
 *  The number of values for nominal attributes (default 1).
 * 
* *
 * -numeric <num>
 *  The number of numeric attributes (default 1).
 * 
* *
 * -string <num>
 *  The number of string attributes (default 1).
 * 
* *
 * -date <num>
 *  The number of date attributes (default 1).
 * 
* *
 * -relational <num>
 *  The number of relational attributes (default 1).
 * 
* *
 * -num-instances-relational <num>
 *  The number of instances in relational/bag attributes (default 10).
 * 
* *
 * -words <comma-separated-list>
 *  The words to use in string attributes.
 * 
* *
 * -word-separators <chars>
 *  The word separators to use in string attributes.
 * 
* *
 * -W
 *  Full name of the classifier analysed.
 *  eg: weka.classifiers.bayes.NaiveBayes
 *  (default weka.classifiers.rules.ZeroR)
 * 
* *
 * Options specific to classifier weka.classifiers.rules.ZeroR:
 * 
* *
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
* * * * Options after -- are passed to the designated classifier. *

* * @author Len Trigg ([email protected]) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 11253 $ * @see TestInstances */ public class CheckClassifier extends CheckScheme { /* * Note about test methods: - methods return array of booleans - first index: * success or not - second index: acceptable or not (e.g., Exception is OK) - * in case the performance is worse than that of ZeroR both indices are true * * FracPete (fracpete at waikato dot ac dot nz) */ /*** The classifier to be examined */ protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR(); /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration

* *

   * -D
   *  Turn on debugging output.
   * 
* *
   * -S
   *  Silent mode - prints nothing to stdout.
   * 
* *
   * -N <num>
   *  The number of instances in the datasets (default 20).
   * 
* *
   * -nominal <num>
   *  The number of nominal attributes (default 2).
   * 
* *
   * -nominal-values <num>
   *  The number of values for nominal attributes (default 1).
   * 
* *
   * -numeric <num>
   *  The number of numeric attributes (default 1).
   * 
* *
   * -string <num>
   *  The number of string attributes (default 1).
   * 
* *
   * -date <num>
   *  The number of date attributes (default 1).
   * 
* *
   * -relational <num>
   *  The number of relational attributes (default 1).
   * 
* *
   * -num-instances-relational <num>
   *  The number of instances in relational/bag attributes (default 10).
   * 
* *
   * -words <comma-separated-list>
   *  The words to use in string attributes.
   * 
* *
   * -word-separators <chars>
   *  The word separators to use in string attributes.
   * 
* *
   * -W
   *  Full name of the classifier analysed.
   *  eg: weka.classifiers.bayes.NaiveBayes
   *  (default weka.classifiers.rules.ZeroR)
   * 
* *
   * Options specific to classifier weka.classifiers.rules.ZeroR:
   * 
* *
   * -D
   *  If set, classifier is run in debug mode and
   *  may output additional info to the console
   * 
* * * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String tmpStr; super.setOptions(options); tmpStr = Utils.getOption('W', options); if (tmpStr.length() == 0) { tmpStr = weka.classifiers.rules.ZeroR.class.getName(); } setClassifier((Classifier) forName("weka.classifiers", Classifier.class, tmpStr, Utils.partitionOptions(options))); } /** * Gets the current settings of the CheckClassifier. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector result; String[] options; result = new Vector(); Collections.addAll(result, super.getOptions()); if (getClassifier() != null) { result.add("-W"); result.add(getClassifier().getClass().getName()); } if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) { options = ((OptionHandler) m_Classifier).getOptions(); if (options.length > 0) { result.add("--"); Collections.addAll(result, options); } } return result.toArray(new String[result.size()]); } /** * Begin the tests, reporting results to System.out */ @Override public void doTests() { if (getClassifier() == null) { println("\n=== No classifier set ==="); return; } println("\n=== Check on Classifier: " + getClassifier().getClass().getName() + " ===\n"); // Start tests m_ClasspathProblems = false; println("--> Checking for interfaces"); canTakeOptions(); boolean updateableClassifier = updateableClassifier()[0]; boolean weightedInstancesHandler = weightedInstancesHandler()[0]; boolean multiInstanceHandler = multiInstanceHandler()[0]; println("--> Classifier tests"); declaresSerialVersionUID(); testToString(); testsPerClassType(Attribute.NOMINAL, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); testsPerClassType(Attribute.NUMERIC, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); testsPerClassType(Attribute.DATE, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); testsPerClassType(Attribute.STRING, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); testsPerClassType(Attribute.RELATIONAL, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); } /** * Set the classifier for boosting. * * @param newClassifier the Classifier to use. */ public void setClassifier(Classifier newClassifier) { m_Classifier = newClassifier; } /** * Get the classifier used as the classifier * * @return the classifier used as the classifier */ public Classifier getClassifier() { return m_Classifier; } /** * Run a battery of tests for a given class attribute type * * @param classType true if the class attribute should be numeric * @param updateable true if the classifier is updateable * @param weighted true if the classifier says it handles weights * @param multiInstance true if the classifier is a multi-instance classifier */ protected void testsPerClassType(int classType, boolean updateable, boolean weighted, boolean multiInstance) { boolean PNom = canPredict(true, false, false, false, false, multiInstance, classType)[0]; boolean PNum = canPredict(false, true, false, false, false, multiInstance, classType)[0]; boolean PStr = canPredict(false, false, true, false, false, multiInstance, classType)[0]; boolean PDat = canPredict(false, false, false, true, false, multiInstance, classType)[0]; boolean PRel; if (!multiInstance) { PRel = canPredict(false, false, false, false, true, multiInstance, classType)[0]; } else { PRel = false; } if (PNom || PNum || PStr || PDat || PRel) { if (weighted) { instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); } canHandleOnlyClass(PNom, PNum, PStr, PDat, PRel, classType); if (classType == Attribute.NOMINAL) { canHandleNClasses(PNom, PNum, PStr, PDat, PRel, multiInstance, 4); } if (!multiInstance) { canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 0); canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 1); } canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 20)[0]; if (handleMissingPredictors) { canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 100); } boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 20)[0]; if (handleMissingClass) { canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 100); } correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, handleMissingPredictors, handleMissingClass); doesntUseTestClassVal(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); if (updateable) { updatingEquality(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); } } } /** * Checks whether the scheme's toString() method works even though the * classifies hasn't been built yet. * * @return index 0 is true if the toString() method works fine */ protected boolean[] testToString() { boolean[] result = new boolean[2]; print("toString..."); try { Classifier copy = m_Classifier.getClass().newInstance(); copy.toString(); result[0] = true; println("yes"); } catch (Exception e) { result[0] = false; println("no"); if (m_Debug) { println("\n=== Full report ==="); e.printStackTrace(); println("\n"); } } return result; } /** * tests for a serialVersionUID. Fails in case the scheme doesn't declare a * UID. * * @return index 0 is true if the scheme declares a UID */ protected boolean[] declaresSerialVersionUID() { boolean[] result = new boolean[2]; print("serialVersionUID..."); result[0] = !SerializationHelper.needsUID(m_Classifier.getClass()); if (result[0]) { println("yes"); } else { println("no"); } return result; } /** * Checks whether the scheme can take command line options. * * @return index 0 is true if the classifier can take options */ protected boolean[] canTakeOptions() { boolean[] result = new boolean[2]; print("options..."); if (m_Classifier instanceof OptionHandler) { println("yes"); if (m_Debug) { println("\n=== Full report ==="); Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy