weka.classifiers.Evaluation Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.
There is a newer version: 3.8.6
Show newest version
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Evaluation.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers;

import java.beans.BeanInfo;
import java.beans.Introspector;
import java.beans.MethodDescriptor;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.lang.reflect.Method;
import java.util.Date;
import java.util.Enumeration;
import java.util.Random;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import weka.classifiers.evaluation.NominalPrediction;
import weka.classifiers.evaluation.ThresholdCurve;
import weka.classifiers.pmml.consumer.PMMLClassifier;
import weka.classifiers.xml.XMLClassifier;
import weka.core.Drawable;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Summarizable;
import weka.core.Utils;
import weka.core.Version;
import weka.core.converters.ConverterUtils.DataSink;
import weka.core.converters.ConverterUtils.DataSource;
import weka.core.pmml.PMMLFactory;
import weka.core.pmml.PMMLModel;
import weka.core.xml.KOML;
import weka.core.xml.XMLOptions;
import weka.core.xml.XMLSerialization;
import weka.estimators.Estimator;
import weka.estimators.KernelEstimator;

/**
 * Class for evaluating machine learning models.
 * 
 * 
 * -------------------------------------------------------------------
 * 

 * 
 * General options when evaluating a learning scheme from the command-line:
 * 

 * 
 * -t filename 

 * Name of the file with the training data. (required)
 * 

 * 
 * -T filename 

 * Name of the file with the test data. If missing a cross-validation is
 * performed.
 * 

 * 
 * -c index 

 * Index of the class attribute (1, 2, ...; default: last).
 * 

 * 
 * -x number 

 * The number of folds for the cross-validation (default: 10).
 * 

 * 
 * -no-cv 

 * No cross validation. If no test file is provided, no evaluation is done.
 * 

 * 
 * -split-percentage percentage 

 * Sets the percentage for the train/test set split, e.g., 66.
 * 

 * 
 * -preserve-order 

 * Preserves the order in the percentage split instead of randomizing the data
 * first with the seed value ('-s').
 * 

 * 
 * -s seed 

 * Random number seed for the cross-validation and percentage split (default:
 * 1).
 * 

 * 
 * -m filename 

 * The name of a file containing a cost matrix.
 * 

 * 
 * -l filename 

 * Loads classifier from the given file. In case the filename ends with ".xml",
 * a PMML file is loaded or, if that fails, options are loaded from XML.
 * 

 * 
 * -d filename 

 * Saves classifier built from the training data into the given file. In case
 * the filename ends with ".xml" the options are saved XML, not the model.
 * 

 * 
 * -v 

 * Outputs no statistics for the training data.
 * 

 * 
 * -o 

 * Outputs statistics only, not the classifier.
 * 

 * 
 * -i 

 * Outputs information-retrieval statistics per class.
 * 

 * 
 * -k 

 * Outputs information-theoretic statistics.
 * 

 * 
 * -p range 

 * Outputs predictions for test instances (or the train instances if no test
 * instances provided and -no-cv is used), along with the attributes in the
 * specified range (and nothing else). Use '-p 0' if no attributes are desired.
 * 

 * 
 * -distribution 

 * Outputs the distribution instead of only the prediction in conjunction with
 * the '-p' option (only nominal classes).
 * 

 * 
 * -r 

 * Outputs cumulative margin distribution (and nothing else).
 * 

 * 
 * -g 

 * Only for classifiers that implement "Graphable." Outputs the graph
 * representation of the classifier (and nothing else).
 * 

 * 
 * -xml filename | xml-string 

 * Retrieves the options from the XML-data instead of the command line.
 * 

 * 
 * -threshold-file file 

 * The file to save the threshold data to. The format is determined by the
 * extensions, e.g., '.arff' for ARFF format or '.csv' for CSV.
 * 

 * 
 * -threshold-label label 

 * The class label to determine the threshold data for (default is the first
 * label)
 * 

 * 
 * -------------------------------------------------------------------
 * 

 * 
 * Example usage as the main of a classifier (called FunkyClassifier):
 *  
 * public static void main(String [] args) {
 *   runClassifier(new FunkyClassifier(), args);
 * }
 * 
 
 * 
 * 
 * ------------------------------------------------------------------
 * 

 * 
 * Example usage from within an application:  
 * Instances trainInstances = ... instances got from somewhere
 * Instances testInstances = ... instances got from somewhere
 * Classifier scheme = ... scheme got from somewhere
 * 
 * Evaluation evaluation = new Evaluation(trainInstances);
 * evaluation.evaluateModel(scheme, testInstances);
 * System.out.println(evaluation.toSummaryString());
 * 
 
 * 
 * 
 * @author Eibe Frank ([email protected])
 * @author Len Trigg ([email protected])
 * @version $Revision: 10974 $
 */
public class Evaluation implements Summarizable, RevisionHandler {

  /** The number of classes. */
  protected int m_NumClasses;

  /** The number of folds for a cross-validation. */
  protected int m_NumFolds;

  /** The weight of all incorrectly classified instances. */
  protected double m_Incorrect;

  /** The weight of all correctly classified instances. */
  protected double m_Correct;

  /** The weight of all unclassified instances. */
  protected double m_Unclassified;

  /*** The weight of all instances that had no class assigned to them. */
  protected double m_MissingClass;

  /** The weight of all instances that had a class assigned to them. */
  protected double m_WithClass;

  /** Array for storing the confusion matrix. */
  protected double[][] m_ConfusionMatrix;

  /** The names of the classes. */
  protected String[] m_ClassNames;

  /** Is the class nominal or numeric? */
  protected boolean m_ClassIsNominal;

  /** The prior probabilities of the classes */
  protected double[] m_ClassPriors;

  /** The sum of counts for priors */
  protected double m_ClassPriorsSum;

  /** The cost matrix (if given). */
  protected CostMatrix m_CostMatrix;

  /** The total cost of predictions (includes instance weights) */
  protected double m_TotalCost;

  /** Sum of errors. */
  protected double m_SumErr;

  /** Sum of absolute errors. */
  protected double m_SumAbsErr;

  /** Sum of squared errors. */
  protected double m_SumSqrErr;

  /** Sum of class values. */
  protected double m_SumClass;

  /** Sum of squared class values. */
  protected double m_SumSqrClass;

  /*** Sum of predicted values. */
  protected double m_SumPredicted;

  /** Sum of squared predicted values. */
  protected double m_SumSqrPredicted;

  /** Sum of predicted * class values. */
  protected double m_SumClassPredicted;

  /** Sum of absolute errors of the prior */
  protected double m_SumPriorAbsErr;

  /** Sum of absolute errors of the prior */
  protected double m_SumPriorSqrErr;

  /** Total Kononenko & Bratko Information */
  protected double m_SumKBInfo;

  /*** Resolution of the margin histogram */
  protected static int k_MarginResolution = 500;

  /** Cumulative margin distribution */
  protected double m_MarginCounts[];

  /** Number of non-missing class training instances seen */
  protected int m_NumTrainClassVals;

  /** Array containing all numeric training class values seen */
  protected double[] m_TrainClassVals;

  /** Array containing all numeric training class weights */
  protected double[] m_TrainClassWeights;

  /** Numeric class error estimator for prior */
  protected Estimator m_PriorErrorEstimator;

  /** Numeric class error estimator for scheme */
  protected Estimator m_ErrorEstimator;

  /**
   * The minimum probablility accepted from an estimator to avoid taking log(0)
   * in Sf calculations.
   */
  protected static final double MIN_SF_PROB = Double.MIN_VALUE;

  /** Total entropy of prior predictions */
  protected double m_SumPriorEntropy;

  /** Total entropy of scheme predictions */
  protected double m_SumSchemeEntropy;

  /** The list of predictions that have been generated (for computing AUC) */
  private FastVector m_Predictions;

  /**
   * enables/disables the use of priors, e.g., if no training set is present in
   * case of de-serialized schemes
   */
  protected boolean m_NoPriors = false;

  /**
   * Initializes all the counters for the evaluation. Use
   * useNoPriors() if the dataset is the test set and you can't
   * initialize with the priors from the training set via
   * setPriors(Instances).
   * 
   * @param data set of training instances, to get some header information and
   *          prior class distribution information
   * @throws Exception if the class is not defined
   * @see #useNoPriors()
   * @see #setPriors(Instances)
   */
  public Evaluation(Instances data) throws Exception {

    this(data, null);
  }

  /**
   * Initializes all the counters for the evaluation and also takes a cost
   * matrix as parameter. Use useNoPriors() if the dataset is the
   * test set and you can't initialize with the priors from the training set via
   * setPriors(Instances).
   * 
   * @param data set of training instances, to get some header information and
   *          prior class distribution information
   * @param costMatrix the cost matrix---if null, default costs will be used
   * @throws Exception if cost matrix is not compatible with data, the class is
   *           not defined or the class is numeric
   * @see #useNoPriors()
   * @see #setPriors(Instances)
   */
  public Evaluation(Instances data, CostMatrix costMatrix) throws Exception {

    m_NumClasses = data.numClasses();
    m_NumFolds = 1;
    m_ClassIsNominal = data.classAttribute().isNominal();

    if (m_ClassIsNominal) {
      m_ConfusionMatrix = new double[m_NumClasses][m_NumClasses];
      m_ClassNames = new String[m_NumClasses];
      for (int i = 0; i < m_NumClasses; i++) {
        m_ClassNames[i] = data.classAttribute().value(i);
      }
    }
    m_CostMatrix = costMatrix;
    if (m_CostMatrix != null) {
      if (!m_ClassIsNominal) {
        throw new Exception("Class has to be nominal if cost matrix "
          + "given!");
      }
      if (m_CostMatrix.size() != m_NumClasses) {
        throw new Exception("Cost matrix not compatible with data!");
      }
    }
    m_ClassPriors = new double[m_NumClasses];
    setPriors(data);
    m_MarginCounts = new double[k_MarginResolution + 1];
  }

  /**
   * Returns the area under ROC for those predictions that have been collected
   * in the evaluateClassifier(Classifier, Instances) method. Returns
   * Instance.missingValue() if the area is not available.
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the area under the ROC curve or not a number
   */
  public double areaUnderROC(int classIndex) {

    // Check if any predictions have been collected
    if (m_Predictions == null) {
      return Instance.missingValue();
    } else {
      ThresholdCurve tc = new ThresholdCurve();
      Instances result = tc.getCurve(m_Predictions, classIndex);
      return ThresholdCurve.getROCArea(result);
    }
  }

  /**
   * Calculates the weighted (by class size) AUC.
   * 
   * @return the weighted AUC.
   */
  public double weightedAreaUnderROC() {
    double[] classCounts = new double[m_NumClasses];
    double classCountSum = 0;

    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        classCounts[i] += m_ConfusionMatrix[i][j];
      }
      classCountSum += classCounts[i];
    }

    double aucTotal = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      double temp = areaUnderROC(i);
      if (!Instance.isMissingValue(temp)) {
        aucTotal += (temp * classCounts[i]);
      }
    }

    return aucTotal / classCountSum;
  }

  /**
   * Returns a copy of the confusion matrix.
   * 
   * @return a copy of the confusion matrix as a two-dimensional array
   */
  public double[][] confusionMatrix() {

    double[][] newMatrix = new double[m_ConfusionMatrix.length][0];

    for (int i = 0; i < m_ConfusionMatrix.length; i++) {
      newMatrix[i] = new double[m_ConfusionMatrix[i].length];
      System.arraycopy(m_ConfusionMatrix[i], 0, newMatrix[i], 0,
        m_ConfusionMatrix[i].length);
    }
    return newMatrix;
  }

  /**
   * Performs a (stratified if class is nominal) cross-validation for a
   * classifier on a set of instances. Now performs a deep copy of the
   * classifier before each call to buildClassifier() (just in case the
   * classifier is not initialized properly).
   * 
   * @param classifier the classifier with any options set.
   * @param data the data on which the cross-validation is to be performed
   * @param numFolds the number of folds for the cross-validation
   * @param random random number generator for randomization
   * @param forPredictionsString varargs parameter that, if supplied, is
   *          expected to hold a StringBuffer to print predictions to, a Range
   *          of attributes to output and a Boolean (true if the distribution is
   *          to be printed)
   * @throws Exception if a classifier could not be generated successfully or
   *           the class is not defined
   */
  public void crossValidateModel(Classifier classifier, Instances data,
    int numFolds, Random random, Object... forPredictionsPrinting)
    throws Exception {

    // Make a copy of the data we can reorder
    data = new Instances(data);
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
      data.stratify(numFolds);
    }

    // We assume that the first element is a StringBuffer, the second a Range
    // (attributes
    // to output) and the third a Boolean (whether or not to output a
    // distribution instead
    // of just a classification)
    if (forPredictionsPrinting.length > 0) {
      // print the header first
      StringBuffer buff = (StringBuffer) forPredictionsPrinting[0];
      Range attsToOutput = (Range) forPredictionsPrinting[1];
      boolean printDist = ((Boolean) forPredictionsPrinting[2]).booleanValue();
      printClassificationsHeader(data, attsToOutput, printDist, buff);
    }

    // Do the folds
    for (int i = 0; i < numFolds; i++) {
      Instances train = data.trainCV(numFolds, i, random);
      setPriors(train);
      Classifier copiedClassifier = Classifier.makeCopy(classifier);
      copiedClassifier.buildClassifier(train);
      Instances test = data.testCV(numFolds, i);
      evaluateModel(copiedClassifier, test, forPredictionsPrinting);
    }
    m_NumFolds = numFolds;
  }

  /**
   * Performs a (stratified if class is nominal) cross-validation for a
   * classifier on a set of instances.
   * 
   * @param classifierString a string naming the class of the classifier
   * @param data the data on which the cross-validation is to be performed
   * @param numFolds the number of folds for the cross-validation
   * @param options the options to the classifier. Any options
   * @param random the random number generator for randomizing the data accepted
   *          by the classifier will be removed from this array.
   * @throws Exception if a classifier could not be generated successfully or
   *           the class is not defined
   */
  public void crossValidateModel(String classifierString, Instances data,
    int numFolds, String[] options, Random random) throws Exception {

    crossValidateModel(Classifier.forName(classifierString, options), data,
      numFolds, random);
  }

  /**
   * Evaluates a classifier with the options given in an array of strings.
   * 
   * 
   * Valid options are:
   * 

   * 
   * -t filename 

   * Name of the file with the training data. (required)
   * 

   * 
   * -T filename 

   * Name of the file with the test data. If missing a cross-validation is
   * performed.
   * 

   * 
   * -c index 

   * Index of the class attribute (1, 2, ...; default: last).
   * 

   * 
   * -x number 

   * The number of folds for the cross-validation (default: 10).
   * 

   * 
   * -no-cv 

   * No cross validation. If no test file is provided, no evaluation is done.
   * 

   * 
   * -split-percentage percentage 

   * Sets the percentage for the train/test set split, e.g., 66.
   * 

   * 
   * -preserve-order 

   * Preserves the order in the percentage split instead of randomizing the data
   * first with the seed value ('-s').
   * 

   * 
   * -s seed 

   * Random number seed for the cross-validation and percentage split (default:
   * 1).
   * 

   * 
   * -m filename 

   * The name of a file containing a cost matrix.
   * 

   * 
   * -l filename 

   * Loads classifier from the given file. In case the filename ends with
   * ".xml",a PMML file is loaded or, if that fails, options are loaded from
   * XML.
   * 

   * 
   * -d filename 

   * Saves classifier built from the training data into the given file. In case
   * the filename ends with ".xml" the options are saved XML, not the model.
   * 

   * 
   * -v 

   * Outputs no statistics for the training data.
   * 

   * 
   * -o 

   * Outputs statistics only, not the classifier.
   * 

   * 
   * -i 

   * Outputs detailed information-retrieval statistics per class.
   * 

   * 
   * -k 

   * Outputs information-theoretic statistics.
   * 

   * 
   * -p range 

   * Outputs predictions for test instances (or the train instances if no test
   * instances provided and -no-cv is used), along with the attributes in the
   * specified range (and nothing else). Use '-p 0' if no attributes are
   * desired.
   * 

   * 
   * -distribution 

   * Outputs the distribution instead of only the prediction in conjunction with
   * the '-p' option (only nominal classes).
   * 

   * 
   * -r 

   * Outputs cumulative margin distribution (and nothing else).
   * 

   * 
   * -g 

   * Only for classifiers that implement "Graphable." Outputs the graph
   * representation of the classifier (and nothing else).
   * 

   * 
   * -xml filename | xml-string 

   * Retrieves the options from the XML-data instead of the command line.
   * 

   * 
   * -threshold-file file 

   * The file to save the threshold data to. The format is determined by the
   * extensions, e.g., '.arff' for ARFF format or '.csv' for CSV.
   * 

   * 
   * -threshold-label label 

   * The class label to determine the threshold data for (default is the first
   * label)
   * 

   * 
   * @param classifierString class of machine learning classifier as a string
   * @param options the array of string containing the options
   * @throws Exception if model could not be evaluated successfully
   * @return a string describing the results
   */
  public static String evaluateModel(String classifierString, String[] options)
    throws Exception {

    Classifier classifier;

    // Create classifier
    try {
      classifier = (Classifier) Class.forName(classifierString).newInstance();
    } catch (Exception e) {
      throw new Exception("Can't find class with name " + classifierString
        + '.');
    }
    return evaluateModel(classifier, options);
  }

  /**
   * A test method for this class. Just extracts the first command line argument
   * as a classifier class name and calls evaluateModel.
   * 
   * @param args an array of command line arguments, the first of which must be
   *          the class name of a classifier.
   */
  public static void main(String[] args) {

    try {
      if (args.length == 0) {
        throw new Exception("The first argument must be the class name"
          + " of a classifier");
      }
      String classifier = args[0];
      args[0] = "";
      System.out.println(evaluateModel(classifier, args));
    } catch (Exception ex) {
      ex.printStackTrace();
      System.err.println(ex.getMessage());
    }
  }

  /**
   * Evaluates a classifier with the options given in an array of strings.
   * 

   * 
   * Valid options are:
   * 

   * 
   * -t name of training file 

   * Name of the file with the training data. (required)
   * 

   * 
   * -T name of test file 

   * Name of the file with the test data. If missing a cross-validation is
   * performed.
   * 

   * 
   * -c class index 

   * Index of the class attribute (1, 2, ...; default: last).
   * 

   * 
   * -x number of folds 

   * The number of folds for the cross-validation (default: 10).
   * 

   * 
   * -no-cv 

   * No cross validation. If no test file is provided, no evaluation is done.
   * 

   * 
   * -split-percentage percentage 

   * Sets the percentage for the train/test set split, e.g., 66.
   * 

   * 
   * -preserve-order 

   * Preserves the order in the percentage split instead of randomizing the data
   * first with the seed value ('-s').
   * 

   * 
   * -s seed 

   * Random number seed for the cross-validation and percentage split (default:
   * 1).
   * 

   * 
   * -m file with cost matrix 

   * The name of a file containing a cost matrix.
   * 

   * 
   * -l filename 

   * Loads classifier from the given file. In case the filename ends with
   * ".xml",a PMML file is loaded or, if that fails, options are loaded from
   * XML.
   * 

   * 
   * -d filename 

   * Saves classifier built from the training data into the given file. In case
   * the filename ends with ".xml" the options are saved XML, not the model.
   * 

   * 
   * -v 

   * Outputs no statistics for the training data.
   * 

   * 
   * -o 

   * Outputs statistics only, not the classifier.
   * 

   * 
   * -i 

   * Outputs detailed information-retrieval statistics per class.
   * 

   * 
   * -k 

   * Outputs information-theoretic statistics.
   * 

   * 
   * -p range 

   * Outputs predictions for test instances (or the train instances if no test
   * instances provided and -no-cv is used), along with the attributes in the
   * specified range (and nothing else). Use '-p 0' if no attributes are
   * desired.
   * 

   * 
   * -distribution 

   * Outputs the distribution instead of only the prediction in conjunction with
   * the '-p' option (only nominal classes).
   * 

   * 
   * -r 

   * Outputs cumulative margin distribution (and nothing else).
   * 

   * 
   * -g 

   * Only for classifiers that implement "Graphable." Outputs the graph
   * representation of the classifier (and nothing else).
   * 

   * 
   * -xml filename | xml-string 

   * Retrieves the options from the XML-data instead of the command line.
   * 

   * 
   * @param classifier machine learning classifier
   * @param options the array of string containing the options
   * @throws Exception if model could not be evaluated successfully
   * @return a string describing the results
   */
  public static String evaluateModel(Classifier classifier, String[] options)
    throws Exception {

    Instances train = null, tempTrain, test = null, template = null;
    int seed = 1, folds = 10, classIndex = -1;
    boolean noCrossValidation = false;
    String trainFileName, testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString;
    boolean noOutput = false, printClassifications = false, trainStatistics =
      true, printMargins = false, printComplexityStatistics = false, printGraph =
      false, classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    DataSource trainSource = null, testSource = null;
    ObjectInputStream objectInputStream = null;
    BufferedInputStream xmlInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed =
      0;
    String xml = "";
    String[] optionsTmp = null;
    Classifier classifierBackup;
    boolean printDistribution = false;
    int actualClassIndex = -1; // 0-based class index
    String splitPercentageString = "";
    double splitPercentage = -1;
    boolean preserveOrder = false;
    boolean trainSetPresent = false;
    boolean testSetPresent = false;
    String thresholdFile;
    String thresholdLabel;
    StringBuffer predsBuff = null; // predictions from cross-validation

    // help requested?
    if (Utils.getFlag("h", options) || Utils.getFlag("help", options)) {

      // global info requested as well?
      boolean globalInfo =
        Utils.getFlag("synopsis", options) || Utils.getFlag("info", options);

      throw new Exception("\nHelp requested."
        + makeOptionString(classifier, globalInfo));
    }

    try {
      // do we get the input from XML instead of normal parameters?
      xml = Utils.getOption("xml", options);
      if (!xml.equals("")) {
        options = new XMLOptions(xml).toArray();
      }

      // is the input model only the XML-Options, i.e. w/o built model?
      optionsTmp = new String[options.length];
      for (int i = 0; i < options.length; i++) {
        optionsTmp[i] = options[i];
      }

      String tmpO = Utils.getOption('l', optionsTmp);
      // if (Utils.getOption('l', optionsTmp).toLowerCase().endsWith(".xml")) {
      if (tmpO.endsWith(".xml")) {
        // try to load file as PMML first
        boolean success = false;
        try {
          PMMLModel pmmlModel = PMMLFactory.getPMMLModel(tmpO);
          if (pmmlModel instanceof PMMLClassifier) {
            classifier = ((PMMLClassifier) pmmlModel);
            success = true;
          }
        } catch (IllegalArgumentException ex) {
          success = false;
        }
        if (!success) {
          // load options from serialized data ('-l' is automatically erased!)
          XMLClassifier xmlserial = new XMLClassifier();
          Classifier cl =
            (Classifier) xmlserial.read(Utils.getOption('l', options));

          // merge options
          optionsTmp = new String[options.length + cl.getOptions().length];
          System.arraycopy(cl.getOptions(), 0, optionsTmp, 0,
            cl.getOptions().length);
          System.arraycopy(options, 0, optionsTmp, cl.getOptions().length,
            options.length);
          options = optionsTmp;
        }
      }

      noCrossValidation = Utils.getFlag("no-cv", options);
      // Get basic options (options the same for all schemes)
      classIndexString = Utils.getOption('c', options);
      if (classIndexString.length() != 0) {
        if (classIndexString.equals("first")) {
          classIndex = 1;
        } else if (classIndexString.equals("last")) {
          classIndex = -1;
        } else {
          classIndex = Integer.parseInt(classIndexString);
        }
      }
      trainFileName = Utils.getOption('t', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      testFileName = Utils.getOption('T', options);
      foldsString = Utils.getOption('x', options);
      if (foldsString.length() != 0) {
        folds = Integer.parseInt(foldsString);
      }
      seedString = Utils.getOption('s', options);
      if (seedString.length() != 0) {
        seed = Integer.parseInt(seedString);
      }
      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw new Exception("No training file and no object "
            + "input file given.");
        }
        if (testFileName.length() == 0) {
          throw new Exception("No training file and no test " + "file given.");
        }
      } else if ((objectInputFileName.length() != 0)
        && ((!(classifier instanceof UpdateableClassifier)) || (testFileName
          .length() == 0))) {
        throw new Exception("Classifier not incremental, or no "
          + "test file provided: can't " + "use both train and model file.");
      }
      try {
        if (trainFileName.length() != 0) {
          trainSetPresent = true;
          trainSource = new DataSource(trainFileName);
        }
        if (testFileName.length() != 0) {
          testSetPresent = true;
          testSource = new DataSource(testFileName);
        }
        if (objectInputFileName.length() != 0) {
          if (objectInputFileName.endsWith(".xml")) {
            // if this is the case then it means that a PMML classifier was
            // successfully loaded earlier in the code
            objectInputStream = null;
            xmlInputStream = null;
          } else {
            InputStream is = new FileInputStream(objectInputFileName);
            if (objectInputFileName.endsWith(".gz")) {
              is = new GZIPInputStream(is);
            }
            // load from KOML?
            if (!(objectInputFileName.endsWith(".koml") && KOML.isPresent())) {
              objectInputStream = new ObjectInputStream(is);
              xmlInputStream = null;
            } else {
              objectInputStream = null;
              xmlInputStream = new BufferedInputStream(is);
            }
          }
        }
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      if (testSetPresent) {
        template = test = testSource.getStructure();
        if (classIndex != -1) {
          test.setClassIndex(classIndex - 1);
        } else {
          if ((test.classIndex() == -1) || (classIndexString.length() != 0)) {
            test.setClassIndex(test.numAttributes() - 1);
          }
        }
        actualClassIndex = test.classIndex();
      } else {
        // percentage split
        splitPercentageString = Utils.getOption("split-percentage", options);
        if (splitPercentageString.length() != 0) {
          if (foldsString.length() != 0) {
            throw new Exception(
              "Percentage split cannot be used in conjunction with "
                + "cross-validation ('-x').");
          }
          splitPercentage = Double.parseDouble(splitPercentageString);
          if ((splitPercentage <= 0) || (splitPercentage >= 100)) {
            throw new Exception("Percentage split value needs be >0 and <100.");
          }
        } else {
          splitPercentage = -1;
        }
        preserveOrder = Utils.getFlag("preserve-order", options);
        if (preserveOrder) {
          if (splitPercentage == -1) {
            throw new Exception(
              "Percentage split ('-split-percentage') is missing.");
          }
        }
        // create new train/test sources
        if (splitPercentage > 0) {
          testSetPresent = true;
          Instances tmpInst = trainSource.getDataSet(actualClassIndex);
          if (!preserveOrder) {
            tmpInst.randomize(new Random(seed));
          }
          int trainSize =
            (int) Math.round(tmpInst.numInstances() * splitPercentage / 100);
          int testSize = tmpInst.numInstances() - trainSize;
          Instances trainInst = new Instances(tmpInst, 0, trainSize);
          Instances testInst = new Instances(tmpInst, trainSize, testSize);
          trainSource = new DataSource(trainInst);
          testSource = new DataSource(testInst);
          template = test = testSource.getStructure();
          if (classIndex != -1) {
            test.setClassIndex(classIndex - 1);
          } else {
            if ((test.classIndex() == -1) || (classIndexString.length() != 0)) {
              test.setClassIndex(test.numAttributes() - 1);
            }
          }
          actualClassIndex = test.classIndex();
        }
      }
      if (trainSetPresent) {
        template = train = trainSource.getStructure();
        if (classIndex != -1) {
          train.setClassIndex(classIndex - 1);
        } else {
          if ((train.classIndex() == -1) || (classIndexString.length() != 0)) {
            train.setClassIndex(train.numAttributes() - 1);
          }
        }
        actualClassIndex = train.classIndex();
        if ((testSetPresent) && !test.equalHeaders(train)) {
          throw new IllegalArgumentException(
            "Train and test file not compatible!");
        }
      }
      if (template == null) {
        throw new Exception("No actual dataset provided to use as template");
      }
      costMatrix =
        handleCostOption(Utils.getOption('m', options), template.numClasses());

      classStatistics = Utils.getFlag('i', options);
      noOutput = Utils.getFlag('o', options);
      trainStatistics = !Utils.getFlag('v', options);
      printComplexityStatistics = Utils.getFlag('k', options);
      printMargins = Utils.getFlag('r', options);
      printGraph = Utils.getFlag('g', options);
      sourceClass = Utils.getOption('z', options);
      printSource = (sourceClass.length() != 0);
      printDistribution = Utils.getFlag("distribution", options);
      thresholdFile = Utils.getOption("threshold-file", options);
      thresholdLabel = Utils.getOption("threshold-label", options);

      // Check -p option
      try {
        attributeRangeString = Utils.getOption('p', options);
      } catch (Exception e) {
        throw new Exception(e.getMessage()
          + "\nNOTE: the -p option has changed. "
          + "It now expects a parameter specifying a range of attributes "
          + "to list with the predictions. Use '-p 0' for none.");
      }
      if (attributeRangeString.length() != 0) {
        printClassifications = true;
        noOutput = true;
        if (!attributeRangeString.equals("0")) {
          attributesToOutput = new Range(attributeRangeString);
        }
      }

      if (!printClassifications && printDistribution) {
        throw new Exception("Cannot print distribution without '-p' option!");
      }

      // if no training file given, we don't have any priors
      if ((!trainSetPresent) && (printComplexityStatistics)) {
        throw new Exception(
          "Cannot print complexity statistics ('-k') without training file ('-t')!");
      }

      // If a model file is given, we can't process
      // scheme-specific options
      if (objectInputFileName.length() != 0) {
        Utils.checkForRemainingOptions(options);
      } else {

        // Set options for classifier
        if (classifier instanceof OptionHandler) {
          for (String option : options) {
            if (option.length() != 0) {
              if (schemeOptionsText == null) {
                schemeOptionsText = new StringBuffer();
              }
              if (option.indexOf(' ') != -1) {
                schemeOptionsText.append('"' + option + "\" ");
              } else {
                schemeOptionsText.append(option + " ");
              }
            }
          }
          ((OptionHandler) classifier).setOptions(options);
        }
      }
      Utils.checkForRemainingOptions(options);
    } catch (Exception e) {
      throw new Exception("\nWeka exception: " + e.getMessage()
        + makeOptionString(classifier, false));
    }

    // Setup up evaluation objects
    Evaluation trainingEvaluation =
      new Evaluation(new Instances(template, 0), costMatrix);
    Evaluation testingEvaluation =
      new Evaluation(new Instances(template, 0), costMatrix);

    // disable use of priors if no training file given
    if (!trainSetPresent) {
      testingEvaluation.useNoPriors();
    }

    if (objectInputFileName.length() != 0) {
      // Load classifier from file
      if (objectInputStream != null) {
        classifier = (Classifier) objectInputStream.readObject();
        // try and read a header (if present)
        Instances savedStructure = null;
        try {
          savedStructure = (Instances) objectInputStream.readObject();
        } catch (Exception ex) {
          // don't make a fuss
        }
        if (savedStructure != null) {
          // test for compatibility with template
          if (!template.equalHeaders(savedStructure)) {
            throw new Exception("training and test set are not compatible");
          }
        }
        objectInputStream.close();
      } else if (xmlInputStream != null) {
        // whether KOML is available has already been checked (objectInputStream
        // would null otherwise)!
        classifier = (Classifier) KOML.read(xmlInputStream);
        xmlInputStream.close();
      }
    }

    // backup of fully setup classifier for cross-validation
    classifierBackup = Classifier.makeCopy(classifier);

    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier)
      && (testSetPresent || noCrossValidation) && (costMatrix == null)
      && (trainSetPresent)) {
      // Build classifier incrementally
      trainingEvaluation.setPriors(train);
      testingEvaluation.setPriors(train);
      trainTimeStart = System.currentTimeMillis();
      if (objectInputFileName.length() == 0) {
        classifier.buildClassifier(train);
      }
      Instance trainInst;
      while (trainSource.hasMoreElements(train)) {
        trainInst = trainSource.nextElement(train);
        trainingEvaluation.updatePriors(trainInst);
        testingEvaluation.updatePriors(trainInst);
        ((UpdateableClassifier) classifier).updateClassifier(trainInst);
      }
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    } else if (objectInputFileName.length() == 0) {
      // Build classifier in one go
      tempTrain = trainSource.getDataSet(actualClassIndex);
      trainingEvaluation.setPriors(tempTrain);
      testingEvaluation.setPriors(tempTrain);
      trainTimeStart = System.currentTimeMillis();
      classifier.buildClassifier(tempTrain);
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }

    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
      OutputStream os = new FileOutputStream(objectOutputFileName);
      // binary
      if (!(objectOutputFileName.endsWith(".xml") || (objectOutputFileName
        .endsWith(".koml") && KOML.isPresent()))) {
        if (objectOutputFileName.endsWith(".gz")) {
          os = new GZIPOutputStream(os);
        }
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
        objectOutputStream.writeObject(classifier);
        if (template != null) {
          objectOutputStream.writeObject(template);
        }
        objectOutputStream.flush();
        objectOutputStream.close();
      }
      // KOML/XML
      else {
        BufferedOutputStream xmlOutputStream = new BufferedOutputStream(os);
        if (objectOutputFileName.endsWith(".xml")) {
          XMLSerialization xmlSerial = new XMLClassifier();
          xmlSerial.write(xmlOutputStream, classifier);
        } else
        // whether KOML is present has already been checked
        // if not present -> ".koml" is interpreted as binary - see above
        if (objectOutputFileName.endsWith(".koml")) {
          KOML.write(xmlOutputStream, classifier);
        }
        xmlOutputStream.close();
      }
    }

    // If classifier is drawable output string describing graph
    if ((classifier instanceof Drawable) && (printGraph)) {
      return ((Drawable) classifier).graph();
    }

    // Output the classifier as equivalent source
    if ((classifier instanceof Sourcable) && (printSource)) {
      return wekaStaticWrapper((Sourcable) classifier, sourceClass);
    }

    // Output model
    if (!(noOutput || printMargins)) {
      if (classifier instanceof OptionHandler) {
        if (schemeOptionsText != null) {
          text.append("\nOptions: " + schemeOptionsText);
          text.append("\n");
        }
      }
      text.append("\n" + classifier.toString() + "\n");
    }

    if (!printMargins && (costMatrix != null)) {
      text.append("\n=== Evaluation Cost Matrix ===\n\n");
      text.append(costMatrix.toString());
    }

    // Output test instance predictions only
    if (printClassifications) {
      DataSource source = testSource;
      predsBuff = new StringBuffer();
      // no test set -> use train set
      if (source == null && noCrossValidation) {
        source = trainSource;
        predsBuff.append("\n=== Predictions on training data ===\n\n");
      } else {
        predsBuff.append("\n=== Predictions on test data ===\n\n");
      }
      if (source != null) {
        /*
         * return printClassifications(classifierClassifications, new
         * Instances(template, 0), source, actualClassIndex + 1,
         * attributesToOutput, printDistribution);
         */
        printClassifications(classifier, new Instances(template, 0), source,
          actualClassIndex + 1, attributesToOutput, printDistribution,
          predsBuff);
        // return predsText.toString();
      }
    }

    // Compute error estimate from training data
    if ((trainStatistics) && (trainSetPresent)) {

      if ((classifier instanceof UpdateableClassifier)
        && (testSetPresent || noCrossValidation) && (costMatrix == null)) {

        // Classifier was trained incrementally, so we have to
        // reset the source.
        trainSource.reset();

        // Incremental testing
        train = trainSource.getStructure(actualClassIndex);
        testTimeStart = System.currentTimeMillis();
        Instance trainInst;
        while (trainSource.hasMoreElements(train)) {
          trainInst = trainSource.nextElement(train);
          trainingEvaluation.evaluateModelOnce(classifier, trainInst);
        }
        testTimeElapsed = System.currentTimeMillis() - testTimeStart;
      } else {
        testTimeStart = System.currentTimeMillis();
        trainingEvaluation.evaluateModel(classifier,
          trainSource.getDataSet(actualClassIndex));
        testTimeElapsed = System.currentTimeMillis() - testTimeStart;
      }

      // Print the results of the training evaluation
      if (printMargins) {
        return trainingEvaluation.toCumulativeMarginDistributionString();
      } else {
        if (!printClassifications) {
          text.append("\nTime taken to build model: "
            + Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");

          if (splitPercentage > 0) {
            text.append("\nTime taken to test model on training split: ");
          } else {
            text.append("\nTime taken to test model on training data: ");
          }
          text.append(Utils.doubleToString(testTimeElapsed / 1000.0, 2)
            + " seconds");

          if (splitPercentage > 0) {
            text.append(trainingEvaluation.toSummaryString(
              "\n\n=== Error on training" + " split ===\n",
              printComplexityStatistics));
          } else {
            text.append(trainingEvaluation.toSummaryString(
              "\n\n=== Error on training" + " data ===\n",
              printComplexityStatistics));
          }

          if (template.classAttribute().isNominal()) {
            if (classStatistics) {
              text.append("\n\n" + trainingEvaluation.toClassDetailsString());
            }

            text.append("\n\n" + trainingEvaluation.toMatrixString());
          }
        }
      }
    }

    // Compute proper error estimates
    if (testSource != null) {
      // Testing is on the supplied test data
      testSource.reset();
      test = testSource.getStructure(test.classIndex());
      Instance testInst;
      while (testSource.hasMoreElements(test)) {
        testInst = testSource.nextElement(test);
        testingEvaluation.evaluateModelOnceAndRecordPrediction(classifier,
          testInst);
      }

      if (splitPercentage > 0) {
        if (!printClassifications) {
          text.append("\n\n"
            + testingEvaluation.toSummaryString(
              "=== Error on test split ===\n", printComplexityStatistics));
        }
      } else {
        if (!printClassifications) {
          text.append("\n\n"
            + testingEvaluation.toSummaryString("=== Error on test data ===\n",
              printComplexityStatistics));
        }
      }

    } else if (trainSource != null) {
      if (!noCrossValidation) {
        // Testing is via cross-validation on training data
        Random random = new Random(seed);
        // use untrained (!) classifier for cross-validation
        classifier = Classifier.makeCopy(classifierBackup);
        if (!printClassifications) {
          testingEvaluation.crossValidateModel(classifier,
            trainSource.getDataSet(actualClassIndex), folds, random);
          if (template.classAttribute().isNumeric()) {
            text.append("\n\n\n"
              + testingEvaluation.toSummaryString("=== Cross-validation ===\n",
                printComplexityStatistics));
          } else {
            text.append("\n\n\n"
              + testingEvaluation.toSummaryString("=== Stratified "
                + "cross-validation ===\n", printComplexityStatistics));
          }
        } else {
          predsBuff = new StringBuffer();
          predsBuff.append("\n=== Predictions under cross-validation ===\n\n");
          testingEvaluation.crossValidateModel(classifier,
            trainSource.getDataSet(actualClassIndex), folds, random, predsBuff,
            attributesToOutput, new Boolean(printDistribution));
          /*
           * if (template.classAttribute().isNumeric()) { text.append("\n\n\n" +
           * testingEvaluation. toSummaryString("=== Cross-validation ===\n",
           * printComplexityStatistics)); } else { text.append("\n\n\n" +
           * testingEvaluation. toSummaryString("=== Stratified " +
           * "cross-validation ===\n", printComplexityStatistics)); }
           */
        }
      }
    }
    if (template.classAttribute().isNominal() && !printClassifications
      && (!noCrossValidation || (testSource != null))) {

      if (classStatistics) {
        text.append("\n\n" + testingEvaluation.toClassDetailsString());
      }
      text.append("\n\n" + testingEvaluation.toMatrixString());
    }

    // predictions from cross-validation?
    if (predsBuff != null) {
      text.append("\n" + predsBuff);
    }

    if ((thresholdFile.length() != 0) && template.classAttribute().isNominal()) {
      int labelIndex = 0;
      if (thresholdLabel.length() != 0) {
        labelIndex = template.classAttribute().indexOfValue(thresholdLabel);
      }
      if (labelIndex == -1) {
        throw new IllegalArgumentException("Class label '" + thresholdLabel
          + "' is unknown!");
      }
      ThresholdCurve tc = new ThresholdCurve();
      Instances result =
        tc.getCurve(testingEvaluation.predictions(), labelIndex);
      DataSink.write(thresholdFile, result);
    }

    return text.toString();
  }

  /**
   * Attempts to load a cost matrix.
   * 
   * @param costFileName the filename of the cost matrix
   * @param numClasses the number of classes that should be in the cost matrix
   *          (only used if the cost file is in old format).
   * @return a CostMatrix value, or null if costFileName is empty
   * @throws Exception if an error occurs.
   */
  protected static CostMatrix handleCostOption(String costFileName,
    int numClasses) throws Exception {

    if ((costFileName != null) && (costFileName.length() != 0)) {
      System.out
        .println("NOTE: The behaviour of the -m option has changed between WEKA 3.0"
          + " and WEKA 3.1. -m now carries out cost-sensitive *evaluation*"
          + " only. For cost-sensitive *prediction*, use one of the"
          + " cost-sensitive metaschemes such as"
          + " weka.classifiers.meta.CostSensitiveClassifier or"
          + " weka.classifiers.meta.MetaCost");

      Reader costReader = null;
      try {
        costReader = new BufferedReader(new FileReader(costFileName));
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      try {
        // First try as a proper cost matrix format
        return new CostMatrix(costReader);
      } catch (Exception ex) {
        try {
          // Now try as the poxy old format :-)
          // System.err.println("Attempting to read old format cost file");
          try {
            costReader.close(); // Close the old one
            costReader = new BufferedReader(new FileReader(costFileName));
          } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
          }
          CostMatrix costMatrix = new CostMatrix(numClasses);
          // System.err.println("Created default cost matrix");
          costMatrix.readOldFormat(costReader);
          return costMatrix;
          // System.err.println("Read old format");
        } catch (Exception e2) {
          // re-throw the original exception
          // System.err.println("Re-throwing original exception");
          throw ex;
        }
      }
    } else {
      return null;
    }
  }

  /**
   * Evaluates the classifier on a given set of instances. Note that the data
   * must have exactly the same format (e.g. order of attributes) as the data
   * used to train the classifier! Otherwise the results will generally be
   * meaningless.
   * 
   * @param classifier machine learning classifier
   * @param data set of test instances for evaluation
   * @param forPredictionsString varargs parameter that, if supplied, is
   *          expected to hold a StringBuffer to print predictions to, a Range
   *          of attributes to output and a Boolean (true if the distribution is
   *          to be printed)
   * @return the predictions
   * @throws Exception if model could not be evaluated successfully
   */
  public double[] evaluateModel(Classifier classifier, Instances data,
    Object... forPredictionsPrinting) throws Exception {
    // for predictions printing
    StringBuffer buff = null;
    Range attsToOutput = null;
    boolean printDist = false;

    double predictions[] = new double[data.numInstances()];

    if (forPredictionsPrinting.length > 0) {
      buff = (StringBuffer) forPredictionsPrinting[0];
      attsToOutput = (Range) forPredictionsPrinting[1];
      printDist = ((Boolean) forPredictionsPrinting[2]).booleanValue();
    }

    // Need to be able to collect predictions if appropriate (for AUC)

    for (int i = 0; i < data.numInstances(); i++) {
      predictions[i] =
        evaluateModelOnceAndRecordPrediction(classifier, data.instance(i));
      if (buff != null) {
        buff.append(predictionText(classifier, data.instance(i), i,
          attsToOutput, printDist));
      }
    }

    return predictions;
  }

  /**
   * Evaluates the classifier on a single instance and records the prediction
   * (if the class is nominal).
   * 
   * @param classifier machine learning classifier
   * @param instance the test instance to be classified
   * @return the prediction made by the clasifier
   * @throws Exception if model could not be evaluated successfully or the data
   *           contains string attributes
   */
  public double evaluateModelOnceAndRecordPrediction(Classifier classifier,
    Instance instance) throws Exception {

    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
      if (m_Predictions == null) {
        m_Predictions = new FastVector();
      }
      double[] dist = classifier.distributionForInstance(classMissing);
      pred = Utils.maxIndex(dist);
      if (dist[(int) pred] <= 0) {
        pred = Instance.missingValue();
      }
      updateStatsForClassifier(dist, instance);
      m_Predictions.addElement(new NominalPrediction(instance.classValue(),
        dist, instance.weight()));
    } else {
      pred = classifier.classifyInstance(classMissing);
      updateStatsForPredictor(pred, instance);
    }
    return pred;
  }

  /**
   * Evaluates the classifier on a single instance.
   * 
   * @param classifier machine learning classifier
   * @param instance the test instance to be classified
   * @return the prediction made by the clasifier
   * @throws Exception if model could not be evaluated successfully or the data
   *           contains string attributes
   */
  public double evaluateModelOnce(Classifier classifier, Instance instance)
    throws Exception {

    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
      double[] dist = classifier.distributionForInstance(classMissing);
      pred = Utils.maxIndex(dist);
      if (dist[(int) pred] <= 0) {
        pred = Instance.missingValue();
      }
      updateStatsForClassifier(dist, instance);
    } else {
      pred = classifier.classifyInstance(classMissing);
      updateStatsForPredictor(pred, instance);
    }
    return pred;
  }

  /**
   * Evaluates the supplied distribution on a single instance.
   * 
   * @param dist the supplied distribution
   * @param instance the test instance to be classified
   * @return the prediction
   * @throws Exception if model could not be evaluated successfully
   */
  public double evaluateModelOnce(double[] dist, Instance instance)
    throws Exception {
    double pred;
    if (m_ClassIsNominal) {
      pred = Utils.maxIndex(dist);
      if (dist[(int) pred] <= 0) {
        pred = Instance.missingValue();
      }
      updateStatsForClassifier(dist, instance);
    } else {
      pred = dist[0];
      updateStatsForPredictor(pred, instance);
    }
    return pred;
  }

  /**
   * Evaluates the supplied distribution on a single instance.
   * 
   * @param dist the supplied distribution
   * @param instance the test instance to be classified
   * @return the prediction
   * @throws Exception if model could not be evaluated successfully
   */
  public double evaluateModelOnceAndRecordPrediction(double[] dist,
    Instance instance) throws Exception {
    double pred;
    if (m_ClassIsNominal) {
      if (m_Predictions == null) {
        m_Predictions = new FastVector();
      }
      pred = Utils.maxIndex(dist);
      if (dist[(int) pred] <= 0) {
        pred = Instance.missingValue();
      }
      updateStatsForClassifier(dist, instance);
      m_Predictions.addElement(new NominalPrediction(instance.classValue(),
        dist, instance.weight()));
    } else {
      pred = dist[0];
      updateStatsForPredictor(pred, instance);
    }
    return pred;
  }

  /**
   * Evaluates the supplied prediction on a single instance.
   * 
   * @param prediction the supplied prediction
   * @param instance the test instance to be classified
   * @throws Exception if model could not be evaluated successfully
   */
  public void evaluateModelOnce(double prediction, Instance instance)
    throws Exception {

    if (m_ClassIsNominal) {
      updateStatsForClassifier(makeDistribution(prediction), instance);
    } else {
      updateStatsForPredictor(prediction, instance);
    }
  }

  /**
   * Returns the predictions that have been collected.
   * 
   * @return a reference to the FastVector containing the predictions that have
   *         been collected. This should be null if no predictions have been
   *         collected (e.g. if the class is numeric).
   */
  public FastVector predictions() {

    return m_Predictions;
  }

  /**
   * Wraps a static classifier in enough source to test using the weka class
   * libraries.
   * 
   * @param classifier a Sourcable Classifier
   * @param className the name to give to the source code class
   * @return the source for a static classifier that can be tested with weka
   *         libraries.
   * @throws Exception if code-generation fails
   */
  public static String wekaStaticWrapper(Sourcable classifier, String className)
    throws Exception {

    StringBuffer result = new StringBuffer();
    String staticClassifier = classifier.toSource(className);

    result.append("// Generated with Weka " + Version.VERSION + "\n");
    result.append("//\n");
    result
      .append("// This code is public domain and comes with no warranty.\n");
    result.append("//\n");
    result.append("// Timestamp: " + new Date() + "\n");
    result.append("\n");
    result.append("package weka.classifiers;\n");
    result.append("\n");
    result.append("import weka.core.Attribute;\n");
    result.append("import weka.core.Capabilities;\n");
    result.append("import weka.core.Capabilities.Capability;\n");
    result.append("import weka.core.Instance;\n");
    result.append("import weka.core.Instances;\n");
    result.append("import weka.core.RevisionUtils;\n");
    result.append("import weka.classifiers.Classifier;\n");
    result.append("\n");
    result.append("public class WekaWrapper\n");
    result.append("  extends Classifier {\n");

    // globalInfo
    result.append("\n");
    result.append("  /**\n");
    result.append("   * Returns only the toString() method.\n");
    result.append("   *\n");
    result.append("   * @return a string describing the classifier\n");
    result.append("   */\n");
    result.append("  public String globalInfo() {\n");
    result.append("    return toString();\n");
    result.append("  }\n");

    // getCapabilities
    result.append("\n");
    result.append("  /**\n");
    result.append("   * Returns the capabilities of this classifier.\n");
    result.append("   *\n");
    result.append("   * @return the capabilities\n");
    result.append("   */\n");
    result.append("  public Capabilities getCapabilities() {\n");
    result.append(((Classifier) classifier).getCapabilities().toSource(
      "result", 4));
    result.append("    return result;\n");
    result.append("  }\n");

    // buildClassifier
    result.append("\n");
    result.append("  /**\n");
    result.append("   * only checks the data against its capabilities.\n");
    result.append("   *\n");
    result.append("   * @param i the training data\n");
    result.append("   */\n");
    result
      .append("  public void buildClassifier(Instances i) throws Exception {\n");
    result.append("    // can classifier handle the data?\n");
    result.append("    getCapabilities().testWithFail(i);\n");
    result.append("  }\n");

    // classifyInstance
    result.append("\n");
    result.append("  /**\n");
    result.append("   * Classifies the given instance.\n");
    result.append("   *\n");
    result.append("   * @param i the instance to classify\n");
    result.append("   * @return the classification result\n");
    result.append("   */\n");
    result
      .append("  public double classifyInstance(Instance i) throws Exception {\n");
    result.append("    Object[] s = new Object[i.numAttributes()];\n");
    result.append("    \n");
    result.append("    for (int j = 0; j < s.length; j++) {\n");
    result.append("      if (!i.isMissing(j)) {\n");
    result.append("        if (i.attribute(j).isNominal())\n");
    result.append("          s[j] = new String(i.stringValue(j));\n");
    result.append("        else if (i.attribute(j).isNumeric())\n");
    result.append("          s[j] = new Double(i.value(j));\n");
    result.append("      }\n");
    result.append("    }\n");
    result.append("    \n");
    result.append("    // set class value to missing\n");
    result.append("    s[i.classIndex()] = null;\n");
    result.append("    \n");
    result.append("    return " + className + ".classify(s);\n");
    result.append("  }\n");

    // getRevision
    result.append("\n");
    result.append("  /**\n");
    result.append("   * Returns the revision string.\n");
    result.append("   * \n");
    result.append("   * @return        the revision\n");
    result.append("   */\n");
    result.append("  public String getRevision() {\n");
    result.append("    return RevisionUtils.extract(\"1.0\");\n");
    result.append("  }\n");

    // toString
    result.append("\n");
    result.append("  /**\n");
    result
      .append("   * Returns only the classnames and what classifier it is based on.\n");
    result.append("   *\n");
    result.append("   * @return a short description\n");
    result.append("   */\n");
    result.append("  public String toString() {\n");
    result.append("    return \"Auto-generated classifier wrapper, based on "
      + classifier.getClass().getName() + " (generated with Weka "
      + Version.VERSION + ").\\n" + "\" + this.getClass().getName() + \"/"
      + className + "\";\n");
    result.append("  }\n");

    // main
    result.append("\n");
    result.append("  /**\n");
    result.append("   * Runs the classfier from commandline.\n");
    result.append("   *\n");
    result.append("   * @param args the commandline arguments\n");
    result.append("   */\n");
    result.append("  public static void main(String args[]) {\n");
    result.append("    runClassifier(new WekaWrapper(), args);\n");
    result.append("  }\n");
    result.append("}\n");

    // actual classifier code
    result.append("\n");
    result.append(staticClassifier);

    return result.toString();
  }

  /**
   * Gets the number of test instances that had a known class value (actually
   * the sum of the weights of test instances with known class value).
   * 
   * @return the number of test instances with known class
   */
  public final double numInstances() {

    return m_WithClass;
  }

  /**
   * Gets the number of instances incorrectly classified (that is, for which an
   * incorrect prediction was made). (Actually the sum of the weights of these
   * instances)
   * 
   * @return the number of incorrectly classified instances
   */
  public final double incorrect() {

    return m_Incorrect;
  }

  /**
   * Gets the percentage of instances incorrectly classified (that is, for which
   * an incorrect prediction was made).
   * 
   * @return the percent of incorrectly classified instances (between 0 and 100)
   */
  public final double pctIncorrect() {

    return 100 * m_Incorrect / m_WithClass;
  }

  /**
   * Gets the total cost, that is, the cost of each prediction times the weight
   * of the instance, summed over all instances.
   * 
   * @return the total cost
   */
  public final double totalCost() {

    return m_TotalCost;
  }

  /**
   * Gets the average cost, that is, total cost of misclassifications (incorrect
   * plus unclassified) over the total number of instances.
   * 
   * @return the average cost.
   */
  public final double avgCost() {

    return m_TotalCost / m_WithClass;
  }

  /**
   * Gets the number of instances correctly classified (that is, for which a
   * correct prediction was made). (Actually the sum of the weights of these
   * instances)
   * 
   * @return the number of correctly classified instances
   */
  public final double correct() {

    return m_Correct;
  }

  /**
   * Gets the percentage of instances correctly classified (that is, for which a
   * correct prediction was made).
   * 
   * @return the percent of correctly classified instances (between 0 and 100)
   */
  public final double pctCorrect() {

    return 100 * m_Correct / m_WithClass;
  }

  /**
   * Gets the number of instances not classified (that is, for which no
   * prediction was made by the classifier). (Actually the sum of the weights of
   * these instances)
   * 
   * @return the number of unclassified instances
   */
  public final double unclassified() {

    return m_Unclassified;
  }

  /**
   * Gets the percentage of instances not classified (that is, for which no
   * prediction was made by the classifier).
   * 
   * @return the percent of unclassified instances (between 0 and 100)
   */
  public final double pctUnclassified() {

    return 100 * m_Unclassified / m_WithClass;
  }

  /**
   * Returns the estimated error rate or the root mean squared error (if the
   * class is numeric). If a cost matrix was given this error rate gives the
   * average cost.
   * 
   * @return the estimated error rate (between 0 and 1, or between 0 and maximum
   *         cost)
   */
  public final double errorRate() {

    if (!m_ClassIsNominal) {
      return Math.sqrt(m_SumSqrErr / (m_WithClass - m_Unclassified));
    }
    if (m_CostMatrix == null) {
      return m_Incorrect / m_WithClass;
    } else {
      return avgCost();
    }
  }

  /**
   * Returns value of kappa statistic if class is nominal.
   * 
   * @return the value of the kappa statistic
   */
  public final double kappa() {

    double[] sumRows = new double[m_ConfusionMatrix.length];
    double[] sumColumns = new double[m_ConfusionMatrix.length];
    double sumOfWeights = 0;
    for (int i = 0; i < m_ConfusionMatrix.length; i++) {
      for (int j = 0; j < m_ConfusionMatrix.length; j++) {
        sumRows[i] += m_ConfusionMatrix[i][j];
        sumColumns[j] += m_ConfusionMatrix[i][j];
        sumOfWeights += m_ConfusionMatrix[i][j];
      }
    }
    double correct = 0, chanceAgreement = 0;
    for (int i = 0; i < m_ConfusionMatrix.length; i++) {
      chanceAgreement += (sumRows[i] * sumColumns[i]);
      correct += m_ConfusionMatrix[i][i];
    }
    chanceAgreement /= (sumOfWeights * sumOfWeights);
    correct /= sumOfWeights;

    if (chanceAgreement < 1) {
      return (correct - chanceAgreement) / (1 - chanceAgreement);
    } else {
      return 1;
    }
  }

  /**
   * Returns the correlation coefficient if the class is numeric.
   * 
   * @return the correlation coefficient
   * @throws Exception if class is not numeric
   */
  public final double correlationCoefficient() throws Exception {

    if (m_ClassIsNominal) {
      throw new Exception("Can't compute correlation coefficient: "
        + "class is nominal!");
    }

    double correlation = 0;
    double varActual =
      m_SumSqrClass - m_SumClass * m_SumClass / (m_WithClass - m_Unclassified);
    double varPredicted =
      m_SumSqrPredicted - m_SumPredicted * m_SumPredicted
        / (m_WithClass - m_Unclassified);
    double varProd =
      m_SumClassPredicted - m_SumClass * m_SumPredicted
        / (m_WithClass - m_Unclassified);

    if (varActual * varPredicted <= 0) {
      correlation = 0.0;
    } else {
      correlation = varProd / Math.sqrt(varActual * varPredicted);
    }

    return correlation;
  }

  /**
   * Returns the mean absolute error. Refers to the error of the predicted
   * values for numeric classes, and the error of the predicted probability
   * distribution for nominal classes.
   * 
   * @return the mean absolute error
   */
  public final double meanAbsoluteError() {

    return m_SumAbsErr / (m_WithClass - m_Unclassified);
  }

  /**
   * Returns the mean absolute error of the prior.
   * 
   * @return the mean absolute error
   */
  public final double meanPriorAbsoluteError() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return m_SumPriorAbsErr / m_WithClass;
  }

  /**
   * Returns the relative absolute error.
   * 
   * @return the relative absolute error
   * @throws Exception if it can't be computed
   */
  public final double relativeAbsoluteError() throws Exception {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return 100 * meanAbsoluteError() / meanPriorAbsoluteError();
  }

  /**
   * Returns the root mean squared error.
   * 
   * @return the root mean squared error
   */
  public final double rootMeanSquaredError() {

    return Math.sqrt(m_SumSqrErr / (m_WithClass - m_Unclassified));
  }

  /**
   * Returns the root mean prior squared error.
   * 
   * @return the root mean prior squared error
   */
  public final double rootMeanPriorSquaredError() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return Math.sqrt(m_SumPriorSqrErr / m_WithClass);
  }

  /**
   * Returns the root relative squared error if the class is numeric.
   * 
   * @return the root relative squared error
   */
  public final double rootRelativeSquaredError() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return 100.0 * rootMeanSquaredError() / rootMeanPriorSquaredError();
  }

  /**
   * Calculate the entropy of the prior distribution
   * 
   * @return the entropy of the prior distribution
   * @throws Exception if the class is not nominal
   */
  public final double priorEntropy() throws Exception {

    if (!m_ClassIsNominal) {
      throw new Exception("Can't compute entropy of class prior: "
        + "class numeric!");
    }

    if (m_NoPriors) {
      return Double.NaN;
    }

    double entropy = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      entropy -=
        m_ClassPriors[i] / m_ClassPriorsSum
          * Utils.log2(m_ClassPriors[i] / m_ClassPriorsSum);
    }
    return entropy;
  }

  /**
   * Return the total Kononenko & Bratko Information score in bits
   * 
   * @return the K&B information score
   * @throws Exception if the class is not nominal
   */
  public final double KBInformation() throws Exception {

    if (!m_ClassIsNominal) {
      throw new Exception("Can't compute K&B Info score: " + "class numeric!");
    }

    if (m_NoPriors) {
      return Double.NaN;
    }

    return m_SumKBInfo;
  }

  /**
   * Return the Kononenko & Bratko Information score in bits per instance.
   * 
   * @return the K&B information score
   * @throws Exception if the class is not nominal
   */
  public final double KBMeanInformation() throws Exception {

    if (!m_ClassIsNominal) {
      throw new Exception("Can't compute K&B Info score: " + "class numeric!");
    }

    if (m_NoPriors) {
      return Double.NaN;
    }

    return m_SumKBInfo / (m_WithClass - m_Unclassified);
  }

  /**
   * Return the Kononenko & Bratko Relative Information score
   * 
   * @return the K&B relative information score
   * @throws Exception if the class is not nominal
   */
  public final double KBRelativeInformation() throws Exception {

    if (!m_ClassIsNominal) {
      throw new Exception("Can't compute K&B Info score: " + "class numeric!");
    }

    if (m_NoPriors) {
      return Double.NaN;
    }

    return 100.0 * KBInformation() / priorEntropy();
  }

  /**
   * Returns the total entropy for the null model
   * 
   * @return the total null model entropy
   */
  public final double SFPriorEntropy() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return m_SumPriorEntropy;
  }

  /**
   * Returns the entropy per instance for the null model
   * 
   * @return the null model entropy per instance
   */
  public final double SFMeanPriorEntropy() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return m_SumPriorEntropy / m_WithClass;
  }

  /**
   * Returns the total entropy for the scheme
   * 
   * @return the total scheme entropy
   */
  public final double SFSchemeEntropy() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return m_SumSchemeEntropy;
  }

  /**
   * Returns the entropy per instance for the scheme
   * 
   * @return the scheme entropy per instance
   */
  public final double SFMeanSchemeEntropy() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return m_SumSchemeEntropy / (m_WithClass - m_Unclassified);
  }

  /**
   * Returns the total SF, which is the null model entropy minus the scheme
   * entropy.
   * 
   * @return the total SF
   */
  public final double SFEntropyGain() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return m_SumPriorEntropy - m_SumSchemeEntropy;
  }

  /**
   * Returns the SF per instance, which is the null model entropy minus the
   * scheme entropy, per instance.
   * 
   * @return the SF per instance
   */
  public final double SFMeanEntropyGain() {

    if (m_NoPriors) {
      return Double.NaN;
    }

    return (m_SumPriorEntropy - m_SumSchemeEntropy)
      / (m_WithClass - m_Unclassified);
  }

  /**
   * Output the cumulative margin distribution as a string suitable for input
   * for gnuplot or similar package.
   * 
   * @return the cumulative margin distribution
   * @throws Exception if the class attribute is nominal
   */
  public String toCumulativeMarginDistributionString() throws Exception {

    if (!m_ClassIsNominal) {
      throw new Exception("Class must be nominal for margin distributions");
    }
    String result = "";
    double cumulativeCount = 0;
    double margin;
    for (int i = 0; i <= k_MarginResolution; i++) {
      if (m_MarginCounts[i] != 0) {
        cumulativeCount += m_MarginCounts[i];
        margin = i * 2.0 / k_MarginResolution - 1.0;
        result =
          result + Utils.doubleToString(margin, 7, 3) + ' '
            + Utils.doubleToString(cumulativeCount * 100 / m_WithClass, 7, 3)
            + '\n';
      } else if (i == 0) {
        result =
          Utils.doubleToString(-1.0, 7, 3) + ' '
            + Utils.doubleToString(0, 7, 3) + '\n';
      }
    }
    return result;
  }

  /**
   * Calls toSummaryString() with no title and no complexity stats
   * 
   * @return a summary description of the classifier evaluation
   */
  @Override
  public String toSummaryString() {

    return toSummaryString("", false);
  }

  /**
   * Calls toSummaryString() with a default title.
   * 
   * @param printComplexityStatistics if true, complexity statistics are
   *          returned as well
   * @return the summary string
   */
  public String toSummaryString(boolean printComplexityStatistics) {

    return toSummaryString("=== Summary ===\n", printComplexityStatistics);
  }

  /**
   * Outputs the performance statistics in summary form. Lists number (and
   * percentage) of instances classified correctly, incorrectly and
   * unclassified. Outputs the total number of instances classified, and the
   * number of instances (if any) that had no class value provided.
   * 
   * @param title the title for the statistics
   * @param printComplexityStatistics if true, complexity statistics are
   *          returned as well
   * @return the summary as a String
   */
  public String toSummaryString(String title, boolean printComplexityStatistics) {

    StringBuffer text = new StringBuffer();

    if (printComplexityStatistics && m_NoPriors) {
      printComplexityStatistics = false;
      System.err
        .println("Priors disabled, cannot print complexity statistics!");
    }

    text.append(title + "\n");
    try {
      if (m_WithClass > 0) {
        if (m_ClassIsNominal) {

          text.append("Correctly Classified Instances     ");
          text.append(Utils.doubleToString(correct(), 12, 4) + "     "
            + Utils.doubleToString(pctCorrect(), 12, 4) + " %\n");
          text.append("Incorrectly Classified Instances   ");
          text.append(Utils.doubleToString(incorrect(), 12, 4) + "     "
            + Utils.doubleToString(pctIncorrect(), 12, 4) + " %\n");
          text.append("Kappa statistic                    ");
          text.append(Utils.doubleToString(kappa(), 12, 4) + "\n");

          if (m_CostMatrix != null) {
            text.append("Total Cost                         ");
            text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n");
            text.append("Average Cost                       ");
            text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n");
          }
          if (printComplexityStatistics) {
            text.append("K&B Relative Info Score            ");
            text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4)
              + " %\n");
            text.append("K&B Information Score              ");
            text.append(Utils.doubleToString(KBInformation(), 12, 4) + " bits");
            text.append(Utils.doubleToString(KBMeanInformation(), 12, 4)
              + " bits/instance\n");
          }
        } else {
          text.append("Correlation coefficient            ");
          text.append(Utils.doubleToString(correlationCoefficient(), 12, 4)
            + "\n");
        }
        if (printComplexityStatistics) {
          text.append("Class complexity | order 0         ");
          text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4) + " bits");
          text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4)
            + " bits/instance\n");
          text.append("Class complexity | scheme          ");
          text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4) + " bits");
          text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4)
            + " bits/instance\n");
          text.append("Complexity improvement     (Sf)    ");
          text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits");
          text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4)
            + " bits/instance\n");
        }

        text.append("Mean absolute error                ");
        text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4) + "\n");
        text.append("Root mean squared error            ");
        text.append(Utils.doubleToString(rootMeanSquaredError(), 12, 4) + "\n");
        if (!m_NoPriors) {
          text.append("Relative absolute error            ");
          text.append(Utils.doubleToString(relativeAbsoluteError(), 12, 4)
            + " %\n");
          text.append("Root relative squared error        ");
          text.append(Utils.doubleToString(rootRelativeSquaredError(), 12, 4)
            + " %\n");
        }
      }
      if (Utils.gr(unclassified(), 0)) {
        text.append("UnClassified Instances             ");
        text.append(Utils.doubleToString(unclassified(), 12, 4) + "     "
          + Utils.doubleToString(pctUnclassified(), 12, 4) + " %\n");
      }
      text.append("Total Number of Instances          ");
      text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n");
      if (m_MissingClass > 0) {
        text.append("Ignored Class Unknown Instances            ");
        text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n");
      }
    } catch (Exception ex) {
      // Should never occur since the class is known to be nominal
      // here
      System.err.println("Arggh - Must be a bug in Evaluation class");
    }

    return text.toString();
  }

  /**
   * Calls toMatrixString() with a default title.
   * 
   * @return the confusion matrix as a string
   * @throws Exception if the class is numeric
   */
  public String toMatrixString() throws Exception {

    return toMatrixString("=== Confusion Matrix ===\n");
  }

  /**
   * Outputs the performance statistics as a classification confusion matrix.
   * For each class value, shows the distribution of predicted class values.
   * 
   * @param title the title for the confusion matrix
   * @return the confusion matrix as a String
   * @throws Exception if the class is numeric
   */
  public String toMatrixString(String title) throws Exception {

    StringBuffer text = new StringBuffer();
    char[] IDChars =
    { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
      'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' };
    int IDWidth;
    boolean fractional = false;

    if (!m_ClassIsNominal) {
      throw new Exception("Evaluation: No confusion matrix possible!");
    }

    // Find the maximum value in the matrix
    // and check for fractional display requirement
    double maxval = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        double current = m_ConfusionMatrix[i][j];
        if (current < 0) {
          current *= -10;
        }
        if (current > maxval) {
          maxval = current;
        }
        double fract = current - Math.rint(current);
        if (!fractional && ((Math.log(fract) / Math.log(10)) >= -2)) {
          fractional = true;
        }
      }
    }

    IDWidth =
      1 + Math.max(
        (int) (Math.log(maxval) / Math.log(10) + (fractional ? 3 : 0)),
        (int) (Math.log(m_NumClasses) / Math.log(IDChars.length)));
    text.append(title).append("\n");
    for (int i = 0; i < m_NumClasses; i++) {
      if (fractional) {
        text.append(" ").append(num2ShortID(i, IDChars, IDWidth - 3))
          .append("   ");
      } else {
        text.append(" ").append(num2ShortID(i, IDChars, IDWidth));
      }
    }
    text.append("   <-- classified as\n");
    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        text.append(" ").append(
          Utils.doubleToString(m_ConfusionMatrix[i][j], IDWidth,
            (fractional ? 2 : 0)));
      }
      text.append(" | ").append(num2ShortID(i, IDChars, IDWidth)).append(" = ")
        .append(m_ClassNames[i]).append("\n");
    }
    return text.toString();
  }

  /**
   * Generates a breakdown of the accuracy for each class (with default title),
   * incorporating various information-retrieval statistics, such as true/false
   * positive rate, precision/recall/F-Measure. Should be useful for ROC curves,
   * recall/precision curves.
   * 
   * @return the statistics presented as a string
   * @throws Exception if class is not nominal
   */
  public String toClassDetailsString() throws Exception {

    return toClassDetailsString("=== Detailed Accuracy By Class ===\n");
  }

  /**
   * Generates a breakdown of the accuracy for each class, incorporating various
   * information-retrieval statistics, such as true/false positive rate,
   * precision/recall/F-Measure. Should be useful for ROC curves,
   * recall/precision curves.
   * 
   * @param title the title to prepend the stats string with
   * @return the statistics presented as a string
   * @throws Exception if class is not nominal
   */
  public String toClassDetailsString(String title) throws Exception {

    if (!m_ClassIsNominal) {
      throw new Exception("Evaluation: No confusion matrix possible!");
    }

    StringBuffer text =
      new StringBuffer(title + "\n               TP Rate   FP Rate"
        + "   Precision   Recall" + "  F-Measure   ROC Area  Class\n");
    for (int i = 0; i < m_NumClasses; i++) {
      text.append(
        "               " + Utils.doubleToString(truePositiveRate(i), 7, 3))
        .append("   ");
      text.append(Utils.doubleToString(falsePositiveRate(i), 7, 3)).append(
        "    ");
      text.append(Utils.doubleToString(precision(i), 7, 3)).append("   ");
      text.append(Utils.doubleToString(recall(i), 7, 3)).append("   ");
      text.append(Utils.doubleToString(fMeasure(i), 7, 3)).append("    ");

      double rocVal = areaUnderROC(i);
      if (Instance.isMissingValue(rocVal)) {
        text.append("  ?    ").append("    ");
      } else {
        text.append(Utils.doubleToString(rocVal, 7, 3)).append("    ");
      }
      text.append(m_ClassNames[i]).append('\n');
    }

    text.append("Weighted Avg.  "
      + Utils.doubleToString(weightedTruePositiveRate(), 7, 3));
    text
      .append("   " + Utils.doubleToString(weightedFalsePositiveRate(), 7, 3));
    text.append("    " + Utils.doubleToString(weightedPrecision(), 7, 3));
    text.append("   " + Utils.doubleToString(weightedRecall(), 7, 3));
    text.append("   " + Utils.doubleToString(weightedFMeasure(), 7, 3));
    text.append("    " + Utils.doubleToString(weightedAreaUnderROC(), 7, 3));
    text.append("\n");

    return text.toString();
  }

  /**
   * Calculate the number of true positives with respect to a particular class.
   * This is defined as
   * 

   * 
   * 
   * correctly classified positives
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the true positive rate
   */
  public double numTruePositives(int classIndex) {

    double correct = 0;
    for (int j = 0; j < m_NumClasses; j++) {
      if (j == classIndex) {
        correct += m_ConfusionMatrix[classIndex][j];
      }
    }
    return correct;
  }

  /**
   * Calculate the true positive rate with respect to a particular class. This
   * is defined as
   * 
   * 
   * 
   * correctly classified positives
   * ------------------------------
   *       total positives
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the true positive rate
   */
  public double truePositiveRate(int classIndex) {

    double correct = 0, total = 0;
    for (int j = 0; j < m_NumClasses; j++) {
      if (j == classIndex) {
        correct += m_ConfusionMatrix[classIndex][j];
      }
      total += m_ConfusionMatrix[classIndex][j];
    }
    if (total == 0) {
      return 0;
    }
    return correct / total;
  }

  /**
   * Calculates the weighted (by class size) true positive rate.
   * 
   * @return the weighted true positive rate.
   */
  public double weightedTruePositiveRate() {
    double[] classCounts = new double[m_NumClasses];
    double classCountSum = 0;

    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        classCounts[i] += m_ConfusionMatrix[i][j];
      }
      classCountSum += classCounts[i];
    }

    double truePosTotal = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      double temp = truePositiveRate(i);
      truePosTotal += (temp * classCounts[i]);
    }

    return truePosTotal / classCountSum;
  }

  /**
   * Calculate the number of true negatives with respect to a particular class.
   * This is defined as
   * 
   * 
   * 
   * correctly classified negatives
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the true positive rate
   */
  public double numTrueNegatives(int classIndex) {

    double correct = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      if (i != classIndex) {
        for (int j = 0; j < m_NumClasses; j++) {
          if (j != classIndex) {
            correct += m_ConfusionMatrix[i][j];
          }
        }
      }
    }
    return correct;
  }

  /**
   * Calculate the true negative rate with respect to a particular class. This
   * is defined as
   * 
   * 
   * 
   * correctly classified negatives
   * ------------------------------
   *       total negatives
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the true positive rate
   */
  public double trueNegativeRate(int classIndex) {

    double correct = 0, total = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      if (i != classIndex) {
        for (int j = 0; j < m_NumClasses; j++) {
          if (j != classIndex) {
            correct += m_ConfusionMatrix[i][j];
          }
          total += m_ConfusionMatrix[i][j];
        }
      }
    }
    if (total == 0) {
      return 0;
    }
    return correct / total;
  }

  /**
   * Calculates the weighted (by class size) true negative rate.
   * 
   * @return the weighted true negative rate.
   */
  public double weightedTrueNegativeRate() {
    double[] classCounts = new double[m_NumClasses];
    double classCountSum = 0;

    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        classCounts[i] += m_ConfusionMatrix[i][j];
      }
      classCountSum += classCounts[i];
    }

    double trueNegTotal = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      double temp = trueNegativeRate(i);
      trueNegTotal += (temp * classCounts[i]);
    }

    return trueNegTotal / classCountSum;
  }

  /**
   * Calculate number of false positives with respect to a particular class.
   * This is defined as
   * 
   * 
   * 
   * incorrectly classified negatives
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the false positive rate
   */
  public double numFalsePositives(int classIndex) {

    double incorrect = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      if (i != classIndex) {
        for (int j = 0; j < m_NumClasses; j++) {
          if (j == classIndex) {
            incorrect += m_ConfusionMatrix[i][j];
          }
        }
      }
    }
    return incorrect;
  }

  /**
   * Calculate the false positive rate with respect to a particular class. This
   * is defined as
   * 
   * 
   * 
   * incorrectly classified negatives
   * --------------------------------
   *        total negatives
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the false positive rate
   */
  public double falsePositiveRate(int classIndex) {

    double incorrect = 0, total = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      if (i != classIndex) {
        for (int j = 0; j < m_NumClasses; j++) {
          if (j == classIndex) {
            incorrect += m_ConfusionMatrix[i][j];
          }
          total += m_ConfusionMatrix[i][j];
        }
      }
    }
    if (total == 0) {
      return 0;
    }
    return incorrect / total;
  }

  /**
   * Calculates the weighted (by class size) false positive rate.
   * 
   * @return the weighted false positive rate.
   */
  public double weightedFalsePositiveRate() {
    double[] classCounts = new double[m_NumClasses];
    double classCountSum = 0;

    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        classCounts[i] += m_ConfusionMatrix[i][j];
      }
      classCountSum += classCounts[i];
    }

    double falsePosTotal = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      double temp = falsePositiveRate(i);
      falsePosTotal += (temp * classCounts[i]);
    }

    return falsePosTotal / classCountSum;
  }

  /**
   * Calculate number of false negatives with respect to a particular class.
   * This is defined as
   * 
   * 
   * 
   * incorrectly classified positives
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the false positive rate
   */
  public double numFalseNegatives(int classIndex) {

    double incorrect = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      if (i == classIndex) {
        for (int j = 0; j < m_NumClasses; j++) {
          if (j != classIndex) {
            incorrect += m_ConfusionMatrix[i][j];
          }
        }
      }
    }
    return incorrect;
  }

  /**
   * Calculate the false negative rate with respect to a particular class. This
   * is defined as
   * 
   * 
   * 
   * incorrectly classified positives
   * --------------------------------
   *        total positives
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the false positive rate
   */
  public double falseNegativeRate(int classIndex) {

    double incorrect = 0, total = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      if (i == classIndex) {
        for (int j = 0; j < m_NumClasses; j++) {
          if (j != classIndex) {
            incorrect += m_ConfusionMatrix[i][j];
          }
          total += m_ConfusionMatrix[i][j];
        }
      }
    }
    if (total == 0) {
      return 0;
    }
    return incorrect / total;
  }

  /**
   * Calculates the weighted (by class size) false negative rate.
   * 
   * @return the weighted false negative rate.
   */
  public double weightedFalseNegativeRate() {
    double[] classCounts = new double[m_NumClasses];
    double classCountSum = 0;

    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        classCounts[i] += m_ConfusionMatrix[i][j];
      }
      classCountSum += classCounts[i];
    }

    double falseNegTotal = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      double temp = falseNegativeRate(i);
      falseNegTotal += (temp * classCounts[i]);
    }

    return falseNegTotal / classCountSum;
  }

  /**
   * Calculate the recall with respect to a particular class. This is defined as
   * 
   * 
   * 
   * correctly classified positives
   * ------------------------------
   *       total positives
   * 
   * 
   * (Which is also the same as the truePositiveRate.)
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the recall
   */
  public double recall(int classIndex) {

    return truePositiveRate(classIndex);
  }

  /**
   * Calculates the weighted (by class size) recall.
   * 
   * @return the weighted recall.
   */
  public double weightedRecall() {
    return weightedTruePositiveRate();
  }

  /**
   * Calculate the precision with respect to a particular class. This is defined
   * as
   * 

   * 
   * 
   * correctly classified positives
   * ------------------------------
   *  total predicted as positive
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the precision
   */
  public double precision(int classIndex) {

    double correct = 0, total = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      if (i == classIndex) {
        correct += m_ConfusionMatrix[i][classIndex];
      }
      total += m_ConfusionMatrix[i][classIndex];
    }
    if (total == 0) {
      return 0;
    }
    return correct / total;
  }

  /**
   * Calculates the weighted (by class size) false precision.
   * 
   * @return the weighted precision.
   */
  public double weightedPrecision() {
    double[] classCounts = new double[m_NumClasses];
    double classCountSum = 0;

    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        classCounts[i] += m_ConfusionMatrix[i][j];
      }
      classCountSum += classCounts[i];
    }

    double precisionTotal = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      double temp = precision(i);
      precisionTotal += (temp * classCounts[i]);
    }

    return precisionTotal / classCountSum;
  }

  /**
   * Calculate the F-Measure with respect to a particular class. This is defined
   * as
   * 
   * 
   * 
   * 2 * recall * precision
   * ----------------------
   *   recall + precision
   * 
   * 
   * @param classIndex the index of the class to consider as "positive"
   * @return the F-Measure
   */
  public double fMeasure(int classIndex) {

    double precision = precision(classIndex);
    double recall = recall(classIndex);
    if ((precision + recall) == 0) {
      return 0;
    }
    return 2 * precision * recall / (precision + recall);
  }

  /**
   * Calculates the weighted (by class size) F-Measure.
   * 
   * @return the weighted F-Measure.
   */
  public double weightedFMeasure() {
    double[] classCounts = new double[m_NumClasses];
    double classCountSum = 0;

    for (int i = 0; i < m_NumClasses; i++) {
      for (int j = 0; j < m_NumClasses; j++) {
        classCounts[i] += m_ConfusionMatrix[i][j];
      }
      classCountSum += classCounts[i];
    }

    double fMeasureTotal = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      double temp = fMeasure(i);
      fMeasureTotal += (temp * classCounts[i]);
    }

    return fMeasureTotal / classCountSum;
  }

  /**
   * Sets the class prior probabilities
   * 
   * @param train the training instances used to determine the prior
   *          probabilities
   * @throws Exception if the class attribute of the instances is not set
   */
  public void setPriors(Instances train) throws Exception {
    m_NoPriors = false;

    if (!m_ClassIsNominal) {

      m_NumTrainClassVals = 0;
      m_TrainClassVals = null;
      m_TrainClassWeights = null;
      m_PriorErrorEstimator = null;
      m_ErrorEstimator = null;

      for (int i = 0; i < train.numInstances(); i++) {
        Instance currentInst = train.instance(i);
        if (!currentInst.classIsMissing()) {
          addNumericTrainClass(currentInst.classValue(), currentInst.weight());
        }
      }

    } else {
      for (int i = 0; i < m_NumClasses; i++) {
        m_ClassPriors[i] = 1;
      }
      m_ClassPriorsSum = m_NumClasses;
      for (int i = 0; i < train.numInstances(); i++) {
        if (!train.instance(i).classIsMissing()) {
          m_ClassPriors[(int) train.instance(i).classValue()] +=
            train.instance(i).weight();
          m_ClassPriorsSum += train.instance(i).weight();
        }
      }
    }
  }

  /**
   * Get the current weighted class counts
   * 
   * @return the weighted class counts
   */
  public double[] getClassPriors() {
    return m_ClassPriors;
  }

  /**
   * Updates the class prior probabilities (when incrementally training)
   * 
   * @param instance the new training instance seen
   * @throws Exception if the class of the instance is not set
   */
  public void updatePriors(Instance instance) throws Exception {
    if (!instance.classIsMissing()) {
      if (!m_ClassIsNominal) {
        if (!instance.classIsMissing()) {
          addNumericTrainClass(instance.classValue(), instance.weight());
        }
      } else {
        m_ClassPriors[(int) instance.classValue()] += instance.weight();
        m_ClassPriorsSum += instance.weight();
      }
    }
  }

  /**
   * disables the use of priors, e.g., in case of de-serialized schemes that
   * have no access to the original training set, but are evaluated on a set
   * set.
   */
  public void useNoPriors() {
    m_NoPriors = true;
  }

  /**
   * Tests whether the current evaluation object is equal to another evaluation
   * object
   * 
   * @param obj the object to compare against
   * @return true if the two objects are equal
   */
  @Override
  public boolean equals(Object obj) {

    if ((obj == null) || !(obj.getClass().equals(this.getClass()))) {
      return false;
    }
    Evaluation cmp = (Evaluation) obj;
    if (m_ClassIsNominal != cmp.m_ClassIsNominal) {
      return false;
    }
    if (m_NumClasses != cmp.m_NumClasses) {
      return false;
    }

    if (m_Incorrect != cmp.m_Incorrect) {
      return false;
    }
    if (m_Correct != cmp.m_Correct) {
      return false;
    }
    if (m_Unclassified != cmp.m_Unclassified) {
      return false;
    }
    if (m_MissingClass != cmp.m_MissingClass) {
      return false;
    }
    if (m_WithClass != cmp.m_WithClass) {
      return false;
    }

    if (m_SumErr != cmp.m_SumErr) {
      return false;
    }
    if (m_SumAbsErr != cmp.m_SumAbsErr) {
      return false;
    }
    if (m_SumSqrErr != cmp.m_SumSqrErr) {
      return false;
    }
    if (m_SumClass != cmp.m_SumClass) {
      return false;
    }
    if (m_SumSqrClass != cmp.m_SumSqrClass) {
      return false;
    }
    if (m_SumPredicted != cmp.m_SumPredicted) {
      return false;
    }
    if (m_SumSqrPredicted != cmp.m_SumSqrPredicted) {
      return false;
    }
    if (m_SumClassPredicted != cmp.m_SumClassPredicted) {
      return false;
    }

    if (m_ClassIsNominal) {
      for (int i = 0; i < m_NumClasses; i++) {
        for (int j = 0; j < m_NumClasses; j++) {
          if (m_ConfusionMatrix[i][j] != cmp.m_ConfusionMatrix[i][j]) {
            return false;
          }
        }
      }
    }

    return true;
  }

  /**
   * Prints the predictions for the given dataset into a String variable.
   * 
   * @param classifier the classifier to use
   * @param train the training data
   * @param testSource the test set
   * @param classIndex the class index (1-based), if -1 ot does not override the
   *          class index is stored in the data file (by using the last
   *          attribute)
   * @param attributesToOutput the indices of the attributes to output
   * @return the generated predictions for the attribute range
   * @throws Exception if test file cannot be opened
   */
  public static void printClassifications(Classifier classifier,
    Instances train, DataSource testSource, int classIndex,
    Range attributesToOutput, StringBuffer predsText) throws Exception {

    printClassifications(classifier, train, testSource, classIndex,
      attributesToOutput, false, predsText);
  }

  /**
   * Prints the header for the predictions output into a supplied StringBuffer
   * 
   * @param test structure of the test set to print predictions for
   * @param attributesToOutput indices of the attributes to output
   * @param printDistribution prints the complete distribution for nominal
   *          attributes, not just the predicted value
   * @param text the StringBuffer to print to
   */
  protected static void printClassificationsHeader(Instances test,
    Range attributesToOutput, boolean printDistribution, StringBuffer text) {
    // print header
    if (test.classAttribute().isNominal()) {
      if (printDistribution) {
        text.append(" inst#     actual  predicted error distribution");
      } else {
        text.append(" inst#     actual  predicted error prediction");
      }
    } else {
      text.append(" inst#     actual  predicted      error");
    }
    if (attributesToOutput != null) {
      attributesToOutput.setUpper(test.numAttributes() - 1);
      text.append(" (");
      boolean first = true;
      for (int i = 0; i < test.numAttributes(); i++) {
        if (i == test.classIndex()) {
          continue;
        }

        if (attributesToOutput.isInRange(i)) {
          if (!first) {
            text.append(",");
          }
          text.append(test.attribute(i).name());
          first = false;
        }
      }
      text.append(")");
    }
    text.append("\n");
  }

  /**
   * Prints the predictions for the given dataset into a supplied StringBuffer
   * 
   * @param classifier the classifier to use
   * @param train the training data
   * @param testSource the test set
   * @param classIndex the class index (1-based), if -1 ot does not override the
   *          class index is stored in the data file (by using the last
   *          attribute)
   * @param attributesToOutput the indices of the attributes to output
   * @param printDistribution prints the complete distribution for nominal
   *          classes, not just the predicted value
   * @param text StringBuffer to hold the printed predictions
   * @throws Exception if test file cannot be opened
   */
  public static void printClassifications(Classifier classifier,
    Instances train, DataSource testSource, int classIndex,
    Range attributesToOutput, boolean printDistribution, StringBuffer text)
    throws Exception {

    if (testSource != null) {
      Instances test = testSource.getStructure();
      if (classIndex != -1) {
        test.setClassIndex(classIndex - 1);
      } else {
        if (test.classIndex() == -1) {
          test.setClassIndex(test.numAttributes() - 1);
        }
      }

      // print the header
      printClassificationsHeader(test, attributesToOutput, printDistribution,
        text);

      // print predictions
      int i = 0;
      testSource.reset();
      test = testSource.getStructure(test.classIndex());
      while (testSource.hasMoreElements(test)) {
        Instance inst = testSource.nextElement(test);
        text.append(predictionText(classifier, inst, i, attributesToOutput,
          printDistribution));
        i++;
      }
    }
    // return text.toString();
  }

  /**
   * store the prediction made by the classifier as a string
   * 
   * @param classifier the classifier to use
   * @param inst the instance to generate text from
   * @param instNum the index in the dataset
   * @param attributesToOutput the indices of the attributes to output
   * @param printDistribution prints the complete distribution for nominal
   *          classes, not just the predicted value
   * @return the prediction as a String
   * @throws Exception if something goes wrong
   * @see #printClassifications(Classifier, Instances, String, int, Range,
   *      boolean)
   */
  protected static String predictionText(Classifier classifier, Instance inst,
    int instNum, Range attributesToOutput, boolean printDistribution)

    throws Exception {

    StringBuffer result = new StringBuffer();
    int width = 10;
    int prec = 3;

    Instance withMissing = (Instance) inst.copy();
    withMissing.setDataset(inst.dataset());
    withMissing.setMissing(withMissing.classIndex());
    double predValue = classifier.classifyInstance(withMissing);

    // index
    result.append(Utils.padLeft("" + (instNum + 1), 6));

    if (inst.dataset().classAttribute().isNumeric()) {
      // actual
      if (inst.classIsMissing()) {
        result.append(" " + Utils.padLeft("?", width));
      } else {
        result.append(" "
          + Utils.doubleToString(inst.classValue(), width, prec));
      }
      // predicted
      if (Instance.isMissingValue(predValue)) {
        result.append(" " + Utils.padLeft("?", width));
      } else {
        result.append(" " + Utils.doubleToString(predValue, width, prec));
      }
      // error
      if (Instance.isMissingValue(predValue) || inst.classIsMissing()) {
        result.append(" " + Utils.padLeft("?", width));
      } else {
        result.append(" "
          + Utils.doubleToString(predValue - inst.classValue(), width, prec));
      }
    } else {
      // actual
      result.append(" "
        + Utils.padLeft(
          ((int) inst.classValue() + 1) + ":"
            + inst.toString(inst.classIndex()), width));
      // predicted
      if (Instance.isMissingValue(predValue)) {
        result.append(" " + Utils.padLeft("?", width));
      } else {
        result.append(" "
          + Utils.padLeft(((int) predValue + 1) + ":"
            + inst.dataset().classAttribute().value((int) predValue), width));
      }
      // error?
      if (!Instance.isMissingValue(predValue) && !inst.classIsMissing()
        && ((int) predValue + 1 != (int) inst.classValue() + 1)) {
        result.append(" " + "  +  ");
      } else {
        result.append(" " + "     ");
      }
      // prediction/distribution
      if (printDistribution) {
        if (Instance.isMissingValue(predValue)) {
          result.append(" " + "?");
        } else {
          result.append(" ");
          double[] dist = classifier.distributionForInstance(withMissing);
          for (int n = 0; n < dist.length; n++) {
            if (n > 0) {
              result.append(",");
            }
            if (n == (int) predValue) {
              result.append("*");
            }
            result.append(Utils.doubleToString(dist[n], prec));
          }
        }
      } else {
        if (Instance.isMissingValue(predValue)) {
          result.append(" " + "?");
        } else {
          result.append(" "
            + Utils.doubleToString(
              classifier.distributionForInstance(withMissing)[(int) predValue],
              prec));
        }
      }
    }

    // attributes
    result.append(" " + attributeValuesString(withMissing, attributesToOutput)
      + "\n");

    return result.toString();
  }

  /**
   * Builds a string listing the attribute values in a specified range of
   * indices, separated by commas and enclosed in brackets.
   * 
   * @param instance the instance to print the values from
   * @param attRange the range of the attributes to list
   * @return a string listing values of the attributes in the range
   */
  protected static String attributeValuesString(Instance instance,
    Range attRange) {
    StringBuffer text = new StringBuffer();
    if (attRange != null) {
      boolean firstOutput = true;
      attRange.setUpper(instance.numAttributes() - 1);
      for (int i = 0; i < instance.numAttributes(); i++) {
        if (attRange.isInRange(i) && i != instance.classIndex()) {
          if (firstOutput) {
            text.append("(");
          } else {
            text.append(",");
          }
          text.append(instance.toString(i));
          firstOutput = false;
        }
      }
      if (!firstOutput) {
        text.append(")");
      }
    }
    return text.toString();
  }

  /**
   * Make up the help string giving all the command line options
   * 
   * @param classifier the classifier to include options for
   * @param globalInfo include the global information string for the classifier
   *          (if available).
   * @return a string detailing the valid command line options
   */
  protected static String makeOptionString(Classifier classifier,
    boolean globalInfo) {

    StringBuffer optionsText = new StringBuffer("");

    // General options
    optionsText.append("\n\nGeneral options:\n\n");
    optionsText.append("-h or -help\n");
    optionsText.append("\tOutput help information.\n");
    optionsText.append("-synopsis or -info\n");
    optionsText.append("\tOutput synopsis for classifier (use in conjunction "
      + " with -h)\n");
    optionsText.append("-t \n");
    optionsText.append("\tSets training file.\n");
    optionsText.append("-T \n");
    optionsText
      .append("\tSets test file. If missing, a cross-validation will be performed\n");
    optionsText.append("\ton the training data.\n");
    optionsText.append("-c \n");
    optionsText.append("\tSets index of class attribute (default: last).\n");
    optionsText.append("-x \n");
    optionsText
      .append("\tSets number of folds for cross-validation (default: 10).\n");
    optionsText.append("-no-cv\n");
    optionsText.append("\tDo not perform any cross validation.\n");
    optionsText.append("-split-percentage \n");
    optionsText
      .append("\tSets the percentage for the train/test set split, e.g., 66.\n");
    optionsText.append("-preserve-order\n");
    optionsText.append("\tPreserves the order in the percentage split.\n");
    optionsText.append("-s \n");
    optionsText
      .append("\tSets random number seed for cross-validation or percentage split\n");
    optionsText.append("\t(default: 1).\n");
    optionsText.append("-m \n");
    optionsText.append("\tSets file with cost matrix.\n");
    optionsText.append("-l \n");
    optionsText
      .append("\tSets model input file. In case the filename ends with '.xml',\n");
    optionsText
      .append("\ta PMML file is loaded or, if that fails, options are loaded\n");
    optionsText.append("\tfrom the XML file.\n");
    optionsText.append("-d \n");
    optionsText
      .append("\tSets model output file. In case the filename ends with '.xml',\n");
    optionsText
      .append("\tonly the options are saved to the XML file, not the model.\n");
    optionsText.append("-v\n");
    optionsText.append("\tOutputs no statistics for training data.\n");
    optionsText.append("-o\n");
    optionsText.append("\tOutputs statistics only, not the classifier.\n");
    optionsText.append("-i\n");
    optionsText.append("\tOutputs detailed information-retrieval");
    optionsText.append(" statistics for each class.\n");
    optionsText.append("-k\n");
    optionsText.append("\tOutputs information-theoretic statistics.\n");
    optionsText.append("-p \n");
    optionsText
      .append("\tOnly outputs predictions for test instances (or the train\n"
        + "\tinstances if no test instances provided and -no-cv is used),\n"
        + "\talong with attributes (0 for none).\n");
    optionsText.append("-distribution\n");
    optionsText
      .append("\tOutputs the distribution instead of only the prediction\n");
    optionsText
      .append("\tin conjunction with the '-p' option (only nominal classes).\n");
    optionsText.append("-r\n");
    optionsText.append("\tOnly outputs cumulative margin distribution.\n");
    if (classifier instanceof Sourcable) {
      optionsText.append("-z \n");
      optionsText.append("\tOnly outputs the source representation"
        + " of the classifier,\n\tgiving it the supplied" + " name.\n");
    }
    if (classifier instanceof Drawable) {
      optionsText.append("-g\n");
      optionsText.append("\tOnly outputs the graph representation"
        + " of the classifier.\n");
    }
    optionsText.append("-xml filename | xml-string\n");
    optionsText
      .append("\tRetrieves the options from the XML-data instead of the "
        + "command line.\n");
    optionsText.append("-threshold-file \n");
    optionsText
      .append("\tThe file to save the threshold data to.\n"
        + "\tThe format is determined by the extensions, e.g., '.arff' for ARFF \n"
        + "\tformat or '.csv' for CSV.\n");
    optionsText.append("-threshold-label \n");
    optionsText
      .append("\tThe class label to determine the threshold data for\n"
        + "\t(default is the first label)\n");

    // Get scheme-specific options
    if (classifier instanceof OptionHandler) {
      optionsText.append("\nOptions specific to "
        + classifier.getClass().getName() + ":\n\n");
      Enumeration enu = ((OptionHandler) classifier).listOptions();
      while (enu.hasMoreElements()) {
        Option option = (Option) enu.nextElement();
        optionsText.append(option.synopsis() + '\n');
        optionsText.append(option.description() + "\n");
      }
    }

    // Get global information (if available)
    if (globalInfo) {
      try {
        String gi = getGlobalInfo(classifier);
        optionsText.append(gi);
      } catch (Exception ex) {
        // quietly ignore
      }
    }
    return optionsText.toString();
  }

  /**
   * Return the global info (if it exists) for the supplied classifier
   * 
   * @param classifier the classifier to get the global info for
   * @return the global info (synopsis) for the classifier
   * @throws Exception if there is a problem reflecting on the classifier
   */
  protected static String getGlobalInfo(Classifier classifier) throws Exception {
    BeanInfo bi = Introspector.getBeanInfo(classifier.getClass());
    MethodDescriptor[] methods;
    methods = bi.getMethodDescriptors();
    Object[] args = {};
    String result =
      "\nSynopsis for " + classifier.getClass().getName() + ":\n\n";

    for (MethodDescriptor method : methods) {
      String name = method.getDisplayName();
      Method meth = method.getMethod();
      if (name.equals("globalInfo")) {
        String globalInfo = (String) (meth.invoke(classifier, args));
        result += globalInfo;
        break;
      }
    }

    return result;
  }

  /**
   * Method for generating indices for the confusion matrix.
   * 
   * @param num integer to format
   * @param IDChars the characters to use
   * @param IDWidth the width of the entry
   * @return the formatted integer as a string
   */
  protected String num2ShortID(int num, char[] IDChars, int IDWidth) {

    char ID[] = new char[IDWidth];
    int i;

    for (i = IDWidth - 1; i >= 0; i--) {
      ID[i] = IDChars[num % IDChars.length];
      num = num / IDChars.length - 1;
      if (num < 0) {
        break;
      }
    }
    for (i--; i >= 0; i--) {
      ID[i] = ' ';
    }

    return new String(ID);
  }

  /**
   * Convert a single prediction into a probability distribution with all zero
   * probabilities except the predicted value which has probability 1.0;
   * 
   * @param predictedClass the index of the predicted class
   * @return the probability distribution
   */
  protected double[] makeDistribution(double predictedClass) {

    double[] result = new double[m_NumClasses];
    if (Instance.isMissingValue(predictedClass)) {
      return result;
    }
    if (m_ClassIsNominal) {
      result[(int) predictedClass] = 1.0;
    } else {
      result[0] = predictedClass;
    }
    return result;
  }

  /**
   * Updates all the statistics about a classifiers performance for the current
   * test instance.
   * 
   * @param predictedDistribution the probabilities assigned to each class
   * @param instance the instance to be classified
   * @throws Exception if the class of the instance is not set
   */
  protected void updateStatsForClassifier(double[] predictedDistribution,
    Instance instance) throws Exception {

    int actualClass = (int) instance.classValue();

    if (!instance.classIsMissing()) {
      updateMargins(predictedDistribution, actualClass, instance.weight());

      // Determine the predicted class (doesn't detect multiple
      // classifications)
      int predictedClass = -1;
      double bestProb = 0.0;
      for (int i = 0; i < m_NumClasses; i++) {
        if (predictedDistribution[i] > bestProb) {
          predictedClass = i;
          bestProb = predictedDistribution[i];
        }
      }

      m_WithClass += instance.weight();

      // Determine misclassification cost
      if (m_CostMatrix != null) {
        if (predictedClass < 0) {
          // For missing predictions, we assume the worst possible cost.
          // This is pretty harsh.
          // Perhaps we could take the negative of the cost of a correct
          // prediction (-m_CostMatrix.getElement(actualClass,actualClass)),
          // although often this will be zero
          m_TotalCost +=
            instance.weight() * m_CostMatrix.getMaxCost(actualClass, instance);
        } else {
          m_TotalCost +=
            instance.weight()
              * m_CostMatrix.getElement(actualClass, predictedClass, instance);
        }
      }

      // Update counts when no class was predicted
      if (predictedClass < 0) {
        m_Unclassified += instance.weight();
        return;
      }

      double predictedProb =
        Math.max(MIN_SF_PROB, predictedDistribution[actualClass]);
      double priorProb =
        Math.max(MIN_SF_PROB, m_ClassPriors[actualClass] / m_ClassPriorsSum);
      if (predictedProb >= priorProb) {
        m_SumKBInfo +=
          (Utils.log2(predictedProb) - Utils.log2(priorProb))
            * instance.weight();
      } else {
        m_SumKBInfo -=
          (Utils.log2(1.0 - predictedProb) - Utils.log2(1.0 - priorProb))
            * instance.weight();
      }

      m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
      m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();

      updateNumericScores(predictedDistribution,
        makeDistribution(instance.classValue()), instance.weight());

      // Update other stats
      m_ConfusionMatrix[actualClass][predictedClass] += instance.weight();
      if (predictedClass != actualClass) {
        m_Incorrect += instance.weight();
      } else {
        m_Correct += instance.weight();
      }
    } else {
      m_MissingClass += instance.weight();
    }
  }

  /**
   * Updates all the statistics about a predictors performance for the current
   * test instance.
   * 
   * @param predictedValue the numeric value the classifier predicts
   * @param instance the instance to be classified
   * @throws Exception if the class of the instance is not set
   */
  protected void updateStatsForPredictor(double predictedValue,
    Instance instance) throws Exception {

    if (!instance.classIsMissing()) {

      // Update stats
      m_WithClass += instance.weight();
      if (Instance.isMissingValue(predictedValue)) {
        m_Unclassified += instance.weight();
        return;
      }
      m_SumClass += instance.weight() * instance.classValue();
      m_SumSqrClass +=
        instance.weight() * instance.classValue() * instance.classValue();
      m_SumClassPredicted +=
        instance.weight() * instance.classValue() * predictedValue;
      m_SumPredicted += instance.weight() * predictedValue;
      m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue;

      if (m_ErrorEstimator == null) {
        setNumericPriorsFromBuffer();
      }
      double predictedProb =
        Math.max(
          m_ErrorEstimator.getProbability(predictedValue
            - instance.classValue()), MIN_SF_PROB);
      double priorProb =
        Math.max(m_PriorErrorEstimator.getProbability(instance.classValue()),
          MIN_SF_PROB);

      m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
      m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();
      m_ErrorEstimator.addValue(predictedValue - instance.classValue(),
        instance.weight());

      updateNumericScores(makeDistribution(predictedValue),
        makeDistribution(instance.classValue()), instance.weight());

    } else {
      m_MissingClass += instance.weight();
    }
  }

  /**
   * Update the cumulative record of classification margins
   * 
   * @param predictedDistribution the probability distribution predicted for the
   *          current instance
   * @param actualClass the index of the actual instance class
   * @param weight the weight assigned to the instance
   */
  protected void updateMargins(double[] predictedDistribution, int actualClass,
    double weight) {

    double probActual = predictedDistribution[actualClass];
    double probNext = 0;

    for (int i = 0; i < m_NumClasses; i++) {
      if ((i != actualClass) && (predictedDistribution[i] > probNext)) {
        probNext = predictedDistribution[i];
      }
    }

    double margin = probActual - probNext;
    int bin = (int) ((margin + 1.0) / 2.0 * k_MarginResolution);
    m_MarginCounts[bin] += weight;
  }

  /**
   * Update the numeric accuracy measures. For numeric classes, the accuracy is
   * between the actual and predicted class values. For nominal classes, the
   * accuracy is between the actual and predicted class probabilities.
   * 
   * @param predicted the predicted values
   * @param actual the actual value
   * @param weight the weight associated with this prediction
   */
  protected void updateNumericScores(double[] predicted, double[] actual,
    double weight) {

    double diff;
    double sumErr = 0, sumAbsErr = 0, sumSqrErr = 0;
    double sumPriorAbsErr = 0, sumPriorSqrErr = 0;
    for (int i = 0; i < m_NumClasses; i++) {
      diff = predicted[i] - actual[i];
      sumErr += diff;
      sumAbsErr += Math.abs(diff);
      sumSqrErr += diff * diff;
      diff = (m_ClassPriors[i] / m_ClassPriorsSum) - actual[i];
      sumPriorAbsErr += Math.abs(diff);
      sumPriorSqrErr += diff * diff;
    }
    m_SumErr += weight * sumErr / m_NumClasses;
    m_SumAbsErr += weight * sumAbsErr / m_NumClasses;
    m_SumSqrErr += weight * sumSqrErr / m_NumClasses;
    m_SumPriorAbsErr += weight * sumPriorAbsErr / m_NumClasses;
    m_SumPriorSqrErr += weight * sumPriorSqrErr / m_NumClasses;
  }

  /**
   * Adds a numeric (non-missing) training class value and weight to the buffer
   * of stored values.
   * 
   * @param classValue the class value
   * @param weight the instance weight
   */
  protected void addNumericTrainClass(double classValue, double weight) {

    if (m_TrainClassVals == null) {
      m_TrainClassVals = new double[100];
      m_TrainClassWeights = new double[100];
    }
    if (m_NumTrainClassVals == m_TrainClassVals.length) {
      double[] temp = new double[m_TrainClassVals.length * 2];
      System.arraycopy(m_TrainClassVals, 0, temp, 0, m_TrainClassVals.length);
      m_TrainClassVals = temp;

      temp = new double[m_TrainClassWeights.length * 2];
      System.arraycopy(m_TrainClassWeights, 0, temp, 0,
        m_TrainClassWeights.length);
      m_TrainClassWeights = temp;
    }
    m_TrainClassVals[m_NumTrainClassVals] = classValue;
    m_TrainClassWeights[m_NumTrainClassVals] = weight;
    m_NumTrainClassVals++;
  }

  /**
   * Sets up the priors for numeric class attributes from the training class
   * values that have been seen so far.
   */
  protected void setNumericPriorsFromBuffer() {

    double numPrecision = 0.01; // Default value
    if (m_NumTrainClassVals > 1) {
      double[] temp = new double[m_NumTrainClassVals];
      System.arraycopy(m_TrainClassVals, 0, temp, 0, m_NumTrainClassVals);
      int[] index = Utils.sort(temp);
      double lastVal = temp[index[0]];
      double deltaSum = 0;
      int distinct = 0;
      for (int i = 1; i < temp.length; i++) {
        double current = temp[index[i]];
        if (current != lastVal) {
          deltaSum += current - lastVal;
          lastVal = current;
          distinct++;
        }
      }
      if (distinct > 0) {
        numPrecision = deltaSum / distinct;
      }
    }
    m_PriorErrorEstimator = new KernelEstimator(numPrecision);
    m_ErrorEstimator = new KernelEstimator(numPrecision);
    m_ClassPriors[0] = m_ClassPriorsSum = 0;
    for (int i = 0; i < m_NumTrainClassVals; i++) {
      m_ClassPriors[0] += m_TrainClassVals[i] * m_TrainClassWeights[i];
      m_ClassPriorsSum += m_TrainClassWeights[i];
      m_PriorErrorEstimator.addValue(m_TrainClassVals[i],
        m_TrainClassWeights[i]);
    }
  }

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 10974 $");
  }
}