weka.classifiers.evaluation.output.prediction.XML Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.
There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * XML.java
 * Copyright (C) 2009-2012 University of Waikato, Hamilton, New Zealand
 */

package weka.classifiers.evaluation.output.prediction;

import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Utils;
import weka.core.Version;
import weka.core.xml.XMLDocument;

/**
 
 * Outputs the predictions in XML.

 * 

 * The following DTD is used:

 * 

 * <!DOCTYPE predictions

 * [

 *   <!ELEMENT predictions (prediction*)>

 *   <!ATTLIST predictions version CDATA "3.5.8">

 *   <!ATTLIST predictions name CDATA #REQUIRED>

 * 

 *   <!ELEMENT prediction ((actual_label,predicted_label,error,(prediction|distribution),attributes?)|(actual_value,predicted_value,error,attributes?))>

 *   <!ATTLIST prediction index CDATA #REQUIRED>

 * 

 *   <!ELEMENT actual_label ANY>

 *   <!ATTLIST actual_label index CDATA #REQUIRED>

 *   <!ELEMENT predicted_label ANY>

 *   <!ATTLIST predicted_label index CDATA #REQUIRED>

 *   <!ELEMENT error ANY>

 *   <!ELEMENT prediction ANY>

 *   <!ELEMENT distribution (class_label+)>

 *   <!ELEMENT class_label ANY>

 *   <!ATTLIST class_label index CDATA #REQUIRED>

 *   <!ATTLIST class_label predicted (yes|no) "no">

 *   <!ELEMENT actual_value ANY>

 *   <!ELEMENT predicted_value ANY>

 *   <!ELEMENT attributes (attribute+)>

 *   <!ELEMENT attribute ANY>

 *   <!ATTLIST attribute index CDATA #REQUIRED>

 *   <!ATTLIST attribute name CDATA #REQUIRED>

 *   <!ATTLIST attribute type (numeric|date|nominal|string|relational) #REQUIRED>

 * ]

 * >
 * 
 
 *
 
 * Valid options are: 

 * 
 * 
 -p <range>
 *  The range of attributes to print in addition to the classification.
 *  (default: none)
 * 
 *  -distribution
 *  Whether to turn on the output of the class distribution.
 *  Only for nominal class attributes.
 *  (default: off)
 * 
 *  -decimals <num>
 *  The number of digits after the decimal point.
 *  (default: 3)
 * 
 *  -file <path>
 *  The file to store the output in, instead of outputting it on stdout.
 *  Gets ignored if the supplied path is a directory.
 *  (default: .)
 * 
 *  -suppress
 *  In case the data gets stored in a file, then this flag can be used
 *  to suppress the regular output.
 *  (default: not suppressed)
 * 
 
 *
 * @author  fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 8937 $
 */
public class XML
  extends AbstractOutput {
  
  /** for serialization. */
  private static final long serialVersionUID = -3165514277316824801L;

  /** the DocType definition. */
  public final static String DTD_DOCTYPE = XMLDocument.DTD_DOCTYPE;
  
  /** the Element definition. */
  public final static String DTD_ELEMENT = XMLDocument.DTD_ELEMENT;
  
  /** the AttList definition. */
  public final static String DTD_ATTLIST = XMLDocument.DTD_ATTLIST;
  
  /** the optional marker. */
  public final static String DTD_OPTIONAL = XMLDocument.DTD_OPTIONAL;
  
  /** the at least one marker. */
  public final static String DTD_AT_LEAST_ONE = XMLDocument.DTD_AT_LEAST_ONE;
  
  /** the zero or more marker. */
  public final static String DTD_ZERO_OR_MORE = XMLDocument.DTD_ZERO_OR_MORE;
  
  /** the option separator. */
  public final static String DTD_SEPARATOR = XMLDocument.DTD_SEPARATOR;
  
  /** the CDATA placeholder. */
  public final static String DTD_CDATA = XMLDocument.DTD_CDATA; 
  
  /** the ANY placeholder. */
  public final static String DTD_ANY = XMLDocument.DTD_ANY; 
  
  /** the #PCDATA placeholder. */
  public final static String DTD_PCDATA = XMLDocument.DTD_PCDATA; 
  
  /** the #IMPLIED placeholder. */
  public final static String DTD_IMPLIED = XMLDocument.DTD_IMPLIED; 
  
  /** the #REQUIRED placeholder. */
  public final static String DTD_REQUIRED = XMLDocument.DTD_REQUIRED; 

  /** the "version" attribute. */
  public final static String ATT_VERSION = XMLDocument.ATT_VERSION;
 
  /** the "name" attribute. */
  public final static String ATT_NAME = XMLDocument.ATT_NAME;
  
  /** the "type" attribute. */
  public final static String ATT_TYPE = "type";

  /** the value "yes". */
  public final static String VAL_YES = XMLDocument.VAL_YES;
  
  /** the value "no". */
  public final static String VAL_NO = XMLDocument.VAL_NO;
  
  /** the predictions tag. */
  public final static String TAG_PREDICTIONS = "predictions";
  
  /** the prediction tag. */
  public final static String TAG_PREDICTION = "prediction";

  /** the actual_nominal tag. */
  public final static String TAG_ACTUAL_LABEL = "actual_label";

  /** the predicted_nominal tag. */
  public final static String TAG_PREDICTED_LABEL = "predicted_label";

  /** the error tag. */
  public final static String TAG_ERROR = "error";

  /** the distribution tag. */
  public final static String TAG_DISTRIBUTION = "distribution";

  /** the class_label tag. */
  public final static String TAG_CLASS_LABEL = "class_label";

  /** the actual_numeric tag. */
  public final static String TAG_ACTUAL_VALUE = "actual_value";

  /** the predicted_numeric tag. */
  public final static String TAG_PREDICTED_VALUE = "predicted_value";

  /** the attributes tag. */
  public final static String TAG_ATTRIBUTES = "attributes";

  /** the attribute tag. */
  public final static String TAG_ATTRIBUTE = "attribute";

  /** the index attribute. */
  public final static String ATT_INDEX = "index";

  /** the predicted attribute. */
  public final static String ATT_PREDICTED = "predicted";
  
  /** the DTD. */
  public final static String DTD = 
    "\n"
    + "  \n"
    + "  \n"
    + "\n"
    + "  \n"
    + "  \n"
    + "\n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "  \n"
    + "]\n"
    + ">";
  
  /**
   * Returns a string describing the output generator.
   * 
   * @return 		a description suitable for
   * 			displaying in the GUI
   */
  public String globalInfo() {
    return 
        "Outputs the predictions in XML.\n\n"
      + "The following DTD is used:\n\n"
      + DTD;
  }
  
  /**
   * Returns a short display text, to be used in comboboxes.
   * 
   * @return 		a short display text
   */
  public String getDisplay() {
    return "XML";
  }

  /**
   * Replaces certain characters with their XML entities.
   * 
   * @param s		the string to process
   * @return		the processed string
   */
  protected String sanitize(String s) {
    String 	result;
    
    result = s;
    result = result.replaceAll("&", "&");
    result = result.replaceAll("<", "<");
    result = result.replaceAll(">", ">");
    result = result.replaceAll("\"", """);
    
    return result;
  }
  
  /**
   * Performs the actual printing of the header.
   */
  protected void doPrintHeader() {
    append("\n");
    append("\n");
    append(DTD + "\n\n");
    append("<" + TAG_PREDICTIONS + " " + ATT_VERSION + "=\"" + Version.VERSION + "\"" + " " + ATT_NAME + "=\"" + sanitize(m_Header.relationName()) + "\">\n");
  }

  /**
   * Builds a string listing the attribute values in a specified range of indices,
   * separated by commas and enclosed in brackets.
   *
   * @param instance 	the instance to print the values from
   * @return 		a string listing values of the attributes in the range
   */
  protected String attributeValuesString(Instance instance) {
    StringBuffer text = new StringBuffer();
    if (m_Attributes != null) {
      text.append("    <" + TAG_ATTRIBUTES + ">\n");
      m_Attributes.setUpper(instance.numAttributes() - 1);
      for (int i=0; i");
	  text.append(sanitize(instance.toString(i)));
	  text.append("\n");
	}
      }
      text.append("    \n");
    }
    return text.toString();
  }
  
  /**
   * Store the prediction made by the classifier as a string.
   * 
   * @param dist        the distribution to use
   * @param inst        the instance to generate text from
   * @param index       the index in the dataset
   * @throws Exception  if something goes wrong
   */
  protected void doPrintClassification(double[] dist, Instance inst, int index) throws Exception {
    int prec = m_NumDecimals;

    Instance withMissing = (Instance)inst.copy();
    withMissing.setDataset(inst.dataset());
    
    double predValue = 0;
    if (Utils.sum(dist) == 0) {
      predValue = Utils.missingValue();
    } else {
      if (inst.classAttribute().isNominal()) {
        predValue = Utils.maxIndex(dist);
      } else {
        predValue = dist[0];                         
      }
    }
    
    // opening tag
    append("  <" + TAG_PREDICTION + " " + ATT_INDEX + "=\"" + (index+1) + "\">\n");

    if (inst.dataset().classAttribute().isNumeric()) {
      // actual
      append("    <" + TAG_ACTUAL_VALUE + ">");
      if (inst.classIsMissing())
        append("?");
      else
        append(Utils.doubleToString(inst.classValue(), prec));
      append("\n");
      // predicted
      append("    <" + TAG_PREDICTED_VALUE + ">");
      if (inst.classIsMissing())
        append("?");
      else
        append(Utils.doubleToString(predValue, prec));
      append("\n");
      // error
      append("    <" + TAG_ERROR + ">");
      if (Utils.isMissingValue(predValue) || inst.classIsMissing())
        append("?");
      else
        append(Utils.doubleToString(predValue - inst.classValue(), prec));
      append("\n");
    } else {
      // actual
      append("    <" + TAG_ACTUAL_LABEL + " " + ATT_INDEX + "=\"" + ((int) inst.classValue()+1) + "\"" + ">");
      append(sanitize(inst.toString(inst.classIndex())));
      append("\n");
      // predicted
      append("    <" + TAG_PREDICTED_LABEL + " " + ATT_INDEX + "=\"" + ((int) predValue+1) + "\"" + ">");
      if (Utils.isMissingValue(predValue))
        append("?");
      else
        append(sanitize(inst.dataset().classAttribute().value((int)predValue)));
      append("\n");
      // error?
      append("    <" + TAG_ERROR + ">");
      if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1))
        append(VAL_YES);
      else
        append(VAL_NO);
      append("\n");
      // prediction/distribution
      if (m_OutputDistribution) {
        append("    <" + TAG_DISTRIBUTION + ">\n");
        for (int n = 0; n < dist.length; n++) {
          append("      <" + TAG_CLASS_LABEL + " " + ATT_INDEX + "=\"" + (n+1) + "\"");
          if (!Utils.isMissingValue(predValue) && (n == (int) predValue))
            append(" " + ATT_PREDICTED + "=\"" + VAL_YES + "\"");
          append(">");
          append(Utils.doubleToString(dist[n], prec));
          append("\n");
        }
        append("    \n");
      }
      else {
        append("    <" + TAG_PREDICTION + ">");
        if (Utils.isMissingValue(predValue))
          append("?");
        else
          append(Utils.doubleToString(dist[(int)predValue], prec));
        append("\n");
      }
    }

    // attributes
    if (m_Attributes != null)
      append(attributeValuesString(withMissing));
    
    // closing tag
    append("  \n");
  }

  /**
   * Store the prediction made by the classifier as a string.
   * 
   * @param classifier	the classifier to use
   * @param inst	the instance to generate text from
   * @param index	the index in the dataset
   * @throws Exception	if something goes wrong
   */
  protected void doPrintClassification(Classifier classifier, Instance inst, int index) throws Exception {
    
    double[] d = classifier.distributionForInstance(inst);
    doPrintClassification(d, inst, index);    
  }
  
  /**
   * Does nothing.
   */
  protected void doPrintFooter() {
    append("\n");
  }
}