All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.stats.NominalStats Maven / Gradle / Ivy

Go to download

This package provides generic configuration class and distributed map/reduce style tasks for Weka

There is a newer version: 1.0.17
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    NominalStats
 *    Copyright (C) 2014 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core.stats;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import weka.core.Attribute;
import weka.core.Utils;
import weka.distributed.CSVToARFFHeaderMapTask;

/**
 * Class for computing nominal statistics (primarily frequency counts)
 * 
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @version $Revision: 10929 $
 */
public class NominalStats extends Stats implements Serializable {

  /** A map of values to counts */
  protected Map m_counts =
    new TreeMap();

  /** The number of missing values for this nominal attribute */
  protected double m_numMissing;

  /** A "label" to use when storing the number of missing values */
  public static final String MISSING_LABEL = "**missing**";

  /** For serialization */
  private static final long serialVersionUID = -6176046647546730423L;

  /**
   * Class that encapsulates a count for nominal value
   * 
   * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
   */
  public static class Count implements Serializable {

    /** For serialization */
    private static final long serialVersionUID = 4310467271632108735L;

    /** The value of the count */
    public double m_count;
  }

  /**
   * Constructs a new NominalStats
   * 
   * @param attributeName the name of the data attribute that these stats
   *          pertain to
   */
  public NominalStats(String attributeName) {
    super(attributeName);
  }

  /**
   * Convert a summary meta attribute to a NominalStats
   * 
   * @param a the attribute to convert
   * @return a NominalStats
   * @throws IllegalArgumentException if a problem occurs
   */
  public static NominalStats attributeToStats(Attribute a)
    throws IllegalArgumentException {

    if (!a.isNominal()) {
      throw new IllegalArgumentException("Stats attribute is not nominal!");
    }

    NominalStats ns = new NominalStats(a.name());
    for (int j = 0; j < a.numValues(); j++) {
      String v = a.value(j);
      String label = v.substring(0, v.lastIndexOf("_"));
      String freqCount = v.substring(v.lastIndexOf("_") + 1, v.length());
      try {
        double fC = Double.parseDouble(freqCount);
        if (label.equals(NominalStats.MISSING_LABEL)) {
          ns.add(null, fC);
        } else {
          ns.add(label, fC);
        }
      } catch (NumberFormatException n) {
        throw new IllegalArgumentException(n);
      }
    }

    return ns;
  }

  /**
   * Adds to the count for a given label. If the label is null then it adds to
   * the count for missing.
   * 
   * @param label the label to add the count to
   * @param value the count to add
   */
  public void add(String label, double value) {

    if (label == null) {
      m_numMissing += value;
    } else {

      NominalStats.Count c = m_counts.get(label);
      if (c == null) {
        c = new Count();
        m_counts.put(label, c);
      }

      c.m_count += value;
    }
  }

  /**
   * Get the set of labels seen by this NominalStats
   * 
   * @return the set of labels
   */
  public Set getLabels() {
    return m_counts.keySet();
  }

  /**
   * Get the count for a given label
   * 
   * @param label the label to get the count for
   * @return the count or missing value if the label is unknown
   */
  public double getCount(String label) {
    NominalStats.Count c = m_counts.get(label);

    if (c == null) {
      return Utils.missingValue();
    }

    return c.m_count;
  }

  /**
   * Get the number of missing values for this attribute
   * 
   * @return the number of missing values seen
   */
  public double getNumMissing() {
    return m_numMissing;
  }

  /**
   * Get the index of the mode
   * 
   * @return the index (in the sorted list of labels) of the mode
   */
  public int getMode() {
    double max = -1;
    int maxIndex = -1;

    int index = 0;
    for (Map.Entry e : m_counts.entrySet()) {
      if (e.getValue().m_count > max) {
        max = e.getValue().m_count;
        maxIndex = index;
      }
      index++;
    }

    return maxIndex;
  }

  /**
   * Get the most frequent label (not including missing values)
   * 
   * @return the most frequent label
   */
  public String getModeLabel() {
    double max = -1;
    String maxLabel = "";

    for (Map.Entry e : m_counts.entrySet()) {
      if (e.getValue().m_count > max) {
        max = e.getValue().m_count;
        maxLabel = e.getKey();
      }
    }

    return maxLabel;
  }

  /**
   * Set the number of missing values for this attribute
   * 
   * @param missing the number of missing values
   */
  public void setNumMissing(double missing) {
    m_numMissing = missing;
  }

  @Override
  public Attribute makeAttribute() {
    ArrayList vals = new ArrayList();

    for (Map.Entry e : m_counts.entrySet()) {
      vals.add(e.getKey() + "_" + e.getValue().m_count);
    }

    vals.add(MISSING_LABEL + "_" + m_numMissing);

    Attribute a =
      new Attribute(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX
        + m_attributeName, vals);

    return a;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy