weka.classifiers.bayes.NaiveBayesMultinomialUpdateable Maven / Gradle / Ivy

Go to download
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    NaiveBayesMultinomialUpdateable.java
 *    Copyright (C) 2003 University of Waikato, Hamilton, New Zealand
 *    Copyright (C) 2007 Jiang Su (incremental version)
 */

package weka.classifiers.bayes;

import weka.classifiers.UpdateableClassifier;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 
 * Class for building and using a multinomial Naive Bayes classifier. For more information see,

 * 

 * Andrew Mccallum, Kamal Nigam: A Comparison of Event Models for Naive Bayes Text Classification. In: AAAI-98 Workshop on 'Learning for Text Categorization', 1998.

 * 

 * The core equation for this classifier:

 * 

 * P[Ci|D] = (P[D|Ci] x P[Ci]) / P[D] (Bayes rule)

 * 

 * where Ci is class i and D is a document.

 * 

 * Incremental version of the algorithm.
 * 
 
 *
 
 * BibTeX:
 * 
 * @inproceedings{Mccallum1998,
 *    author = {Andrew Mccallum and Kamal Nigam},
 *    booktitle = {AAAI-98 Workshop on 'Learning for Text Categorization'},
 *    title = {A Comparison of Event Models for Naive Bayes Text Classification},
 *    year = {1998}
 * }
 * 
 * 
 
 *
 
 * Valid options are: 

 * 
 * 
 -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * 
 
 *
 * @author Andrew Golightly ([email protected])
 * @author Bernhard Pfahringer ([email protected])
 * @author Jiang Su
 * @version $Revision: 1.3 $
 */
public class NaiveBayesMultinomialUpdateable
  extends NaiveBayesMultinomial
  implements UpdateableClassifier {

  /** for serialization */
  private static final long serialVersionUID = -7204398796974263186L;
  
  /** the word count per class */
  protected double[] m_wordsPerClass;
  
  /**
   * Returns a string describing this classifier
   * 
   * @return 		a description of the classifier suitable for
   * 			displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return
        super.globalInfo() + "\n\n"
      + "Incremental version of the algorithm.";
  }

  /**
   * Generates the classifier.
   *
   * @param instances 	set of instances serving as training data
   * @throws Exception 	if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    m_headerInfo = new Instances(instances, 0);
    m_numClasses = instances.numClasses();
    m_numAttributes = instances.numAttributes();
    m_probOfWordGivenClass = new double[m_numClasses][];
    m_wordsPerClass = new double[m_numClasses];
    m_probOfClass = new double[m_numClasses];

    // initialising the matrix of word counts
    // NOTE: Laplace estimator introduced in case a word that does not 
    // appear for a class in the training set does so for the test set
    double laplace = 1;
    for (int c = 0; c < m_numClasses; c++) {
      m_probOfWordGivenClass[c] = new double[m_numAttributes];
      m_probOfClass[c]   = laplace;
      m_wordsPerClass[c] = laplace * m_numAttributes;
      for(int att = 0; att