weka.classifiers.bayes.net.ADNode Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.
There is a newer version: 3.8.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * ADNode.java
 * Copyright (C) 2002-2012 University of Waikato, Hamilton, New Zealand
 * 
 */

package weka.classifiers.bayes.net;

import java.io.FileReader;
import java.io.Serializable;
import java.util.ArrayList;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;

/**
 * The ADNode class implements the ADTree datastructure which increases the
 * speed with which sub-contingency tables can be constructed from a data set in
 * an Instances object. For details, see:
 * 
 * 
 *  Andrew W. Moore, Mary S. Lee (1998).
 * Cached Sufficient Statistics for Efficient Machine Learning with Large
 * Datasets. Journal of Artificial Intelligence Research. 8:67-91. 
 * 

 * 
 *  BibTeX:
 * 
 * 
 * @article{Moore1998,
 *    author = {Andrew W. Moore and Mary S. Lee},
 *    journal = {Journal of Artificial Intelligence Research},
 *    pages = {67-91},
 *    title = {Cached Sufficient Statistics for Efficient Machine Learning with Large Datasets},
 *    volume = {8},
 *    year = {1998}
 * }
 * 
 * 
 * 
 * 
 * @author Remco Bouckaert ([email protected])
 * @version $Revision: 10153 $
 */
public class ADNode implements Serializable, TechnicalInformationHandler,
  RevisionHandler {

  /** for serialization */
  static final long serialVersionUID = 397409728366910204L;

  final static int MIN_RECORD_SIZE = 0;

  /** list of VaryNode children **/
  public VaryNode[] m_VaryNodes;
  /**
   * list of Instance children (either m_Instances or m_VaryNodes is
   * instantiated)
   **/
  public Instance[] m_Instances;

  /** count **/
  public int m_nCount;

  /** first node in VaryNode array **/
  public int m_nStartNode;

  /** Creates new ADNode */
  public ADNode() {
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing detailed
   * information about the technical background of this class, e.g., paper
   * reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
  @Override
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation result;

    result = new TechnicalInformation(Type.ARTICLE);
    result.setValue(Field.AUTHOR, "Andrew W. Moore and Mary S. Lee");
    result.setValue(Field.YEAR, "1998");
    result
      .setValue(
        Field.TITLE,
        "Cached Sufficient Statistics for Efficient Machine Learning with Large Datasets");
    result.setValue(Field.JOURNAL,
      "Journal of Artificial Intelligence Research");
    result.setValue(Field.VOLUME, "8");
    result.setValue(Field.PAGES, "67-91");

    return result;
  }

  /**
   * create sub tree
   * 
   * @param iNode index of the lowest node in the tree
   * @param nRecords set of records in instances to be considered
   * @param instances data set
   * @return VaryNode representing part of an ADTree
   **/
  public static VaryNode makeVaryNode(int iNode, ArrayList nRecords,
    Instances instances) {
    VaryNode _VaryNode = new VaryNode(iNode);
    int nValues = instances.attribute(iNode).numValues();

    // reserve memory and initialize
    @SuppressWarnings("unchecked")
    ArrayList[] nChildRecords = new ArrayList[nValues];
    for (int iChild = 0; iChild < nValues; iChild++) {
      nChildRecords[iChild] = new ArrayList();
    }
    // divide the records among children
    for (int iRecord = 0; iRecord < nRecords.size(); iRecord++) {
      int iInstance = nRecords.get(iRecord).intValue();
      nChildRecords[(int) instances.instance(iInstance).value(iNode)]
        .add(new Integer(iInstance));
    }

    // find most common value
    int nCount = nChildRecords[0].size();
    int nMCV = 0;
    for (int iChild = 1; iChild < nValues; iChild++) {
      if (nChildRecords[iChild].size() > nCount) {
        nCount = nChildRecords[iChild].size();
        nMCV = iChild;
      }
    }
    _VaryNode.m_nMCV = nMCV;

    // determine child nodes
    _VaryNode.m_ADNodes = new ADNode[nValues];
    for (int iChild = 0; iChild < nValues; iChild++) {
      if (iChild == nMCV || nChildRecords[iChild].size() == 0) {
        _VaryNode.m_ADNodes[iChild] = null;
      } else {
        _VaryNode.m_ADNodes[iChild] = makeADTree(iNode + 1,
          nChildRecords[iChild], instances);
      }
    }
    return _VaryNode;
  } // MakeVaryNode

  /**
   * create sub tree
   * 
   * @param iNode index of the lowest node in the tree
   * @param nRecords set of records in instances to be considered
   * @param instances data set
   * @return ADNode representing an ADTree
   */
  public static ADNode makeADTree(int iNode, ArrayList nRecords,
    Instances instances) {
    ADNode _ADNode = new ADNode();
    _ADNode.m_nCount = nRecords.size();
    _ADNode.m_nStartNode = iNode;
    if (nRecords.size() < MIN_RECORD_SIZE) {
      _ADNode.m_Instances = new Instance[nRecords.size()];
      for (int iInstance = 0; iInstance < nRecords.size(); iInstance++) {
        _ADNode.m_Instances[iInstance] = instances.instance(nRecords.get(
          iInstance).intValue());
      }
    } else {
      _ADNode.m_VaryNodes = new VaryNode[instances.numAttributes() - iNode];
      for (int iNode2 = iNode; iNode2 < instances.numAttributes(); iNode2++) {
        _ADNode.m_VaryNodes[iNode2 - iNode] = makeVaryNode(iNode2, nRecords,
          instances);
      }
    }
    return _ADNode;
  } // MakeADTree

  /**
   * create AD tree from set of instances
   * 
   * @param instances data set
   * @return ADNode representing an ADTree
   */
  public static ADNode makeADTree(Instances instances) {
    ArrayList nRecords = new ArrayList(
      instances.numInstances());
    for (int iRecord = 0; iRecord < instances.numInstances(); iRecord++) {
      nRecords.add(new Integer(iRecord));
    }
    return makeADTree(0, nRecords, instances);
  } // MakeADTree

  /**
   * get counts for specific instantiation of a set of nodes
   * 
   * @param nCounts - array for storing counts
   * @param nNodes - array of node indexes
   * @param nOffsets - offset for nodes in nNodes in nCounts
   * @param iNode - index into nNode indicating current node
   * @param iOffset - Offset into nCounts due to nodes below iNode
   * @param bSubstract - indicate whether counts should be added or substracted
   */
  public void getCounts(int[] nCounts, int[] nNodes, int[] nOffsets, int iNode,
    int iOffset, boolean bSubstract) {
    // for (int iNode2 = 0; iNode2 < nCounts.length; iNode2++) {
    // System.out.print(nCounts[iNode2] + " ");
    // }
    // System.out.println();
    if (iNode >= nNodes.length) {
      if (bSubstract) {
        nCounts[iOffset] -= m_nCount;
      } else {
        nCounts[iOffset] += m_nCount;
      }
      return;
    } else {
      if (m_VaryNodes != null) {
        m_VaryNodes[nNodes[iNode] - m_nStartNode].getCounts(nCounts, nNodes,
          nOffsets, iNode, iOffset, this, bSubstract);
      } else {
        for (Instance instance : m_Instances) {
          int iOffset2 = iOffset;
          for (int iNode2 = iNode; iNode2 < nNodes.length; iNode2++) {
            iOffset2 = iOffset2 + nOffsets[iNode2]
              * (int) instance.value(nNodes[iNode2]);
          }
          if (bSubstract) {
            nCounts[iOffset2]--;
          } else {
            nCounts[iOffset2]++;
          }
        }
      }
    }
  } // getCounts

  /**
   * print is used for debugging only and shows the ADTree in ASCII graphics
   */
  public void print() {
    String sTab = new String();
    for (int i = 0; i < m_nStartNode; i++) {
      sTab = sTab + "  ";
    }
    System.out.println(sTab + "Count = " + m_nCount);
    if (m_VaryNodes != null) {
      for (int iNode = 0; iNode < m_VaryNodes.length; iNode++) {
        System.out.println(sTab + "Node " + (iNode + m_nStartNode));
        m_VaryNodes[iNode].print(sTab);
      }
    } else {
      System.out.println(m_Instances);
    }
  }

  /**
   * for testing only
   * 
   * @param argv the commandline options
   */
  public static void main(String[] argv) {
    try {
      Instances instances = new Instances(new FileReader("\\iris.2.arff"));
      ADNode ADTree = ADNode.makeADTree(instances);
      int[] nCounts = new int[12];
      int[] nNodes = new int[3];
      int[] nOffsets = new int[3];
      nNodes[0] = 0;
      nNodes[1] = 3;
      nNodes[2] = 4;
      nOffsets[0] = 2;
      nOffsets[1] = 1;
      nOffsets[2] = 4;
      ADTree.print();
      ADTree.getCounts(nCounts, nNodes, nOffsets, 0, 0, false);

    } catch (Throwable t) {
      t.printStackTrace();
    }
  } // main

  /**
   * Returns the revision string.
   * 
   * @return the revision
   */
  @Override
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 10153 $");
  }
} // class ADNode