All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.bayes.net.search.SearchAlgorithm Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * SearchAlgorithm.java
 * Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
 * 
 */
package weka.classifiers.bayes.net.search;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.bayes.BayesNet;
import weka.classifiers.bayes.net.BIFReader;
import weka.classifiers.bayes.net.ParentSet;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;

/**
 * This is the base class for all search algorithms for learning Bayes networks.
 * It contains some common code, used by other network structure search
 * algorithms, and should not be used by itself.
 * 
 *  
 * 
 * @author Remco Bouckaert
 * @version $Revision: 10153 $
 */
public class SearchAlgorithm implements OptionHandler, Serializable,
  RevisionHandler {

  /** for serialization */
  static final long serialVersionUID = 6164792240778525312L;

  /**
   * Holds upper bound on number of parents
   */
  protected int m_nMaxNrOfParents = 1;

  /**
   * determines whether initial structure is an empty graph or a Naive Bayes
   * network
   */
  protected boolean m_bInitAsNaiveBayes = true;

  /**
   * Determines whether after structure is found a MarkovBlanketClassifier
   * correction should be applied If this is true, m_bInitAsNaiveBayes is
   * overridden and interpreted as false.
   */
  protected boolean m_bMarkovBlanketClassifier = false;

  /**
   * File name containing initial network structure. This can be used as
   * starting point for structure search It will be ignored if not speficied.
   * When specified, it overrides the InitAsNaivBayes flag.
   */
  protected String m_sInitalBIFFile;

  /** c'tor **/
  public SearchAlgorithm() {
  } // SearchAlgorithm

  /**
   * AddArcMakesSense checks whether adding the arc from iAttributeTail to
   * iAttributeHead does not already exists and does not introduce a cycle
   * 
   * @param bayesNet
   * @param instances
   * @param iAttributeHead index of the attribute that becomes head of the arrow
   * @param iAttributeTail index of the attribute that becomes tail of the arrow
   * @return true if adding arc is allowed, otherwise false
   */
  protected boolean addArcMakesSense(BayesNet bayesNet, Instances instances,
    int iAttributeHead, int iAttributeTail) {
    if (iAttributeHead == iAttributeTail) {
      return false;
    }

    // sanity check: arc should not be in parent set already
    if (isArc(bayesNet, iAttributeHead, iAttributeTail)) {
      return false;
    }

    // sanity check: arc should not introduce a cycle
    int nNodes = instances.numAttributes();
    boolean[] bDone = new boolean[nNodes];

    for (int iNode = 0; iNode < nNodes; iNode++) {
      bDone[iNode] = false;
    }

    // check for cycles
    bayesNet.getParentSet(iAttributeHead).addParent(iAttributeTail, instances);

    for (int iNode = 0; iNode < nNodes; iNode++) {

      // find a node for which all parents are 'done'
      boolean bFound = false;

      for (int iNode2 = 0; !bFound && iNode2 < nNodes; iNode2++) {
        if (!bDone[iNode2]) {
          boolean bHasNoParents = true;

          for (int iParent = 0; iParent < bayesNet.getParentSet(iNode2)
            .getNrOfParents(); iParent++) {
            if (!bDone[bayesNet.getParentSet(iNode2).getParent(iParent)]) {
              bHasNoParents = false;
            }
          }

          if (bHasNoParents) {
            bDone[iNode2] = true;
            bFound = true;
          }
        }
      }

      if (!bFound) {
        bayesNet.getParentSet(iAttributeHead).deleteLastParent(instances);

        return false;
      }
    }

    bayesNet.getParentSet(iAttributeHead).deleteLastParent(instances);

    return true;
  } // AddArcMakesCycle

  /**
   * reverseArcMakesSense checks whether the arc from iAttributeTail to
   * iAttributeHead exists and reversing does not introduce a cycle
   * 
   * @param bayesNet
   * @param instances
   * @param iAttributeHead index of the attribute that is head of the arrow
   * @param iAttributeTail index of the attribute that is tail of the arrow
   * @return true if the arc from iAttributeTail to iAttributeHead exists and
   *         reversing does not introduce a cycle
   */
  protected boolean reverseArcMakesSense(BayesNet bayesNet,
    Instances instances, int iAttributeHead, int iAttributeTail) {

    if (iAttributeHead == iAttributeTail) {
      return false;
    }

    // sanity check: arc should be in parent set already
    if (!isArc(bayesNet, iAttributeHead, iAttributeTail)) {
      return false;
    }

    // sanity check: arc should not introduce a cycle
    int nNodes = instances.numAttributes();
    boolean[] bDone = new boolean[nNodes];

    for (int iNode = 0; iNode < nNodes; iNode++) {
      bDone[iNode] = false;
    }

    // check for cycles
    bayesNet.getParentSet(iAttributeTail).addParent(iAttributeHead, instances);

    for (int iNode = 0; iNode < nNodes; iNode++) {

      // find a node for which all parents are 'done'
      boolean bFound = false;

      for (int iNode2 = 0; !bFound && iNode2 < nNodes; iNode2++) {
        if (!bDone[iNode2]) {
          ParentSet parentSet = bayesNet.getParentSet(iNode2);
          boolean bHasNoParents = true;
          for (int iParent = 0; iParent < parentSet.getNrOfParents(); iParent++) {
            if (!bDone[parentSet.getParent(iParent)]) {

              // this one has a parent which is not 'done' UNLESS it is the arc
              // to be reversed
              if (!(iNode2 == iAttributeHead && parentSet.getParent(iParent) == iAttributeTail)) {
                bHasNoParents = false;
              }
            }
          }

          if (bHasNoParents) {
            bDone[iNode2] = true;
            bFound = true;
          }
        }
      }

      if (!bFound) {
        bayesNet.getParentSet(iAttributeTail).deleteLastParent(instances);
        return false;
      }
    }

    bayesNet.getParentSet(iAttributeTail).deleteLastParent(instances);
    return true;
  } // ReverseArcMakesCycle

  /**
   * IsArc checks whether the arc from iAttributeTail to iAttributeHead already
   * exists
   * 
   * @param bayesNet
   * @param iAttributeHead index of the attribute that becomes head of the arrow
   * @param iAttributeTail index of the attribute that becomes tail of the arrow
   * @return true if the arc from iAttributeTail to iAttributeHead already
   *         exists
   */
  protected boolean isArc(BayesNet bayesNet, int iAttributeHead,
    int iAttributeTail) {
    for (int iParent = 0; iParent < bayesNet.getParentSet(iAttributeHead)
      .getNrOfParents(); iParent++) {
      if (bayesNet.getParentSet(iAttributeHead).getParent(iParent) == iAttributeTail) {
        return true;
      }
    }

    return false;
  } // IsArc

  /**
   * Returns an enumeration describing the available options.
   * 
   * @return an enumeration of all the available options.
   */
  @Override
  public Enumeration

* * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { } // setOptions /** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { return new String[0]; } // getOptions /** * a string representation of the algorithm * * @return a string representation */ @Override public String toString() { return "SearchAlgorithm\n"; } // toString /** * buildStructure determines the network structure/graph of the network. The * default behavior is creating a network where all nodes have the first node * as its parent (i.e., a BayesNet that behaves like a naive Bayes * classifier). This method can be overridden by derived classes to restrict * the class of network structures that are acceptable. * * @param bayesNet the network * @param instances the data to use * @throws Exception if something goes wrong */ public void buildStructure(BayesNet bayesNet, Instances instances) throws Exception { if (m_sInitalBIFFile != null && !m_sInitalBIFFile.equals("")) { BIFReader initialNet = new BIFReader().processFile(m_sInitalBIFFile); for (int iAttribute = 0; iAttribute < instances.numAttributes(); iAttribute++) { int iNode = initialNet.getNode(bayesNet.getNodeName(iAttribute)); for (int iParent = 0; iParent < initialNet.getNrOfParents(iAttribute); iParent++) { String sParent = initialNet.getNodeName(initialNet.getParent(iNode, iParent)); int nParent = 0; while (nParent < bayesNet.getNrOfNodes() && !bayesNet.getNodeName(nParent).equals(sParent)) { nParent++; } if (nParent < bayesNet.getNrOfNodes()) { bayesNet.getParentSet(iAttribute).addParent(nParent, instances); } else { System.err .println("Warning: Node " + sParent + " is ignored. It is found in initial network but not in data set."); } } } } else if (m_bInitAsNaiveBayes) { int iClass = instances.classIndex(); // initialize parent sets to have arrow from classifier node to // each of the other nodes for (int iAttribute = 0; iAttribute < instances.numAttributes(); iAttribute++) { if (iAttribute != iClass) { bayesNet.getParentSet(iAttribute).addParent(iClass, instances); } } } search(bayesNet, instances); if (m_bMarkovBlanketClassifier) { doMarkovBlanketCorrection(bayesNet, instances); } } // buildStructure /** * * @param bayesNet * @param instances */ protected void search(BayesNet bayesNet, Instances instances) throws Exception { // placeholder with implementation in derived classes } // search /** * for each node in the network make sure it is in the Markov blanket of the * classifier node, and if not, add arrows so that it is. If the node is an * ancestor of the classifier node, add arrow pointing to the classifier node, * otherwise, add arrow pointing to attribute node. * * @param bayesNet * @param instances */ protected void doMarkovBlanketCorrection(BayesNet bayesNet, Instances instances) { // Add class node as parent if it is not in the Markov Boundary int iClass = instances.classIndex(); ParentSet ancestors = new ParentSet(); int nOldSize = 0; ancestors.addParent(iClass, instances); while (nOldSize != ancestors.getNrOfParents()) { nOldSize = ancestors.getNrOfParents(); for (int iNode = 0; iNode < nOldSize; iNode++) { int iCurrent = ancestors.getParent(iNode); ParentSet p = bayesNet.getParentSet(iCurrent); for (int iParent = 0; iParent < p.getNrOfParents(); iParent++) { if (!ancestors.contains(p.getParent(iParent))) { ancestors.addParent(p.getParent(iParent), instances); } } } } for (int iAttribute = 0; iAttribute < instances.numAttributes(); iAttribute++) { boolean bIsInMarkovBoundary = (iAttribute == iClass) || bayesNet.getParentSet(iAttribute).contains(iClass) || bayesNet.getParentSet(iClass).contains(iAttribute); for (int iAttribute2 = 0; !bIsInMarkovBoundary && iAttribute2 < instances.numAttributes(); iAttribute2++) { bIsInMarkovBoundary = bayesNet.getParentSet(iAttribute2).contains( iAttribute) && bayesNet.getParentSet(iAttribute2).contains(iClass); } if (!bIsInMarkovBoundary) { if (ancestors.contains(iAttribute)) { if (bayesNet.getParentSet(iClass).getCardinalityOfParents() < 1024) { bayesNet.getParentSet(iClass).addParent(iAttribute, instances); } else { // too bad } } else { bayesNet.getParentSet(iAttribute).addParent(iClass, instances); } } } } // doMarkovBlanketCorrection /** * * @param bMarkovBlanketClassifier */ protected void setMarkovBlanketClassifier(boolean bMarkovBlanketClassifier) { m_bMarkovBlanketClassifier = bMarkovBlanketClassifier; } /** * * @return */ protected boolean getMarkovBlanketClassifier() { return m_bMarkovBlanketClassifier; } /** * @return a string to describe the MaxNrOfParentsoption. */ public String maxNrOfParentsTipText() { return "Set the maximum number of parents a node in the Bayes net can have." + " When initialized as Naive Bayes, setting this parameter to 1 results in" + " a Naive Bayes classifier. When set to 2, a Tree Augmented Bayes Network (TAN)" + " is learned, and when set >2, a Bayes Net Augmented Bayes Network (BAN)" + " is learned. By setting it to a value much larger than the number of nodes" + " in the network (the default of 100000 pretty much guarantees this), no" + " restriction on the number of parents is enforced"; } // maxNrOfParentsTipText /** * @return a string to describe the InitAsNaiveBayes option. */ public String initAsNaiveBayesTipText() { return "When set to true (default), the initial network used for structure learning" + " is a Naive Bayes Network, that is, a network with an arrow from the classifier" + " node to each other node. When set to false, an empty network is used as initial" + " network structure"; } // initAsNaiveBayesTipText /** * @return a string to describe the MarkovBlanketClassifier option. */ protected String markovBlanketClassifierTipText() { return "When set to true (default is false), after a network structure is learned" + " a Markov Blanket correction is applied to the network structure. This ensures" + " that all nodes in the network are part of the Markov blanket of the classifier" + " node."; } // markovBlanketClassifierTipText /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision: 10153 $"); } } // class SearchAlgorithm





© 2015 - 2024 Weber Informatics LLC | Privacy Policy