All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.bayes.net.search.local.K2 Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * K2.java
 * Copyright (C) 2001-2012 University of Waikato, Hamilton, New Zealand
 * 
 */
package weka.classifiers.bayes.net.search.local;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.bayes.BayesNet;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;

/**
 *  This Bayes Network learning algorithm uses a hill
 * climbing algorithm restricted by an order on the variables.
*
* For more information see:
*
* G.F. Cooper, E. Herskovits (1990). A Bayesian method for constructing * Bayesian belief networks from databases.
*
* G. Cooper, E. Herskovits (1992). A Bayesian method for the induction of * probabilistic networks from data. Machine Learning. 9(4):309-347.
*
* Works with nominal variables and no missing values only. *

* * * BibTeX: * *

 * @proceedings{Cooper1990,
 *    author = {G.F. Cooper and E. Herskovits},
 *    booktitle = {Proceedings of the Conference on Uncertainty in AI},
 *    pages = {86-94},
 *    title = {A Bayesian method for constructing Bayesian belief networks from databases},
 *    year = {1990}
 * }
 * 
 * @article{Cooper1992,
 *    author = {G. Cooper and E. Herskovits},
 *    journal = {Machine Learning},
 *    number = {4},
 *    pages = {309-347},
 *    title = {A Bayesian method for the induction of probabilistic networks from data},
 *    volume = {9},
 *    year = {1992}
 * }
 * 
*

* * * Valid options are: *

* *

 * -N
 *  Initial structure is empty (instead of Naive Bayes)
 * 
* *
 * -P <nr of parents>
 *  Maximum number of parents
 * 
* *
 * -R
 *  Random order.
 *  (default false)
 * 
* *
 * -mbc
 *  Applies a Markov Blanket correction to the network structure, 
 *  after a network structure is learned. This ensures that all 
 *  nodes in the network are part of the Markov blanket of the 
 *  classifier node.
 * 
* *
 * -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
 *  Score type (BAYES, BDeu, MDL, ENTROPY and AIC)
 * 
* * * * @author Remco Bouckaert ([email protected]) * @version $Revision: 11247 $ */ public class K2 extends LocalScoreSearchAlgorithm implements TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 6176545934752116631L; /** Holds flag to indicate ordering should be random **/ boolean m_bRandomOrder = false; /** * Returns an instance of a TechnicalInformation object, containing detailed * information about the technical background of this class, e.g., paper * reference or book this class is based on. * * @return the technical information about this class */ @Override public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.PROCEEDINGS); result.setValue(Field.AUTHOR, "G.F. Cooper and E. Herskovits"); result.setValue(Field.YEAR, "1990"); result .setValue(Field.TITLE, "A Bayesian method for constructing Bayesian belief networks from databases"); result.setValue(Field.BOOKTITLE, "Proceedings of the Conference on Uncertainty in AI"); result.setValue(Field.PAGES, "86-94"); additional = result.add(Type.ARTICLE); additional.setValue(Field.AUTHOR, "G. Cooper and E. Herskovits"); additional.setValue(Field.YEAR, "1992"); additional .setValue(Field.TITLE, "A Bayesian method for the induction of probabilistic networks from data"); additional.setValue(Field.JOURNAL, "Machine Learning"); additional.setValue(Field.VOLUME, "9"); additional.setValue(Field.NUMBER, "4"); additional.setValue(Field.PAGES, "309-347"); return result; } /** * search determines the network structure/graph of the network with the K2 * algorithm, restricted by its initial structure (which can be an empty * graph, or a Naive Bayes graph. * * @param bayesNet the network * @param instances the data to work with * @throws Exception if something goes wrong */ @Override public void search(BayesNet bayesNet, Instances instances) throws Exception { int nOrder[] = new int[instances.numAttributes()]; nOrder[0] = instances.classIndex(); int nAttribute = 0; for (int iOrder = 1; iOrder < instances.numAttributes(); iOrder++) { if (nAttribute == instances.classIndex()) { nAttribute++; } nOrder[iOrder] = nAttribute++; } if (m_bRandomOrder) { // generate random ordering (if required) Random random = new Random(); int iClass; if (getInitAsNaiveBayes()) { iClass = 0; } else { iClass = -1; } for (int iOrder = 0; iOrder < instances.numAttributes(); iOrder++) { int iOrder2 = random.nextInt(instances.numAttributes()); if (iOrder != iClass && iOrder2 != iClass) { int nTmp = nOrder[iOrder]; nOrder[iOrder] = nOrder[iOrder2]; nOrder[iOrder2] = nTmp; } } } // determine base scores double[] fBaseScores = new double[instances.numAttributes()]; for (int iOrder = 0; iOrder < instances.numAttributes(); iOrder++) { int iAttribute = nOrder[iOrder]; fBaseScores[iAttribute] = calcNodeScore(iAttribute); } // K2 algorithm: greedy search restricted by ordering for (int iOrder = 1; iOrder < instances.numAttributes(); iOrder++) { int iAttribute = nOrder[iOrder]; double fBestScore = fBaseScores[iAttribute]; boolean bProgress = (bayesNet.getParentSet(iAttribute).getNrOfParents() < getMaxNrOfParents()); while (bProgress) { int nBestAttribute = -1; for (int iOrder2 = 0; iOrder2 < iOrder; iOrder2++) { int iAttribute2 = nOrder[iOrder2]; double fScore = calcScoreWithExtraParent(iAttribute, iAttribute2); if (fScore > fBestScore) { fBestScore = fScore; nBestAttribute = iAttribute2; } } if (nBestAttribute != -1) { bayesNet.getParentSet(iAttribute) .addParent(nBestAttribute, instances); fBaseScores[iAttribute] = fBestScore; bProgress = (bayesNet.getParentSet(iAttribute).getNrOfParents() < getMaxNrOfParents()); } else { bProgress = false; } } } } // buildStructure /** * Sets the max number of parents * * @param nMaxNrOfParents the max number of parents */ public void setMaxNrOfParents(int nMaxNrOfParents) { m_nMaxNrOfParents = nMaxNrOfParents; } /** * Gets the max number of parents. * * @return the max number of parents */ public int getMaxNrOfParents() { return m_nMaxNrOfParents; } /** * Sets whether to init as naive bayes * * @param bInitAsNaiveBayes whether to init as naive bayes */ public void setInitAsNaiveBayes(boolean bInitAsNaiveBayes) { m_bInitAsNaiveBayes = bInitAsNaiveBayes; } /** * Gets whether to init as naive bayes * * @return whether to init as naive bayes */ public boolean getInitAsNaiveBayes() { return m_bInitAsNaiveBayes; } /** * Set random order flag * * @param bRandomOrder the random order flag */ public void setRandomOrder(boolean bRandomOrder) { m_bRandomOrder = bRandomOrder; } // SetRandomOrder /** * Get random order flag * * @return the random order flag */ public boolean getRandomOrder() { return m_bRandomOrder; } // getRandomOrder /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2025 Weber Informatics LLC | Privacy Policy