weka.classifiers.bayes.net.search.ci.ICSSearchAlgorithm Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-stable Show documentation
Show all versions of weka-stable Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This is the stable version. Apart from bugfixes, this version
does not receive any other updates.
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* ICSSearchAlgorithm.java
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.bayes.net.search.ci;
import weka.classifiers.bayes.BayesNet;
import weka.classifiers.bayes.net.ParentSet;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import java.io.FileReader;
import java.util.Enumeration;
import java.util.Vector;
/**
* This Bayes Network learning algorithm uses conditional independence tests to find a skeleton, finds V-nodes and applies a set of rules to find the directions of the remaining arrows.
*
*
* Valid options are:
*
* -cardinality <num>
* When determining whether an edge exists a search is performed
* for a set Z that separates the nodes. MaxCardinality determines
* the maximum size of the set Z. This greatly influences the
* length of the search. (default 2)
*
* -mbc
* Applies a Markov Blanket correction to the network structure,
* after a network structure is learned. This ensures that all
* nodes in the network are part of the Markov blanket of the
* classifier node.
*
* -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
* Score type (BAYES, BDeu, MDL, ENTROPY and AIC)
*
*
* @author Remco Bouckaert
* @version $Revision: 1.8 $
*/
public class ICSSearchAlgorithm
extends CISearchAlgorithm {
/** for serialization */
static final long serialVersionUID = -2510985917284798576L;
/**
* returns the name of the attribute with the given index
*
* @param iAttribute the index of the attribute
* @return the name of the attribute
*/
String name(int iAttribute) {
return m_instances.attribute(iAttribute).name();
}
/**
* returns the number of attributes
*
* @return the number of attributes
*/
int maxn() {
return m_instances.numAttributes();
}
/** maximum size of separating set **/
private int m_nMaxCardinality = 2;
/**
* sets the cardinality
*
* @param nMaxCardinality the max cardinality
*/
public void setMaxCardinality(int nMaxCardinality) {
m_nMaxCardinality = nMaxCardinality;
}
/**
* returns the max cardinality
*
* @return the max cardinality
*/
public int getMaxCardinality() {
return m_nMaxCardinality;
}
class SeparationSet
implements RevisionHandler {
public int [] m_set;
/**
* constructor
*/
public SeparationSet() {
m_set= new int [getMaxCardinality() + 1];
} // c'tor
public boolean contains(int nItem) {
for (int iItem = 0; iItem < getMaxCardinality() && m_set[iItem] != -1; iItem++) {
if (m_set[iItem] == nItem) {
return true;
}
}
return false;
} // contains
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 1.8 $");
}
} // class sepset
/**
* Search for Bayes network structure using ICS algorithm
* @param bayesNet datastructure to build network structure for
* @param instances data set to learn from
* @throws Exception if something goes wrong
*/
protected void search(BayesNet bayesNet, Instances instances) throws Exception {
// init
m_BayesNet = bayesNet;
m_instances = instances;
boolean edges[][] = new boolean [maxn() + 1][];
boolean [] [] arrows = new boolean [maxn() + 1][];
SeparationSet [] [] sepsets = new SeparationSet [maxn() + 1][];
for (int iNode = 0 ; iNode < maxn() + 1; iNode++) {
edges[iNode] = new boolean[maxn()];
arrows[iNode] = new boolean[maxn()];
sepsets[iNode] = new SeparationSet[maxn()];
}
calcDependencyGraph(edges, sepsets);
calcVeeNodes(edges, arrows, sepsets);
calcArcDirections(edges, arrows);
// transfrom into BayesNet datastructure
for (int iNode = 0; iNode < maxn(); iNode++) {
// clear parent set of AttributeX
ParentSet oParentSet = m_BayesNet.getParentSet(iNode);
while (oParentSet.getNrOfParents() > 0) {
oParentSet.deleteLastParent(m_instances);
}
for (int iParent = 0; iParent < maxn(); iParent++) {
if (arrows[iParent][iNode]) {
oParentSet.addParent(iParent, m_instances);
}
}
}
} // search
/** CalcDependencyGraph determines the skeleton of the BayesNetwork by
* starting with a complete graph and removing edges (a--b) if it can
* find a set Z such that a and b conditionally independent given Z.
* The set Z is found by trying all possible subsets of nodes adjacent
* to a and b, first of size 0, then of size 1, etc. up to size
* m_nMaxCardinality
* @param edges boolean matrix representing the edges
* @param sepsets set of separating sets
*/
void calcDependencyGraph(boolean[][] edges, SeparationSet[][] sepsets) {
/*calc undirected graph a-b iff D(a,S,b) for all S)*/
SeparationSet oSepSet;
for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
/*start with complete graph*/
for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
edges[iNode1][iNode2] = true;
}
}
for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
edges[iNode1][iNode1] = false;
}
for (int iCardinality = 0; iCardinality <= getMaxCardinality(); iCardinality++) {
for (int iNode1 = 0; iNode1 <= maxn() - 2; iNode1++) {
for (int iNode2 = iNode1 + 1; iNode2 < maxn(); iNode2++) {
if (edges[iNode1][iNode2]) {
oSepSet = existsSepSet(iNode1, iNode2, iCardinality, edges);
if (oSepSet != null) {
edges[iNode1][iNode2] = false;
edges[iNode2][iNode1] = false;
sepsets[iNode1][iNode2] = oSepSet;
sepsets[iNode2][iNode1] = oSepSet;
// report separating set
System.err.print("I(" + name(iNode1) + ", {");
for (int iNode3 = 0; iNode3 < iCardinality; iNode3++) {
System.err.print(name(oSepSet.m_set[iNode3]) + " ");
}
System.err.print("} ," + name(iNode2) + ")\n");
}
}
}
}
// report current state of dependency graph
System.err.print(iCardinality + " ");
for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
System.err.print(name(iNode1) + " ");
}
System.err.print('\n');
for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
if (edges[iNode1][iNode2])
System.err.print("X ");
else
System.err.print(". ");
}
System.err.print(name(iNode1) + " ");
System.err.print('\n');
}
}
} /*CalcDependencyGraph*/
/** ExistsSepSet tests if a separating set Z of node a and b exists of given
* cardiniality exists.
* The set Z is found by trying all possible subsets of nodes adjacent
* to both a and b of the requested cardinality.
* @param iNode1 index of first node a
* @param iNode2 index of second node b
* @param nCardinality size of the separating set Z
* @param edges
* @return SeparationSet containing set that separates iNode1 and iNode2 or null if no such set exists
*/
SeparationSet existsSepSet(int iNode1, int iNode2, int nCardinality, boolean [] [] edges)
{
/*Test if a separating set of node d and e exists of cardiniality k*/
// int iNode1_, iNode2_;
int iNode3, iZ;
SeparationSet Z = new SeparationSet();
Z.m_set[nCardinality] = -1;
// iNode1_ = iNode1;
// iNode2_ = iNode2;
// find first candidate separating set Z
if (nCardinality > 0) {
Z.m_set[0] = next(-1, iNode1, iNode2, edges);
iNode3 = 1;
while (iNode3 < nCardinality) {
Z.m_set[iNode3] = next(Z.m_set[iNode3 - 1], iNode1, iNode2, edges);
iNode3++;
}
}
if (nCardinality > 0) {
iZ = maxn() - Z.m_set[nCardinality - 1] - 1;
} else {
iZ = 0;
}
while (iZ >= 0)
{
//check if candidate separating set makes iNode2_ and iNode1_ independent
if (isConditionalIndependent(iNode2, iNode1, Z.m_set, nCardinality)) {
return Z;
}
// calc next candidate separating set
if (nCardinality > 0) {
Z.m_set[nCardinality - 1] = next(Z.m_set[nCardinality - 1], iNode1, iNode2, edges);
}
iZ = nCardinality - 1;
while (iZ >= 0 && Z.m_set[iZ] >= maxn()) {
iZ = nCardinality - 1;
while (iZ >= 0 && Z.m_set[iZ] >= maxn()) {
iZ--;
}
if (iZ < 0) {
break;
}
Z.m_set[iZ] = next(Z.m_set[iZ], iNode1, iNode2, edges);
for (iNode3 = iZ + 1; iNode3 < nCardinality; iNode3++) {
Z.m_set[iNode3] = next(Z.m_set[iNode3 - 1], iNode1, iNode2, edges);
}
iZ = nCardinality - 1;
}
}
return null;
} /*ExistsSepSet*/
/**
* determine index of node that makes next candidate separating set
* adjacent to iNode1 and iNode2, but not iNode2 itself
* @param x index of current node
* @param iNode1 first node
* @param iNode2 second node (must be larger than iNode1)
* @param edges skeleton so far
* @return int index of next node adjacent to iNode1 after x
*/
int next(int x, int iNode1, int iNode2, boolean [] [] edges)
{
x++;
while (x < maxn() && (!edges[iNode1][x] || !edges[iNode2][x] ||x == iNode2)) {
x++;
}
return x;
} /*next*/
/** CalcVeeNodes tries to find V-nodes, i.e. nodes a,b,c such that
* a->c<-b and a-/-b. These nodes are identified by finding nodes
* a,b,c in the skeleton such that a--c, c--b and a-/-b and furthermore
* c is not in the set Z that separates a and b
* @param edges skeleton
* @param arrows resulting partially directed skeleton after all V-nodes
* have been identified
* @param sepsets separating sets
*/
void calcVeeNodes(
boolean[][] edges,
boolean[][] arrows,
SeparationSet[][] sepsets) {
// start with complete empty graph
for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
arrows[iNode1][iNode2] = false;
}
}
for (int iNode1 = 0; iNode1 < maxn() - 1; iNode1++) {
for (int iNode2 = iNode1 + 1; iNode2 < maxn(); iNode2++) {
if (!edges[iNode1][iNode2]) { /*i nonadj j*/
for (int iNode3 = 0; iNode3 < maxn(); iNode3++) {
if ((iNode3 != iNode1
&& iNode3 != iNode2
&& edges[iNode1][iNode3]
&& edges[iNode2][iNode3])
& (!sepsets[iNode1][iNode2].contains(iNode3))) {
arrows[iNode1][iNode3] = true; /*add arc i->k*/
arrows[iNode2][iNode3] = true; /*add arc j->k*/
}
}
}
}
}
} // CalcVeeNodes
/** CalcArcDirections assigns directions to edges that remain after V-nodes have
* been identified. The arcs are directed using the following rules:
Rule 1: i->j--k & i-/-k => j->k
Rule 2: i->j->k & i--k => i->k
Rule 3 m
/|\
i | k => m->j
i->j<-k \|/
j
Rule 4 m
/ \
i---k => i->m & k->m
i->j \ /
j
Rule 5: if no edges are directed then take a random one (first we can find)
* @param edges skeleton
* @param arrows resulting fully directed DAG
*/
void calcArcDirections(boolean[][] edges, boolean[][] arrows) {
/*give direction to remaining arcs*/
int i, j, k, m;
boolean bFound;
do {
bFound = false;
/*Rule 1: i->j--k & i-/-k => j->k*/
for (i = 0; i < maxn(); i++) {
for (j = 0; j < maxn(); j++) {
if (i != j && arrows[i][j]) {
for (k = 0; k < maxn(); k++) {
if (i != k
&& j != k
&& edges[j][k]
&& !edges[i][k]
&& !arrows[j][k]
&& !arrows[k][j]) {
arrows[j][k] = true;
bFound = true;
}
}
}
}
}
/*Rule 2: i->j->k & i--k => i->k*/
for (i = 0; i < maxn(); i++) {
for (j = 0; j < maxn(); j++) {
if (i != j && arrows[i][j]) {
for (k = 0; k < maxn(); k++) {
if (i != k
&& j != k
&& edges[i][k]
&& arrows[j][k]
&& !arrows[i][k]
&& !arrows[k][i]) {
arrows[i][k] = true;
bFound = true;
}
}
}
}
}
/* Rule 3 m
/|\
i | k => m->j
i->j<-k \|/
j
*/
for (i = 0; i < maxn(); i++) {
for (j = 0; j < maxn(); j++) {
if (i != j && arrows[i][j]) {
for (k = 0; k < maxn(); k++) {
if (k != i
&& k != j
&& arrows[k][j]
&& !edges[k][i]) {
for (m = 0; m < maxn(); m++) {
if (m != i
&& m != j
&& m != k
&& edges[m][i]
&& !arrows[m][i]
&& !arrows[i][m]
&& edges[m][j]
&& !arrows[m][j]
&& !arrows[j][m]
&& edges[m][k]
&& !arrows[m][k]
&& !arrows[k][m]) {
arrows[m][j] = true;
bFound = true;
}
}
}
}
}
}
}
/* Rule 4 m
/ \
i---k => i->m & k->m
i->j \ /
j
*/
for (i = 0; i < maxn(); i++) {
for (j = 0; j < maxn(); j++) {
if (i != j && arrows[j][i]) {
for (k = 0; k < maxn(); k++) {
if (k != i
&& k != j
&& edges[k][j]
&& !arrows[k][j]
&& !arrows[j][k]
&& edges[k][i]
&& !arrows[k][i]
&& !arrows[i][k]) {
for (m = 0; m < maxn(); m++) {
if (m != i
&& m != j
&& m != k
&& edges[m][i]
&& !arrows[m][i]
&& !arrows[i][m]
&& edges[m][k]
&& !arrows[m][k]
&& !arrows[k][m]) {
arrows[i][m] = true;
arrows[k][m] = true;
bFound = true;
}
}
}
}
}
}
}
/*Rule 5: if no edges are directed then take a random one (first we can find)*/
if (!bFound) {
i = 0;
while (!bFound && i < maxn()) {
j = 0;
while (!bFound && j < maxn()) {
if (edges[i][j]
&& !arrows[i][j]
&& !arrows[j][i]) {
arrows[i][j] = true;
bFound = true;
}
j++;
}
i++;
}
}
}
while (bFound);
} // CalcArcDirections
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector result = new Vector();
result.addElement(new Option(
"\tWhen determining whether an edge exists a search is performed \n"
+ "\tfor a set Z that separates the nodes. MaxCardinality determines \n"
+ "\tthe maximum size of the set Z. This greatly influences the \n"
+ "\tlength of the search. (default 2)",
"cardinality", 1, "-cardinality "));
Enumeration en = super.listOptions();
while (en.hasMoreElements())
result.addElement(en.nextElement());
return result.elements();
} // listOption
/**
* Parses a given list of options.
*
* Valid options are:
*
* -cardinality <num>
* When determining whether an edge exists a search is performed
* for a set Z that separates the nodes. MaxCardinality determines
* the maximum size of the set Z. This greatly influences the
* length of the search. (default 2)
*
* -mbc
* Applies a Markov Blanket correction to the network structure,
* after a network structure is learned. This ensures that all
* nodes in the network are part of the Markov blanket of the
* classifier node.
*
* -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
* Score type (BAYES, BDeu, MDL, ENTROPY and AIC)
*
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String tmpStr;
tmpStr = Utils.getOption("cardinality", options);
if (tmpStr.length() != 0)
setMaxCardinality(Integer.parseInt(tmpStr));
else
setMaxCardinality(2);
super.setOptions(options);
} // setOptions
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
Vector result;
String[] options;
int i;
result = new Vector();
options = super.getOptions();
for (i = 0; i < options.length; i++)
result.add(options[i]);
result.add("-cardinality");
result.add("" + getMaxCardinality());
return (String[]) result.toArray(new String[result.size()]);
} // getOptions
/**
* @return a string to describe the MaxCardinality option.
*/
public String maxCardinalityTipText() {
return "When determining whether an edge exists a search is performed for a set Z "+
"that separates the nodes. MaxCardinality determines the maximum size of the set Z. " +
"This greatly influences the length of the search. Default value is 2.";
} // maxCardinalityTipText
/**
* This will return a string describing the search algorithm.
* @return The string.
*/
public String globalInfo() {
return "This Bayes Network learning algorithm uses conditional independence tests " +
"to find a skeleton, finds V-nodes and applies a set of rules to find the directions " +
"of the remaining arrows.";
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 1.8 $");
}
/**
* for testing the class
*
* @param argv the commandline parameters
*/
static public void main(String [] argv) {
try {
BayesNet b = new BayesNet();
b.setSearchAlgorithm( new ICSSearchAlgorithm());
Instances instances = new Instances(new FileReader("C:\\eclipse\\workspace\\weka\\data\\contact-lenses.arff"));
instances.setClassIndex(instances.numAttributes() - 1);
b.buildClassifier(instances);
System.out.println(b.toString());
} catch (Exception e) {
e.printStackTrace();
}
} // main
} // class ICSSearchAlgorithm
© 2015 - 2025 Weber Informatics LLC | Privacy Policy