weka.gui.graphvisualizer.BIFParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.
There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 *    BIFParser.java
 *    Copyright (C) 2003-2012 University of Waikato, Hamilton, New Zealand
 *
 */
package weka.gui.graphvisualizer;

import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.StringTokenizer;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

/**
 * This class parses an inputstream or a string in XMLBIF ver. 0.3 format, and
 * builds the datastructures that are passed to it through the constructor.
 * 
 * @author Ashraf M. Kibriya ([email protected])
 * @version $Revision: 10153 $ - 24 Apr 2003 - Initial version (Ashraf M.
 *          Kibriya)
 */
public class BIFParser implements GraphConstants {

  /** These holds the nodes and edges of the graph */
  protected ArrayList m_nodes;
  protected ArrayList m_edges;
  /**
   * This holds the name of the graph (i.e. the name of network tag in XMLBIF
   * input)
   */
  protected String graphName;
  /** This holds the string to be parsed */
  protected String inString;
  /** This holds the InputStream to be parsed */
  protected InputStream inStream;

  /**
   * Constructor (if our input is a String)
   * 
   * @param input the string to be parsed (should not be null)
   * @param nodes vector containing GraphNode objects (should be empty)
   * @param edges vector containing GraphEdge objects (should be empty)
   */
  public BIFParser(String input, ArrayList nodes,
    ArrayList edges) {
    m_nodes = nodes;
    m_edges = edges;
    inString = input;
  }

  /**
   * Constructor (if our input is an InputStream)
   * 
   * @param instream the InputStream to be parsed (should not be null)
   * @param nodes vector containing GraphNode objects (should be empty)
   * @param edges vector containing GraphEdge objects (should be empty)
   */
  public BIFParser(InputStream instream, ArrayList nodes,
    ArrayList edges) {
    m_nodes = nodes;
    m_edges = edges;
    inStream = instream;
  }

  /**
   * This method parses the string or the InputStream that we passed in through
   * the constructor and builds up the m_nodes and m_edges vectors
   * 
   * @exception Exception if both the inString and inStream are null, i.e. no
   *              input has been provided
   * @exception BIFFormatException if there is format of the input is not
   *              correct. The format should conform to XMLBIF version 0.3
   * @exception NumberFormatException if there is an invalid char in the
   *              probability table of a node.
   * @return returns the name of the graph
   */
  public String parse() throws Exception {
    Document dc = null;

    javax.xml.parsers.DocumentBuilderFactory dbf = javax.xml.parsers.DocumentBuilderFactory
      .newInstance();
    dbf.setIgnoringElementContentWhitespace(true);
    javax.xml.parsers.DocumentBuilder db = dbf.newDocumentBuilder();

    if (inStream != null) {
      dc = db.parse(inStream);
    } else if (inString != null) {
      dc = db.parse(new org.xml.sax.InputSource(new StringReader(inString)));
    } else {
      throw new Exception("No input given");
    }

    NodeList nl = dc.getElementsByTagName("NETWORK");

    if (nl.getLength() == 0) {
      throw new BIFFormatException("NETWORK tag not found");
    }

    // take only the first network node
    NodeList templist = ((Element) nl.item(0)).getElementsByTagName("NAME");
    graphName = templist.item(0).getFirstChild().getNodeValue();
    // System.out.println("The name of the network is "+
    // templist.item(0).getFirstChild().getNodeValue());

    // Get all the variables
    nl = dc.getElementsByTagName("VARIABLE");
    for (int i = 0; i < nl.getLength(); i++) {

      templist = ((Element) nl.item(i)).getElementsByTagName("NAME");
      if (templist.getLength() > 1) {
        throw new BIFFormatException("More than one name tags found for "
          + "variable no. " + (i + 1));
      }

      String nodename = templist.item(0).getFirstChild().getNodeValue();
      GraphNode n = new GraphNode(nodename, nodename, GraphNode.NORMAL);
      m_nodes.add(n);
      // getting nodes position
      templist = ((Element) nl.item(i)).getElementsByTagName("PROPERTY");
      for (int j = 0; j < templist.getLength(); j++) {
        if (templist.item(j).getFirstChild().getNodeValue()
          .startsWith("position")) {
          String xy = templist.item(j).getFirstChild().getNodeValue();
          // System.out.println("x: "+
          // xy.substring(xy.indexOf('(')+1, xy.indexOf(','))+
          // " y: "+
          // xy.substring(xy.indexOf(',')+1, xy.indexOf(')'))
          // );
          n.x = Integer.parseInt(xy.substring(xy.indexOf('(') + 1,
            xy.indexOf(',')).trim());
          n.y = Integer.parseInt(xy.substring(xy.indexOf(',') + 1,
            xy.indexOf(')')).trim());
          break;
        }
      }
      // getting all the outcomes of the node
      templist = ((Element) nl.item(i)).getElementsByTagName("OUTCOME");
      n.outcomes = new String[templist.getLength()];
      for (int j = 0; j < templist.getLength(); j++) {
        n.outcomes[j] = templist.item(j).getFirstChild().getNodeValue();
        // System.out.println("Outcome["+j+"]: "+n.outcomes[j]);
      }
    } // end for (for variables)

    // Get all the edges and probability tables by getting all the definitions
    nl = dc.getElementsByTagName("DEFINITION");
    for (int i = 0; i < nl.getLength(); i++) {

      templist = ((Element) nl.item(i)).getElementsByTagName("FOR");
      // the Label of the node the edges are coming into
      String nid = templist.item(0).getFirstChild().getNodeValue();

      // getting the GraphNode object with the above label
      GraphNode n = m_nodes.get(0);
      for (int j = 1; j < m_nodes.size() && !n.ID.equals(nid); j++) {
        n = m_nodes.get(j);
      }

      templist = ((Element) nl.item(i)).getElementsByTagName("GIVEN");
      int parntOutcomes = 1; // for creating the probability table later on
      // creating all the edges coming into the node
      for (int j = 0; j < templist.getLength(); j++) {
        nid = templist.item(j).getFirstChild().getNodeValue();

        GraphNode n2 = m_nodes.get(0);
        for (int k = 1; k < m_nodes.size() && !n2.ID.equals(nid); k++) {
          n2 = m_nodes.get(k);
        }
        m_edges.add(new GraphEdge(m_nodes.indexOf(n2), m_nodes.indexOf(n), 1));

        parntOutcomes *= n2.outcomes.length;
      }

      // creating the probability table for the node
      templist = ((Element) nl.item(i)).getElementsByTagName("TABLE");
      if (templist.getLength() > 1) {
        throw new BIFFormatException("More than one Probability Table for "
          + n.ID);
      }

      String probs = templist.item(0).getFirstChild().getNodeValue();
      StringTokenizer tk = new StringTokenizer(probs, " \n\t");

      if (parntOutcomes * n.outcomes.length > tk.countTokens()) {
        throw new BIFFormatException("Probability Table for " + n.ID
          + " contains more values than it should");
      } else if (parntOutcomes * n.outcomes.length < tk.countTokens()) {
        throw new BIFFormatException("Probability Table for " + n.ID
          + " contains less values than it should");
      } else {
        n.probs = new double[parntOutcomes][n.outcomes.length];
        for (int r = 0; r < parntOutcomes; r++) {
          for (int c = 0; c < n.outcomes.length; c++) {
            try {
              n.probs[r][c] = Double.parseDouble(tk.nextToken());
            } catch (NumberFormatException ne) {
              throw ne;
            }
          }
        }
      } // end of creating probability table
    } // endfor (for edges)

    // int tmpMatrix[][] = new int[m_nodes.size()][m_nodes.size()];
    // for(int i=0; i nodes, ArrayList edges) {
    try {
      FileWriter outfile = new FileWriter(filename);

      StringBuffer text = new StringBuffer();

      text.append("\n");
      text.append("\n");
      text.append("\n");
      text.append("	      \n");
      text.append("	\n");
      text.append("	\n");
      text.append("	\n");
      text.append("	      \n");
      text.append("	\n");
      text.append("	\n");
      text.append("	\n");
      text.append("	\n");
      text.append("	\n");
      text.append("	\n");
      text.append("]>\n");
      text.append("\n");
      text.append("\n");
      text.append("\n");
      text.append("\n");
      text.append("" + XMLNormalize(graphName) + "\n");

      // Writing all the node names and their outcomes
      // If outcome is null(ie if the graph was loaded from DOT file) then
      // simply write TRUE
      for (int nodeidx = 0; nodeidx < nodes.size(); nodeidx++) {
        GraphNode n = nodes.get(nodeidx);
        if (n.nodeType != GraphNode.NORMAL) {
          continue;
        }

        text.append("\n");
        text.append("\t" + XMLNormalize(n.ID) + "\n");

        if (n.outcomes != null) {
          for (String outcome : n.outcomes) {
            text.append("\t" + XMLNormalize(outcome) + "\n");
          }
        } else {
          text.append("\ttrue\n");
        }

        text.append("\tposition = (" + n.x + "," + n.y
          + ")\n");
        text.append("\n");
      }

      // Writing all the nodes definitions and their probability tables
      // If probability table is null then simply write 1 for all
      // the possible outcomes of the parents
      for (int nodeidx = 0; nodeidx < nodes.size(); nodeidx++) {
        GraphNode n = nodes.get(nodeidx);
        if (n.nodeType != GraphNode.NORMAL) {
          continue;
        }

        text.append("\n");
        text.append("" + XMLNormalize(n.ID) + "\n");
        int parntOutcomes = 1;
        if (n.prnts != null) {
          for (int prnt2 : n.prnts) {
            GraphNode prnt = nodes.get(prnt2);
            text.append("\t" + XMLNormalize(prnt.ID) + "\n");
            if (prnt.outcomes != null) {
              parntOutcomes *= prnt.outcomes.length;
            }
          }
        }

        text.append("\n");
        for (int i = 0; i < parntOutcomes; i++) {
          if (n.outcomes != null) {
            for (int outidx = 0; outidx < n.outcomes.length; outidx++) {
              text.append(n.probs[i][outidx] + " ");
            }
          } else {
            text.append("1");
          }
          text.append('\n');
        }
        text.append("\n");
        text.append("\n");
      }
      text.append("\n");
      text.append("\n");

      outfile.write(text.toString());
      outfile.close();
    } catch (IOException ex) {
      ex.printStackTrace();
    }
  }// writeXMLBIF

  /**
   * XMLNormalize converts the five standard XML entities in a string g.e. the
   * string V&D's is returned as V&D's
   * 
   * @author Remco Bouckaert ([email protected])
   * @param sStr string to normalize
   * @return normalized string
   */
  private static String XMLNormalize(String sStr) {
    StringBuffer sStr2 = new StringBuffer();
    for (int iStr = 0; iStr < sStr.length(); iStr++) {
      char c = sStr.charAt(iStr);
      switch (c) {
      case '&':
        sStr2.append("&");
        break;
      case '\'':
        sStr2.append("'");
        break;
      case '\"':
        sStr2.append(""");
        break;
      case '<':
        sStr2.append("<");
        break;
      case '>':
        sStr2.append(">");
        break;
      default:
        sStr2.append(c);
      }
    }
    return sStr2.toString();
  } // XMLNormalize

} // BIFParser