All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.xml.XMLDocument Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This version represents the developer version, the "bleeding edge" of development, you could say. New functionality gets added to this version.

There is a newer version: 3.9.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * XMLDocument.java
 * Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
 */

package weka.core.xml;

import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.util.Vector;

import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

import weka.core.RevisionHandler;
import weka.core.RevisionUtils;

/**
 * This class offers some methods for generating, reading and writing 
 * XML documents.
* It can only handle UTF-8. * * @see #PI * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ */ public class XMLDocument implements RevisionHandler { /** the parsing instructions "<?xml version=\"1.0\" encoding=\"utf-8\"?>" * (may not show up in Javadoc due to tags!). */ public final static String PI = ""; // DTD placeholders /** the DocType definition. */ public final static String DTD_DOCTYPE = "DOCTYPE"; /** the Element definition. */ public final static String DTD_ELEMENT = "ELEMENT"; /** the AttList definition. */ public final static String DTD_ATTLIST = "ATTLIST"; /** the optional marker. */ public final static String DTD_OPTIONAL = "?"; /** the at least one marker. */ public final static String DTD_AT_LEAST_ONE = "+"; /** the zero or more marker. */ public final static String DTD_ZERO_OR_MORE = "*"; /** the option separator. */ public final static String DTD_SEPARATOR = "|"; /** the CDATA placeholder. */ public final static String DTD_CDATA = "CDATA"; /** the ANY placeholder. */ public final static String DTD_ANY = "ANY"; /** the #PCDATA placeholder. */ public final static String DTD_PCDATA = "#PCDATA"; /** the #IMPLIED placeholder. */ public final static String DTD_IMPLIED = "#IMPLIED"; /** the #REQUIRED placeholder. */ public final static String DTD_REQUIRED = "#REQUIRED"; // often used attributes /** the "version" attribute. */ public final static String ATT_VERSION = "version"; /** the "name" attribute. */ public final static String ATT_NAME = "name"; // often used values /** the value "yes". */ public final static String VAL_YES = "yes"; /** the value "no". */ public final static String VAL_NO = "no"; // members /** the factory for DocumentBuilder. */ protected DocumentBuilderFactory m_Factory = null; /** the instance of a DocumentBuilder. */ protected DocumentBuilder m_Builder = null; /** whether to use a validating parser or not. */ protected boolean m_Validating = false; /** the DOM document. */ protected Document m_Document = null; /** the DOCTYPE node as String. */ protected String m_DocType = null; /** the root node as String. */ protected String m_RootNode = null; /** for XPath queries. */ protected XPath m_XPath = null; /** * initializes the factory with non-validating parser. * * @throws Exception if the construction fails */ public XMLDocument() throws Exception { m_Factory = DocumentBuilderFactory.newInstance(); m_XPath = XPathFactory.newInstance(XPathFactory.DEFAULT_OBJECT_MODEL_URI).newXPath(); setDocType(null); setRootNode(null); setValidating(false); } /** * Creates a new instance of XMLDocument. * * @param xml the xml to parse (if " * Note: this does clear the current DOM document! * * @param validating whether to use a validating parser * @throws Exception if the instantiating of the DocumentBuilder fails */ public void setValidating(boolean validating) throws Exception { m_Validating = validating; m_Factory.setValidating(validating); m_Builder = m_Factory.newDocumentBuilder(); clear(); } /** * returns the parsed DOM document. * * @return the parsed DOM document */ public Document getDocument() { return m_Document; } /** * sets the DOM document to use. * * @param newDocument the DOM document to use */ public void setDocument(Document newDocument) { m_Document = newDocument; } /** * sets the DOCTYPE-String to use in the XML output. Performs NO checking! * if it is null the DOCTYPE is omitted. * * @param docType the DOCTYPE definition to use in XML output */ public void setDocType(String docType) { m_DocType = docType; } /** * returns the current DOCTYPE, can be null. * * @return the current DOCTYPE definition, can be null */ public String getDocType() { return m_DocType; } /** * sets the root node to use in the XML output. Performs NO checking with * DOCTYPE! * * @param rootNode the root node to use in the XML output */ public void setRootNode(String rootNode) { if (rootNode == null) m_RootNode = "root"; else m_RootNode = rootNode; } /** * returns the current root node. * * @return the current root node */ public String getRootNode() { return m_RootNode; } /** * sets up an empty DOM document, with the current DOCTYPE and root node. * * @see #setRootNode(String) * @see #setDocType(String) */ public void clear() { newDocument(getDocType(), getRootNode()); } /** * creates a new Document with the given information. * * @param docType the DOCTYPE definition (no checking happens!), can be null * @param rootNode the name of the root node (must correspond to the one * given in docType) * @return returns the just created DOM document for convenience */ public Document newDocument(String docType, String rootNode) { m_Document = getBuilder().newDocument(); m_Document.appendChild(m_Document.createElement(rootNode)); setDocType(docType); return getDocument(); } /** * parses the given XML string (can be XML or a filename) and returns a * DOM Document. * * @param xml the xml to parse (if " -1) return read(new ByteArrayInputStream(xml.getBytes())); else return read(new File(xml)); } /** * parses the given file and returns a DOM document. * * @param file the XML file to parse * @return the parsed DOM document * @throws Exception if something goes wrong with the parsing */ public Document read(File file) throws Exception { m_Document = getBuilder().parse(file); return getDocument(); } /** * parses the given stream and returns a DOM document. * * @param stream the XML stream to parse * @return the parsed DOM document * @throws Exception if something goes wrong with the parsing */ public Document read(InputStream stream) throws Exception { m_Document = getBuilder().parse(stream); return getDocument(); } /** * parses the given reader and returns a DOM document. * * @param reader the XML reader to parse * @return the parsed DOM document * @throws Exception if something goes wrong with the parsing */ public Document read(Reader reader) throws Exception { m_Document = getBuilder().parse(new InputSource(reader)); return getDocument(); } /** * writes the current DOM document into the given file. * * @param file the filename to write to * @throws Exception if something goes wrong with the parsing */ public void write(String file) throws Exception { write(new File(file)); } /** * writes the current DOM document into the given file. * * @param file the filename to write to * @throws Exception if something goes wrong with the parsing */ public void write(File file) throws Exception { write(new BufferedWriter(new FileWriter(file))); } /** * writes the current DOM document into the given stream. * * @param stream the filename to write to * @throws Exception if something goes wrong with the parsing */ public void write(OutputStream stream) throws Exception { String xml; xml = toString(); stream.write(xml.getBytes(), 0, xml.length()); stream.flush(); } /** * writes the current DOM document into the given writer. * * @param writer the filename to write to * @throws Exception if something goes wrong with the parsing */ public void write(Writer writer) throws Exception { writer.write(toString()); writer.flush(); } /** * returns all non tag-children from the given node. * * @param parent the node to get the children from * @return a vector containing all the non-text children */ public static Vector getChildTags(Node parent) { return getChildTags(parent, ""); } /** * returns all non tag-children from the given node. * * @param parent the node to get the children from * @param name the name of the tags to return, "" for all * @return a vector containing all the non-text children */ public static Vector getChildTags(Node parent, String name) { Vector result; int i; NodeList list; result = new Vector(); list = parent.getChildNodes(); for (i = 0; i < list.getLength(); i++) { if (!(list.item(i) instanceof Element)) continue; // only tags with a certain name? if (name.length() != 0) { if (!((Element) list.item(i)).getTagName().equals(name)) continue; } result.add((Element)list.item(i)); } return result; } /** * Returns the specified result of the XPath expression. * Can return null if an error occurred. * * @param xpath the XPath expression to run on the document * @param type the type of the result * @return the result */ protected Object eval(String xpath, QName type) { Object result; try { result = m_XPath.evaluate(xpath, m_Document, type); } catch (Exception e) { e.printStackTrace(); result = null; } return result; } /** * Returns the nodes that the given xpath expression will find in the * document. Can return null if an error occurred. * * @param xpath the XPath expression to run on the document * @return the nodelist */ public NodeList findNodes(String xpath) { return (NodeList) eval(xpath, XPathConstants.NODESET); } /** * Returns the node represented by the XPath expression. * Can return null if an error occurred. * * @param xpath the XPath expression to run on the document * @return the node */ public Node getNode(String xpath) { return (Node) eval(xpath, XPathConstants.NODE); } /** * Evaluates and returns the boolean result of the XPath expression. * * @param xpath the expression to evaluate * @return the result of the evaluation, null in case of an error */ public Boolean evalBoolean(String xpath) { return (Boolean) eval(xpath, XPathConstants.BOOLEAN); } /** * Evaluates and returns the double result of the XPath expression. * * @param xpath the expression to evaluate * @return the result of the evaluation, null in case of * an error */ public Double evalDouble(String xpath) { return (Double) eval(xpath, XPathConstants.NUMBER); } /** * Evaluates and returns the boolean result of the XPath expression. * * @param xpath the expression to evaluate * @return the result of the evaluation */ public String evalString(String xpath) { return (String) eval(xpath, XPathConstants.STRING); } /** * returns the text between the opening and closing tag of a node * (performs a trim() on the result). * * @param node the node to get the text from * @return the content of the given node */ public static String getContent(Element node) { NodeList list; Node item; int i; String result; result = ""; list = node.getChildNodes(); for (i = 0; i < list.getLength(); i++) { item = list.item(i); if (item.getNodeType() == Node.TEXT_NODE) result += item.getNodeValue(); } return result.trim(); } /** * turns the given node into a XML-stringbuffer according to the depth. * * @param buf the stringbuffer so far * @param parent the current node * @param depth the current depth * @return the new XML-stringbuffer */ protected StringBuffer toString(StringBuffer buf, Node parent, int depth) { NodeList list; Node node; int i; int n; String indent; NamedNodeMap atts; // build indent indent = ""; for (i = 0; i < depth; i++) indent += " "; if (parent.getNodeType() == Node.TEXT_NODE) { if (!parent.getNodeValue().trim().equals("")) buf.append(indent + parent.getNodeValue().trim() + "\n"); } else if (parent.getNodeType() == Node.COMMENT_NODE) { buf.append(indent + "\n"); } else { buf.append(indent + "<" + parent.getNodeName()); // attributes? if (parent.hasAttributes()) { atts = parent.getAttributes(); for (n = 0; n < atts.getLength(); n++) { node = atts.item(n); buf.append(" " + node.getNodeName() + "=\"" + node.getNodeValue() + "\""); } } // children? if (parent.hasChildNodes()) { list = parent.getChildNodes(); // just a text node? if ( (list.getLength() == 1) && (list.item(0).getNodeType() == Node.TEXT_NODE) ) { buf.append(">"); buf.append(list.item(0).getNodeValue().trim()); buf.append("\n"); } else { buf.append(">\n"); for (n = 0; n < list.getLength(); n++) { node = list.item(n); toString(buf, node, depth + 1); } buf.append(indent + "\n"); } } else { buf.append("/>\n"); } } return buf; } /** * prints the current DOM document to standard out. */ public void print() { System.out.println(toString()); } /** * returns the current DOM document as XML-string. * * @return the document as XML-string representation */ public String toString() { String header; header = PI + "\n\n"; if (getDocType() != null) header += getDocType() + "\n\n"; return toString(new StringBuffer(header), getDocument().getDocumentElement(), 0).toString(); } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } /** * for testing only. takes the name of an XML file as first arg, reads that * file, prints it to stdout and if a second filename is given, writes the * parsed document to that again. * * @param args the commandline arguments * @throws Exception if something goes wrong */ public static void main(String[] args) throws Exception { XMLDocument doc; if (args.length > 0) { doc = new XMLDocument(); // read doc.read(args[0]); // print to stdout doc.print(); // output? if (args.length > 1) { doc.write(args[1]); } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy