com.mockrunner.util.web.XmlUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mockrunner-core Show documentation
Show all versions of mockrunner-core Show documentation
Core classes common to all Mockrunner modules
package com.mockrunner.util.web;
import java.io.StringReader;
import java.util.List;
import org.apache.xerces.parsers.DOMParser;
import org.cyberneko.html.HTMLConfiguration;
import org.jdom.Element;
import org.jdom.input.DOMBuilder;
import org.jdom.output.XMLOutputter;
import org.xml.sax.InputSource;
import com.mockrunner.base.NestedApplicationException;
/**
* Util class for HTML and XML parsing.
*/
public class XmlUtil
{
/**
* Convinience method for HTML fragments. Returns the body
* as JDOM Element
.
*
* If an HTML documents looks like this:
*
* <html>
* <head>
* </head>
* <body>
* <h1>
* </h1>
* </body>
* </html>
*
*
* the method returns the h1 tag as Element
.
* @param document the org.jdom.Document
* @return the body Element
*/
public static Element getBodyFragmentFromJDOMDocument(org.jdom.Document document)
{
Element element = document.getRootElement().getChild("BODY");
if(null == element)
{
element = document.getRootElement().getChild("body");
}
if(null != element)
{
List childs = element.getChildren();
if(null != childs && childs.size() > 0) return (Element)childs.get(0);
}
return null;
}
/**
* @deprecated use {@link #getBodyFragmentFromJDOMDocument}
*/
public static Element getBodyFragmentJDOMDocument(org.jdom.Document document)
{
return getBodyFragmentFromJDOMDocument(document);
}
/**
* Returns the documents XML content as a string.
* @param document the org.jdom.Document
* @return the output as string
*/
public static String createStringFromJDOMDocument(org.jdom.Document document)
{
try
{
return new XMLOutputter().outputString(document);
}
catch(Exception exc)
{
throw new NestedApplicationException(exc);
}
}
/**
* Creates a JDOM Document
from a specified
* W3C Document
.
* @param document the org.w3c.dom.Document
* @return the org.jdom.Document
*/
public static org.jdom.Document createJDOMDocument(org.w3c.dom.Document document)
{
return new DOMBuilder().build(document);
}
/**
* Returns a parser suitable for parsing HTML documents.
* The NekoHTML parser is used with some settings to
* preserve case of tag names and disable namespace processing.
* This method is used by {@link #parseHTML}.
* @return instance of org.apache.xerces.parsers.DOMParser
* with Neko configuration
*/
public static DOMParser getHTMLParser()
{
try
{
HTMLConfiguration config = new HTMLConfiguration();
config.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
DOMParser parser = new DOMParser(config);
return parser;
}
catch(Exception exc)
{
throw new NestedApplicationException(exc);
}
}
/**
* Parses the specified HTML with the NekoHTML parser.
* If you want to use another HTML parser or configure
* the NekoHTML parser with special features, you can use
* the parse
method.
* @param source the HTML as String
* @return the parsed document as org.w3c.dom.Document
*/
public static org.w3c.dom.Document parseHTML(String source)
{
try
{
return parse(getHTMLParser(), source);
}
catch(Exception exc)
{
throw new NestedApplicationException(exc);
}
}
/**
* Parses the specified XML with the specified parser.
* The main purpose of this method is to use the NekoHTML
* parser with custom features and properties. If you can live
* with the settings provided by Mockrunner, you can use
* {@link #parseHTML}.
* @param parser the parser (must extend
* org.apache.xerces.parsers.DOMParser
),
* e.g. the one returned by {@link #getHTMLParser}
* @param source the XML as String
* @return the parsed document as org.w3c.dom.Document
*/
public static org.w3c.dom.Document parse(DOMParser parser, String source)
{
try
{
parser.parse(new InputSource(new StringReader(source)));
return parser.getDocument();
}
catch(Exception exc)
{
throw new NestedApplicationException(exc);
}
}
}