All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mockrunner.util.web.XmlUtil Maven / Gradle / Ivy

There is a newer version: 2.0.7
Show newest version
package com.mockrunner.util.web;

import java.io.StringReader;
import java.util.List;

import org.apache.xerces.parsers.DOMParser;
import org.cyberneko.html.HTMLConfiguration;
import org.jdom.Element;
import org.jdom.input.DOMBuilder;
import org.jdom.output.XMLOutputter;
import org.xml.sax.InputSource;

import com.mockrunner.base.NestedApplicationException;

/**
 * Util class for HTML and XML parsing.
 */
public class XmlUtil
{ 
    /**
     * Convinience method for HTML fragments. Returns the body
     * as JDOM Element.
     * 
     * If an HTML documents looks like this:
     * 
     * <html>
     * <head>
     * </head>
     * <body>
     * <h1>
     * </h1>
     * </body>
     * </html>
     * 
* * the method returns the h1 tag as Element. * @param document the org.jdom.Document * @return the body Element */ public static Element getBodyFragmentFromJDOMDocument(org.jdom.Document document) { Element element = document.getRootElement().getChild("BODY"); if(null == element) { element = document.getRootElement().getChild("body"); } if(null != element) { List childs = element.getChildren(); if(null != childs && childs.size() > 0) return (Element)childs.get(0); } return null; } /** * @deprecated use {@link #getBodyFragmentFromJDOMDocument} */ public static Element getBodyFragmentJDOMDocument(org.jdom.Document document) { return getBodyFragmentFromJDOMDocument(document); } /** * Returns the documents XML content as a string. * @param document the org.jdom.Document * @return the output as string */ public static String createStringFromJDOMDocument(org.jdom.Document document) { try { return new XMLOutputter().outputString(document); } catch(Exception exc) { throw new NestedApplicationException(exc); } } /** * Creates a JDOM Document from a specified * W3C Document. * @param document the org.w3c.dom.Document * @return the org.jdom.Document */ public static org.jdom.Document createJDOMDocument(org.w3c.dom.Document document) { return new DOMBuilder().build(document); } /** * Returns a parser suitable for parsing HTML documents. * The NekoHTML parser is used with some settings to * preserve case of tag names and disable namespace processing. * This method is used by {@link #parseHTML}. * @return instance of org.apache.xerces.parsers.DOMParser * with Neko configuration */ public static DOMParser getHTMLParser() { try { HTMLConfiguration config = new HTMLConfiguration(); config.setProperty("http://cyberneko.org/html/properties/names/elems", "match"); config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change"); DOMParser parser = new DOMParser(config); return parser; } catch(Exception exc) { throw new NestedApplicationException(exc); } } /** * Parses the specified HTML with the NekoHTML parser. * If you want to use another HTML parser or configure * the NekoHTML parser with special features, you can use * the parse method. * @param source the HTML as String * @return the parsed document as org.w3c.dom.Document */ public static org.w3c.dom.Document parseHTML(String source) { try { return parse(getHTMLParser(), source); } catch(Exception exc) { throw new NestedApplicationException(exc); } } /** * Parses the specified XML with the specified parser. * The main purpose of this method is to use the NekoHTML * parser with custom features and properties. If you can live * with the settings provided by Mockrunner, you can use * {@link #parseHTML}. * @param parser the parser (must extend * org.apache.xerces.parsers.DOMParser), * e.g. the one returned by {@link #getHTMLParser} * @param source the XML as String * @return the parsed document as org.w3c.dom.Document */ public static org.w3c.dom.Document parse(DOMParser parser, String source) { try { parser.parse(new InputSource(new StringReader(source))); return parser.getDocument(); } catch(Exception exc) { throw new NestedApplicationException(exc); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy