All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codehaus.staxmate.dom.DOMConverter Maven / Gradle / Ivy

Go to download

StaxMate is a light-weight framework that adds convenience to streaming XML-processing without significant additional overhead. It builds on top of a Stax (JSR-173) compliant XML processors such as Woodstox or Sjsxp (default Stax implementation of JDK 1.6) and offers two basic abstractions: Cursors, which build on XMLStreamReaders and Output objects, which build on XMLStreamWriters.

There is a newer version: 2.0.1
Show newest version
package org.codehaus.staxmate.dom;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.*;

import javax.xml.XMLConstants;
import javax.xml.stream.*;

import org.codehaus.stax2.XMLStreamReader2;
import org.codehaus.stax2.XMLStreamWriter2;
import org.codehaus.stax2.ri.Stax2ReaderAdapter;
import org.codehaus.stax2.ri.Stax2WriterAdapter;

/**
 * Class that can build DOM trees and fragments using
 * Stax stream readers, and write them out using
 * Stax stream writers.
 */
public class DOMConverter
{
    /*
    ////////////////////////////////////////////////////////
    // Input configuration
    ////////////////////////////////////////////////////////
     */

    protected final DocumentBuilder _docBuilder;

    /**
     * Whether ignorable white space should be ignored, ie not added
     * in the resulting JDOM tree. If true, it will be ignored; if false,
     * it will be added in the tree. Default value if false.
     */
    protected boolean _inputCfgIgnoreWs = false;

    /*
    ////////////////////////////////////////////////////////
    // Output configuration
    ////////////////////////////////////////////////////////
     */

    /*
    ////////////////////////////////////////////////////////
    // State
    ////////////////////////////////////////////////////////
     */

    /*
    ////////////////////////////////////////////////////////
    // Construction
    ////////////////////////////////////////////////////////
     */

    public DOMConverter()
    {
        this(_constructBuilder());
    }

    public DOMConverter(DocumentBuilder b)
    {
        _docBuilder = b;
    }

    private final static DocumentBuilder _constructBuilder()
    {
        try {
            return DocumentBuilderFactory.newInstance().newDocumentBuilder();
        } catch (ParserConfigurationException pe) {
            // should seldom (~= never) occur, so:
            throw new IllegalStateException(pe);
        }
    }


    /*
    ////////////////////////////////////////////////////////
    // Public API, configuration
    ////////////////////////////////////////////////////////
     */

    /**
     * Method used to change whether the build methods will add ignorable
     * (element) white space in the DOM tree or not.
     *

* Whether all-whitespace text segment is ignorable white space or * not is based on DTD read in, as per XML specifications (white space * is only significant in mixed content or pure text elements). */ public void setIgnoreWhitespace(boolean state) { _inputCfgIgnoreWs = state; } /* //////////////////////////////////////////////////////// // Public API, input (DOM from stax stream reader) //////////////////////////////////////////////////////// */ /** * This method will create a {@link org.w3c.dom.Document} instance using * the default JAXP DOM document construction mechanism and * populated using the given StAX stream reader. * Namespace-awareness will be enabled for the * {@link DocumentBuilderFactory} constructed; if this is not wanted, * caller should construct DocumentBuilder separately. *

* Note: underlying stream reader will be closed by the method, but * generally this does NOT mean that the stream will be (as per * Stax 1.0 specs which defined behavior of XMLStreamReader.close(). * * @param r Stream reader from which input is read. * @return Document - DOM document object. * @throws XMLStreamException If the reader threw such exception (to * indicate a parsing or I/O problem) */ public Document buildDocument(XMLStreamReader r) throws XMLStreamException { // Let's enable namespace awareness by default DocumentBuilder db; try{ DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); } catch (ParserConfigurationException pce) { throw new XMLStreamException(pce); } return buildDocument(r, db); } /** * This method will create a {@link org.w3c.dom.Document} instance using * given DocumentBuilder and * populated using the given StAX stream reader. *

* Note: underlying stream reader will be closed by the method, but * generally this does NOT mean that the stream will be (as per * Stax 1.0 specs which defined behavior of XMLStreamReader.close(). * * @param r Stream reader from which input is read. * @return Document - DOM document object. * @throws XMLStreamException If the reader threw such exception (to * indicate a parsing or I/O problem) */ public Document buildDocument(XMLStreamReader r, DocumentBuilder docbuilder) throws XMLStreamException { Document doc = docbuilder.newDocument(); buildDocument(r, doc); return doc; } /** * This method will populate given {@link org.w3c.dom.Document} using * the given StAX stream reader instance. *

* Note: underlying stream reader will be closed by the method, but * generally this does NOT mean that the stream will be (as per * Stax 1.0 specs which defined behavior of XMLStreamReader.close(). * * @param r Stream reader from which input is read. * @throws XMLStreamException If the reader threw such exception (to * indicate a parsing or I/O problem) */ public void buildDocument(XMLStreamReader r, Document doc) throws XMLStreamException { _build(r, doc); r.close(); } /** * This method takes a XMLStreamReader and builds up * a JDOM tree. Recursion has been eliminated by using nodes' * parent/child relationship; this improves performance somewhat * (classic recursion-by-iteration-and-explicit stack transformation) * * @param r0 Stream reader to use for reading the document from which * to build the tree * @param doc Document being built. */ protected void _build(XMLStreamReader r0, Document doc) throws XMLStreamException { XMLStreamReader2 sr = Stax2ReaderAdapter.wrapIfNecessary(r0); QNameRecycler recycler = new QNameRecycler(); boolean nsAware = _isNamespaceAware(sr); Node current = doc; // At top level main_loop: while (true) { int evtType = sr.next(); Node child; switch (evtType) { case XMLStreamConstants.CDATA: child = doc.createCDATASection(sr.getText()); break; case XMLStreamConstants.SPACE: if (_inputCfgIgnoreWs) { continue main_loop; } /* Oh great. DOM is brain-dead in that ignorable white space * can not be added, even though it is legal, and often * reported by StAX/SAX impls... */ if (current == doc) { // better just ignore, thus... continue; } // fall through case XMLStreamConstants.CHARACTERS: child = doc.createTextNode(sr.getText()); break; case XMLStreamConstants.COMMENT: child = doc.createComment(sr.getText()); break; case XMLStreamConstants.END_DOCUMENT: break main_loop; case XMLStreamConstants.END_ELEMENT: current = current.getParentNode(); if (current == null) { current = doc; } continue main_loop; case XMLStreamConstants.ENTITY_DECLARATION: case XMLStreamConstants.NOTATION_DECLARATION: /* Shouldn't really get these, but maybe some stream readers * do provide the info. If so, better ignore it -- DTD event * should have most/all we need. */ continue main_loop; case XMLStreamConstants.ENTITY_REFERENCE: child = doc.createEntityReference(sr.getLocalName()); break; case XMLStreamConstants.PROCESSING_INSTRUCTION: child = doc.createProcessingInstruction(sr.getPITarget(), sr.getPIData()); break; case XMLStreamConstants.START_ELEMENT: // Ok, need to add a new element... { String ln = sr.getLocalName(); Element newElem; if (nsAware) { String qname = sr.getPrefixedName(); newElem = doc.createElementNS(sr.getNamespaceURI(), qname); } else { // if non-ns-aware, things are simpler: newElem = doc.createElement(ln); } /* Silly old DOM: must mix in namespace declarations * in there... */ for (int i = 0, len = sr.getNamespaceCount(); i < len; ++i) { String prefix = sr.getNamespacePrefix(i); String qname; if (prefix == null || prefix.length() == 0) { qname = "xmlns"; } else { qname = recycler.getQualified("xmlns", prefix); } newElem.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, qname, sr.getNamespaceURI(i)); } // And then the attributes: for (int i = 0, len = sr.getAttributeCount(); i < len; ++i) { ln = sr.getAttributeLocalName(i); if (nsAware) { String prefix = sr.getAttributePrefix(i); if (prefix != null && prefix.length() > 0) { ln = recycler.getQualified(prefix, ln); } newElem.setAttributeNS(sr.getAttributeNamespace(i), ln, sr.getAttributeValue(i)); } else { newElem.setAttribute(ln, sr.getAttributeValue(i)); } } // And then 'push' new element... current.appendChild(newElem); current = newElem; continue main_loop; } case XMLStreamConstants.START_DOCUMENT: /* This should only be received at the beginning of document... * so, should we indicate the problem or not? */ /* For now, let it pass: maybe some (broken) readers pass * that info as first event in beginning of doc? */ continue main_loop; case XMLStreamConstants.DTD: /* !!! Note: StAX does not expose enough information about * doctype declaration (specifically, public and system id!); * (altough StAX2 would...) * * Worse, DOM1/2 do not specify a way to create the DocType * node, even if StAX provided it. This is pretty silly, * all in all. */ continue main_loop; // Should never get these, from a stream reader: /* (commented out entries are just FYI; default catches * them all) */ //case XMLStreamConstants.ATTRIBUTE: //case XMLStreamConstants.NAMESPACE: default: throw new XMLStreamException("Unrecognized iterator event type: "+sr.getEventType()+"; should not receive such types (broken stream reader?)"); } if (child != null) { current.appendChild(child); } } } /* //////////////////////////////////////////////////////// // Public API, output (DOM written using stax stream writer) //////////////////////////////////////////////////////// */ /** * Method for writing out given DOM document using specified * stream writer. *

* Note: only regular XMLStreamWriter.close() is * called on the stream writer. This usually means that the underlying * stream is not closed (as per Stax 1.0 specification). */ public void writeDocument(Document doc, XMLStreamWriter sw0) throws XMLStreamException { XMLStreamWriter2 sw = Stax2WriterAdapter.wrapIfNecessary(sw0); sw.writeStartDocument(); for (Node child = doc.getFirstChild(); child != null; child = child.getNextSibling()) { _writeNode(sw, child); } sw.writeEndDocument(); sw.close(); } public void writeFragment(NodeList nodes, XMLStreamWriter sw0) throws XMLStreamException { XMLStreamWriter2 sw = Stax2WriterAdapter.wrapIfNecessary(sw0); for (int i = 0, len = nodes.getLength(); i < len; ++i) { _writeNode(sw, (Node) nodes.item(i)); } } public void writeFragment(Node node, XMLStreamWriter sw0) throws XMLStreamException { XMLStreamWriter2 sw = Stax2WriterAdapter.wrapIfNecessary(sw0); _writeNode(sw, node); } /* //////////////////////////////////////////////////////// // Helper methods, property detection //////////////////////////////////////////////////////// */ protected static boolean _isNamespaceAware(XMLStreamReader r) throws XMLStreamException { Object o = r.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE); /* StAX defaults to namespace aware, so let's use similar * logics (although all compliant implementations really should * return a valid value) */ if ((o instanceof Boolean) && !((Boolean) o).booleanValue()) { return false; } return true; } /* Not used, 06-Mar-2009, tatu protected static boolean _isRepairing(XMLStreamWriter sw) throws XMLStreamException { Object o = sw.getProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES); return (o instanceof Boolean) && ((Boolean) o).booleanValue(); } */ /* //////////////////////////////////////////////////////// // Helper methods, output //////////////////////////////////////////////////////// */ protected void _writeNode(XMLStreamWriter2 sw, Node node) throws XMLStreamException { switch (node.getNodeType()) { case Node.ELEMENT_NODE: _writeElement(sw, (Element) node); break; case Node.TEXT_NODE: // Do we care about whether it's actually CDATA? sw.writeCharacters(node.getNodeValue()); break; case Node.CDATA_SECTION_NODE: sw.writeCData(node.getNodeValue()); break; case Node.COMMENT_NODE: sw.writeComment(node.getNodeValue()); break; case Node.ENTITY_REFERENCE_NODE: sw.writeEntityRef(node.getNodeName()); break; case Node.PROCESSING_INSTRUCTION_NODE: String target = node.getNodeName(); String data = node.getNodeValue(); if (data == null || data.length() == 0) { sw.writeProcessingInstruction(target); } else { sw.writeProcessingInstruction(target, data); } break; case Node.DOCUMENT_TYPE_NODE: sw.writeDTD(_buildDTD((DocumentType) node)); break; default: throw new XMLStreamException("Unrecognized or unexpected node class: "+node.getClass().getName()); } } protected String _buildDTD(DocumentType doctype) { /* For StAX 1.0, need to construct it: for StAX2 we could * pass these as they are... */ StringBuilder sb = new StringBuilder(); sb.append(" 0) { // but have sys id sb.append("SYSTEM \""); sb.append(sysId); sb.append('"'); } } else { sb.append("PUBLIC \""); sb.append(pubId); sb.append("\" \""); // System id can not be null, if so sb.append(sysId); sb.append('"'); } String intSubset = doctype.getInternalSubset(); if (intSubset != null && intSubset.length() > 0) { sb.append(" ["); sb.append(intSubset); sb.append(']'); } sb.append('>'); return sb.toString(); } /** * Method called to output an element node and all of its children * (recursively). * * @param elem Element to output */ protected void _writeElement(XMLStreamWriter2 sw, Element elem) throws XMLStreamException { String elemPrefix = elem.getPrefix(); if (elemPrefix == null) { elemPrefix = ""; } String elemUri = elem.getNamespaceURI(); if (elemUri == null) { elemUri = ""; } sw.writeStartElement(elemPrefix, elem.getLocalName(), elemUri); /* And in any case, may have attributes; list also contains * namespace declarations (stupid DOM) */ NamedNodeMap attrs = elem.getAttributes(); for (int i = 0, len = attrs.getLength(); i < len; ++i) { Attr attr = (Attr) attrs.item(i); String aPrefix = attr.getPrefix(); String ln = attr.getLocalName(); String value = attr.getValue(); /* With attributes things are bit simpler: they will never use * the default namespace, so if prefix is empty, they will bound * to the empty namespace. */ if (aPrefix == null || aPrefix.length() == 0) { // no NS if ("xmlns".equals(ln)) { sw.writeDefaultNamespace(value); } else { sw.writeAttribute(ln, value); } } else { // Ok: is it a namespace declaration? if ("xmlns".equals(aPrefix)) { sw.writeNamespace(ln, value); } else { sw.writeAttribute(aPrefix, attr.getNamespaceURI(), ln, value); } } } // And then children, recursively: for (Node child = elem.getFirstChild(); child != null; child = child.getNextSibling()) { _writeNode(sw, child); } sw.writeEndElement(); } /* //////////////////////////////////////////////////////// // Helper classes //////////////////////////////////////////////////////// */ /** * We can do simple reuse of commonly seen names */ final static class QNameRecycler { String _lastPrefix = null; String _lastLocalName = null; String _lastQName = null; public QNameRecycler() { } public String getQualified(String prefix, String localName) { /* This mostly/only helps with empty/text-only elements... * might make sense to do 'real' caching... */ if (localName == _lastLocalName && prefix == _lastPrefix) { return _lastQName; } _lastLocalName = localName; _lastPrefix = prefix; StringBuilder sb = new StringBuilder(1 + prefix.length() + localName.length()); sb.append(prefix).append(':').append(localName); _lastQName = sb.toString(); return _lastQName; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy