org.dom4j.io.XMPPPacketReader Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of xmppserver Show documentation
The newest version!
/*
 * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
 *
 * This software is open source.
 * See the bottom of this file for the licence.
 *
 */

package org.dom4j.io;

import org.dom4j.*;
import org.jivesoftware.openfire.net.MXParser;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlPullParserFactory;

import java.io.*;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;

/**
 * XMPPPacketReader is a Reader of DOM4J documents that
 * uses the fast
 * XML Pull Parser 3.x.
 * It is very fast for use in SOAP style environments.
 *
 * @author Pelle Braendgaard
 * @author James Strachan
 */
public class XMPPPacketReader {

    /**
     * DocumentFactory used to create new document objects
     */
    private DocumentFactory factory;

    /**
     * XmlPullParser used to parse XML
     */
    private MXParser xppParser;

    /**
     * XmlPullParser used to parse XML
     */
    private XmlPullParserFactory xppFactory;

    /**
     * DispatchHandler to call when each Element is encountered
     */
    private DispatchHandler dispatchHandler;

    /**
     * Last time a full Document was read or a heartbeat was received. Hearbeats
     * are represented as whitespaces received while a Document is not being parsed.
     */
    private long lastActive = System.currentTimeMillis();

    /**
     * Stream of various endpoints (eg: s2s, c2s) use different default namespaces. To be able to use a stanza that's
     * parsed on one type of endpoint in the context of another endpoint, we explicitly ignore these namespaced. This
     * allows us to forward, for instance, a stanza received via C2S (which has the "jabber:client" default namespace)
     * on a S2S stream (which has the "jabber:server" default namespace).
     *
     * @see RFC 6120, 4.8.3. XMPP Content Namespaces
     */
    public static final Collection IGNORED_NAMESPACE_ON_STANZA = Arrays.asList( "jabber:client", "jabber:server", "jabber:connectionmanager", "jabber:component:accept", "http://jabber.org/protocol/httpbind" );

    public XMPPPacketReader() {
    }

    public XMPPPacketReader(DocumentFactory factory) {
        this.factory = factory;
    }


    /**
     * Reads a Document from the given File
     *
     * @param file is the File to read from.
     * @return the newly created Document instance
     * @throws DocumentException              if an error occurs during parsing.
     * @throws java.net.MalformedURLException if a URL could not be made for the given File
     */
    public Document read(File file) throws DocumentException, IOException, XmlPullParserException {
        String systemID = file.getAbsolutePath();
        return read(new BufferedReader(new FileReader(file)), systemID);
    }

    /**
     * Reads a Document from the given URL
     *
     * @param url URL to read from.
     * @return the newly created Document instance
     * @throws DocumentException if an error occurs during parsing.
     */
    public Document read(URL url) throws DocumentException, IOException, XmlPullParserException {
        String systemID = url.toExternalForm();
        return read(createReader(url.openStream()), systemID);
    }

    /**
     * Reads a Document from the given URL or filename.
     * 
     * If the systemID contains a ':' character then it is
     * assumed to be a URL otherwise its assumed to be a file name.
     * If you want finer grained control over this mechansim then please
     * explicitly pass in either a {@link URL} or a {@link File} instance
     * instead of a {@link String} to denote the source of the document.
     * 
     *
     * @param systemID is a URL for a document or a file name.
     * @return the newly created Document instance
     * @throws DocumentException              if an error occurs during parsing.
     * @throws java.net.MalformedURLException if a URL could not be made for the given File
     */
    public Document read(String systemID) throws DocumentException, IOException, XmlPullParserException {
        if (systemID.indexOf(':') >= 0) {
            // lets assume its a URL
            return read(new URL(systemID));
        }
        else {
            // lets assume that we are given a file name
            return read(new File(systemID));
        }
    }

    /**
     * Reads a Document from the given stream
     *
     * @param in InputStream to read from.
     * @return the newly created Document instance
     * @throws DocumentException if an error occurs during parsing.
     */
    public Document read(InputStream in) throws DocumentException, IOException, XmlPullParserException {
        return read(createReader(in));
    }

    /**
     * Reads a Document from the given stream
     *
     * @param charSet the charSet that the input is encoded in
     * @param in InputStream to read from.
     * @return the newly created Document instance
     * @throws DocumentException if an error occurs during parsing.
     */
    public Document read(String charSet, InputStream in)
            throws DocumentException, IOException, XmlPullParserException
    {
        return read(createReader(in, charSet));
    }

    /**
     * Reads a Document from the given Reader
     *
     * @param reader is the reader for the input
     * @return the newly created Document instance
     * @throws DocumentException if an error occurs during parsing.
     */
    public Document read(Reader reader) throws DocumentException, IOException, XmlPullParserException {
        getXPPParser().setInput(reader);
        return parseDocument();
    }

    /**
     * Reads a Document from the given array of characters
     *
     * @param text is the text to parse
     * @return the newly created Document instance
     * @throws DocumentException if an error occurs during parsing.
     */
    public Document read(char[] text) throws DocumentException, IOException, XmlPullParserException {
        getXPPParser().setInput(new CharArrayReader(text));
        return parseDocument();
    }

    /**
     * Reads a Document from the given stream
     *
     * @param in       InputStream to read from.
     * @param systemID is the URI for the input
     * @return the newly created Document instance
     * @throws DocumentException if an error occurs during parsing.
     */
    public Document read(InputStream in, String systemID) throws DocumentException, IOException, XmlPullParserException {
        return read(createReader(in), systemID);
    }

    /**
     * Reads a Document from the given Reader
     *
     * @param reader   is the reader for the input
     * @param systemID is the URI for the input
     * @return the newly created Document instance
     * @throws DocumentException if an error occurs during parsing.
     */
    public Document read(Reader reader, String systemID) throws DocumentException, IOException, XmlPullParserException {
        Document document = read(reader);
        document.setName(systemID);
        return document;
    }


    // Properties
    //-------------------------------------------------------------------------

    public MXParser getXPPParser() throws XmlPullParserException {
        if (xppParser == null) {
            xppParser = (MXParser) getXPPFactory().newPullParser();
        }
        return xppParser;
    }

    public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
        if (xppFactory == null) {
            xppFactory = XmlPullParserFactory.newInstance(MXParser.class.getName(), null);
        }
        xppFactory.setNamespaceAware(true);
        return xppFactory;
    }

    public void setXPPFactory(XmlPullParserFactory xppFactory) {
        this.xppFactory = xppFactory;
    }

    /**
     * @return the DocumentFactory used to create document objects
     */
    public DocumentFactory getDocumentFactory() {
        if (factory == null) {
            factory = DocumentFactory.getInstance();
        }
        return factory;
    }

    /**
     * This sets the DocumentFactory used to create new documents.
     * This method allows the building of custom DOM4J tree objects to be implemented
     * easily using a custom derivation of {@link DocumentFactory}
     *
     * @param factory DocumentFactory used to create DOM4J objects
     */
    public void setDocumentFactory(DocumentFactory factory) {
        this.factory = factory;
    }


    /**
     * Adds the ElementHandler to be called when the
     * specified path is encounted.
     *
     * @param path    is the path to be handled
     * @param handler is the ElementHandler to be called
     *                by the event based processor.
     */
    public void addHandler(String path, ElementHandler handler) {
        getDispatchHandler().addHandler(path, handler);
    }

    /**
     * Removes the ElementHandler from the event based
     * processor, for the specified path.
     *
     * @param path is the path to remove the ElementHandler for.
     */
    public void removeHandler(String path) {
        getDispatchHandler().removeHandler(path);
    }

    /**
     * When multiple ElementHandler instances have been
     * registered, this will set a default ElementHandler
     * to be called for any path which does NOT have a handler
     * registered.
     *
     * @param handler is the ElementHandler to be called
     *                by the event based processor.
     */
    public void setDefaultHandler(ElementHandler handler) {
        getDispatchHandler().setDefaultHandler(handler);
    }

    /**
     * Returns the last time a full Document was read or a heartbeat was received. Hearbeats
     * are represented as whitespaces or \n received while a Document is not being parsed.
     *
     * @return the time in milliseconds when the last document or heartbeat was received.
     */
    public long getLastActive() {
        long lastHeartbeat = 0;
        try {
            lastHeartbeat = getXPPParser().getLastHeartbeat();
        }
        catch (XmlPullParserException e) {}
        return lastActive > lastHeartbeat ? lastActive : lastHeartbeat;
    }

    /*
     * DANIELE: Add parse document by string
     */
    public Document parseDocument(String xml) throws DocumentException {
        /*
        // Long way with reuse of DocumentFactory.
        DocumentFactory df = getDocumentFactory();
        SAXReader reader = new SAXReader( df );
        Document document = reader.read( new StringReader( xml );*/

        // Simple way
        // TODO Optimize. Do not create a sax reader for each parsing
        Document document = DocumentHelper.parseText(xml);

        return document;
    }

    // Implementation methods
    //-------------------------------------------------------------------------
    public Document parseDocument() throws DocumentException, IOException, XmlPullParserException {
        DocumentFactory df = getDocumentFactory();
        Document document = df.createDocument();
        Element parent = null;
        XmlPullParser pp = getXPPParser();
        int count = 0;
        while (true) {
            int type = -1;
            type = pp.nextToken();
            switch (type) {
                case XmlPullParser.PROCESSING_INSTRUCTION: {
                    String text = pp.getText();
                    int loc = text.indexOf(" ");
                    if (loc >= 0) {
                        document.addProcessingInstruction(text.substring(0, loc),
                                text.substring(loc + 1));
                    }
                    else {
                        document.addProcessingInstruction(text, "");
                    }
                    break;
                }
                case XmlPullParser.COMMENT: {
                    if (parent != null) {
                        parent.addComment(pp.getText());
                    }
                    else {
                        document.addComment(pp.getText());
                    }
                    break;
                }
                case XmlPullParser.CDSECT: {
                    String text = pp.getText();
                    if (parent != null) {
                        parent.addCDATA(text);
                    }
                    else {
                        if (text.trim().length() > 0) {
                            throw new DocumentException("Cannot have text content outside of the root document");
                        }
                    }
                    break;

                }
                case XmlPullParser.ENTITY_REF: {
                    String text = pp.getText();
                    if (parent != null) {
                        parent.addText(text);
                    }
                    else {
                        if (text.trim().length() > 0) {
                            throw new DocumentException("Cannot have an entityref outside of the root document");
                        }
                    }
                    break;
                }
                case XmlPullParser.END_DOCUMENT: {
                    return document;
                }
                case XmlPullParser.START_TAG: {
                    QName qname = (pp.getPrefix() == null) ? df.createQName(pp.getName(), pp.getNamespace()) : df.createQName(pp.getName(), pp.getPrefix(), pp.getNamespace());
                    Element newElement;

                    // Strip namespace from all default-namespaced elements if
                    // all ancestors have the same namespace and it's a content
                    // namespace.
                    boolean dropNamespace = false;
                    if (pp.getPrefix() == null && IGNORED_NAMESPACE_ON_STANZA.contains(qname.getNamespaceURI())) {
                        // Default namespaced element which is in a content namespace,
                        // so we'll drop. Example, stanzas, 
                        dropNamespace = true;
                        for (Element el = parent; el != null; el = el.getParent()) {
                            final String defaultNS = el.getNamespaceForPrefix("").getURI();
                            if (defaultNS.equals("")) {
                                // We've cleared this one already, just bail.
                                break;
                            }
                            if (!defaultNS.equals(qname.getNamespaceURI())) {
                                // But if there's an ancestor element, we shouldn't drop
                                // after all. Example: forwarded message.
                                dropNamespace = false;
                                break;
                            }
                        }
                    }
                    if ( dropNamespace ) {
                        newElement = df.createElement(pp.getName());
                    }
                    else {
                        newElement = df.createElement(qname);
                    }
                    int nsStart = pp.getNamespaceCount(pp.getDepth() - 1);
                    int nsEnd = pp.getNamespaceCount(pp.getDepth());
                    for (int i = nsStart; i < nsEnd; i++) {
                        final String namespacePrefix = pp.getNamespacePrefix( i );
                        final String namespaceUri = pp.getNamespaceUri( i );
                        if ( namespacePrefix != null ) {
                            newElement.addNamespace(namespacePrefix, namespaceUri);
                        } else if ( parent == null && IGNORED_NAMESPACE_ON_STANZA.contains( namespaceUri ) ) {
                            // Don't copy.
                        } else if ( !(dropNamespace && namespaceUri.equals( qname.getNamespaceURI() ) ) ) {
                            // Do not include certain default namespace on the root-element ('stream') or stanza level. This makes stanzas re-usable between, for example, c2s and s2s.
                            newElement.addNamespace( "", namespaceUri );
                        }
                    }
                    for (int i = 0; i < pp.getAttributeCount(); i++) {
                        QName qa = (pp.getAttributePrefix(i) == null) ? df.createQName(pp.getAttributeName(i)) : df.createQName(pp.getAttributeName(i), pp.getAttributePrefix(i), pp.getAttributeNamespace(i));
                        newElement.addAttribute(qa, pp.getAttributeValue(i));
                    }
                    if (parent != null) {
                        parent.add(newElement);
                    }
                    else {
                        document.add(newElement);
                    }
                    parent = newElement;
                    count++;
                    break;
                }
                case XmlPullParser.END_TAG: {
                    if (parent != null) {
                        parent = parent.getParent();
                    }
                    count--;
                    if (count < 1) {
                        // Update the last time a Document was received
                        lastActive = System.currentTimeMillis();
                        return document;
                    }
                    break;
                }
                case XmlPullParser.TEXT: {
                    String text = pp.getText();
                    if (parent != null) {
                        parent.addText(text);
                    }
                    else {
                        if (text.trim().length() > 0) {
                            throw new DocumentException("Cannot have text content outside of the root document");
                        }
                    }
                    break;
                }
                default:
                {

                }
            }
        }
    }

    protected DispatchHandler getDispatchHandler() {
        if (dispatchHandler == null) {
            dispatchHandler = new DispatchHandler();
        }
        return dispatchHandler;
    }

    protected void setDispatchHandler(DispatchHandler dispatchHandler) {
        this.dispatchHandler = dispatchHandler;
    }

    /**
     * Factory method to create a Reader from the given InputStream.
     */
    protected Reader createReader(InputStream in) throws IOException {
        return new BufferedReader(new InputStreamReader(in));
    }

    private Reader createReader(InputStream in, String charSet) throws UnsupportedEncodingException {
        return new BufferedReader(new InputStreamReader(in, charSet));
    }
}

/*
 * Redistribution and use of this software and associated documentation
 * ("Software"), with or without modification, are permitted provided
 * that the following conditions are met:
 *
 * 1. Redistributions of source code must retain copyright
 *    statements and notices.  Redistributions must also contain a
 *    copy of this document.
 *
 * 2. Redistributions in binary form must reproduce the
 *    above copyright notice, this list of conditions and the
 *    following disclaimer in the documentation and/or other
 *    materials provided with the distribution.
 *
 * 3. The name "DOM4J" must not be used to endorse or promote
 *    products derived from this Software without prior written
 *    permission of MetaStuff, Ltd.  For written permission,
 *    please contact [email protected].
 *
 * 4. Products derived from this Software may not be called "DOM4J"
 *    nor may "DOM4J" appear in their names without prior written
 *    permission of MetaStuff, Ltd. DOM4J is a registered
 *    trademark of MetaStuff, Ltd.
 *
 * 5. Due credit should be given to the DOM4J Project -
 *    http://www.dom4j.org
 *
 * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
 * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
 * METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved.
 *
 */