org.dom4j.io.XPP3Reader Maven / Gradle / Ivy

Go to download
/*
 * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
 *
 * This software is open source.
 * See the bottom of this file for the licence.
 */

package org.dom4j.io;

import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentFactory;
import org.dom4j.Element;
import org.dom4j.ElementHandler;
import org.dom4j.QName;

import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserFactory;

/**
 * 
 * XPP3Reader is a Reader of DOM4J documents that uses the fast XML Pull Parser 3.x . It is very fast for use in SOAP style
 * environments.
 * 
 * 
 * @author  Pelle Braendgaard 
 * @author  James Strachan 
 * @version $Revision: 1.3 $
 */
public class XPP3Reader {
	/** DocumentFactory used to create new document objects */
	private DocumentFactory factory;

	/** XmlPullParser used to parse XML */
	private XmlPullParser xppParser;

	/** XmlPullParser used to parse XML */
	private XmlPullParserFactory xppFactory;

	/** DispatchHandler to call when each Element is encountered */
	private DispatchHandler dispatchHandler;

	public XPP3Reader() {
	}

	public XPP3Reader(DocumentFactory factory) {
		this.factory = factory;
	}

	/**
	 * 
	 * Reads a Document from the given File
	 * 
	 * 
	 * @param  file                   is the File to read from.
	 * @return                        the newly created Document instance
	 * @throws DocumentException      if an error occurs during parsing.
	 * @throws IOException            if a URL could not be made for the given File
	 * @ DOCUMENT ME!
	 */
	public Document read(File file) throws DocumentException, IOException {
		String systemID = file.getAbsolutePath();

		return read(new BufferedReader(new FileReader(file)), systemID);
	}

	/**
	 * 
	 * Reads a Document from the given URL
	 * 
	 * 
	 * @param  url                    URL to read from.
	 * @return                        the newly created Document instance
	 * @throws DocumentException      if an error occurs during parsing.
	 * @throws IOException            DOCUMENT ME!
	 * @ DOCUMENT ME!
	 */
	public Document read(URL url) throws DocumentException, IOException {
		String systemID = url.toExternalForm();

		return read(createReader(url.openStream()), systemID);
	}

	/**
	 * 
	 * Reads a Document from the given URL or filename.
	 * 
	 * 
	 * If the systemID contains a ':' character then it is assumed to be a URL otherwise its assumed to be a file name. If you want finer grained control over this mechansim then please
	 * explicitly pass in either a {@link URL}or a {@link File}instance instead of a {@link String} to denote the source of the document.
	 * 
	 * 
	 * @param  systemID               is a URL for a document or a file name.
	 * @return                        the newly created Document instance
	 * @throws DocumentException      if an error occurs during parsing.
	 * @throws IOException            if a URL could not be made for the given File
	 * @ DOCUMENT ME!
	 */
	public Document read(String systemID) throws DocumentException, IOException {
		if (systemID.indexOf(':') >= 0) {
			// lets assume its a URL
			return read(new URL(systemID));
		} else {
			// lets assume that we are given a file name
			return read(new File(systemID));
		}
	}

	/**
	 * 
	 * Reads a Document from the given stream
	 * 
	 * 
	 * @param  in                     InputStream to read from.
	 * @return                        the newly created Document instance
	 * @throws DocumentException      if an error occurs during parsing.
	 * @throws IOException            DOCUMENT ME!
	 * @ DOCUMENT ME!
	 */
	public Document read(InputStream in) throws DocumentException, IOException {
		return read(createReader(in));
	}

	/**
	 * 
	 * Reads a Document from the given Reader
	 * 
	 * 
	 * @param  reader                 is the reader for the input
	 * @return                        the newly created Document instance
	 * @throws DocumentException      if an error occurs during parsing.
	 * @throws IOException            DOCUMENT ME!
	 * @ DOCUMENT ME!
	 */
	public Document read(Reader reader) throws DocumentException, IOException {
		getXPPParser().setInput(reader);

		return parseDocument();
	}

	/**
	 * 
	 * Reads a Document from the given array of characters
	 * 
	 * 
	 * @param  text                   is the text to parse
	 * @return                        the newly created Document instance
	 * @throws DocumentException      if an error occurs during parsing.
	 * @throws IOException            DOCUMENT ME!
	 * @ DOCUMENT ME!
	 */
	public Document read(char[] text) throws DocumentException, IOException {
		getXPPParser().setInput(new CharArrayReader(text));

		return parseDocument();
	}

	/**
	 * 
	 * Reads a Document from the given stream
	 * 
	 * 
	 * @param  in                     InputStream to read from.
	 * @param  systemID               is the URI for the input
	 * @return                        the newly created Document instance
	 * @throws DocumentException      if an error occurs during parsing.
	 * @throws IOException            DOCUMENT ME!
	 * @ DOCUMENT ME!
	 */
	public Document read(InputStream in, String systemID) throws DocumentException, IOException {
		return read(createReader(in), systemID);
	}

	/**
	 * 
	 * Reads a Document from the given Reader
	 * 
	 * 
	 * @param  reader                 is the reader for the input
	 * @param  systemID               is the URI for the input
	 * @return                        the newly created Document instance
	 * @throws DocumentException      if an error occurs during parsing.
	 * @throws IOException            DOCUMENT ME!
	 * @ DOCUMENT ME!
	 */
	public Document read(Reader reader, String systemID) throws DocumentException, IOException {
		Document document = read(reader);
		document.setName(systemID);

		return document;
	}

	// Properties
	// -------------------------------------------------------------------------
	public XmlPullParser getXPPParser()  {
		if (xppParser == null) {
			xppParser = getXPPFactory().newPullParser();
		}

		return xppParser;
	}

	public XmlPullParserFactory getXPPFactory()  {
		if (xppFactory == null) {
			xppFactory = XmlPullParserFactory.newInstance();
		}

		xppFactory.setNamespaceAware(true);

		return xppFactory;
	}

	public void setXPPFactory(XmlPullParserFactory xPPfactory) {
		this.xppFactory = xPPfactory;
	}

	/**
	 * DOCUMENT ME!
	 * 
	 * @return the DocumentFactory used to create document objects
	 */
	public DocumentFactory getDocumentFactory() {
		if (factory == null) {
			factory = DocumentFactory.getInstance();
		}

		return factory;
	}

	/**
	 * 
	 * This sets the DocumentFactory used to create new documents. This method allows the building of custom DOM4J tree objects to be implemented easily using a custom derivation of
	 * {@link DocumentFactory}
	 * 
	 * 
	 * @param documentFactory DocumentFactory used to create DOM4J objects
	 */
	public void setDocumentFactory(DocumentFactory documentFactory) {
		this.factory = documentFactory;
	}

	/**
	 * Adds the ElementHandler to be called when the specified path is encounted.
	 * 
	 * @param path    is the path to be handled
	 * @param handler is the ElementHandler to be called by the event based processor.
	 */
	public void addHandler(String path, ElementHandler handler) {
		getDispatchHandler().addHandler(path, handler);
	}

	/**
	 * Removes the ElementHandler from the event based processor, for the specified path.
	 * 
	 * @param path is the path to remove the ElementHandler for.
	 */
	public void removeHandler(String path) {
		getDispatchHandler().removeHandler(path);
	}

	/**
	 * When multiple ElementHandler instances have been registered, this will set a default ElementHandler to be called for any path which does NOT  have a handler
	 * registered.
	 * 
	 * @param handler is the ElementHandler to be called by the event based processor.
	 */
	public void setDefaultHandler(ElementHandler handler) {
		getDispatchHandler().setDefaultHandler(handler);
	}

	// Implementation methods
	// -------------------------------------------------------------------------
	protected Document parseDocument() throws DocumentException, IOException {
		DocumentFactory df = getDocumentFactory();
		Document document = df.createDocument();
		Element parent = null;
		XmlPullParser pp = getXPPParser();
		pp.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);

		while (true) {
			int type = pp.nextToken();

			switch (type) {
				case XmlPullParser.PROCESSING_INSTRUCTION: {
					String text = pp.getText();
					int loc = text.indexOf(" ");

					if (loc >= 0) {
						String target = text.substring(0, loc);
						String txt = text.substring(loc + 1);
						document.addProcessingInstruction(target, txt);
					} else {
						document.addProcessingInstruction(text, "");
					}

					break;
				}

				case XmlPullParser.COMMENT: {
					if (parent != null) {
						parent.addComment(pp.getText());
					} else {
						document.addComment(pp.getText());
					}

					break;
				}

				case XmlPullParser.CDSECT: {
					if (parent != null) {
						parent.addCDATA(pp.getText());
					} else {
						String msg = "Cannot have text content outside of the " + "root document";
						throw new DocumentException(msg);
					}

					break;
				}

				case XmlPullParser.ENTITY_REF:
					break;

				case XmlPullParser.END_DOCUMENT:
					return document;

				case XmlPullParser.START_TAG: {
					QName qname = (pp.getPrefix() == null) ? df.createQName(pp.getName(), pp.getNamespace()) : df.createQName(pp.getName(), pp.getPrefix(), pp.getNamespace());
					Element newElement = df.createElement(qname);
					int nsStart = pp.getNamespaceCount(pp.getDepth() - 1);
					int nsEnd = pp.getNamespaceCount(pp.getDepth());

					for (int i = nsStart; i < nsEnd; i++) {
						if (pp.getNamespacePrefix(i) != null) {
							newElement.addNamespace(pp.getNamespacePrefix(i), pp.getNamespaceUri(i));
						}
					}

					for (int i = 0; i < pp.getAttributeCount(); i++) {
						QName qa = (pp.getAttributePrefix(i) == null) ? df.createQName(pp.getAttributeName(i))
								: df.createQName(pp.getAttributeName(i), pp.getAttributePrefix(i), pp.getAttributeNamespace(i));
						newElement.addAttribute(qa, pp.getAttributeValue(i));
					}

					if (parent != null) {
						parent.add(newElement);
					} else {
						document.add(newElement);
					}

					parent = newElement;

					break;
				}

				case XmlPullParser.END_TAG: {
					if (parent != null) {
						parent = parent.getParent();
					}

					break;
				}

				case XmlPullParser.TEXT: {
					String text = pp.getText();

					if (parent != null) {
						parent.addText(text);
					} else {
						String msg = "Cannot have text content outside of the " + "root document";
						throw new DocumentException(msg);
					}

					break;
				}

				default:
					break;
			}
		}
	}

	protected DispatchHandler getDispatchHandler() {
		if (dispatchHandler == null) {
			dispatchHandler = new DispatchHandler();
		}

		return dispatchHandler;
	}

	protected void setDispatchHandler(DispatchHandler dispatchHandler) {
		this.dispatchHandler = dispatchHandler;
	}

	/**
	 * Factory method to create a Reader from the given InputStream.
	 * 
	 * @param  in          DOCUMENT ME!
	 * @return             DOCUMENT ME!
	 * @throws IOException DOCUMENT ME!
	 */
	protected Reader createReader(InputStream in) throws IOException {
		return new BufferedReader(new InputStreamReader(in));
	}
}

/*
 * Redistribution and use of this software and associated documentation ("Software"), with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of
 * source code must retain copyright statements and notices. Redistributions must also contain a copy of this document. 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name "DOM4J" must not be used to endorse or promote products
 * derived from this Software without prior written permission of MetaStuff, Ltd. For written permission, please contact [email protected]. 4. Products derived from this Software may not be
 * called "DOM4J" nor may "DOM4J" appear in their names without prior written permission of MetaStuff, Ltd. DOM4J is a registered trademark of MetaStuff, Ltd. 5. Due credit should be given to the
 * DOM4J Project - http://www.dom4j.org THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE. Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
 */