
org.dom4j.io.XPP3Reader Maven / Gradle / Ivy
/*
* Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
*
* This software is open source.
* See the bottom of this file for the licence.
*/
package org.dom4j.io;
import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentFactory;
import org.dom4j.Element;
import org.dom4j.ElementHandler;
import org.dom4j.QName;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserFactory;
/**
*
* XPP3Reader
is a Reader of DOM4J documents that uses the fast XML Pull Parser 3.x . It is very fast for use in SOAP style
* environments.
*
*
* @author Pelle Braendgaard
* @author James Strachan
* @version $Revision: 1.3 $
*/
public class XPP3Reader {
/** DocumentFactory
used to create new document objects */
private DocumentFactory factory;
/** XmlPullParser
used to parse XML */
private XmlPullParser xppParser;
/** XmlPullParser
used to parse XML */
private XmlPullParserFactory xppFactory;
/** DispatchHandler to call when each Element
is encountered */
private DispatchHandler dispatchHandler;
public XPP3Reader() {
}
public XPP3Reader(DocumentFactory factory) {
this.factory = factory;
}
/**
*
* Reads a Document from the given File
*
*
* @param file is the File
to read from.
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException if a URL could not be made for the given File
* @ DOCUMENT ME!
*/
public Document read(File file) throws DocumentException, IOException {
String systemID = file.getAbsolutePath();
return read(new BufferedReader(new FileReader(file)), systemID);
}
/**
*
* Reads a Document from the given URL
*
*
* @param url URL
to read from.
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException DOCUMENT ME!
* @ DOCUMENT ME!
*/
public Document read(URL url) throws DocumentException, IOException {
String systemID = url.toExternalForm();
return read(createReader(url.openStream()), systemID);
}
/**
*
* Reads a Document from the given URL or filename.
*
*
* If the systemID contains a ':'
character then it is assumed to be a URL otherwise its assumed to be a file name. If you want finer grained control over this mechansim then please
* explicitly pass in either a {@link URL}or a {@link File}instance instead of a {@link String} to denote the source of the document.
*
*
* @param systemID is a URL for a document or a file name.
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException if a URL could not be made for the given File
* @ DOCUMENT ME!
*/
public Document read(String systemID) throws DocumentException, IOException {
if (systemID.indexOf(':') >= 0) {
// lets assume its a URL
return read(new URL(systemID));
} else {
// lets assume that we are given a file name
return read(new File(systemID));
}
}
/**
*
* Reads a Document from the given stream
*
*
* @param in InputStream
to read from.
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException DOCUMENT ME!
* @ DOCUMENT ME!
*/
public Document read(InputStream in) throws DocumentException, IOException {
return read(createReader(in));
}
/**
*
* Reads a Document from the given Reader
*
*
* @param reader is the reader for the input
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException DOCUMENT ME!
* @ DOCUMENT ME!
*/
public Document read(Reader reader) throws DocumentException, IOException {
getXPPParser().setInput(reader);
return parseDocument();
}
/**
*
* Reads a Document from the given array of characters
*
*
* @param text is the text to parse
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException DOCUMENT ME!
* @ DOCUMENT ME!
*/
public Document read(char[] text) throws DocumentException, IOException {
getXPPParser().setInput(new CharArrayReader(text));
return parseDocument();
}
/**
*
* Reads a Document from the given stream
*
*
* @param in InputStream
to read from.
* @param systemID is the URI for the input
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException DOCUMENT ME!
* @ DOCUMENT ME!
*/
public Document read(InputStream in, String systemID) throws DocumentException, IOException {
return read(createReader(in), systemID);
}
/**
*
* Reads a Document from the given Reader
*
*
* @param reader is the reader for the input
* @param systemID is the URI for the input
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException DOCUMENT ME!
* @ DOCUMENT ME!
*/
public Document read(Reader reader, String systemID) throws DocumentException, IOException {
Document document = read(reader);
document.setName(systemID);
return document;
}
// Properties
// -------------------------------------------------------------------------
public XmlPullParser getXPPParser() {
if (xppParser == null) {
xppParser = getXPPFactory().newPullParser();
}
return xppParser;
}
public XmlPullParserFactory getXPPFactory() {
if (xppFactory == null) {
xppFactory = XmlPullParserFactory.newInstance();
}
xppFactory.setNamespaceAware(true);
return xppFactory;
}
public void setXPPFactory(XmlPullParserFactory xPPfactory) {
this.xppFactory = xPPfactory;
}
/**
* DOCUMENT ME!
*
* @return the DocumentFactory
used to create document objects
*/
public DocumentFactory getDocumentFactory() {
if (factory == null) {
factory = DocumentFactory.getInstance();
}
return factory;
}
/**
*
* This sets the DocumentFactory
used to create new documents. This method allows the building of custom DOM4J tree objects to be implemented easily using a custom derivation of
* {@link DocumentFactory}
*
*
* @param documentFactory DocumentFactory
used to create DOM4J objects
*/
public void setDocumentFactory(DocumentFactory documentFactory) {
this.factory = documentFactory;
}
/**
* Adds the ElementHandler
to be called when the specified path is encounted.
*
* @param path is the path to be handled
* @param handler is the ElementHandler
to be called by the event based processor.
*/
public void addHandler(String path, ElementHandler handler) {
getDispatchHandler().addHandler(path, handler);
}
/**
* Removes the ElementHandler
from the event based processor, for the specified path.
*
* @param path is the path to remove the ElementHandler
for.
*/
public void removeHandler(String path) {
getDispatchHandler().removeHandler(path);
}
/**
* When multiple ElementHandler
instances have been registered, this will set a default ElementHandler
to be called for any path which does NOT have a handler
* registered.
*
* @param handler is the ElementHandler
to be called by the event based processor.
*/
public void setDefaultHandler(ElementHandler handler) {
getDispatchHandler().setDefaultHandler(handler);
}
// Implementation methods
// -------------------------------------------------------------------------
protected Document parseDocument() throws DocumentException, IOException {
DocumentFactory df = getDocumentFactory();
Document document = df.createDocument();
Element parent = null;
XmlPullParser pp = getXPPParser();
pp.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
while (true) {
int type = pp.nextToken();
switch (type) {
case XmlPullParser.PROCESSING_INSTRUCTION: {
String text = pp.getText();
int loc = text.indexOf(" ");
if (loc >= 0) {
String target = text.substring(0, loc);
String txt = text.substring(loc + 1);
document.addProcessingInstruction(target, txt);
} else {
document.addProcessingInstruction(text, "");
}
break;
}
case XmlPullParser.COMMENT: {
if (parent != null) {
parent.addComment(pp.getText());
} else {
document.addComment(pp.getText());
}
break;
}
case XmlPullParser.CDSECT: {
if (parent != null) {
parent.addCDATA(pp.getText());
} else {
String msg = "Cannot have text content outside of the " + "root document";
throw new DocumentException(msg);
}
break;
}
case XmlPullParser.ENTITY_REF:
break;
case XmlPullParser.END_DOCUMENT:
return document;
case XmlPullParser.START_TAG: {
QName qname = (pp.getPrefix() == null) ? df.createQName(pp.getName(), pp.getNamespace()) : df.createQName(pp.getName(), pp.getPrefix(), pp.getNamespace());
Element newElement = df.createElement(qname);
int nsStart = pp.getNamespaceCount(pp.getDepth() - 1);
int nsEnd = pp.getNamespaceCount(pp.getDepth());
for (int i = nsStart; i < nsEnd; i++) {
if (pp.getNamespacePrefix(i) != null) {
newElement.addNamespace(pp.getNamespacePrefix(i), pp.getNamespaceUri(i));
}
}
for (int i = 0; i < pp.getAttributeCount(); i++) {
QName qa = (pp.getAttributePrefix(i) == null) ? df.createQName(pp.getAttributeName(i))
: df.createQName(pp.getAttributeName(i), pp.getAttributePrefix(i), pp.getAttributeNamespace(i));
newElement.addAttribute(qa, pp.getAttributeValue(i));
}
if (parent != null) {
parent.add(newElement);
} else {
document.add(newElement);
}
parent = newElement;
break;
}
case XmlPullParser.END_TAG: {
if (parent != null) {
parent = parent.getParent();
}
break;
}
case XmlPullParser.TEXT: {
String text = pp.getText();
if (parent != null) {
parent.addText(text);
} else {
String msg = "Cannot have text content outside of the " + "root document";
throw new DocumentException(msg);
}
break;
}
default:
break;
}
}
}
protected DispatchHandler getDispatchHandler() {
if (dispatchHandler == null) {
dispatchHandler = new DispatchHandler();
}
return dispatchHandler;
}
protected void setDispatchHandler(DispatchHandler dispatchHandler) {
this.dispatchHandler = dispatchHandler;
}
/**
* Factory method to create a Reader from the given InputStream.
*
* @param in DOCUMENT ME!
* @return DOCUMENT ME!
* @throws IOException DOCUMENT ME!
*/
protected Reader createReader(InputStream in) throws IOException {
return new BufferedReader(new InputStreamReader(in));
}
}
/*
* Redistribution and use of this software and associated documentation ("Software"), with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of
* source code must retain copyright statements and notices. Redistributions must also contain a copy of this document. 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name "DOM4J" must not be used to endorse or promote products
* derived from this Software without prior written permission of MetaStuff, Ltd. For written permission, please contact [email protected]. 4. Products derived from this Software may not be
* called "DOM4J" nor may "DOM4J" appear in their names without prior written permission of MetaStuff, Ltd. DOM4J is a registered trademark of MetaStuff, Ltd. 5. Due credit should be given to the
* DOM4J Project - http://www.dom4j.org THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE. Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
*/