groovy.util.XmlParser Maven / Gradle / Ivy
/*
* Copyright 2003-2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package groovy.util;
import groovy.xml.FactorySupport;
import groovy.xml.QName;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.DTDHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* A helper class for parsing XML into a tree of Node instances for a
* simple way of processing XML. This parser does not preserve the XML
* InfoSet - if that's what you need try using W3C DOM, dom4j, JDOM, XOM etc.
* This parser ignores comments and processing instructions and converts
* the XML into a Node for each element in the XML with attributes
* and child Nodes and Strings. This simple model is sufficient for
* most simple use cases of processing XML.
*
* Example usage:
*
* def xml = '<root><one a1="uno!"/><two>Some text!</two></root>'
* def rootNode = new XmlParser().parseText(xml)
* assert rootNode.name() == 'root'
* assert rootNode.one[0].@a1 == 'uno!'
* assert rootNode.two.text() == 'Some text!'
* rootNode.children().each { assert it.name() in ['one','two'] }
*
*
* @author James Strachan
* @author Paul King
*/
public class XmlParser implements ContentHandler {
private StringBuilder bodyText = new StringBuilder();
private List stack = new ArrayList();
private Locator locator;
private XMLReader reader;
private Node parent;
private boolean trimWhitespace = false;
private boolean keepIgnorableWhitespace = false;
private boolean namespaceAware;
/**
* Creates a non-validating and non-namespace-aware XmlParser
which does not allow DOCTYPE declarations in documents.
*
* @throws ParserConfigurationException if no parser which satisfies the requested configuration can be created.
* @throws SAXException for SAX errors.
*/
public XmlParser() throws ParserConfigurationException, SAXException {
this(false, true);
}
/**
* Creates a XmlParser
which does not allow DOCTYPE declarations in documents.
*
* @param validating true
if the parser should validate documents as they are parsed; false otherwise.
* @param namespaceAware true
if the parser should provide support for XML namespaces; false
otherwise.
* @throws ParserConfigurationException if no parser which satisfies the requested configuration can be created.
* @throws SAXException for SAX errors.
*/
public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
this(validating, namespaceAware, false);
}
/**
* Creates a XmlParser
.
*
* @param validating true
if the parser should validate documents as they are parsed; false otherwise.
* @param namespaceAware true
if the parser should provide support for XML namespaces; false
otherwise.
* @param allowDocTypeDeclaration true
if the parser should provide support for DOCTYPE declarations; false
otherwise.
* @throws ParserConfigurationException if no parser which satisfies the requested configuration can be created.
* @throws SAXException for SAX errors.
*/
public XmlParser(boolean validating, boolean namespaceAware, boolean allowDocTypeDeclaration) throws ParserConfigurationException, SAXException {
SAXParserFactory factory = FactorySupport.createSaxParserFactory();
factory.setNamespaceAware(namespaceAware);
this.namespaceAware = namespaceAware;
factory.setValidating(validating);
setQuietly(factory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
setQuietly(factory, "http://apache.org/xml/features/disallow-doctype-decl", !allowDocTypeDeclaration);
reader = factory.newSAXParser().getXMLReader();
}
public XmlParser(XMLReader reader) {
this.reader = reader;
}
public XmlParser(SAXParser parser) throws SAXException {
reader = parser.getXMLReader();
}
private void setQuietly(SAXParserFactory factory, String feature, boolean value) {
try {
factory.setFeature(feature, value);
}
catch (ParserConfigurationException ignored) { }
catch (SAXNotRecognizedException ignored) { }
catch (SAXNotSupportedException ignored) { }
}
/**
* Returns the current trim whitespace setting.
*
* @return true if whitespace will be trimmed
*/
public boolean isTrimWhitespace() {
return trimWhitespace;
}
/**
* Sets the trim whitespace setting value.
*
* @param trimWhitespace the desired setting value
*/
public void setTrimWhitespace(boolean trimWhitespace) {
this.trimWhitespace = trimWhitespace;
}
/**
* Returns the current keep ignorable whitespace setting.
*
* @return true if ignorable whitespace will be kept (default false)
*/
public boolean isKeepIgnorableWhitespace() {
return keepIgnorableWhitespace;
}
/**
* Sets the keep ignorable whitespace setting value.
*
* @param keepIgnorableWhitespace the desired new value
*/
public void setKeepIgnorableWhitespace(boolean keepIgnorableWhitespace) {
this.keepIgnorableWhitespace = keepIgnorableWhitespace;
}
/**
* Parses the content of the given file as XML turning it into a tree
* of Nodes.
*
* @param file the File containing the XML to be parsed
* @return the root node of the parsed tree of Nodes
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @throws IOException An IO exception from the parser,
* possibly from a byte stream or character stream
* supplied by the application.
*/
public Node parse(File file) throws IOException, SAXException {
InputSource input = new InputSource(new FileInputStream(file));
input.setSystemId("file://" + file.getAbsolutePath());
getXMLReader().parse(input);
return parent;
}
/**
* Parse the content of the specified input source into a tree of Nodes.
*
* @param input the InputSource for the XML to parse
* @return the root node of the parsed tree of Nodes
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @throws IOException An IO exception from the parser,
* possibly from a byte stream or character stream
* supplied by the application.
*/
public Node parse(InputSource input) throws IOException, SAXException {
getXMLReader().parse(input);
return parent;
}
/**
* Parse the content of the specified input stream into a tree of Nodes.
*
* Note that using this method will not provide the parser with any URI
* for which to find DTDs etc
*
* @param input an InputStream containing the XML to be parsed
* @return the root node of the parsed tree of Nodes
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @throws IOException An IO exception from the parser,
* possibly from a byte stream or character stream
* supplied by the application.
*/
public Node parse(InputStream input) throws IOException, SAXException {
InputSource is = new InputSource(input);
getXMLReader().parse(is);
return parent;
}
/**
* Parse the content of the specified reader into a tree of Nodes.
*
* Note that using this method will not provide the parser with any URI
* for which to find DTDs etc
*
* @param in a Reader to read the XML to be parsed
* @return the root node of the parsed tree of Nodes
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @throws IOException An IO exception from the parser,
* possibly from a byte stream or character stream
* supplied by the application.
*/
public Node parse(Reader in) throws IOException, SAXException {
InputSource is = new InputSource(in);
getXMLReader().parse(is);
return parent;
}
/**
* Parse the content of the specified URI into a tree of Nodes.
*
* @param uri a String containing a uri pointing to the XML to be parsed
* @return the root node of the parsed tree of Nodes
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @throws IOException An IO exception from the parser,
* possibly from a byte stream or character stream
* supplied by the application.
*/
public Node parse(String uri) throws IOException, SAXException {
InputSource is = new InputSource(uri);
getXMLReader().parse(is);
return parent;
}
/**
* A helper method to parse the given text as XML.
*
* @param text the XML text to parse
* @return the root node of the parsed tree of Nodes
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @throws IOException An IO exception from the parser,
* possibly from a byte stream or character stream
* supplied by the application.
*/
public Node parseText(String text) throws IOException, SAXException {
return parse(new StringReader(text));
}
/**
* Determine if namespace handling is enabled.
*
* @return true if namespace handling is enabled
*/
public boolean isNamespaceAware() {
return namespaceAware;
}
/**
* Enable and/or disable namespace handling.
*
* @param namespaceAware the new desired value
*/
public void setNamespaceAware(boolean namespaceAware) {
this.namespaceAware = namespaceAware;
}
// Delegated XMLReader methods
//------------------------------------------------------------------------
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getDTDHandler()
*/
public DTDHandler getDTDHandler() {
return this.reader.getDTDHandler();
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getEntityResolver()
*/
public EntityResolver getEntityResolver() {
return this.reader.getEntityResolver();
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getErrorHandler()
*/
public ErrorHandler getErrorHandler() {
return this.reader.getErrorHandler();
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getFeature(java.lang.String)
*/
public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
return this.reader.getFeature(uri);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#getProperty(java.lang.String)
*/
public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
return this.reader.getProperty(uri);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
*/
public void setDTDHandler(final DTDHandler dtdHandler) {
this.reader.setDTDHandler(dtdHandler);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
*/
public void setEntityResolver(final EntityResolver entityResolver) {
this.reader.setEntityResolver(entityResolver);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
*/
public void setErrorHandler(final ErrorHandler errorHandler) {
this.reader.setErrorHandler(errorHandler);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
*/
public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
this.reader.setFeature(uri, value);
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
*/
public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
reader.setProperty(uri, value);
}
// ContentHandler interface
//-------------------------------------------------------------------------
public void startDocument() throws SAXException {
parent = null;
}
public void endDocument() throws SAXException {
stack.clear();
}
public void startElement(String namespaceURI, String localName, String qName, Attributes list)
throws SAXException {
addTextToNode();
Object nodeName = getElementName(namespaceURI, localName, qName);
int size = list.getLength();
Map