
org.openrdf.util.xml.SimpleSAXParser Maven / Gradle / Ivy
The newest version!
/* Sesame - Storage and Querying architecture for RDF and RDF Schema
* Copyright (C) 2001-2006 Aduna
*
* Contact:
* Aduna
* Prinses Julianaplein 14 b
* 3817 CS Amersfoort
* The Netherlands
* tel. +33 (0)33 465 99 87
* fax. +33 (0)33 465 99 87
*
* http://aduna-software.com/
* http://www.openrdf.org/
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.openrdf.util.xml;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* An XML parser that generates "simple" SAX-like events from a limited subset
* of XML documents. The SimpleSAXParser can parse simple XML documents; it
* doesn't support processing instructions or elements that contain both
* sub-element and character data; character data is only supported in the
* "leaves" of the XML element tree.
*
* Example:
*
* Parsing the following XML:
*
* <?xml version='1.0' encoding='UTF-8'?>
* <xml-doc>
* <foo a="1" b="2&3"/>
* <bar>Hello World!</bar>
* </xml-doc>
*
*
* will result in the following method calls to the
* SimpleSAXListener:
*
* startDocument()
* startTag("xml-doc", emptyMap, "")
*
* startTag("foo", a_b_Map, "")
* endTag("foo")
*
* startTag("bar", emptyMap, "Hello World!")
* endTag("bar")
*
* endTag("xml-doc")
* endDocument()
*
**/
public class SimpleSAXParser {
/*-------------+
| Variables |
+-------------*/
/**
* The XMLReader to use for parsing the XML.
**/
private XMLReader _xmlReader;
/**
* The listener to report the events to.
**/
private SimpleSAXListener _listener;
/*--------------+
| Constructors |
+--------------*/
/**
* Creates a new SimpleSAXParser that will use the supplied
* XMLReader for parsing the XML. One must set a
* SimpleSAXListener on this object before calling one of
* the parse() methods.
*
* @param xmlReader The XMLReader to use for parsing.
*
* @see #setListener
**/
public SimpleSAXParser(XMLReader xmlReader) {
super();
_xmlReader = xmlReader;
}
/**
* Creates a new SimpleSAXParser that will try to create a new
* XMLReader using org.openrdf.util.xml.XMLReaderFactory
* for parsing the XML. One must set a SimpleSAXListener on
* this object before calling one of the parse() methods.
*
* @exception SAXException If the SimpleSAXParser was unable to
* create an XMLReader.
*
* @see #setListener
* @see org.xml.sax.XMLReader
* @see org.openrdf.util.xml.XMLReaderFactory
**/
public SimpleSAXParser()
throws SAXException
{
this(XMLReaderFactory.createXMLReader());
}
/*--------------+
| Methods |
+--------------*/
/**
* Sets the (new) listener that should receive any events from
* this parser. This listener will replace any previously set
* listener.
*
* @param listener The (new) listener for events from this parser.
**/
public void setListener(SimpleSAXListener listener) {
_listener = listener;
}
/**
* Gets the listener that currently will receive any events from
* this parser.
*
* @return The listener for events from this parser.
**/
public SimpleSAXListener getListener() {
return _listener;
}
/**
* Parses the content of the supplied File as XML.
*
* @param file The file containing the XML to parse.
**/
public void parse(File file)
throws SAXException, IOException
{
InputStream in = new FileInputStream(file);
try {
parse(in);
}
finally {
try {
in.close();
} catch (IOException ignore) {}
}
}
/**
* Parses the content of the supplied InputStream as XML.
*
* @param in An InputStream containing XML data.
**/
public void parse(InputStream in)
throws SAXException, IOException
{
_parse(new InputSource(in));
}
/**
* Parses the content of the supplied Reader as XML.
*
* @param reader A Reader containing XML data.
**/
public void parse(Reader reader)
throws SAXException, IOException
{
_parse(new InputSource(reader));
}
/**
* Parses the content of the supplied InputSource as XML.
*
* @param inputSource An InputSource containing XML data.
**/
private synchronized void _parse(InputSource inputSource)
throws SAXException, IOException
{
_xmlReader.setContentHandler(new SimpleSAXDefaultHandler());
_xmlReader.parse(inputSource);
}
/*--------------------------------------+
| Inner class SimpleSAXDefaultHandler |
+--------------------------------------*/
class SimpleSAXDefaultHandler extends DefaultHandler {
/*-------------+
| Variables |
+-------------*/
/**
* StringBuffer used to collect text during parsing.
**/
private StringBuffer _charBuf = new StringBuffer(512);
/**
* The tag name of a deferred start tag.
**/
private String _deferredStartTag = null;
/**
* The attributes of a deferred start tag.
**/
private Map _deferredAttributes = null;
/*-------------+
| Constructors |
+-------------*/
public SimpleSAXDefaultHandler() {
super();
}
/*-------------+
| Methods |
+-------------*/
// overrides DefaultHandler.startDocument()
public void startDocument()
throws SAXException
{
_listener.startDocument();
}
// overrides DefaultHandler.endDocument()
public void endDocument()
throws SAXException
{
_listener.endDocument();
}
// overrides DefaultHandler.characters()
public void characters(char[] ch, int start, int length)
throws SAXException
{
_charBuf.append(ch, start, length);
}
// overrides DefaultHandler.startElement()
public void startElement(
String namespaceURI, String localName,
String qName, Attributes attributes)
throws SAXException
{
// Report any deferred start tag
if (_deferredStartTag != null) {
_reportDeferredStartElement();
}
// Make current tag new deferred start tag
_deferredStartTag = qName;
// Copy attributes to _deferredAttributes
int attCount = attributes.getLength();
if (attCount == 0) {
_deferredAttributes = Collections.EMPTY_MAP;
}
else {
_deferredAttributes = new HashMap(attCount * 2);
for (int i = 0; i < attCount; i++) {
_deferredAttributes.put(
attributes.getQName(i), attributes.getValue(i));
}
}
// Clear character buffer
_charBuf.setLength(0);
}
private void _reportDeferredStartElement()
throws SAXException
{
_listener.startTag(_deferredStartTag, _deferredAttributes, "");
_deferredStartTag = null;
_deferredAttributes = null;
}
// overrides DefaultHandler.endElement()
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException
{
if (_deferredStartTag != null) {
// Check if any character data has been collected in the _charBuf
String text = _charBuf.toString().trim();
// Report deferred start tag
_listener.startTag(_deferredStartTag, _deferredAttributes, text);
_deferredStartTag = null;
_deferredAttributes = null;
}
// Report the end tag
_listener.endTag(qName);
// Clear character buffer
_charBuf.setLength(0);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy