All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openrdf.util.xml.SimpleSAXParser Maven / Gradle / Ivy

The newest version!
/*  Sesame - Storage and Querying architecture for RDF and RDF Schema
 *  Copyright (C) 2001-2006 Aduna
 *
 *  Contact: 
 *  	Aduna
 *  	Prinses Julianaplein 14 b
 *  	3817 CS Amersfoort
 *  	The Netherlands
 *  	tel. +33 (0)33 465 99 87
 *  	fax. +33 (0)33 465 99 87
 *
 *  	http://aduna-software.com/
 *  	http://www.openrdf.org/
 *  
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.openrdf.util.xml;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * An XML parser that generates "simple" SAX-like events from a limited subset
 * of XML documents. The SimpleSAXParser can parse simple XML documents; it
 * doesn't support processing instructions or elements that contain both
 * sub-element and character data; character data is only supported in the
 * "leaves" of the XML element tree.
 *
 * 

Example:

*

* Parsing the following XML: *

 * <?xml version='1.0' encoding='UTF-8'?>
 * <xml-doc>
 *   <foo a="1" b="2&amp;3"/>
 *   <bar>Hello World!</bar>
 * </xml-doc>
 *
*

* will result in the following method calls to the * SimpleSAXListener: *

 * startDocument()
 * startTag("xml-doc", emptyMap, "")
 *
 * startTag("foo", a_b_Map, "")
 * endTag("foo")
 *
 * startTag("bar", emptyMap, "Hello World!")
 * endTag("bar")
 *
 * endTag("xml-doc")
 * endDocument()
 * 
**/ public class SimpleSAXParser { /*-------------+ | Variables | +-------------*/ /** * The XMLReader to use for parsing the XML. **/ private XMLReader _xmlReader; /** * The listener to report the events to. **/ private SimpleSAXListener _listener; /*--------------+ | Constructors | +--------------*/ /** * Creates a new SimpleSAXParser that will use the supplied * XMLReader for parsing the XML. One must set a * SimpleSAXListener on this object before calling one of * the parse() methods. * * @param xmlReader The XMLReader to use for parsing. * * @see #setListener **/ public SimpleSAXParser(XMLReader xmlReader) { super(); _xmlReader = xmlReader; } /** * Creates a new SimpleSAXParser that will try to create a new * XMLReader using org.openrdf.util.xml.XMLReaderFactory * for parsing the XML. One must set a SimpleSAXListener on * this object before calling one of the parse() methods. * * @exception SAXException If the SimpleSAXParser was unable to * create an XMLReader. * * @see #setListener * @see org.xml.sax.XMLReader * @see org.openrdf.util.xml.XMLReaderFactory **/ public SimpleSAXParser() throws SAXException { this(XMLReaderFactory.createXMLReader()); } /*--------------+ | Methods | +--------------*/ /** * Sets the (new) listener that should receive any events from * this parser. This listener will replace any previously set * listener. * * @param listener The (new) listener for events from this parser. **/ public void setListener(SimpleSAXListener listener) { _listener = listener; } /** * Gets the listener that currently will receive any events from * this parser. * * @return The listener for events from this parser. **/ public SimpleSAXListener getListener() { return _listener; } /** * Parses the content of the supplied File as XML. * * @param file The file containing the XML to parse. **/ public void parse(File file) throws SAXException, IOException { InputStream in = new FileInputStream(file); try { parse(in); } finally { try { in.close(); } catch (IOException ignore) {} } } /** * Parses the content of the supplied InputStream as XML. * * @param in An InputStream containing XML data. **/ public void parse(InputStream in) throws SAXException, IOException { _parse(new InputSource(in)); } /** * Parses the content of the supplied Reader as XML. * * @param reader A Reader containing XML data. **/ public void parse(Reader reader) throws SAXException, IOException { _parse(new InputSource(reader)); } /** * Parses the content of the supplied InputSource as XML. * * @param inputSource An InputSource containing XML data. **/ private synchronized void _parse(InputSource inputSource) throws SAXException, IOException { _xmlReader.setContentHandler(new SimpleSAXDefaultHandler()); _xmlReader.parse(inputSource); } /*--------------------------------------+ | Inner class SimpleSAXDefaultHandler | +--------------------------------------*/ class SimpleSAXDefaultHandler extends DefaultHandler { /*-------------+ | Variables | +-------------*/ /** * StringBuffer used to collect text during parsing. **/ private StringBuffer _charBuf = new StringBuffer(512); /** * The tag name of a deferred start tag. **/ private String _deferredStartTag = null; /** * The attributes of a deferred start tag. **/ private Map _deferredAttributes = null; /*-------------+ | Constructors | +-------------*/ public SimpleSAXDefaultHandler() { super(); } /*-------------+ | Methods | +-------------*/ // overrides DefaultHandler.startDocument() public void startDocument() throws SAXException { _listener.startDocument(); } // overrides DefaultHandler.endDocument() public void endDocument() throws SAXException { _listener.endDocument(); } // overrides DefaultHandler.characters() public void characters(char[] ch, int start, int length) throws SAXException { _charBuf.append(ch, start, length); } // overrides DefaultHandler.startElement() public void startElement( String namespaceURI, String localName, String qName, Attributes attributes) throws SAXException { // Report any deferred start tag if (_deferredStartTag != null) { _reportDeferredStartElement(); } // Make current tag new deferred start tag _deferredStartTag = qName; // Copy attributes to _deferredAttributes int attCount = attributes.getLength(); if (attCount == 0) { _deferredAttributes = Collections.EMPTY_MAP; } else { _deferredAttributes = new HashMap(attCount * 2); for (int i = 0; i < attCount; i++) { _deferredAttributes.put( attributes.getQName(i), attributes.getValue(i)); } } // Clear character buffer _charBuf.setLength(0); } private void _reportDeferredStartElement() throws SAXException { _listener.startTag(_deferredStartTag, _deferredAttributes, ""); _deferredStartTag = null; _deferredAttributes = null; } // overrides DefaultHandler.endElement() public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (_deferredStartTag != null) { // Check if any character data has been collected in the _charBuf String text = _charBuf.toString().trim(); // Report deferred start tag _listener.startTag(_deferredStartTag, _deferredAttributes, text); _deferredStartTag = null; _deferredAttributes = null; } // Report the end tag _listener.endTag(qName); // Clear character buffer _charBuf.setLength(0); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy