All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.rdf4j.common.xml.SimpleSAXParser Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *******************************************************************************/

package org.eclipse.rdf4j.common.xml;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * An XML parser that generates "simple" SAX-like events from a limited subset of XML documents. The
 * SimpleSAXParser can parse simple XML documents; it doesn't support processing instructions or elements that
 * contain both sub-element and character data; character data is only supported in the "leaves" of the XML
 * element tree.
 * 

Example:

*

* Parsing the following XML: * *

 * <?xml version='1.0' encoding='UTF-8'?>
 * <xml-doc>
 *   <foo a="1" b="2&amp;3"/>
 *   <bar>Hello World!</bar>
 * </xml-doc>
 * 
*

* will result in the following method calls to the SimpleSAXListener: * *

 * startDocument()
 * startTag("xml-doc", emptyMap, "")
 *
 * startTag("foo", a_b_Map, "")
 * endTag("foo")
 *
 * startTag("bar", emptyMap, "Hello World!")
 * endTag("bar")
 *
 * endTag("xml-doc")
 * endDocument()
 * 
*/ public class SimpleSAXParser { /*-----------* * Variables * *-----------*/ /** * The XMLReader to use for parsing the XML. */ private XMLReader xmlReader; /** * The listener to report the events to. */ private SimpleSAXListener listener; /** * Flag indicating whether leading and trailing whitespace in text elements should be preserved. */ private boolean preserveWhitespace = false; /*--------------* * Constructors * *--------------*/ /** * Creates a new SimpleSAXParser that will use the supplied XMLReader for parsing the XML. One * must set a SimpleSAXListener on this object before calling one of the parse() * methods. * * @param xmlReader * The XMLReader to use for parsing. * @see #setListener */ public SimpleSAXParser(XMLReader xmlReader) { super(); this.xmlReader = xmlReader; } /** * Creates a new SimpleSAXParser that will try to create a new XMLReader using * info.aduna.xml.XMLReaderFactory for parsing the XML. One must set a SimpleSAXListener * on this object before calling one of the parse() methods. * * @throws SAXException * If the SimpleSAXParser was unable to create an XMLReader. * @see #setListener * @see org.xml.sax.XMLReader * @see org.eclipse.rdf4j.common.xml.XMLReaderFactory */ public SimpleSAXParser() throws SAXException { this(XMLReaderFactory.createXMLReader()); } /*---------* * Methods * *---------*/ /** * Sets the (new) listener that should receive any events from this parser. This listener will replace any * previously set listener. * * @param listener * The (new) listener for events from this parser. */ public void setListener(SimpleSAXListener listener) { this.listener = listener; } /** * Gets the listener that currently will receive any events from this parser. * * @return The listener for events from this parser. */ public SimpleSAXListener getListener() { return listener; } /** * Sets whether leading and trailing whitespace characters in text elements should be preserved. Such * whitespace characters are discarded by default. */ public void setPreserveWhitespace(boolean preserveWhitespace) { this.preserveWhitespace = preserveWhitespace; } /** * Checks whether leading and trailing whitespace characters in text elements are preserved. Defaults to * false. */ public boolean isPreserveWhitespace() { return preserveWhitespace; } /** * Parses the content of the supplied File as XML. * * @param file * The file containing the XML to parse. */ public void parse(File file) throws SAXException, IOException { InputStream in = new FileInputStream(file); try { parse(in); } finally { try { in.close(); } catch (IOException ignore) { } } } /** * Parses the content of the supplied InputStream as XML. * * @param in * An InputStream containing XML data. */ public void parse(InputStream in) throws SAXException, IOException { parse(new InputSource(in)); } /** * Parses the content of the supplied Reader as XML. * * @param reader * A Reader containing XML data. */ public void parse(Reader reader) throws SAXException, IOException { parse(new InputSource(reader)); } /** * Parses the content of the supplied InputSource as XML. * * @param inputSource * An InputSource containing XML data. */ private synchronized void parse(InputSource inputSource) throws SAXException, IOException { xmlReader.setContentHandler(new SimpleSAXDefaultHandler()); xmlReader.parse(inputSource); } /*-------------------------------------* * Inner class SimpleSAXDefaultHandler * *-------------------------------------*/ class SimpleSAXDefaultHandler extends DefaultHandler { /*-----------* * Variables * *-----------*/ /** * StringBuilder used to collect text during parsing. */ private StringBuilder charBuf = new StringBuilder(512); /** * The tag name of a deferred start tag. */ private String deferredStartTag = null; /** * The attributes of a deferred start tag. */ private Map deferredAttributes = null; /*--------------* * Constructors * *--------------*/ public SimpleSAXDefaultHandler() { super(); } /*---------* * Methods * *---------*/ // overrides DefaultHandler.startDocument() public void startDocument() throws SAXException { listener.startDocument(); } // overrides DefaultHandler.endDocument() public void endDocument() throws SAXException { listener.endDocument(); } // overrides DefaultHandler.characters() public void characters(char[] ch, int start, int length) throws SAXException { charBuf.append(ch, start, length); } // overrides DefaultHandler.startElement() public void startElement(String namespaceURI, String localName, String qName, Attributes attributes) throws SAXException { // Report any deferred start tag if (deferredStartTag != null) { reportDeferredStartElement(); } // Make current tag new deferred start tag deferredStartTag = localName; // Copy attributes to deferredAttributes int attCount = attributes.getLength(); if (attCount == 0) { deferredAttributes = Collections.emptyMap(); } else { deferredAttributes = new LinkedHashMap(attCount * 2); for (int i = 0; i < attCount; i++) { deferredAttributes.put(attributes.getQName(i), attributes.getValue(i)); } } // Clear character buffer charBuf.setLength(0); } private void reportDeferredStartElement() throws SAXException { listener.startTag(deferredStartTag, deferredAttributes, ""); deferredStartTag = null; deferredAttributes = null; } // overrides DefaultHandler.endElement() public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (deferredStartTag != null) { // Check if any character data has been collected in the charBuf String text = charBuf.toString(); if (!preserveWhitespace) { text = text.trim(); } // Report deferred start tag listener.startTag(deferredStartTag, deferredAttributes, text); deferredStartTag = null; deferredAttributes = null; } // Report the end tag listener.endTag(localName); // Clear character buffer charBuf.setLength(0); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy