org.eclipse.rdf4j.common.xml.SimpleSAXParser Maven / Gradle / Ivy
The newest version!
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.common.xml;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* An XML parser that generates "simple" SAX-like events from a limited subset of XML documents. The SimpleSAXParser can
* parse simple XML documents; it doesn't support processing instructions or elements that contain both sub-element and
* character data; character data is only supported in the "leaves" of the XML element tree.
* Example:
*
* Parsing the following XML:
*
*
* <?xml version='1.0' encoding='UTF-8'?>
* <xml-doc>
* <foo a="1" b="2&3"/>
* <bar>Hello World!</bar>
* </xml-doc>
*
*
* will result in the following method calls to the SimpleSAXListener:
*
*
* startDocument()
* startTag("xml-doc", emptyMap, "")
*
* startTag("foo", a_b_Map, "")
* endTag("foo")
*
* startTag("bar", emptyMap, "Hello World!")
* endTag("bar")
*
* endTag("xml-doc")
* endDocument()
*
*/
public class SimpleSAXParser {
/*-----------*
* Variables *
*-----------*/
/**
* The XMLReader to use for parsing the XML.
*/
private final XMLReader xmlReader;
/**
* The listener to report the events to.
*/
private SimpleSAXListener listener;
/**
* Flag indicating whether leading and trailing whitespace in text elements should be preserved.
*/
private boolean preserveWhitespace = false;
/**
* A Locator indicating a position in the text that is currently being parsed by the SAX parser.
*/
private Locator locator;
/*--------------*
* Constructors *
*--------------*/
/**
* Creates a new SimpleSAXParser that will use the supplied XMLReader for parsing the XML. One must set a
* SimpleSAXListener on this object before calling one of the parse() methods.
*
* @param xmlReader The XMLReader to use for parsing.
* @see #setListener
*/
public SimpleSAXParser(XMLReader xmlReader) {
super();
this.xmlReader = xmlReader;
}
/**
* Creates a new SimpleSAXParser that will try to create a new XMLReader using
* info.aduna.xml.XMLReaderFactory for parsing the XML. One must set a SimpleSAXListener on
* this object before calling one of the parse() methods.
*
* @throws SAXException If the SimpleSAXParser was unable to create an XMLReader.
* @see #setListener
* @see org.xml.sax.XMLReader
* @see org.eclipse.rdf4j.common.xml.XMLReaderFactory
*/
public SimpleSAXParser() throws SAXException {
this(XMLReaderFactory.createXMLReader());
}
/*---------*
* Methods *
*---------*/
/**
* Sets the (new) listener that should receive any events from this parser. This listener will replace any
* previously set listener.
*
* @param listener The (new) listener for events from this parser.
*/
public void setListener(SimpleSAXListener listener) {
this.listener = listener;
}
/**
* Gets the listener that currently will receive any events from this parser.
*
* @return The listener for events from this parser.
*/
public SimpleSAXListener getListener() {
return listener;
}
public Locator getLocator() {
return locator;
}
/**
* Sets whether leading and trailing whitespace characters in text elements should be preserved. Such whitespace
* characters are discarded by default.
*/
public void setPreserveWhitespace(boolean preserveWhitespace) {
this.preserveWhitespace = preserveWhitespace;
}
/**
* Checks whether leading and trailing whitespace characters in text elements are preserved. Defaults to
* false.
*/
public boolean isPreserveWhitespace() {
return preserveWhitespace;
}
/**
* Parses the content of the supplied File as XML.
*
* @param file The file containing the XML to parse.
*/
public void parse(File file) throws SAXException, IOException {
try (InputStream in = new FileInputStream(file)) {
parse(in);
}
}
/**
* Parses the content of the supplied InputStream as XML.
*
* @param in An InputStream containing XML data.
*/
public void parse(InputStream in) throws SAXException, IOException {
parse(new InputSource(in));
}
/**
* Parses the content of the supplied Reader as XML.
*
* @param reader A Reader containing XML data.
*/
public void parse(Reader reader) throws SAXException, IOException {
parse(new InputSource(reader));
}
/**
* Parses the content of the supplied InputSource as XML.
*
* @param inputSource An InputSource containing XML data.
*/
public synchronized void parse(InputSource inputSource) throws SAXException, IOException {
xmlReader.setContentHandler(new SimpleSAXDefaultHandler());
xmlReader.parse(inputSource);
}
/*-------------------------------------*
* Inner class SimpleSAXDefaultHandler *
*-------------------------------------*/
class SimpleSAXDefaultHandler extends DefaultHandler {
/*-----------*
* Variables *
*-----------*/
/**
* StringBuilder used to collect text during parsing.
*/
private final StringBuilder charBuf = new StringBuilder(512);
/**
* The tag name of a deferred start tag.
*/
private String deferredStartTag = null;
/**
* The attributes of a deferred start tag.
*/
private Map deferredAttributes = null;
/*--------------*
* Constructors *
*--------------*/
public SimpleSAXDefaultHandler() {
super();
}
/*---------*
* Methods *
*---------*/
// overrides DefaultHandler.startDocument()
@Override
public void startDocument() throws SAXException {
listener.startDocument();
}
// overrides DefaultHandler.endDocument()
@Override
public void endDocument() throws SAXException {
listener.endDocument();
}
// overrides DefaultHandler.characters()
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
charBuf.append(ch, start, length);
}
// overrides DefaultHandler.startElement()
@Override
public void startElement(String namespaceURI, String localName, String qName, Attributes attributes)
throws SAXException {
// Report any deferred start tag
if (deferredStartTag != null) {
reportDeferredStartElement();
}
// Make current tag new deferred start tag
deferredStartTag = localName;
// Copy attributes to deferredAttributes
int attCount = attributes.getLength();
if (attCount == 0) {
deferredAttributes = Collections.emptyMap();
} else {
deferredAttributes = new LinkedHashMap<>(attCount * 2);
for (int i = 0; i < attCount; i++) {
deferredAttributes.put(attributes.getQName(i), attributes.getValue(i));
}
}
// Clear character buffer
charBuf.setLength(0);
}
private void reportDeferredStartElement() throws SAXException {
listener.startTag(deferredStartTag, deferredAttributes, "");
deferredStartTag = null;
deferredAttributes = null;
}
// overrides DefaultHandler.endElement()
@Override
public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
if (deferredStartTag != null) {
// Check if any character data has been collected in the charBuf
String text = charBuf.toString();
if (!preserveWhitespace) {
text = text.trim();
}
// Report deferred start tag
listener.startTag(deferredStartTag, deferredAttributes, text);
deferredStartTag = null;
deferredAttributes = null;
}
// Report the end tag
listener.endTag(localName);
// Clear character buffer
charBuf.setLength(0);
}
@Override
public void setDocumentLocator(Locator loc) {
locator = loc;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy