All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.rdf4j.common.xml.SimpleSAXParser Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/

package org.eclipse.rdf4j.common.xml;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * An XML parser that generates "simple" SAX-like events from a limited subset of XML documents. The SimpleSAXParser can
 * parse simple XML documents; it doesn't support processing instructions or elements that contain both sub-element and
 * character data; character data is only supported in the "leaves" of the XML element tree.
 * 

Example:

*

* Parsing the following XML: * *

 * <?xml version='1.0' encoding='UTF-8'?>
 * <xml-doc>
 *   <foo a="1" b="2&amp;3"/>
 *   <bar>Hello World!</bar>
 * </xml-doc>
 * 
*

* will result in the following method calls to the SimpleSAXListener: * *

 * startDocument()
 * startTag("xml-doc", emptyMap, "")
 *
 * startTag("foo", a_b_Map, "")
 * endTag("foo")
 *
 * startTag("bar", emptyMap, "Hello World!")
 * endTag("bar")
 *
 * endTag("xml-doc")
 * endDocument()
 * 
*/ public class SimpleSAXParser { /*-----------* * Variables * *-----------*/ /** * The XMLReader to use for parsing the XML. */ private final XMLReader xmlReader; /** * The listener to report the events to. */ private SimpleSAXListener listener; /** * Flag indicating whether leading and trailing whitespace in text elements should be preserved. */ private boolean preserveWhitespace = false; /** * A Locator indicating a position in the text that is currently being parsed by the SAX parser. */ private Locator locator; /*--------------* * Constructors * *--------------*/ /** * Creates a new SimpleSAXParser that will use the supplied XMLReader for parsing the XML. One must set a * SimpleSAXListener on this object before calling one of the parse() methods. * * @param xmlReader The XMLReader to use for parsing. * @see #setListener */ public SimpleSAXParser(XMLReader xmlReader) { super(); this.xmlReader = xmlReader; } /** * Creates a new SimpleSAXParser that will try to create a new XMLReader using * info.aduna.xml.XMLReaderFactory for parsing the XML. One must set a SimpleSAXListener on * this object before calling one of the parse() methods. * * @throws SAXException If the SimpleSAXParser was unable to create an XMLReader. * @see #setListener * @see org.xml.sax.XMLReader * @see org.eclipse.rdf4j.common.xml.XMLReaderFactory */ public SimpleSAXParser() throws SAXException { this(XMLReaderFactory.createXMLReader()); } /*---------* * Methods * *---------*/ /** * Sets the (new) listener that should receive any events from this parser. This listener will replace any * previously set listener. * * @param listener The (new) listener for events from this parser. */ public void setListener(SimpleSAXListener listener) { this.listener = listener; } /** * Gets the listener that currently will receive any events from this parser. * * @return The listener for events from this parser. */ public SimpleSAXListener getListener() { return listener; } public Locator getLocator() { return locator; } /** * Sets whether leading and trailing whitespace characters in text elements should be preserved. Such whitespace * characters are discarded by default. */ public void setPreserveWhitespace(boolean preserveWhitespace) { this.preserveWhitespace = preserveWhitespace; } /** * Checks whether leading and trailing whitespace characters in text elements are preserved. Defaults to * false. */ public boolean isPreserveWhitespace() { return preserveWhitespace; } /** * Parses the content of the supplied File as XML. * * @param file The file containing the XML to parse. */ public void parse(File file) throws SAXException, IOException { try (InputStream in = new FileInputStream(file)) { parse(in); } } /** * Parses the content of the supplied InputStream as XML. * * @param in An InputStream containing XML data. */ public void parse(InputStream in) throws SAXException, IOException { parse(new InputSource(in)); } /** * Parses the content of the supplied Reader as XML. * * @param reader A Reader containing XML data. */ public void parse(Reader reader) throws SAXException, IOException { parse(new InputSource(reader)); } /** * Parses the content of the supplied InputSource as XML. * * @param inputSource An InputSource containing XML data. */ public synchronized void parse(InputSource inputSource) throws SAXException, IOException { xmlReader.setContentHandler(new SimpleSAXDefaultHandler()); xmlReader.parse(inputSource); } /*-------------------------------------* * Inner class SimpleSAXDefaultHandler * *-------------------------------------*/ class SimpleSAXDefaultHandler extends DefaultHandler { /*-----------* * Variables * *-----------*/ /** * StringBuilder used to collect text during parsing. */ private final StringBuilder charBuf = new StringBuilder(512); /** * The tag name of a deferred start tag. */ private String deferredStartTag = null; /** * The attributes of a deferred start tag. */ private Map deferredAttributes = null; /*--------------* * Constructors * *--------------*/ public SimpleSAXDefaultHandler() { super(); } /*---------* * Methods * *---------*/ // overrides DefaultHandler.startDocument() @Override public void startDocument() throws SAXException { listener.startDocument(); } // overrides DefaultHandler.endDocument() @Override public void endDocument() throws SAXException { listener.endDocument(); } // overrides DefaultHandler.characters() @Override public void characters(char[] ch, int start, int length) throws SAXException { charBuf.append(ch, start, length); } // overrides DefaultHandler.startElement() @Override public void startElement(String namespaceURI, String localName, String qName, Attributes attributes) throws SAXException { // Report any deferred start tag if (deferredStartTag != null) { reportDeferredStartElement(); } // Make current tag new deferred start tag deferredStartTag = localName; // Copy attributes to deferredAttributes int attCount = attributes.getLength(); if (attCount == 0) { deferredAttributes = Collections.emptyMap(); } else { deferredAttributes = new LinkedHashMap<>(attCount * 2); for (int i = 0; i < attCount; i++) { deferredAttributes.put(attributes.getQName(i), attributes.getValue(i)); } } // Clear character buffer charBuf.setLength(0); } private void reportDeferredStartElement() throws SAXException { listener.startTag(deferredStartTag, deferredAttributes, ""); deferredStartTag = null; deferredAttributes = null; } // overrides DefaultHandler.endElement() @Override public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (deferredStartTag != null) { // Check if any character data has been collected in the charBuf String text = charBuf.toString(); if (!preserveWhitespace) { text = text.trim(); } // Report deferred start tag listener.startTag(deferredStartTag, deferredAttributes, text); deferredStartTag = null; deferredAttributes = null; } // Report the end tag listener.endTag(localName); // Clear character buffer charBuf.setLength(0); } @Override public void setDocumentLocator(Locator loc) { locator = loc; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy