All Downloads are FREE. Search and download functionalities are using the official Maven repository.

sirius.kernel.xml.XMLReader Maven / Gradle / Ivy

Go to download

Provides common core classes and the microkernel powering all Sirius applications

There is a newer version: 12.9.1
Show newest version
/*
 * Made with all the love in the world
 * by scireum in Remshalden, Germany
 *
 * Copyright by scireum GmbH
 * http://www.scireum.de - [email protected]
 */

package sirius.kernel.xml;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import sirius.kernel.async.CallContext;
import sirius.kernel.async.TaskContext;
import sirius.kernel.health.Exceptions;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

/**
 * A combination of DOM and SAX parser which permits to parse very large XML files while conveniently handling sub tree
 * using a DOM and xpath api.
 * 

* Used SAX to parse a given XML file. A set of {@link NodeHandler} objects can be given, which get notified if * a sub-tree below a given tag was parsed. This sub-tree is available as DOM and can conveniently be processed * using xpath. */ public class XMLReader extends DefaultHandler { private TaskContext taskContext; private Map handlers = Maps.newTreeMap(); private List activeHandlers = Lists.newArrayList(); private DocumentBuilder documentBuilder; /** * Creates a new XMLReader. *

* Use {@link #addHandler(String, NodeHandler)} tobind handlers to tags and then call one of the parse * methods to process the XML file. *

* To interrupt processing use {@link TaskContext#cancel()}. */ public XMLReader() { try { documentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); taskContext = CallContext.getCurrent().get(TaskContext.class); } catch (ParserConfigurationException e) { throw Exceptions.handle(e); } } @Override public void characters(char[] ch, int start, int length) throws SAXException { // Delegate to active handlers... String cData = new String(ch).substring(start, start + length); for (SAX2DOMHandler handler : activeHandlers) { handler.text(cData); } } @Override public void endDocument() throws SAXException { // Consider iterating over all activeHandler which are not complete // yet and raise an exception. // For now this is simply ignored to make processing more robust. } @Override public void endElement(String uri, String localName, String name) throws SAXException { // Delegate to active handlers and deletes them if they are finished... activeHandlers.removeIf(handler -> handler.endElement(name)); } @Override public void processingInstruction(String target, String data) throws SAXException { // Delegate to active handlers... for (SAX2DOMHandler handler : activeHandlers) { handler.processingInstruction(target, data); } } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // Delegate to active handlers... for (SAX2DOMHandler handler : activeHandlers) { handler.createElement(name, attributes); } // Start a new handler is necessary NodeHandler handler = handlers.get(name); if (handler != null) { SAX2DOMHandler saxHandler = new SAX2DOMHandler(handler, documentBuilder.newDocument()); saxHandler.createElement(name, attributes); activeHandlers.add(saxHandler); } // Check if the user tried to interrupt parsing.... if (!taskContext.isActive()) { throw new UserInterruptException(); } } /** * Registers a new handler for a qualified name of a node. *

* Handlers are invoked after the complete node was read. Namespaces are ignored for now which eases * the processing a lot (especially for xpath related tasks). Namespaces however * could be easily added by replacing String with QName here. * * @param name the qualified name of the tag which should be parsed and processed * @param handler the NodeHandler used to process the parsed DOM sub-tree */ public void addHandler(String name, NodeHandler handler) { handlers.put(name, handler); } /** * Parses the given stream. * * @param stream the stream to parse * @throws IOException if parsing the XML fails either due to an IO error or due to an SAXException (when * processing a malformed XML). */ public void parse(InputStream stream) throws IOException { parse(stream, null); } /** * Used to handle the an abort via {@link TaskContext} */ static class UserInterruptException extends RuntimeException { private static final long serialVersionUID = -7454219131982518216L; } /** * Parses the given stream using the given locator and interrupt signal. * * @param stream the stream containing the XML data * @param resourceLocator the resource locator used to discover dependent resources * @throws IOException if parsing the XML fails either due to an IO error or due to an SAXException (when * processing a malformed XML). */ public void parse(InputStream stream, Function resourceLocator) throws IOException { try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser saxParser = factory.newSAXParser(); org.xml.sax.XMLReader reader = saxParser.getXMLReader(); reader.setEntityResolver(new EntityResolver() { @Override public InputSource resolveEntity(String publicId, String systemId) throws IOException { return tryResolveEntity(systemId, resourceLocator); } }); reader.setContentHandler(this); reader.parse(new InputSource(stream)); } catch (ParserConfigurationException | SAXException e) { throw new IOException(e); } catch (UserInterruptException e) { // IGNORED - this is used to cancel parsing if the used tried to // cancel a process. } finally { stream.close(); } } private InputSource tryResolveEntity(String systemId, Function resourceLocator) throws IOException { URL url = new URL(systemId); if (!"file".equals(url.getProtocol())) { return emptyResource(); } File file = new File(url.getFile()); if (file.exists()) { return new InputSource(new FileInputStream(file)); } if (resourceLocator == null) { return emptyResource(); } InputStream stream = resourceLocator.apply(file.getName()); if (stream != null) { return new InputSource(stream); } return emptyResource(); } private InputSource emptyResource() { return new InputSource(new StringReader("")); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy