All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cdc.io.data.xml.XmlDataReader Maven / Gradle / Ivy

The newest version!
package cdc.io.data.xml;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.DefaultHandler2;

import cdc.io.compress.CompressionUtils;
import cdc.io.compress.Compressor;
import cdc.io.data.Child;
import cdc.io.data.Comment;
import cdc.io.data.Document;
import cdc.io.data.Element;
import cdc.io.data.NodeType;
import cdc.io.data.Parent;
import cdc.io.data.Text;
import cdc.io.data.TextKind;
import cdc.io.data.util.AttributeNameConverter;
import cdc.io.data.util.AttributePredicate;
import cdc.io.data.util.AttributeValueConverter;
import cdc.io.data.util.ElementNameConverter;
import cdc.io.data.util.ElementPredicate;
import cdc.io.data.util.TextContentConverter;
import cdc.io.xml.StAXSupport;
import cdc.io.xml.XmlUtils;
import cdc.util.lang.Checks;

/**
 * Class used to read an XML source and produce a Document.
 * 

* It is possible, during loading, to: *

    *
  • filter (keep or remove) attributes *
  • convert attributes names *
  • convert attributes values *
  • filter (keep or remove) elements at creation time (pre) and when all its children are known (post). *
  • convert elements names *
  • ignore (remove) comments *
  • ignore (remove) spaces *
* This can be used to create only necessary nodes and attributes * in memory and adapt their names or content. *

* It is possible to attach an {@link IssueHandler} to process warnings, recoverable and fatal errors. *

* WARNING this class will become immutable in the future. Use {@link XmlDataReader.Builder}. * * @author Damien Carbonne */ public class XmlDataReader { private static final Logger LOGGER = LogManager.getLogger(XmlDataReader.class); private final Set features = EnumSet.noneOf(Feature.class); private IssueHandler issueHandler = null; /** * The entity resolver. */ private EntityResolver entityResolver = null; /** * The filter attributes. *

* Only accepted attributes are kept and transformed. */ private AttributePredicate attributeFilter = AttributePredicate.ANY_ATTRIBUTE; /** * Attribute name converter. *

* Applied on accepted attributes. */ private AttributeNameConverter attributeNameConverter = AttributeNameConverter.IDENTITY; /** * Attribute value converter. *

* Applied on accepted attributes. */ private AttributeValueConverter attributeValueConverter = AttributeValueConverter.INDENTITY; /** * The filter to use when the element is created. */ private ElementPredicate elementPreFilter = ElementPredicate.ANY_ELEMENT; /** * The filter to use when all the children of an element are created. */ private ElementPredicate elementPostFilter = ElementPredicate.ANY_ELEMENT; /** * Element name converter. *

* Applied on elements that are pre accepted. */ private ElementNameConverter elementNameConverter = ElementNameConverter.IDENTITY; /** * Text content converter. */ private TextContentConverter textContentConverter = TextContentConverter.IDENTITY; private static final String FILTER = "filter"; private static final String CONVERTER = "converter"; public enum Feature { /** * If enabled, comments are loaded (as comment nodes). *

* They are ignored by default. */ LOAD_COMMENTS, /** * If enabled, spaces are loaded (as text nodes). *

* They are ignored by default. */ LOAD_SPACES, /** * If enabled, DTD is loaded if present. *

* DTD is ignored by default. *

* WARNING: when there is a DTD, parsing may depend on conformity of XML content to DTD. * For example, characters can be parsed as characters or spaces. So, enabling or disabling * {@link #LOAD_SPACES} may change result. *

* WARNING: DTD is not fully compliant with {@link #USE_SAX}, one should use {@link #USE_STAX}. *

* WARNING: Use of DTD is discouraged with this library. It is XML-like, but it is not 100% XML. * One day it may diverge more from XML. */ LOAD_DTD, /** * If enabled, mixed content is allowed. */ ALLOW_MIXED_CONTENT, /** * If enabled, CDATA are preserved. *

* Otherwise, they are transformed to normal text. * WARNING:Some parsers don't support this feature. */ PRESERVE_CDATA, /** * If enabled, a dummy entity resolver is used. *

* This may be used to ignore DTD.
* However, if entities are used, result will be wrong. */ DUMMY_ENTITY_RESOLVER, USE_SAX, USE_STAX } private XmlDataReader(Builder builder) { this.features.addAll(builder.features); this.issueHandler = builder.issueHandler; this.entityResolver = builder.entityResolver; this.attributeFilter = Checks.isNotNull(builder.attributeFilter, "attributeFilter"); this.attributeNameConverter = Checks.isNotNull(builder.attributeNameConverter, "attributeNameConverter"); this.attributeValueConverter = Checks.isNotNull(builder.attributeValueConverter, "attributeValueConverter"); this.elementPreFilter = Checks.isNotNull(builder.elementPreFilter, "elementPreFilter"); this.elementPostFilter = Checks.isNotNull(builder.elementPostFilter, "elementPostFilter"); this.elementNameConverter = Checks.isNotNull(builder.elementNameConverter, "elementNameConverter"); this.textContentConverter = Checks.isNotNull(builder.textContentConverter, "textContentConverter"); } /** * Builds an XmlDataRaeder. * * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public XmlDataReader() { super(); } /** * Builds an XmlDataRaeder. * * @param features The enabled features. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public XmlDataReader(Feature... features) { for (final Feature feature : features) { setEnabled(feature, true); } } /** * Builds an XmlDataRaeder. * * @param handler The issue handler. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setIssueHandler(IssueHandler handler) { this.issueHandler = handler; } public final IssueHandler getIssueHandler() { return issueHandler; } /** * Returns {@code true} when a feature is enabled. * * @param feature The feature. * @return {@code true} if {@code feature} is enabled. */ public final boolean isEnabled(Feature feature) { return features.contains(feature); } /** * Enables or disables a feature. * * @param feature The feature. * @param enabled If {@code true}, the feature is enabled. It is disabled otherwise. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public final void setEnabled(Feature feature, boolean enabled) { if (enabled) { features.add(feature); } else { features.remove(feature); } } /** * @return The entity resolver. */ public EntityResolver getEntityResolver() { return entityResolver; } /** * Sets the entity resolver. *

* WARNING: This has interactions with {@link Feature#DUMMY_ENTITY_RESOLVER}. * * @param resolver The entity resolver. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setEntityResolver(EntityResolver resolver) { this.entityResolver = resolver; } /** * @return The attribute filter. */ public AttributePredicate getAttributeFilter() { return attributeFilter; } /** * Sets the attribute filter. *

* The name and value that are passed to the filter are the original name and value, before any name or value conversion * happens. * * @param filter The filter. * @throws IllegalArgumentException When {@code filter} is null. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setAttributeFilter(AttributePredicate filter) { Checks.isNotNull(filter, FILTER); this.attributeFilter = filter; } /** * @return The attribute name converter. */ public AttributeNameConverter getAttributeNameConverter() { return attributeNameConverter; } /** * Sets the attribute name converter. *

* The conversion is applied after attribute filtering. * * @param converter The converter. * @throws IllegalArgumentException When {@code converter} is null. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setAttributeNameConverter(AttributeNameConverter converter) { Checks.isNotNull(converter, CONVERTER); this.attributeNameConverter = converter; } /** * @return The attribute value converter. */ public AttributeValueConverter getAttributeValueConverter() { return attributeValueConverter; } /** * Sets the attribute value converter. *

* The conversion is applied after attribute filtering. * * @param converter The converter. * @throws IllegalArgumentException When {@code converter} is null. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setAttributeValueConverter(AttributeValueConverter converter) { Checks.isNotNull(converter, CONVERTER); this.attributeValueConverter = converter; } /** * @return the element pre filter. */ public ElementPredicate getElementPreFilter() { return elementPreFilter; } /** * Sets the element pre filter. *

* WARNING: *

    *
  • The element name is the original one (before any conversion happens). *
  • The attributes names and values are the converted ones (after all conversions have happened). *
  • Only remaining attributes are available. *
* * @param filter The filter. * @throws IllegalArgumentException When {@code filter} is null. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setElementPreFilter(ElementPredicate filter) { Checks.isNotNull(filter, FILTER); this.elementPreFilter = filter; } /** * @return the element post filter. */ public ElementPredicate getElementPostFilter() { return elementPostFilter; } /** * Sets the element post filter. *

* WARNING: *

    *
  • The element name is the converted one (after conversion has happened). *
  • The attributes names and values are the converted ones (after all conversions have happened). *
  • Only remaining attributes are available. *
* * @param filter The filter. * @throws IllegalArgumentException When {@code filter} is null. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setElementPostFilter(ElementPredicate filter) { Checks.isNotNull(filter, FILTER); this.elementPostFilter = filter; } /** * @return The element name converter. */ public ElementNameConverter getElementNameConverter() { return elementNameConverter; } /** * Sets the element name converter. * * @param converter The converter. * @throws IllegalArgumentException When {@code converter} is null. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setElementNameConverter(ElementNameConverter converter) { Checks.isNotNull(converter, CONVERTER); this.elementNameConverter = converter; } public TextContentConverter getTextContentConverter() { return textContentConverter; } /** * Sets the text converter. * * @param converter The text converter. * @deprecated Use {@link XmlDataReader.Builder}. */ @Deprecated(since = "2024-05-01", forRemoval = true) public void setTextContentConverter(TextContentConverter converter) { Checks.isNotNull(converter, CONVERTER); this.textContentConverter = converter; } private static class DummyEntityResolver implements EntityResolver { public DummyEntityResolver() { super(); } @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { LOGGER.debug("resolveEntity('{}', '{}')", publicId, systemId); return new InputSource(new StringReader("")); } } private XMLReader configureReader(SAXParser parser, SAXHandler handler) throws SAXException { final XMLReader reader = parser.getXMLReader(); reader.setContentHandler(handler); reader.setErrorHandler(handler); reader.setDTDHandler(handler); if (isEnabled(Feature.LOAD_COMMENTS) || isEnabled(Feature.PRESERVE_CDATA)) { reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler); } reader.setProperty("http://xml.org/sax/properties/declaration-handler", handler); // Set dummy entity resolver before user defined one if (isEnabled(Feature.DUMMY_ENTITY_RESOLVER)) { reader.setEntityResolver(new DummyEntityResolver()); } if (getEntityResolver() != null) { if (isEnabled(Feature.DUMMY_ENTITY_RESOLVER)) { LOGGER.warn("Dummy entity resolver overwritten by user defined one"); } reader.setEntityResolver(getEntityResolver()); } return reader; } public Document read(InputStream is, Compressor compressor) throws IOException { LOGGER.debug("read(is=..., {})", compressor); return read(is, null, compressor); } public Document read(InputStream is) throws IOException { LOGGER.debug("read(is=...)"); return read(is, Compressor.NONE); } public Element readRoot(InputStream is, Compressor compressor) throws IOException { return Document.getRootElement(read(is, compressor)); } public Element readRoot(InputStream is) throws IOException { return Document.getRootElement(read(is)); } public Document read(InputStream is, String systemId, Compressor compressor) throws IOException { LOGGER.debug("read(is=..., '{}', {})", systemId, compressor); final InputStream adapted = compressor == null ? is : CompressionUtils.adapt(is, compressor); if (isEnabled(Feature.USE_STAX)) { final XMLInputFactory factory = XMLInputFactory.newInstance(); if (isEnabled(Feature.PRESERVE_CDATA)) { if (factory.isPropertySupported(StAXSupport.REPORT_CDATA)) { factory.setProperty(StAXSupport.REPORT_CDATA, Boolean.TRUE); } else { LOGGER.warn(StAXSupport.REPORT_CDATA + " is not supported"); } } factory.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.FALSE); factory.setProperty(XMLInputFactory.SUPPORT_DTD, isEnabled(Feature.LOAD_DTD)); try { final XMLStreamReader reader; if (systemId == null) { reader = factory.createXMLStreamReader(systemId, adapted); } else { reader = factory.createXMLStreamReader(adapted); } final StAXHandler handler = new StAXHandler(this, reader); handler.read(); return handler.getDocument(); } catch (final XMLStreamException e) { throw new IOException(e); } } else { final SAXParserFactory factory = SAXParserFactory.newInstance(); final SAXHandler handler = new SAXHandler(this); try { final SAXParser parser = factory.newSAXParser(); final XMLReader reader = configureReader(parser, handler); final InputSource source = new InputSource(adapted); if (systemId != null) { source.setSystemId(systemId); } reader.parse(source); return handler.getDocument(); } catch (final ParserConfigurationException e) { LOGGER.trace(e); } catch (final SAXException e) { throw new IOException(e); } } return null; } /** * Reads an InputStream. * * @param is The InputStream. * @param systemId The systemId which is needed for resolving relative URIs. * @return The corresponding Document. * @throws IOException When an IO error occurs. */ public Document read(InputStream is, String systemId) throws IOException { LOGGER.debug("read(is=..., '{}')", systemId); return read(is, systemId, null); } public Element readRoot(InputStream is, String systemId, Compressor compressor) throws IOException { return Document.getRootElement(read(is, systemId, compressor)); } /** * Reads an InputStream. * * @param is The InputStream. * @param systemId The systemId which is needed for resolving relative URIs. * @return The root element of the corresponding Document. * @throws IOException When an IO error occurs. */ public Element readRoot(InputStream is, String systemId) throws IOException { return Document.getRootElement(read(is, systemId)); } /** * Reads a string. * * @param s The string. * @param charset The charset. Must be compliant with string content. * @return The corresponding Document. * @throws IOException When an IO error occurs. */ public Document read(String s, Charset charset) throws IOException { return read(new ByteArrayInputStream(s.getBytes(charset))); } /** * Reads a string. * * @param s The string. * @param charset The charset. Must be compliant with string content. * @return The root element of the corresponding Document. * @throws IOException When an IO error occurs. */ public Element readRoot(String s, Charset charset) throws IOException { return Document.getRootElement(read(s, charset)); } /** * Reads an URL. * * @param url The URL. * @param compressor The compressor used to compress file. * @return The corresponding Document. * @throws IOException When an IO error occurs. */ public Document read(URL url, Compressor compressor) throws IOException { LOGGER.debug("read(url='{}', {})", url, compressor); try (final InputStream is = CompressionUtils.adapt(url.openStream(), compressor)) { return read(is); } } /** * Reads an URL. * * @param url The URL. * @return The corresponding Document. * @throws IOException When an IO error occurs. */ public Document read(URL url) throws IOException { return read(url, Compressor.NONE); } /** * Reads an URL. * * @param url The URL. * @param compressor The compressor used to compress file. * @return The root element of the corresponding Document. * @throws IOException When an IO error occurs. */ public Element readRoot(URL url, Compressor compressor) throws IOException { return Document.getRootElement(read(url, compressor)); } /** * Reads an URL. * * @param url The URL. * @return The root element of the corresponding Document. * @throws IOException When an IO error occurs. */ public Element readRoot(URL url) throws IOException { return readRoot(url, Compressor.NONE); } /** * Reads a file. * * @param filename The file name. * @param compressor The compressor used to compress file. * @return The corresponding Document. * @throws IOException When an IO error occurs. */ public Document read(String filename, Compressor compressor) throws IOException { LOGGER.debug("read(filename='{}', {}", filename, compressor); try (InputStream is = new BufferedInputStream(CompressionUtils.adapt(new FileInputStream(filename), compressor))) { return read(is, filename); } } /** * Reads a file. * * @param filename The file name. * @return The corresponding Document. * @throws IOException When an IO error occurs. */ public Document read(String filename) throws IOException { return read(filename, Compressor.NONE); } /** * Reads a file. * * @param filename The file name. * @param compressor The compressor used to compress file. * @return The root element of the corresponding Document. * @throws IOException When an IO error occurs. */ public Element readRoot(String filename, Compressor compressor) throws IOException { return Document.getRootElement(read(filename, compressor)); } /** * Reads a file. * * @param filename The file name. * @return The root element of the corresponding Document. * @throws IOException When an IO error occurs. */ public Element readRoot(String filename) throws IOException { return readRoot(filename, Compressor.NONE); } /** * Reads a file. * * @param file The file. * @param compressor The compressor used to compress file. * @return The corresponding Document. * @throws IOException When an IO error occurs. */ public Document read(File file, Compressor compressor) throws IOException { LOGGER.debug("read(file='{}', {})", file, compressor); return read(file.getPath(), compressor); } /** * Reads a file. * * @param file The file. * @return The corresponding Document. * @throws IOException When an IO error occurs. */ public Document read(File file) throws IOException { return read(file.getPath(), Compressor.NONE); } /** * Reads a file. * * @param file The file. * @param compressor The compressor used to compress file. * @return The root element of the corresponding Document. * @throws IOException When an IO error occurs. */ public Element readRoot(File file, Compressor compressor) throws IOException { return Document.getRootElement(read(file, compressor)); } /** * Reads a file. * * @param file The file. * @return The root element of the corresponding Document. * @throws IOException When an IO error occurs. */ public Element readRoot(File file) throws IOException { return readRoot(file, Compressor.NONE); } public static XmlDataReader create(Feature... features) { return XmlDataReader.builder() .features(features) .build(); } public static Document load(InputStream is, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.read(is); } public static Element loadRoot(InputStream is, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.readRoot(is); } public static Document load(InputStream is, String systemId, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.read(is, systemId); } public static Element loadRoot(InputStream is, String systemId, Compressor compressor, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.readRoot(is, systemId, compressor); } public static Element loadRoot(InputStream is, String systemId, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.readRoot(is, systemId); } public static Document load(String s, Charset charset, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.read(s, charset); } public static Element loadRoot(String s, Charset charset, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.readRoot(s, charset); } public static Document load(URL url, Compressor compressor, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.read(url, compressor); } public static Document load(URL url, Feature... features) throws IOException { return load(url, Compressor.NONE, features); } public static Element loadRoot(URL url, Compressor compressor, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.readRoot(url, compressor); } public static Element loadRoot(URL url, Feature... features) throws IOException { return loadRoot(url, Compressor.NONE, features); } public static Document load(String filename, Compressor compressor, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.read(filename, compressor); } public static Document load(String filename, Feature... features) throws IOException { return load(filename, Compressor.NONE, features); } public static Element loadRoot(String filename, Compressor compressor, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.readRoot(filename, compressor); } public static Element loadRoot(String filename, Feature... features) throws IOException { return loadRoot(filename, Compressor.NONE, features); } public static Document load(File file, Compressor compressor, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.read(file, compressor); } public static Document load(File file, Feature... features) throws IOException { return load(file, Compressor.NONE, features); } public static Element loadRoot(File file, Compressor compressor, Feature... features) throws IOException { final XmlDataReader reader = create(features); return reader.readRoot(file, compressor); } public static Element loadRoot(File file, Feature... features) throws IOException { return loadRoot(file, Compressor.NONE, features); } public static Builder builder() { return new Builder(); } /** * XmlDataReader Builder. */ public static final class Builder { private final Set features = EnumSet.noneOf(Feature.class); private IssueHandler issueHandler; private EntityResolver entityResolver; private AttributePredicate attributeFilter = AttributePredicate.ANY_ATTRIBUTE; private AttributeNameConverter attributeNameConverter = AttributeNameConverter.IDENTITY; private AttributeValueConverter attributeValueConverter = AttributeValueConverter.INDENTITY; private ElementPredicate elementPreFilter = ElementPredicate.ANY_ELEMENT; private ElementPredicate elementPostFilter = ElementPredicate.ANY_ELEMENT; private ElementNameConverter elementNameConverter = ElementNameConverter.IDENTITY; private TextContentConverter textContentConverter = TextContentConverter.IDENTITY; private Builder() { } public Builder feature(Feature feature, boolean enabled) { if (enabled) { this.features.add(feature); } else { this.features.remove(feature); } return this; } public Builder feature(Feature feature) { return feature(feature, true); } public Builder features(Feature... features) { Collections.addAll(this.features, features); return this; } public Builder issueHandler(IssueHandler issueHandler) { this.issueHandler = issueHandler; return this; } public Builder entityResolver(EntityResolver entityResolver) { this.entityResolver = entityResolver; return this; } public Builder attributeFilter(AttributePredicate attributeFilter) { this.attributeFilter = attributeFilter; return this; } public Builder attributeNameConverter(AttributeNameConverter attributeNameConverter) { this.attributeNameConverter = attributeNameConverter; return this; } public Builder attributeValueConverter(AttributeValueConverter attributeValueConverter) { this.attributeValueConverter = attributeValueConverter; return this; } public Builder elementPreFilter(ElementPredicate elementPreFilter) { this.elementPreFilter = elementPreFilter; return this; } public Builder elementPostFilter(ElementPredicate elementPostFilter) { this.elementPostFilter = elementPostFilter; return this; } public Builder elementNameConverter(ElementNameConverter elementNameConverter) { this.elementNameConverter = elementNameConverter; return this; } public Builder textContentConverter(TextContentConverter textContentConverter) { this.textContentConverter = textContentConverter; return this; } public XmlDataReader build() { return new XmlDataReader(this); } } /** * Internal utility class used to build data. */ private static class Context { private final Context parent; private Context child; /** * If true, parsed data are kept, discarded otherwise. */ boolean keep = true; private Context(Context parent) { this.parent = parent; } public Context() { this(null); } private void setKeep(boolean keep) { if (parent.keep) { this.keep = keep; } else { this.keep = false; } } public Context push(boolean keep) { if (child == null) { child = new Context(this); } child.setKeep(keep); return child; } public Context pop() { return parent; } } private static interface AttributesExtractor { public int getLength(); public String getQName(int index); public String getValue(int index); } /** * Internal utility used to build data. *

* It is used with StAX and SAX. * * @param The exception type. */ private static class DataBuilder { private final XmlDataReader caller; private final Function exceptionBuilder; private Document document = null; private Parent currentParent = null; private TextKind kind = TextKind.STANDARD; private final StringBuilder chars = new StringBuilder(); private final boolean preserveCData; private final boolean featureLoadSpaces; private final boolean featureAllowMixedContent; private final boolean featureLoadDTD; private boolean charsIsWhiteSpace = true; public DataBuilder(XmlDataReader caller, Function exceptionBuilder) { this.caller = caller; this.exceptionBuilder = exceptionBuilder; this.preserveCData = caller.isEnabled(Feature.PRESERVE_CDATA); this.featureLoadSpaces = caller.isEnabled(Feature.LOAD_SPACES); this.featureAllowMixedContent = caller.isEnabled(Feature.ALLOW_MIXED_CONTENT); this.featureLoadDTD = caller.isEnabled(Feature.LOAD_DTD); } /** * Top of context stack. *

* At beginning there is a sentinel.
* The {@code currentParent} node corresponds to the top-most {@code KEEP}. */ private Context context = new Context(); private void addText() { final Text text = new Text(currentParent); if (preserveCData) { text.setKind(kind); } text.setContent(caller.textContentConverter.convertTextContent(currentParent, chars.toString())); } private void flushText(boolean preserve) throws E { if (chars.length() > 0) { if (charsIsWhiteSpace) { if (featureLoadSpaces || (preserve && currentParent.getChildrenCount() == 0)) { addText(); } } else if (currentParent.getChildrenCount() > 1 && !featureAllowMixedContent) { throw exceptionBuilder.apply("Mixed content not allowed"); } else { addText(); } chars.setLength(0); charsIsWhiteSpace = true; } } /** * @return {@code true} if {@code currentParent} has a text child close to the end. */ private boolean currentParentHasCloseTextChild() { final List children = currentParent.getChildren(); for (int index = children.size() - 1; index >= 0; index--) { final Child child = children.get(index); final NodeType childType = child.getType(); if (childType == NodeType.ELEMENT) { // No need to continue, even if technically a text child can exist further. // The usage of this method makes this assumption valid. return false; } else if (childType == NodeType.TEXT) { return true; } // If child is a comment, continue // It can not be a document } return false; } public Document getDocument() { return document; } public void startDocument() throws E { document = new Document(); currentParent = document; flushText(false); } public void startElement(String qName, AttributesExtractor atts) throws E { if (context.keep) { flushText(false); if (!featureAllowMixedContent && currentParentHasCloseTextChild()) { throw exceptionBuilder.apply("Mixed content not allowed"); } final Element element = new Element(caller.elementNameConverter.convertElementName(currentParent, qName)); for (int index = 0; index < atts.getLength(); index++) { final String name = atts.getQName(index); final String value = atts.getValue(index); if (caller.attributeFilter.accepts(element, name, value)) { element.addAttribute(caller.attributeNameConverter.convertAttributeName(element, name), caller.attributeValueConverter.convertAttributeValue(element, name, value)); } } if (caller.elementPreFilter.accepts(currentParent, element)) { context = context.push(true); currentParent.addChild(element); currentParent = element; } else { context = context.push(false); } } else { context = context.push(false); } } public void endElement(String qName) throws E { if (context.keep) { final Element current = (Element) currentParent; flushText(true); currentParent = current.getParent(); if (!caller.elementPostFilter.accepts(currentParent, current)) { current.detach(); } } context = context.pop(); } public void characters(char[] ch, int start, int length) { if (context.keep) { chars.append(ch, start, length); charsIsWhiteSpace = charsIsWhiteSpace && XmlUtils.isWhiteSpace(ch, start, length); } } public void comment(char[] ch, int start, int length) throws E { if (context.keep) { flushText(false); final Comment comment = new Comment(new String(ch, start, length)); currentParent.addChild(comment); } } public void setTextKind(TextKind kind) throws E { if (this.kind != kind) { flushText(true); // FIXME } this.kind = kind; } } /** * Internal class used to read XML using StAX. * * @author Damien Carbonne */ private static class StAXHandler { private final XMLStreamReader reader; private final DataBuilder builder; private final AttributesExtractor wrapper = new AttributesExtractor() { @Override public int getLength() { return reader.getAttributeCount(); } @Override public String getQName(int index) { return reader.getAttributeLocalName(index); } @Override public String getValue(int index) { return reader.getAttributeValue(index); } }; public StAXHandler(XmlDataReader caller, XMLStreamReader reader) { this.reader = reader; this.builder = new DataBuilder<>(caller, XMLStreamException::new); } public void read() throws XMLStreamException { while (reader.hasNext()) { final int eventType = reader.getEventType(); switch (eventType) { case XMLStreamConstants.START_DOCUMENT: builder.startDocument(); break; case XMLStreamConstants.COMMENT: builder.comment(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength()); break; case XMLStreamConstants.CHARACTERS: builder.setTextKind(TextKind.STANDARD); builder.characters(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength()); break; case XMLStreamConstants.SPACE: if (builder.featureLoadSpaces) { builder.setTextKind(TextKind.STANDARD); builder.characters(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength()); } break; case XMLStreamConstants.START_ELEMENT: builder.startElement(reader.getName().getLocalPart(), wrapper); break; case XMLStreamConstants.END_ELEMENT: builder.endElement(reader.getName().getLocalPart()); break; case XMLStreamConstants.CDATA: builder.setTextKind(TextKind.CDATA); builder.characters(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength()); break; case XMLStreamConstants.DTD: if (builder.featureLoadDTD) { builder.document.setDTD(reader.getText()); } break; case XMLStreamConstants.ATTRIBUTE: case XMLStreamConstants.END_DOCUMENT: case XMLStreamConstants.ENTITY_DECLARATION: case XMLStreamConstants.ENTITY_REFERENCE: case XMLStreamConstants.NAMESPACE: case XMLStreamConstants.NOTATION_DECLARATION: case XMLStreamConstants.PROCESSING_INSTRUCTION: default: // Ignore break; } reader.next(); } } public Document getDocument() { return builder.getDocument(); } } /** * Internal class used to load XML source using SAX. * * @author Damien Carbonne */ private static class SAXHandler extends DefaultHandler2 { private final XmlDataReader caller; private final DataBuilder builder; private final StringBuilder dtd = new StringBuilder(); private boolean inDTD = false; private final String eol = "\n "; private class Wrapper implements AttributesExtractor { Attributes atts = null; public Wrapper() { super(); } @Override public int getLength() { return atts.getLength(); } @Override public String getQName(int index) { return atts.getQName(index); } @Override public String getValue(int index) { return atts.getValue(index); } } private final Wrapper wrapper = new Wrapper(); public SAXHandler(XmlDataReader caller) { this.caller = caller; this.builder = new DataBuilder<>(caller, SAXException::new); } public Document getDocument() { return builder.getDocument(); } @Override public void setDocumentLocator(Locator locator) { // Ignore } @Override public void startDocument() throws SAXException { builder.startDocument(); } @Override public void endDocument() throws SAXException { // Ignore } @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { // Ignore } @Override public void endPrefixMapping(String prefix) throws SAXException { // Ignore } @Override public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { wrapper.atts = atts; builder.startElement(qName, wrapper); } @Override public void endElement(String uri, String localName, String qName) throws SAXException { builder.endElement(qName); } @Override public void characters(char[] ch, int start, int length) throws SAXException { builder.characters(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { // Ignore } @Override public void processingInstruction(String target, String data) throws SAXException { LOGGER.debug("processingInstruction({}, {})", target, data); if (inDTD) { // TODO } } @Override public void skippedEntity(String name) throws SAXException { // Ignore } @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { LOGGER.debug("resolveEntity({}, {})", publicId, systemId); return null; } @Override public void startDTD(String name, String publicId, String systemId) throws SAXException { LOGGER.debug("startDTD({}, {}, {})", name, publicId, systemId); inDTD = true; dtd.append(""); inDTD = false; builder.document.setDTD(dtd.toString()); } @Override public void notationDecl(String name, String publicId, String systemId) throws SAXException { LOGGER.debug("DTD notationDecl({}, {}, {})", name, publicId, systemId); dtd.append("') .append(eol); } @Override public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { LOGGER.debug("DTD unparsedEntityDecl({}, {}, {}, {})", name, publicId, systemId, notationName); dtd.append("') .append(eol); } @Override public void elementDecl(String name, String model) throws SAXException { LOGGER.debug("DTD elementDecl({}, {})", name, model); dtd.append("') .append(eol); } @Override public void attributeDecl(String eName, String aName, String type, String mode, String value) throws SAXException { LOGGER.debug("DTD attributeDecl({}, {}, {}, {}, {})", eName, aName, type, mode, value); dtd.append("') .append(eol); } @Override public void internalEntityDecl(String name, String value) throws SAXException { LOGGER.debug("DTD internalEntityDecl({}, {})", name, value); dtd.append("") .append(eol); } @Override public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException { LOGGER.debug("DTD externalEntityDecl({}, {}, {})", name, publicId, systemId); dtd.append("') .append(eol); } @Override public void startEntity(String name) throws SAXException { LOGGER.debug("startEntity({})", name); // Ignore } @Override public void endEntity(String name) throws SAXException { LOGGER.debug("endEntity({})", name); // Ignore } @Override public void startCDATA() throws SAXException { LOGGER.debug("startCDATA"); builder.setTextKind(TextKind.CDATA); } @Override public void endCDATA() throws SAXException { LOGGER.debug("endCDATA"); builder.setTextKind(TextKind.STANDARD); } @Override public void comment(char[] ch, int start, int length) throws SAXException { LOGGER.debug("comment(...)"); if (inDTD) { dtd.append("") .append(eol); } else { builder.comment(ch, start, length); } } @Override public void warning(SAXParseException exception) throws SAXException { if (caller.issueHandler == null) { LOGGER.warn("{}:{} {}", exception.getLineNumber(), exception.getColumnNumber(), exception.getMessage()); } else { caller.issueHandler.warning(exception.getMessage(), exception.getSystemId(), exception.getPublicId(), exception.getLineNumber(), exception.getColumnNumber()); } } @Override public void error(SAXParseException exception) throws SAXException { if (caller.issueHandler == null) { LOGGER.error("{}:{} {}", exception.getLineNumber(), exception.getColumnNumber(), exception.getMessage()); } else { caller.issueHandler.error(exception.getMessage(), exception.getSystemId(), exception.getPublicId(), exception.getLineNumber(), exception.getColumnNumber()); } } @Override public void fatalError(SAXParseException exception) throws SAXException { if (caller.issueHandler == null) { LOGGER.fatal("{}:{} {}", exception.getLineNumber(), exception.getColumnNumber(), exception.getMessage()); } else { caller.issueHandler.fatal(exception.getMessage(), exception.getSystemId(), exception.getPublicId(), exception.getLineNumber(), exception.getColumnNumber()); } throw exception; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy