cdc.io.data.xml.XmlDataReader Maven / Gradle / Ivy
package cdc.io.data.xml;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.DefaultHandler2;
import cdc.io.compress.CompressionUtils;
import cdc.io.compress.Compressor;
import cdc.io.data.Child;
import cdc.io.data.Comment;
import cdc.io.data.Document;
import cdc.io.data.Element;
import cdc.io.data.NodeType;
import cdc.io.data.Parent;
import cdc.io.data.Text;
import cdc.io.data.TextKind;
import cdc.io.data.util.AttributeNameConverter;
import cdc.io.data.util.AttributePredicate;
import cdc.io.data.util.AttributeValueConverter;
import cdc.io.data.util.ElementNameConverter;
import cdc.io.data.util.ElementPredicate;
import cdc.io.data.util.TextContentConverter;
import cdc.io.xml.StAXSupport;
import cdc.io.xml.XmlUtils;
import cdc.util.lang.Checks;
/**
* Class used to read an XML source and produce a Document.
*
* It is possible, during loading, to:
*
* - filter (keep or remove) attributes
*
- convert attributes names
*
- convert attributes values
*
- filter (keep or remove) elements at creation time (pre) and when all its children are known (post).
*
- convert elements names
*
- ignore (remove) comments
*
- ignore (remove) spaces
*
* This can be used to create only necessary nodes and attributes
* in memory and adapt their names or content.
*
* It is possible to attach an {@link IssueHandler} to process warnings, recoverable and fatal errors.
*
* WARNING this class will become immutable in the future. Use {@link XmlDataReader.Builder}.
*
* @author Damien Carbonne
*/
public class XmlDataReader {
private static final Logger LOGGER = LogManager.getLogger(XmlDataReader.class);
private final Set features = EnumSet.noneOf(Feature.class);
private IssueHandler issueHandler = null;
/**
* The entity resolver.
*/
private EntityResolver entityResolver = null;
/**
* The filter attributes.
*
* Only accepted attributes are kept and transformed.
*/
private AttributePredicate attributeFilter = AttributePredicate.ANY_ATTRIBUTE;
/**
* Attribute name converter.
*
* Applied on accepted attributes.
*/
private AttributeNameConverter attributeNameConverter = AttributeNameConverter.IDENTITY;
/**
* Attribute value converter.
*
* Applied on accepted attributes.
*/
private AttributeValueConverter attributeValueConverter = AttributeValueConverter.INDENTITY;
/**
* The filter to use when the element is created.
*/
private ElementPredicate elementPreFilter = ElementPredicate.ANY_ELEMENT;
/**
* The filter to use when all the children of an element are created.
*/
private ElementPredicate elementPostFilter = ElementPredicate.ANY_ELEMENT;
/**
* Element name converter.
*
* Applied on elements that are pre accepted.
*/
private ElementNameConverter elementNameConverter = ElementNameConverter.IDENTITY;
/**
* Text content converter.
*/
private TextContentConverter textContentConverter = TextContentConverter.IDENTITY;
private static final String FILTER = "filter";
private static final String CONVERTER = "converter";
public enum Feature {
/**
* If enabled, comments are loaded (as comment nodes).
*
* They are ignored by default.
*/
LOAD_COMMENTS,
/**
* If enabled, spaces are loaded (as text nodes).
*
* They are ignored by default.
*/
LOAD_SPACES,
/**
* If enabled, DTD is loaded if present.
*
* DTD is ignored by default.
*
* WARNING: when there is a DTD, parsing may depend on conformity of XML content to DTD.
* For example, characters can be parsed as characters or spaces. So, enabling or disabling
* {@link #LOAD_SPACES} may change result.
*
* WARNING: DTD is not fully compliant with {@link #USE_SAX}, one should use {@link #USE_STAX}.
*
* WARNING: Use of DTD is discouraged with this library. It is XML-like, but it is not 100% XML.
* One day it may diverge more from XML.
*/
LOAD_DTD,
/**
* If enabled, mixed content is allowed.
*/
ALLOW_MIXED_CONTENT,
/**
* If enabled, CDATA are preserved.
*
* Otherwise, they are transformed to normal text.
* WARNING:Some parsers don't support this feature.
*/
PRESERVE_CDATA,
/**
* If enabled, a dummy entity resolver is used.
*
* This may be used to ignore DTD.
* However, if entities are used, result will be wrong.
*/
DUMMY_ENTITY_RESOLVER,
USE_SAX,
USE_STAX
}
private XmlDataReader(Builder builder) {
this.features.addAll(builder.features);
this.issueHandler = builder.issueHandler;
this.entityResolver = builder.entityResolver;
this.attributeFilter = Checks.isNotNull(builder.attributeFilter, "attributeFilter");
this.attributeNameConverter = Checks.isNotNull(builder.attributeNameConverter, "attributeNameConverter");
this.attributeValueConverter = Checks.isNotNull(builder.attributeValueConverter, "attributeValueConverter");
this.elementPreFilter = Checks.isNotNull(builder.elementPreFilter, "elementPreFilter");
this.elementPostFilter = Checks.isNotNull(builder.elementPostFilter, "elementPostFilter");
this.elementNameConverter = Checks.isNotNull(builder.elementNameConverter, "elementNameConverter");
this.textContentConverter = Checks.isNotNull(builder.textContentConverter, "textContentConverter");
}
/**
* Builds an XmlDataRaeder.
*
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public XmlDataReader() {
super();
}
/**
* Builds an XmlDataRaeder.
*
* @param features The enabled features.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public XmlDataReader(Feature... features) {
for (final Feature feature : features) {
setEnabled(feature, true);
}
}
/**
* Builds an XmlDataRaeder.
*
* @param handler The issue handler.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setIssueHandler(IssueHandler handler) {
this.issueHandler = handler;
}
public final IssueHandler getIssueHandler() {
return issueHandler;
}
/**
* Returns {@code true} when a feature is enabled.
*
* @param feature The feature.
* @return {@code true} if {@code feature} is enabled.
*/
public final boolean isEnabled(Feature feature) {
return features.contains(feature);
}
/**
* Enables or disables a feature.
*
* @param feature The feature.
* @param enabled If {@code true}, the feature is enabled. It is disabled otherwise.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public final void setEnabled(Feature feature,
boolean enabled) {
if (enabled) {
features.add(feature);
} else {
features.remove(feature);
}
}
/**
* @return The entity resolver.
*/
public EntityResolver getEntityResolver() {
return entityResolver;
}
/**
* Sets the entity resolver.
*
* WARNING: This has interactions with {@link Feature#DUMMY_ENTITY_RESOLVER}.
*
* @param resolver The entity resolver.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setEntityResolver(EntityResolver resolver) {
this.entityResolver = resolver;
}
/**
* @return The attribute filter.
*/
public AttributePredicate getAttributeFilter() {
return attributeFilter;
}
/**
* Sets the attribute filter.
*
* The name and value that are passed to the filter are the original name and value, before any name or value conversion
* happens.
*
* @param filter The filter.
* @throws IllegalArgumentException When {@code filter} is null.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setAttributeFilter(AttributePredicate filter) {
Checks.isNotNull(filter, FILTER);
this.attributeFilter = filter;
}
/**
* @return The attribute name converter.
*/
public AttributeNameConverter getAttributeNameConverter() {
return attributeNameConverter;
}
/**
* Sets the attribute name converter.
*
* The conversion is applied after attribute filtering.
*
* @param converter The converter.
* @throws IllegalArgumentException When {@code converter} is null.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setAttributeNameConverter(AttributeNameConverter converter) {
Checks.isNotNull(converter, CONVERTER);
this.attributeNameConverter = converter;
}
/**
* @return The attribute value converter.
*/
public AttributeValueConverter getAttributeValueConverter() {
return attributeValueConverter;
}
/**
* Sets the attribute value converter.
*
* The conversion is applied after attribute filtering.
*
* @param converter The converter.
* @throws IllegalArgumentException When {@code converter} is null.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setAttributeValueConverter(AttributeValueConverter converter) {
Checks.isNotNull(converter, CONVERTER);
this.attributeValueConverter = converter;
}
/**
* @return the element pre filter.
*/
public ElementPredicate getElementPreFilter() {
return elementPreFilter;
}
/**
* Sets the element pre filter.
*
* WARNING:
*
* - The element name is the original one (before any conversion happens).
*
- The attributes names and values are the converted ones (after all conversions have happened).
*
- Only remaining attributes are available.
*
*
* @param filter The filter.
* @throws IllegalArgumentException When {@code filter} is null.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setElementPreFilter(ElementPredicate filter) {
Checks.isNotNull(filter, FILTER);
this.elementPreFilter = filter;
}
/**
* @return the element post filter.
*/
public ElementPredicate getElementPostFilter() {
return elementPostFilter;
}
/**
* Sets the element post filter.
*
* WARNING:
*
* - The element name is the converted one (after conversion has happened).
*
- The attributes names and values are the converted ones (after all conversions have happened).
*
- Only remaining attributes are available.
*
*
* @param filter The filter.
* @throws IllegalArgumentException When {@code filter} is null.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setElementPostFilter(ElementPredicate filter) {
Checks.isNotNull(filter, FILTER);
this.elementPostFilter = filter;
}
/**
* @return The element name converter.
*/
public ElementNameConverter getElementNameConverter() {
return elementNameConverter;
}
/**
* Sets the element name converter.
*
* @param converter The converter.
* @throws IllegalArgumentException When {@code converter} is null.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setElementNameConverter(ElementNameConverter converter) {
Checks.isNotNull(converter, CONVERTER);
this.elementNameConverter = converter;
}
public TextContentConverter getTextContentConverter() {
return textContentConverter;
}
/**
* Sets the text converter.
*
* @param converter The text converter.
* @deprecated Use {@link XmlDataReader.Builder}.
*/
@Deprecated(since = "2024-05-01", forRemoval = true)
public void setTextContentConverter(TextContentConverter converter) {
Checks.isNotNull(converter, CONVERTER);
this.textContentConverter = converter;
}
private static class DummyEntityResolver implements EntityResolver {
public DummyEntityResolver() {
super();
}
@Override
public InputSource resolveEntity(String publicId,
String systemId) throws SAXException, IOException {
LOGGER.debug("resolveEntity('{}', '{}')", publicId, systemId);
return new InputSource(new StringReader(""));
}
}
private XMLReader configureReader(SAXParser parser,
SAXHandler handler) throws SAXException {
final XMLReader reader = parser.getXMLReader();
reader.setContentHandler(handler);
reader.setErrorHandler(handler);
reader.setDTDHandler(handler);
if (isEnabled(Feature.LOAD_COMMENTS) || isEnabled(Feature.PRESERVE_CDATA)) {
reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
}
reader.setProperty("http://xml.org/sax/properties/declaration-handler", handler);
// Set dummy entity resolver before user defined one
if (isEnabled(Feature.DUMMY_ENTITY_RESOLVER)) {
reader.setEntityResolver(new DummyEntityResolver());
}
if (getEntityResolver() != null) {
if (isEnabled(Feature.DUMMY_ENTITY_RESOLVER)) {
LOGGER.warn("Dummy entity resolver overwritten by user defined one");
}
reader.setEntityResolver(getEntityResolver());
}
return reader;
}
public Document read(InputStream is,
Compressor compressor) throws IOException {
LOGGER.debug("read(is=..., {})", compressor);
return read(is, null, compressor);
}
public Document read(InputStream is) throws IOException {
LOGGER.debug("read(is=...)");
return read(is, Compressor.NONE);
}
public Element readRoot(InputStream is,
Compressor compressor) throws IOException {
return Document.getRootElement(read(is, compressor));
}
public Element readRoot(InputStream is) throws IOException {
return Document.getRootElement(read(is));
}
public Document read(InputStream is,
String systemId,
Compressor compressor) throws IOException {
LOGGER.debug("read(is=..., '{}', {})", systemId, compressor);
final InputStream adapted = compressor == null ? is : CompressionUtils.adapt(is, compressor);
if (isEnabled(Feature.USE_STAX)) {
final XMLInputFactory factory = XMLInputFactory.newInstance();
if (isEnabled(Feature.PRESERVE_CDATA)) {
if (factory.isPropertySupported(StAXSupport.REPORT_CDATA)) {
factory.setProperty(StAXSupport.REPORT_CDATA, Boolean.TRUE);
} else {
LOGGER.warn(StAXSupport.REPORT_CDATA + " is not supported");
}
}
factory.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.FALSE);
factory.setProperty(XMLInputFactory.SUPPORT_DTD, isEnabled(Feature.LOAD_DTD));
try {
final XMLStreamReader reader;
if (systemId == null) {
reader = factory.createXMLStreamReader(systemId, adapted);
} else {
reader = factory.createXMLStreamReader(adapted);
}
final StAXHandler handler = new StAXHandler(this, reader);
handler.read();
return handler.getDocument();
} catch (final XMLStreamException e) {
throw new IOException(e);
}
} else {
final SAXParserFactory factory = SAXParserFactory.newInstance();
final SAXHandler handler = new SAXHandler(this);
try {
final SAXParser parser = factory.newSAXParser();
final XMLReader reader = configureReader(parser, handler);
final InputSource source = new InputSource(adapted);
if (systemId != null) {
source.setSystemId(systemId);
}
reader.parse(source);
return handler.getDocument();
} catch (final ParserConfigurationException e) {
LOGGER.trace(e);
} catch (final SAXException e) {
throw new IOException(e);
}
}
return null;
}
/**
* Reads an InputStream.
*
* @param is The InputStream.
* @param systemId The systemId which is needed for resolving relative URIs.
* @return The corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Document read(InputStream is,
String systemId) throws IOException {
LOGGER.debug("read(is=..., '{}')", systemId);
return read(is, systemId, null);
}
public Element readRoot(InputStream is,
String systemId,
Compressor compressor) throws IOException {
return Document.getRootElement(read(is, systemId, compressor));
}
/**
* Reads an InputStream.
*
* @param is The InputStream.
* @param systemId The systemId which is needed for resolving relative URIs.
* @return The root element of the corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Element readRoot(InputStream is,
String systemId) throws IOException {
return Document.getRootElement(read(is, systemId));
}
/**
* Reads a string.
*
* @param s The string.
* @param charset The charset. Must be compliant with string content.
* @return The corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Document read(String s,
Charset charset) throws IOException {
return read(new ByteArrayInputStream(s.getBytes(charset)));
}
/**
* Reads a string.
*
* @param s The string.
* @param charset The charset. Must be compliant with string content.
* @return The root element of the corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Element readRoot(String s,
Charset charset) throws IOException {
return Document.getRootElement(read(s, charset));
}
/**
* Reads an URL.
*
* @param url The URL.
* @param compressor The compressor used to compress file.
* @return The corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Document read(URL url,
Compressor compressor) throws IOException {
LOGGER.debug("read(url='{}', {})", url, compressor);
try (final InputStream is = CompressionUtils.adapt(url.openStream(), compressor)) {
return read(is);
}
}
/**
* Reads an URL.
*
* @param url The URL.
* @return The corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Document read(URL url) throws IOException {
return read(url, Compressor.NONE);
}
/**
* Reads an URL.
*
* @param url The URL.
* @param compressor The compressor used to compress file.
* @return The root element of the corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Element readRoot(URL url,
Compressor compressor) throws IOException {
return Document.getRootElement(read(url, compressor));
}
/**
* Reads an URL.
*
* @param url The URL.
* @return The root element of the corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Element readRoot(URL url) throws IOException {
return readRoot(url, Compressor.NONE);
}
/**
* Reads a file.
*
* @param filename The file name.
* @param compressor The compressor used to compress file.
* @return The corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Document read(String filename,
Compressor compressor) throws IOException {
LOGGER.debug("read(filename='{}', {}", filename, compressor);
try (InputStream is = new BufferedInputStream(CompressionUtils.adapt(new FileInputStream(filename), compressor))) {
return read(is, filename);
}
}
/**
* Reads a file.
*
* @param filename The file name.
* @return The corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Document read(String filename) throws IOException {
return read(filename, Compressor.NONE);
}
/**
* Reads a file.
*
* @param filename The file name.
* @param compressor The compressor used to compress file.
* @return The root element of the corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Element readRoot(String filename,
Compressor compressor) throws IOException {
return Document.getRootElement(read(filename, compressor));
}
/**
* Reads a file.
*
* @param filename The file name.
* @return The root element of the corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Element readRoot(String filename) throws IOException {
return readRoot(filename, Compressor.NONE);
}
/**
* Reads a file.
*
* @param file The file.
* @param compressor The compressor used to compress file.
* @return The corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Document read(File file,
Compressor compressor) throws IOException {
LOGGER.debug("read(file='{}', {})", file, compressor);
return read(file.getPath(), compressor);
}
/**
* Reads a file.
*
* @param file The file.
* @return The corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Document read(File file) throws IOException {
return read(file.getPath(), Compressor.NONE);
}
/**
* Reads a file.
*
* @param file The file.
* @param compressor The compressor used to compress file.
* @return The root element of the corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Element readRoot(File file,
Compressor compressor) throws IOException {
return Document.getRootElement(read(file, compressor));
}
/**
* Reads a file.
*
* @param file The file.
* @return The root element of the corresponding Document.
* @throws IOException When an IO error occurs.
*/
public Element readRoot(File file) throws IOException {
return readRoot(file, Compressor.NONE);
}
public static XmlDataReader create(Feature... features) {
return XmlDataReader.builder()
.features(features)
.build();
}
public static Document load(InputStream is,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.read(is);
}
public static Element loadRoot(InputStream is,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.readRoot(is);
}
public static Document load(InputStream is,
String systemId,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.read(is, systemId);
}
public static Element loadRoot(InputStream is,
String systemId,
Compressor compressor,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.readRoot(is, systemId, compressor);
}
public static Element loadRoot(InputStream is,
String systemId,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.readRoot(is, systemId);
}
public static Document load(String s,
Charset charset,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.read(s, charset);
}
public static Element loadRoot(String s,
Charset charset,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.readRoot(s, charset);
}
public static Document load(URL url,
Compressor compressor,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.read(url, compressor);
}
public static Document load(URL url,
Feature... features) throws IOException {
return load(url, Compressor.NONE, features);
}
public static Element loadRoot(URL url,
Compressor compressor,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.readRoot(url, compressor);
}
public static Element loadRoot(URL url,
Feature... features) throws IOException {
return loadRoot(url, Compressor.NONE, features);
}
public static Document load(String filename,
Compressor compressor,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.read(filename, compressor);
}
public static Document load(String filename,
Feature... features) throws IOException {
return load(filename, Compressor.NONE, features);
}
public static Element loadRoot(String filename,
Compressor compressor,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.readRoot(filename, compressor);
}
public static Element loadRoot(String filename,
Feature... features) throws IOException {
return loadRoot(filename, Compressor.NONE, features);
}
public static Document load(File file,
Compressor compressor,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.read(file, compressor);
}
public static Document load(File file,
Feature... features) throws IOException {
return load(file, Compressor.NONE, features);
}
public static Element loadRoot(File file,
Compressor compressor,
Feature... features) throws IOException {
final XmlDataReader reader = create(features);
return reader.readRoot(file, compressor);
}
public static Element loadRoot(File file,
Feature... features) throws IOException {
return loadRoot(file, Compressor.NONE, features);
}
public static Builder builder() {
return new Builder();
}
/**
* XmlDataReader Builder.
*/
public static final class Builder {
private final Set features = EnumSet.noneOf(Feature.class);
private IssueHandler issueHandler;
private EntityResolver entityResolver;
private AttributePredicate attributeFilter = AttributePredicate.ANY_ATTRIBUTE;
private AttributeNameConverter attributeNameConverter = AttributeNameConverter.IDENTITY;
private AttributeValueConverter attributeValueConverter = AttributeValueConverter.INDENTITY;
private ElementPredicate elementPreFilter = ElementPredicate.ANY_ELEMENT;
private ElementPredicate elementPostFilter = ElementPredicate.ANY_ELEMENT;
private ElementNameConverter elementNameConverter = ElementNameConverter.IDENTITY;
private TextContentConverter textContentConverter = TextContentConverter.IDENTITY;
private Builder() {
}
public Builder feature(Feature feature,
boolean enabled) {
if (enabled) {
this.features.add(feature);
} else {
this.features.remove(feature);
}
return this;
}
public Builder feature(Feature feature) {
return feature(feature, true);
}
public Builder features(Feature... features) {
Collections.addAll(this.features, features);
return this;
}
public Builder issueHandler(IssueHandler issueHandler) {
this.issueHandler = issueHandler;
return this;
}
public Builder entityResolver(EntityResolver entityResolver) {
this.entityResolver = entityResolver;
return this;
}
public Builder attributeFilter(AttributePredicate attributeFilter) {
this.attributeFilter = attributeFilter;
return this;
}
public Builder attributeNameConverter(AttributeNameConverter attributeNameConverter) {
this.attributeNameConverter = attributeNameConverter;
return this;
}
public Builder attributeValueConverter(AttributeValueConverter attributeValueConverter) {
this.attributeValueConverter = attributeValueConverter;
return this;
}
public Builder elementPreFilter(ElementPredicate elementPreFilter) {
this.elementPreFilter = elementPreFilter;
return this;
}
public Builder elementPostFilter(ElementPredicate elementPostFilter) {
this.elementPostFilter = elementPostFilter;
return this;
}
public Builder elementNameConverter(ElementNameConverter elementNameConverter) {
this.elementNameConverter = elementNameConverter;
return this;
}
public Builder textContentConverter(TextContentConverter textContentConverter) {
this.textContentConverter = textContentConverter;
return this;
}
public XmlDataReader build() {
return new XmlDataReader(this);
}
}
/**
* Internal utility class used to build data.
*/
private static class Context {
private final Context parent;
private Context child;
/**
* If true, parsed data are kept, discarded otherwise.
*/
boolean keep = true;
private Context(Context parent) {
this.parent = parent;
}
public Context() {
this(null);
}
private void setKeep(boolean keep) {
if (parent.keep) {
this.keep = keep;
} else {
this.keep = false;
}
}
public Context push(boolean keep) {
if (child == null) {
child = new Context(this);
}
child.setKeep(keep);
return child;
}
public Context pop() {
return parent;
}
}
private static interface AttributesExtractor {
public int getLength();
public String getQName(int index);
public String getValue(int index);
}
/**
* Internal utility used to build data.
*
* It is used with StAX and SAX.
*
* @param The exception type.
*/
private static class DataBuilder {
private final XmlDataReader caller;
private final Function exceptionBuilder;
private Document document = null;
private Parent currentParent = null;
private TextKind kind = TextKind.STANDARD;
private final StringBuilder chars = new StringBuilder();
private final boolean preserveCData;
private final boolean featureLoadSpaces;
private final boolean featureAllowMixedContent;
private final boolean featureLoadDTD;
private boolean charsIsWhiteSpace = true;
public DataBuilder(XmlDataReader caller,
Function exceptionBuilder) {
this.caller = caller;
this.exceptionBuilder = exceptionBuilder;
this.preserveCData = caller.isEnabled(Feature.PRESERVE_CDATA);
this.featureLoadSpaces = caller.isEnabled(Feature.LOAD_SPACES);
this.featureAllowMixedContent = caller.isEnabled(Feature.ALLOW_MIXED_CONTENT);
this.featureLoadDTD = caller.isEnabled(Feature.LOAD_DTD);
}
/**
* Top of context stack.
*
* At beginning there is a sentinel.
* The {@code currentParent} node corresponds to the top-most {@code KEEP}.
*/
private Context context = new Context();
private void addText() {
final Text text = new Text(currentParent);
if (preserveCData) {
text.setKind(kind);
}
text.setContent(caller.textContentConverter.convertTextContent(currentParent, chars.toString()));
}
private void flushText(boolean preserve) throws E {
if (chars.length() > 0) {
if (charsIsWhiteSpace) {
if (featureLoadSpaces || (preserve && currentParent.getChildrenCount() == 0)) {
addText();
}
} else if (currentParent.getChildrenCount() > 1 && !featureAllowMixedContent) {
throw exceptionBuilder.apply("Mixed content not allowed");
} else {
addText();
}
chars.setLength(0);
charsIsWhiteSpace = true;
}
}
/**
* @return {@code true} if {@code currentParent} has a text child close to the end.
*/
private boolean currentParentHasCloseTextChild() {
final List extends Child> children = currentParent.getChildren();
for (int index = children.size() - 1; index >= 0; index--) {
final Child child = children.get(index);
final NodeType childType = child.getType();
if (childType == NodeType.ELEMENT) {
// No need to continue, even if technically a text child can exist further.
// The usage of this method makes this assumption valid.
return false;
} else if (childType == NodeType.TEXT) {
return true;
}
// If child is a comment, continue
// It can not be a document
}
return false;
}
public Document getDocument() {
return document;
}
public void startDocument() throws E {
document = new Document();
currentParent = document;
flushText(false);
}
public void startElement(String qName,
AttributesExtractor atts) throws E {
if (context.keep) {
flushText(false);
if (!featureAllowMixedContent && currentParentHasCloseTextChild()) {
throw exceptionBuilder.apply("Mixed content not allowed");
}
final Element element = new Element(caller.elementNameConverter.convertElementName(currentParent, qName));
for (int index = 0; index < atts.getLength(); index++) {
final String name = atts.getQName(index);
final String value = atts.getValue(index);
if (caller.attributeFilter.accepts(element, name, value)) {
element.addAttribute(caller.attributeNameConverter.convertAttributeName(element, name),
caller.attributeValueConverter.convertAttributeValue(element, name, value));
}
}
if (caller.elementPreFilter.accepts(currentParent, element)) {
context = context.push(true);
currentParent.addChild(element);
currentParent = element;
} else {
context = context.push(false);
}
} else {
context = context.push(false);
}
}
public void endElement(String qName) throws E {
if (context.keep) {
final Element current = (Element) currentParent;
flushText(true);
currentParent = current.getParent();
if (!caller.elementPostFilter.accepts(currentParent, current)) {
current.detach();
}
}
context = context.pop();
}
public void characters(char[] ch,
int start,
int length) {
if (context.keep) {
chars.append(ch, start, length);
charsIsWhiteSpace = charsIsWhiteSpace && XmlUtils.isWhiteSpace(ch, start, length);
}
}
public void comment(char[] ch,
int start,
int length) throws E {
if (context.keep) {
flushText(false);
final Comment comment = new Comment(new String(ch, start, length));
currentParent.addChild(comment);
}
}
public void setTextKind(TextKind kind) throws E {
if (this.kind != kind) {
flushText(true); // FIXME
}
this.kind = kind;
}
}
/**
* Internal class used to read XML using StAX.
*
* @author Damien Carbonne
*/
private static class StAXHandler {
private final XMLStreamReader reader;
private final DataBuilder builder;
private final AttributesExtractor wrapper = new AttributesExtractor() {
@Override
public int getLength() {
return reader.getAttributeCount();
}
@Override
public String getQName(int index) {
return reader.getAttributeLocalName(index);
}
@Override
public String getValue(int index) {
return reader.getAttributeValue(index);
}
};
public StAXHandler(XmlDataReader caller,
XMLStreamReader reader) {
this.reader = reader;
this.builder = new DataBuilder<>(caller, XMLStreamException::new);
}
public void read() throws XMLStreamException {
while (reader.hasNext()) {
final int eventType = reader.getEventType();
switch (eventType) {
case XMLStreamConstants.START_DOCUMENT:
builder.startDocument();
break;
case XMLStreamConstants.COMMENT:
builder.comment(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength());
break;
case XMLStreamConstants.CHARACTERS:
builder.setTextKind(TextKind.STANDARD);
builder.characters(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength());
break;
case XMLStreamConstants.SPACE:
if (builder.featureLoadSpaces) {
builder.setTextKind(TextKind.STANDARD);
builder.characters(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength());
}
break;
case XMLStreamConstants.START_ELEMENT:
builder.startElement(reader.getName().getLocalPart(), wrapper);
break;
case XMLStreamConstants.END_ELEMENT:
builder.endElement(reader.getName().getLocalPart());
break;
case XMLStreamConstants.CDATA:
builder.setTextKind(TextKind.CDATA);
builder.characters(reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength());
break;
case XMLStreamConstants.DTD:
if (builder.featureLoadDTD) {
builder.document.setDTD(reader.getText());
}
break;
case XMLStreamConstants.ATTRIBUTE:
case XMLStreamConstants.END_DOCUMENT:
case XMLStreamConstants.ENTITY_DECLARATION:
case XMLStreamConstants.ENTITY_REFERENCE:
case XMLStreamConstants.NAMESPACE:
case XMLStreamConstants.NOTATION_DECLARATION:
case XMLStreamConstants.PROCESSING_INSTRUCTION:
default:
// Ignore
break;
}
reader.next();
}
}
public Document getDocument() {
return builder.getDocument();
}
}
/**
* Internal class used to load XML source using SAX.
*
* @author Damien Carbonne
*/
private static class SAXHandler extends DefaultHandler2 {
private final XmlDataReader caller;
private final DataBuilder builder;
private final StringBuilder dtd = new StringBuilder();
private boolean inDTD = false;
private final String eol = "\n ";
private class Wrapper implements AttributesExtractor {
Attributes atts = null;
public Wrapper() {
super();
}
@Override
public int getLength() {
return atts.getLength();
}
@Override
public String getQName(int index) {
return atts.getQName(index);
}
@Override
public String getValue(int index) {
return atts.getValue(index);
}
}
private final Wrapper wrapper = new Wrapper();
public SAXHandler(XmlDataReader caller) {
this.caller = caller;
this.builder = new DataBuilder<>(caller, SAXException::new);
}
public Document getDocument() {
return builder.getDocument();
}
@Override
public void setDocumentLocator(Locator locator) {
// Ignore
}
@Override
public void startDocument() throws SAXException {
builder.startDocument();
}
@Override
public void endDocument() throws SAXException {
// Ignore
}
@Override
public void startPrefixMapping(String prefix,
String uri) throws SAXException {
// Ignore
}
@Override
public void endPrefixMapping(String prefix) throws SAXException {
// Ignore
}
@Override
public void startElement(String uri,
String localName,
String qName,
Attributes atts) throws SAXException {
wrapper.atts = atts;
builder.startElement(qName, wrapper);
}
@Override
public void endElement(String uri,
String localName,
String qName) throws SAXException {
builder.endElement(qName);
}
@Override
public void characters(char[] ch,
int start,
int length) throws SAXException {
builder.characters(ch, start, length);
}
@Override
public void ignorableWhitespace(char[] ch,
int start,
int length) throws SAXException {
// Ignore
}
@Override
public void processingInstruction(String target,
String data) throws SAXException {
LOGGER.debug("processingInstruction({}, {})", target, data);
if (inDTD) {
// TODO
}
}
@Override
public void skippedEntity(String name) throws SAXException {
// Ignore
}
@Override
public InputSource resolveEntity(String publicId,
String systemId) throws SAXException, IOException {
LOGGER.debug("resolveEntity({}, {})", publicId, systemId);
return null;
}
@Override
public void startDTD(String name,
String publicId,
String systemId) throws SAXException {
LOGGER.debug("startDTD({}, {}, {})", name, publicId, systemId);
inDTD = true;
dtd.append("");
inDTD = false;
builder.document.setDTD(dtd.toString());
}
@Override
public void notationDecl(String name,
String publicId,
String systemId) throws SAXException {
LOGGER.debug("DTD notationDecl({}, {}, {})", name, publicId, systemId);
dtd.append("')
.append(eol);
}
@Override
public void unparsedEntityDecl(String name,
String publicId,
String systemId,
String notationName) throws SAXException {
LOGGER.debug("DTD unparsedEntityDecl({}, {}, {}, {})", name, publicId, systemId, notationName);
dtd.append("')
.append(eol);
}
@Override
public void elementDecl(String name,
String model) throws SAXException {
LOGGER.debug("DTD elementDecl({}, {})", name, model);
dtd.append("')
.append(eol);
}
@Override
public void attributeDecl(String eName,
String aName,
String type,
String mode,
String value) throws SAXException {
LOGGER.debug("DTD attributeDecl({}, {}, {}, {}, {})", eName, aName, type, mode, value);
dtd.append("')
.append(eol);
}
@Override
public void internalEntityDecl(String name,
String value) throws SAXException {
LOGGER.debug("DTD internalEntityDecl({}, {})", name, value);
dtd.append("")
.append(eol);
}
@Override
public void externalEntityDecl(String name,
String publicId,
String systemId) throws SAXException {
LOGGER.debug("DTD externalEntityDecl({}, {}, {})", name, publicId, systemId);
dtd.append("')
.append(eol);
}
@Override
public void startEntity(String name) throws SAXException {
LOGGER.debug("startEntity({})", name);
// Ignore
}
@Override
public void endEntity(String name) throws SAXException {
LOGGER.debug("endEntity({})", name);
// Ignore
}
@Override
public void startCDATA() throws SAXException {
LOGGER.debug("startCDATA");
builder.setTextKind(TextKind.CDATA);
}
@Override
public void endCDATA() throws SAXException {
LOGGER.debug("endCDATA");
builder.setTextKind(TextKind.STANDARD);
}
@Override
public void comment(char[] ch,
int start,
int length) throws SAXException {
LOGGER.debug("comment(...)");
if (inDTD) {
dtd.append("")
.append(eol);
} else {
builder.comment(ch, start, length);
}
}
@Override
public void warning(SAXParseException exception) throws SAXException {
if (caller.issueHandler == null) {
LOGGER.warn("{}:{} {}", exception.getLineNumber(), exception.getColumnNumber(), exception.getMessage());
} else {
caller.issueHandler.warning(exception.getMessage(),
exception.getSystemId(),
exception.getPublicId(),
exception.getLineNumber(),
exception.getColumnNumber());
}
}
@Override
public void error(SAXParseException exception) throws SAXException {
if (caller.issueHandler == null) {
LOGGER.error("{}:{} {}", exception.getLineNumber(), exception.getColumnNumber(), exception.getMessage());
} else {
caller.issueHandler.error(exception.getMessage(),
exception.getSystemId(),
exception.getPublicId(),
exception.getLineNumber(),
exception.getColumnNumber());
}
}
@Override
public void fatalError(SAXParseException exception) throws SAXException {
if (caller.issueHandler == null) {
LOGGER.fatal("{}:{} {}", exception.getLineNumber(), exception.getColumnNumber(), exception.getMessage());
} else {
caller.issueHandler.fatal(exception.getMessage(),
exception.getSystemId(),
exception.getPublicId(),
exception.getLineNumber(),
exception.getColumnNumber());
}
throw exception;
}
}
}