All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.easybatch.xml.XmlRecordReader Maven / Gradle / Ivy

The newest version!
/**
 * The MIT License
 *
 *   Copyright (c) 2020, Mahmoud Ben Hassine ([email protected])
 *
 *   Permission is hereby granted, free of charge, to any person obtaining a copy
 *   of this software and associated documentation files (the "Software"), to deal
 *   in the Software without restriction, including without limitation the rights
 *   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 *   copies of the Software, and to permit persons to whom the Software is
 *   furnished to do so, subject to the following conditions:
 *
 *   The above copyright notice and this permission notice shall be included in
 *   all copies or substantial portions of the Software.
 *
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 *   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 *   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 *   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 *   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 *   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 *   THE SOFTWARE.
 */
package org.easybatch.xml;

import org.easybatch.core.reader.RecordReader;
import org.easybatch.core.record.Header;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.*;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Date;
import java.util.Iterator;

/**
 * A record reader that reads xml records from an xml input stream.
 *
 * This reader produces {@link XmlRecord} instances.
 *
 * @author Mahmoud Ben Hassine ([email protected])
 */
public class XmlRecordReader implements RecordReader {

    private static final Logger LOGGER = LoggerFactory.getLogger(XmlRecordReader.class.getName());

    private String rootElementName;
    private InputStream xmlInputStream;
    private Charset charset;
    private XMLEventReader xmlEventReader;
    private long currentRecordNumber;

    /**
     * Create a new {@link XmlRecordReader}.
     *
     * @param rootElementName to extract as record
     * @param xmlInputStream to read
     * @deprecated This constructor is deprecated since v5.3 and will be removed in v6.
     * Use {@link XmlRecordReader#XmlRecordReader(java.io.InputStream, java.lang.String)} instead
     */
    @Deprecated
    public XmlRecordReader(final String rootElementName, final InputStream xmlInputStream) {
        this(rootElementName, xmlInputStream, Charset.defaultCharset().name());
    }

    /**
     * Create a new {@link XmlRecordReader}.
     *
     * @param xmlInputStream to read
     * @param rootElementName to extract as record
     */
    public XmlRecordReader(final InputStream xmlInputStream, final String rootElementName) {
        this(xmlInputStream, rootElementName, Charset.defaultCharset());
    }

    /**
     * Create a new {@link XmlRecordReader}.
     *
     * @param rootElementName to extract as record
     * @param xmlInputStream to read
     * @param charset of the input stream
     * @deprecated This constructor is deprecated since v5.3 and will be removed in v6.
     * Use {@link XmlRecordReader#XmlRecordReader(java.io.InputStream, java.lang.String, java.nio.charset.Charset)} instead
     */
    @Deprecated
    public XmlRecordReader(final String rootElementName, final InputStream xmlInputStream, final String charset) {
        this.rootElementName = rootElementName;
        this.xmlInputStream = xmlInputStream;
        this.charset = Charset.forName(charset);
    }

    /**
     * Create a new {@link XmlRecordReader}.
     *
     * @param rootElementName to extract as record
     * @param xmlInputStream to read
     * @param charset of the input stream
     */
    public XmlRecordReader(final InputStream xmlInputStream, final String rootElementName, final Charset charset) {
        this.rootElementName = rootElementName;
        this.xmlInputStream = xmlInputStream;
        this.charset = charset;
    }

    @Override
    public void open() throws Exception {
        currentRecordNumber = 0;
        xmlEventReader = XMLInputFactory.newInstance().createXMLEventReader(xmlInputStream, charset.name());
    }

    @Override
    public XmlRecord readRecord() throws Exception {
        if (hasNextRecord()) {
            StringBuilder stringBuilder = new StringBuilder("");
            while (!nextTagIsRootElementEnd()) {
                XMLEvent xmlEvent = xmlEventReader.nextEvent();
                if (xmlEvent.isStartElement()) {
                    escapeStartElementAttributes(stringBuilder, xmlEvent);
                } else if (xmlEvent.isEndElement()) {
                    writeEndElement(stringBuilder, xmlEvent);
                } else {
                    if (xmlEvent.isCharacters()) {
                        stringBuilder.append(escape(xmlEvent.asCharacters().getData()));
                    }
                }
            }
            writeEndElement(stringBuilder, xmlEventReader.nextEvent());
            Header header = new Header(++currentRecordNumber, getDataSourceName(), new Date());
            return new XmlRecord(header, stringBuilder.toString());
        } else {
            return null;
        }
    }

    protected String getDataSourceName() {
        return "XML stream";
    }

    @Override
    public void close() throws Exception {
        if (xmlEventReader != null) {
            xmlEventReader.close(); // TODO should close underlying input stream (See Javadoc)
        }
    }

    private boolean hasNextRecord() {
        try {
            while (!nextTagIsRootElementStart()) {
                XMLEvent xmlEvent = xmlEventReader.nextEvent();
                if (xmlEvent instanceof EndDocument) {
                    return false;
                }
            }
            return true;
        } catch (Exception e) {
            LOGGER.debug("Unable to peek next xml record", e);
            return false;
        }
    }

    private boolean nextTagIsRootElementStart() throws XMLStreamException {
        return xmlEventReader.peek().isStartElement() &&
                xmlEventReader.peek().asStartElement().getName().getLocalPart().equalsIgnoreCase(rootElementName);
    }

    private boolean nextTagIsRootElementEnd() throws XMLStreamException {
        return xmlEventReader.peek().isEndElement() &&
                xmlEventReader.peek().asEndElement().getName().getLocalPart().equalsIgnoreCase(rootElementName);
    }

    private void writeEndElement(StringBuilder stringBuilder, XMLEvent xmlEvent) {
        if (xmlEvent.isEndElement()) {
            EndElement endElement = xmlEvent.asEndElement();
            stringBuilder.append("");
        }
    }

    private void escapeStartElementAttributes(StringBuilder stringBuilder, XMLEvent xmlEvent) {
        StartElement startElement = xmlEvent.asStartElement();
        stringBuilder.append("<").append(startElement.getName().getLocalPart());
        Iterator iterator = startElement.getAttributes();
        while (iterator.hasNext()) {
            Attribute attribute = (Attribute) iterator.next();
            stringBuilder.append(" ")
                    .append(attribute.getName().getLocalPart())
                    .append("='")
                    .append(escape(attribute.getValue()))
                    .append("'");
        }
        stringBuilder.append(">");
    }

    private String escape(String xmlToEscape) {
        return xmlToEscape.replaceAll("&", "&")
                .replaceAll("'", "'")
                .replaceAll("\"", """)
                .replaceAll("<", "<")
                .replaceAll(">", ">")
                ;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy