All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tectonica.xmlchunk.XmlChunkerContext Maven / Gradle / Ivy

There is a newer version: 2024-10-onix308-fix
Show newest version
/*
 * Copyright (C) 2012-2023 Zach Melamed
 *
 * Latest version available online at https://github.com/zach-m/jonix
 * Contact me at [email protected]
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.tectonica.xmlchunk;

import org.w3c.dom.Element;

import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.stax.StAXSource;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

public class XmlChunkerContext {
    static final XMLInputFactory inputFactory;
    static final TransformerFactory transformerFactory;
    static final XmlChunkerEndDocument endDocumentEvent = new XmlChunkerEndDocument();

    static {
        inputFactory = XMLInputFactory.newInstance();

        // no need to validate, or even try to access, the remote DTD file
        inputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, Boolean.FALSE);

        // no need to validate internal entities - this is important because ONIX files are designed to contain HTML
        // sections inside them. these sections may include entities (such as ' ') that aren't XML-compatible
        inputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);

        // this is our Transformer of choice
        System.setProperty("javax.xml.transform.TransformerFactory",
            "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl");
        transformerFactory = TransformerFactory.newInstance();
    }

    private int depth;
    private List events;
    private XMLEventReader reader;
    private XMLEvent startDocumentEvent;
    private int targetDepth;

    public XmlChunkerContext(InputStream is, String encoding, int targetDepth) throws XMLStreamException {
        depth = -1;
        events = null;
        reader = inputFactory.createXMLEventReader(is, encoding);
        startDocumentEvent = null;
        this.targetDepth = targetDepth;
    }

    int getDepth() {
        return depth;
    }

    public Element nextChunk() throws XMLStreamException {
        Object next;
        while ((next = nextObject()) != null) {
            if (next instanceof Element) {
                return (Element) next;
            }
        }
        return null;
    }

    public Object nextObject() throws XMLStreamException {
        while (reader.hasNext()) {
            XMLEvent event = reader.nextEvent();
            Object nextObject = null;

            if (event.isStartDocument()) {
                depth = 0;
                startDocumentEvent = event;
            } else if (depth < 0) {
                continue;
            }

            if (event.isStartElement()) {
                depth++;
                if (depth < targetDepth) {
                    nextObject = event.asStartElement(); // i.e. will return a StartElement event
                } else if (depth == targetDepth) {
                    events = new ArrayList<>();
                }
            }

            if (events != null) {
                events.add(event);
            }

            if (event.isEndElement()) {
                if (depth == targetDepth) {
                    nextObject = elementFromEvents(); // i.e. will return an XML Element
                    events = null;
                }
                depth--;
            }

            if (nextObject != null) {
                return nextObject;
            }
        }
        reader.close();
        return null;
    }

    private Element elementFromEvents() throws XMLStreamException {
        List domEvents = new ArrayList<>();
        domEvents.add(startDocumentEvent);
        domEvents.addAll(events);
        domEvents.add(endDocumentEvent);

        XmlChunkerReader xmlEventReader = new XmlChunkerReader(domEvents, reader);

        DOMResult dom = new DOMResult();

        try {
            Transformer transformer = transformerFactory.newTransformer();
            transformer.transform(new StAXSource(xmlEventReader), dom);
            return (Element) dom.getNode().getFirstChild();
        } catch (TransformerException e) {
            throw new RuntimeException(e);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy