All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.evpull.StaxEventBridge Maven / Gradle / Ivy

Go to download

Provides a basic XSLT 2.0 and XQuery 1.0 processor (W3C Recommendations, January 2007). Command line interfaces and implementations of several Java APIs (DOM, XPath, s9api) are also included.

The newest version!
package net.sf.saxon.evpull;

import net.sf.saxon.Configuration;
import net.sf.saxon.event.*;
import net.sf.saxon.expr.ExpressionLocation;
import net.sf.saxon.om.*;
import net.sf.saxon.pull.UnparsedEntity;
import net.sf.saxon.tinytree.CharSlice;
import net.sf.saxon.trans.SaxonErrorCode;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.type.Type;
import net.sf.saxon.value.Whitespace;

import javax.xml.stream.*;
import javax.xml.stream.events.EntityDeclaration;
import javax.xml.transform.TransformerException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

/**
 * This class implements the Saxon EventIterator API on top of a standard StAX parser
 * (or any other StAX XMLStreamReader implementation)
 */

public class StaxEventBridge implements EventIterator, SaxonLocator, SourceLocationProvider {

    private Configuration config;
    private XMLStreamReader reader;
    private PipelineConfiguration pipe;
    private List unparsedEntities = null;
    PullEvent currentEvent = null;
    int depth = 0;
    boolean ignoreIgnorable = false;

    /**
     * Create a new instance of the class
     */

    public StaxEventBridge() {

    }

    /**
     * Supply an input stream containing XML to be parsed. A StAX parser is created using
     * the JAXP XMLInputFactory.
     * @param systemId The Base URI of the input document
     * @param inputStream the stream containing the XML to be parsed
     * @throws net.sf.saxon.trans.XPathException if an error occurs creating the StAX parser
     */

    public void setInputStream(String systemId, InputStream inputStream) throws XPathException {
        try {
            XMLInputFactory factory = XMLInputFactory.newInstance();
            //XMLInputFactory factory = new WstxInputFactory();
            factory.setXMLReporter(new StaxErrorReporter());
            reader = factory.createXMLStreamReader(systemId, inputStream);
        } catch (XMLStreamException e) {
            throw new XPathException(e);
        }
    }

    /**
     * Supply an XMLStreamReader: the events reported by this XMLStreamReader will be translated
     * into EventIterator events
     * @param reader the supplier of XML events, typically an XML parser
     */

    public void setXMLStreamReader(XMLStreamReader reader) {
        this.reader = reader;
    }

    /**
     * Set configuration information. This must only be called before any events
     * have been read.
     * @param pipe the pipeline configuration
     */

    public void setPipelineConfiguration(PipelineConfiguration pipe) {
        this.pipe = new PipelineConfiguration(pipe);
        this.pipe.setLocationProvider(this);
        config = pipe.getConfiguration();
        ignoreIgnorable = config.getStripsWhiteSpace() != Whitespace.NONE;
    }

    /**
     * Get configuration information.
     * @return the pipeline configuration
     */

    public PipelineConfiguration getPipelineConfiguration() {
        return pipe;
    }

    /**
     * Get the XMLStreamReader used by this StaxBridge. This is available only after
     * setInputStream() or setXMLStreamReader() has been called
     * @return the instance of XMLStreamReader allocated when setInputStream() was called,
     * or the instance supplied directly to setXMLStreamReader()
     */

    public XMLStreamReader getXMLStreamReader() {
        return reader;
    }

    /**
     * Get the name pool
     * @return the name pool
     */

    public NamePool getNamePool() {
        return pipe.getConfiguration().getNamePool();
    }

    /**
     * Get the next event
     * @return the next event; or null to indicate the end of the event stream
     */

    public PullEvent next() throws XPathException {
        if (currentEvent == null) {
            // StAX isn't reporting START_DOCUMENT so we supply it ourselves
            currentEvent = StartDocumentEvent.getInstance();
            return currentEvent;
        }
        if (currentEvent instanceof EndDocumentEvent) {
            try {
                reader.close();
            } catch (XMLStreamException e) {
                //
            }
            return null;
        }
        try {
            if (reader.hasNext()) {
                int event = reader.next();
                //System.err.println("Read event " + event);
                currentEvent = translate(event);
            } else {
                currentEvent = null;
            }
        } catch (XMLStreamException e) {
            String message = e.getMessage();
            // Following code recognizes the messages produced by the Sun Zephyr parser
            if (message.startsWith("ParseError at")) {
                int c = message.indexOf("\nMessage: ");
                if (c > 0) {
                    message = message.substring(c + 10);
                }
            }
            XPathException err = new XPathException("Error reported by XML parser: " + message);
            err.setErrorCode(SaxonErrorCode.SXXP0003);
            err.setLocator(translateLocation(e.getLocation()));
            throw err;
        }
        return currentEvent;
    }

    /**
     * Translate a StAX event into a Saxon PullEvent
     * @param event the StAX event
     * @return the Saxon PullEvent
     * @throws XPathException
     */

    private PullEvent translate(int event) throws XPathException {
            //System.err.println("EVENT " + event);
            switch (event) {
                case XMLStreamConstants.ATTRIBUTE:
                    return next();          // attributes are reported as part of StartElement
                case XMLStreamConstants.CDATA:
                case XMLStreamConstants.CHARACTERS:
                    if (depth == 0 && reader.isWhiteSpace()) {
                        return next();
                    } else {
                        Orphan o = new Orphan(config);
                        o.setNodeKind(Type.TEXT);
                        CharSlice value = new CharSlice(
                            reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength());
                        o.setStringValue(value);
                        return o;
                    }
                case XMLStreamConstants.COMMENT: {
                    Orphan o = new Orphan(config);
                    o.setNodeKind(Type.COMMENT);
                    CharSlice value = new CharSlice(
                            reader.getTextCharacters(), reader.getTextStart(), reader.getTextLength());
                    o.setStringValue(value);
                    return o;
                }
                case XMLStreamConstants.DTD:
                    unparsedEntities = (List)reader.getProperty("javax.xml.stream.entities");
                    return next();
                case XMLStreamConstants.END_DOCUMENT:
                    return EndDocumentEvent.getInstance();
                case XMLStreamConstants.END_ELEMENT:
                    depth--;
                    return EndElementEvent.getInstance();
                case XMLStreamConstants.ENTITY_DECLARATION:
                    return next();
                case XMLStreamConstants.ENTITY_REFERENCE:
                    return next();
                case XMLStreamConstants.NAMESPACE:
                    return next();      // namespaces are reported as part of StartElement
                case XMLStreamConstants.NOTATION_DECLARATION:
                    return next();
                case XMLStreamConstants.PROCESSING_INSTRUCTION:{
                    Orphan o = new Orphan(config);
                    o.setNodeKind(Type.PROCESSING_INSTRUCTION);
                    String local = reader.getPITarget();
                    o.setNameCode(getNamePool().allocate("", "", local));
                    o.setStringValue(reader.getText());
                    return o;
                }
                case XMLStreamConstants.SPACE:
                    if (depth == 0) {
                        return next();
                    } else if (ignoreIgnorable) {
                        // (Brave attempt, but Woodstox doesn't seem to report ignorable whitespace)
                        return next();
                    } else {
                        Orphan o = new Orphan(config);
                        o.setNodeKind(Type.TEXT);
                        o.setStringValue(reader.getText());
                        return o;
                    }
                case XMLStreamConstants.START_DOCUMENT:
                    return next();  // we supplied the START_DOCUMENT ourselves
                case XMLStreamConstants.START_ELEMENT:
                    depth++;
                    StartElementEvent see = new StartElementEvent(config);
                    String elocal = reader.getLocalName();
                    String euri = reader.getNamespaceURI();
                    String eprefix = reader.getPrefix();
                    if (eprefix == null) {
                        eprefix = "";
                    }
                    if (euri == null) {
                        euri = "";
                    }
                    int enc = getNamePool().allocate(eprefix, euri, elocal);
                    see.setNameCode(enc);
                    see.setTypeCode(StandardNames.XS_UNTYPED);
                    int attCount = reader.getAttributeCount();
                    for (int index=0; index
     * 

The return value is the public identifier of the document * entity or of the external parsed entity in which the markup * triggering the event appears.

* * @return A string containing the public identifier, or * null if none is available. * @see #getSystemId */ public String getPublicId() { return reader.getLocation().getPublicId(); } /** * Return the system identifier for the current document event. *

*

The return value is the system identifier of the document * entity or of the external parsed entity in which the markup * triggering the event appears.

*

*

If the system identifier is a URL, the parser must resolve it * fully before passing it to the application. For example, a file * name must always be provided as a file:... URL, and other * kinds of relative URI are also resolved against their bases.

* * @return A string containing the system identifier, or null * if none is available. * @see #getPublicId */ public String getSystemId() { return reader.getLocation().getSystemId(); } /** * Return the line number where the current document event ends. * Lines are delimited by line ends, which are defined in * the XML specification. *

*

Warning: The return value from the method * is intended only as an approximation for the sake of diagnostics; * it is not intended to provide sufficient information * to edit the character content of the original XML document. * In some cases, these "line" numbers match what would be displayed * as columns, and in others they may not match the source text * due to internal entity expansion.

*

*

The return value is an approximation of the line number * in the document entity or external parsed entity where the * markup triggering the event appears.

*

*

If possible, the SAX driver should provide the line position * of the first character after the text associated with the document * event. The first line is line 1.

* * @return The line number, or -1 if none is available. * @see #getColumnNumber */ public int getLineNumber() { return reader.getLocation().getLineNumber(); } /** * Return the column number where the current document event ends. * This is one-based number of Java char values since * the last line end. *

*

Warning: The return value from the method * is intended only as an approximation for the sake of diagnostics; * it is not intended to provide sufficient information * to edit the character content of the original XML document. * For example, when lines contain combining character sequences, wide * characters, surrogate pairs, or bi-directional text, the value may * not correspond to the column in a text editor's display.

*

*

The return value is an approximation of the column number * in the document entity or external parsed entity where the * markup triggering the event appears.

*

*

If possible, the SAX driver should provide the line position * of the first character after the text associated with the document * event. The first column in each line is column 1.

* * @return The column number, or -1 if none is available. * @see #getLineNumber */ public int getColumnNumber() { return reader.getLocation().getColumnNumber(); } public String getSystemId(long locationId) { return getSystemId(); } public int getLineNumber(long locationId) { return getLineNumber(); } /** * Get a list of unparsed entities. * * @return a list of unparsed entities, or null if the information is not available, or * an empty list if there are no unparsed entities. Each item in the list will * be an instance of {@link net.sf.saxon.pull.UnparsedEntity} */ public List getUnparsedEntities() { if (unparsedEntities == null) { return null; } List list = new ArrayList(unparsedEntities.size()); for (int i=0; i 1) { emitter.setOutputStream(new FileOutputStream(args[1])); } else { emitter.setOutputStream(System.out); } NamespaceReducer r = new NamespaceReducer(emitter); EventIteratorToReceiver.copy(puller, r); System.err.println("Elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } } /** * Determine whether the EventIterator returns a flat sequence of events, or whether it can return * nested event iterators * * @return true if the next() method is guaranteed never to return an EventIterator */ public boolean isFlatSequence() { return false; } } // // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License"); // you may not use this file except in compliance with the License. You may obtain a copy of the // License at http://www.mozilla.org/MPL/ // // Software distributed under the License is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the License for the specific language governing rights and limitations under the License. // // The Original Code is: all this file // // The Initial Developer of the Original Code is Michael H. Kay. // // Contributor(s): //




© 2015 - 2025 Weber Informatics LLC | Privacy Policy