All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.enhydra.xml.xmlc.parsers.xerces.XercesParser Maven / Gradle / Ivy

The newest version!
/*
 * enhydra Java Application Server Project
 * 
 * The contents of this file are subject to the Enhydra Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License on
 * the Enhydra web site ( http://www.enhydra.org/ ).
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 * the License for the specific terms governing rights and limitations
 * under the License.
 * 
 * The Initial Developer of the Enhydra Application Server is Lutris
 * Technologies, Inc. The Enhydra Application Server and portions created
 * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * $Id: XercesParser.java,v 1.3 2005/01/26 08:29:24 jkjome Exp $
 */

package org.enhydra.xml.xmlc.parsers.xerces;

import java.io.IOException;

import org.enhydra.apache.xerces.framework.XMLAttrList;
import org.enhydra.apache.xerces.framework.XMLContentSpec;
import org.enhydra.apache.xerces.framework.XMLDocumentHandler;
import org.enhydra.apache.xerces.framework.XMLParser;
import org.enhydra.apache.xerces.readers.XMLEntityHandler;
import org.enhydra.apache.xerces.utils.QName;
import org.enhydra.apache.xerces.validators.common.XMLAttributeDecl;
import org.enhydra.xml.io.ErrorReporter;
import org.enhydra.xml.io.XMLEntityResolver;
import org.enhydra.xml.xmlc.XMLCError;
import org.enhydra.xml.xmlc.XMLCException;
import org.enhydra.xml.xmlc.dom.XMLCDocument;
import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
import org.enhydra.xml.xmlc.metadata.MetaData;
import org.enhydra.xml.xmlc.metadata.Parser;
import org.enhydra.xml.xmlc.misc.LineNumberMap;
import org.enhydra.xml.xmlc.parsers.DocBuilder;
import org.enhydra.xml.xmlc.parsers.ParseTracer;
import org.enhydra.xml.xmlc.parsers.XMLCParser;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

/*
 * FIXME:
 *   - How to handle built-in entities (see xerces DOMParser).
 *   - Split handlers, entity resolvers into inner classes.
 *     or turn into minimal parser, or just use native DOMParser
 *    (however must work with other DOMS)
 *   - DOM builder doesn't build ElementDecl nodes correctly.
 *   - Need a hack to org.enhydra.apache.xerces.dom.EntityReferenceImpl to
 *     not create entity references read-only.  Need to figure out
 *     how to make this conform to what Xerces does (like to Entity
 *     nodes).
 */

/**
 * Parse using the Xerces parser.
 */
public class XercesParser extends XMLParser
    implements XMLCParser, EntityResolver,
               XMLDocumentHandler, XMLDocumentHandler.DTDHandler {

    /**
     * Error handler class that handles mapping of line numbers
     */
    private class MappingErrorHandler implements ErrorHandler {
        private ErrorReporter fErrorReporter;
        private LineNumberMap fLineNumberMap;

        /**
         * Constructor.
         */
        public MappingErrorHandler(ErrorReporter errorReporter,
                                   LineNumberMap lineNumberMap) {
            fErrorReporter = errorReporter;
            fLineNumberMap = lineNumberMap;
        }

        /**
         * Wrap a SAXParseException with a SAXParseException with the
         * location mapped.  If there is no line number map, return the
         * exception unchanged.
         */
        private SAXParseException mapException(SAXParseException exception) {
            if (fLineNumberMap == null) {
                return exception;
            } else {
                LineNumberMap.Line line
                    = fLineNumberMap.getLineFromLineNum(exception.getLineNumber());
                return new SAXParseException(exception.getMessage(),
                                             exception.getPublicId(),
                                             line.getFileName(),
                                             line.getLineNum(),
                                             exception.getColumnNumber(),
                                             exception);
            }
        }

        /**
         * Receive notification of a SAX recoverable error.
         */
        public void error(SAXParseException exception) throws SAXException {
            fErrorReporter.error(mapException(exception));
        }

        /**
         * Receive notification of a SAX warning.
         */
        public void warning(SAXParseException exception) throws SAXException {
            fErrorReporter.warning(mapException(exception));
        }

        /**
         * Receive notification of a SAX non-recoverable error.
         */
        public void fatalError(SAXParseException exception) throws SAXException {
            fErrorReporter.fatalError(mapException(exception));
        }
    }

    /**
     * Verbose tracing.
     */
    private XercesTracer fTracer;

    /**
     * The document builder.
     */
    private DocBuilder fDocBuilder;

    /**
     * Is a CDATASection being processed?
     */
    private boolean fProcessingCDATASection = false;

    /**
     * Currently processing a document.
     */
    private static final int PROCESSING_DOCUMENT = 0;

    /**
     * Currently processing the external subset.
     */
    private static final int PROCESSING_EXTERNAL_SUBSET = 1;

    /**
     * Currently processing the internal subset.
     */
    private static final int PROCESSING_INTERNAL_SUBSET = 2;

    /**
     * Part of the document being processed.
     */
    private int fProcessingState;

    /**
     * Classpath/XCatalog entity resolver.
     */
    private XMLEntityResolver fResolver;

    /*
     * String pool indexes of built-in character entities.
     */
    private int fAmpIndex;
    private int fLtIndex;
    private int fGtIndex;
    private int fAposIndex;
    private int fQuotIndex;


    /**
     * @see XMLCParser
     */
    public XMLCDocument parse(InputSource input,
                              LineNumberMap lineNumberMap,
                              XMLCDomFactory domFactory,
                              MetaData metaData,
                              ErrorReporter errorReporter,
                              ParseTracer tracer)
        throws IOException, XMLCException, SAXException {

        Parser parser = metaData.getParser();
        fTracer = new XercesTracer(fStringPool, tracer);
        fDocBuilder = new DocBuilder(domFactory);

        fProcessingState = PROCESSING_DOCUMENT;

        // Configure parser.
        initCharEntity();
        initHandlers(true, this, this);
        setEntityResolver(this);
        if (lineNumberMap != null) {
            setErrorHandler(new MappingErrorHandler(errorReporter,
                                                    lineNumberMap));
        } else {
            setErrorHandler(errorReporter);
        }
        setAllowJavaEncodings(true);
        setNamespaces(true);
        
        Boolean validate = parser.getValidate();
        setValidation((validate == null) ? true : validate.booleanValue());

        // Setup entity resolver
        fResolver = new XMLEntityResolver();
        if (tracer.enabled()) {
            fResolver.setDebugWriter(tracer);
        }
        // Add defaults bore adding catalogs so defautls can be overridden.
        fResolver.setDefaultResolving();

        String[] xCatalog = parser.getXCatalogURLs();
        for (int idx = 0; idx < xCatalog.length; idx++) {
            fResolver.loadCatalog(new InputSource(xCatalog[idx]));
        }

        super.parse(input);
        fDocBuilder.finish();

        return fDocBuilder.getDocument();

    }

    /**
     * Get a string from the string pool.
     */
    private String getString(int index) {
        return fStringPool.toString(index);
    }

    /*
     * Initialize the built-in character entity name indexes.
     */
    private void initCharEntity() {
        fAmpIndex = fStringPool.addSymbol("amp");
        fLtIndex = fStringPool.addSymbol("lt");
        fGtIndex = fStringPool.addSymbol("gt");
        fAposIndex = fStringPool.addSymbol("apos");
        fQuotIndex = fStringPool.addSymbol("quot");
    }

    /**
     * Determine of an entity is one of the  standard character
     * entities.
     */
    boolean isCharEntity(int entityName) { 
        return ((entityName == fAmpIndex) ||
                (entityName == fGtIndex) ||
                (entityName == fLtIndex) ||
                (entityName == fAposIndex) ||
                (entityName == fQuotIndex));
    }

    /**
     * Resolve an entity.
     * @see EntityResolver#resolveEntity
     */
    public InputSource resolveEntity(String publicId,
                                     String systemId)
        throws SAXException, IOException {

        InputSource source = null;
        if (fResolver != null) {
            source = fResolver.resolveEntity(publicId, systemId);
        }
        fTracer.resolveEntity(publicId, systemId, source);
        return source;
    }

    /** 
     * Handle start of document.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startDocument
     */
    public void startDocument() throws Exception {
        fTracer.startDocument();
    }

    /**
     * Handle end document.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#endDocument
     */
    public void endDocument() throws Exception {
        fTracer.endDocument();
    }

    /**
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#xmlDecl
     */
    public void xmlDecl(int version, int encoding, int standalone) throws Exception {
        fTracer.xmlDecl(version, encoding, standalone);
        fDocBuilder.setXMLVersion(getString(version));
        fDocBuilder.setEncoding(getString(encoding));
    }

    /**
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#textDecl
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#textDecl
     */
    public void textDecl(int version, int encoding) throws Exception {
        fTracer.textDecl(version, encoding);
    }

    /** 
     * Handle start of a namespace declaration scope. 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startNamespaceDeclScope
     */
    public void startNamespaceDeclScope(int prefix,
                                        int uri) throws Exception {
        fTracer.startNamespaceDeclScope(prefix, uri);
        // Nothing to do as URI is in QName
    }

    /** 
     * Handle end of a namespace declaration scope. 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startNamespaceDeclScope
     */
    public void endNamespaceDeclScope(int prefix) throws Exception {
        fTracer.endNamespaceDeclScope(prefix);
    }

    /**
     * Handle start of element.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startElement
     */
    public void startElement(QName element,
                             XMLAttrList attrList,
                             int attrListHandle) throws Exception {
        fTracer.startElement(element, attrList, attrListHandle);
        fDocBuilder.startElement(getString(element.uri),
                                 getString(element.rawname));

        int attrIndex = attrListHandle;
        while (attrIndex >= 0) {
            if (attrList.isSpecified(attrIndex)) {
                fDocBuilder.addAttribute(getString(attrList.getAttrURI(attrIndex)),
                                         getString(attrList.getAttrName(attrIndex)),
                                         getString(attrList.getAttValue(attrIndex)));
            }
            attrIndex = attrList.getNextAttr(attrIndex);
        }
    }

    /**
     * Handle end element.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#endElement
     */
    public void endElement(QName element) throws Exception {
        fTracer.endElement(element);
        fDocBuilder.finishElement();
    }

    /**
     * Determine if an entity reference should be processed.
     * The entity reference callbacks are called at the start and external DTD
     * with a null name, ignore these.
     * Also ignore calls when processing attribute value entities references, as
     * these are called before the start of element.
     */
    private boolean shouldProcessEntityReference(int entityName,
                                                 int entityContext) {
        return ((entityName >= 0) && (fProcessingState == PROCESSING_DOCUMENT)
                && (entityContext != XMLEntityHandler.ENTITYREF_IN_ATTVALUE));
    }

    /**
     * Handle the start of an entity reference.  If it's is one of the
     * standard character entity references, we don't push the create
     * the node, we just let the child be appended directly in its place.
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startEntityReference
     */
    public void startEntityReference(int entityName,
                                     int entityType,
                                     int entityContext) throws Exception {
        boolean shouldProcess
            = shouldProcessEntityReference(entityName, entityContext);
        fTracer.startEntityReference(entityName, entityType, entityContext,
                                     shouldProcess);
        if (shouldProcess && !isCharEntity(entityName)) {
            fDocBuilder.startEntityReference(getString(entityName));
        }
    }

    /**
     * Handle the end of an entity reference.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#endEntityReference
     */
    public void endEntityReference(int entityName,
                                   int entityType,
                                   int entityContext) throws Exception {
        boolean shouldProcess
            = shouldProcessEntityReference(entityName, entityContext);
        fTracer.endEntityReference(entityName, entityType, entityContext, shouldProcess);

        if (shouldProcess && !isCharEntity(entityName)) {
            fDocBuilder.endEntityReference();
        }
    }

    /**
     * Not used.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#characters
     */
    public void characters(int data) throws Exception {
        throw new XMLCError("fatal error: method that should not be invoked called");
    }

    /**
     * Not used.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#ignorableWhitespace
     */
    public void ignorableWhitespace(int data) throws Exception {
        throw new XMLCError("fatal error: method that should not be invoked called");
    }

    /** 
     * Handle start of CDATA section. 
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startCDATA
     */
    public void startCDATA() {
        fTracer.startCDATA();
        fProcessingCDATASection = true;
    }

    /** 
     * Handle for end of CDATA section.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#endCDATA
     */
    public void endCDATA() {
        fTracer.endCDATA();
        fProcessingCDATASection = false;
    }

    /** 
     * Handle processing instruction.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#processingInstruction
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#processingInstruction
     */
    public void processingInstruction(int target,
                                      int data) throws Exception {
        fTracer.processingInstruction(target, data);
        if (fProcessingState == PROCESSING_DOCUMENT) {
            fDocBuilder.addProcessingInstruction(getString(target),
                                                 getString(data));
        }
    }

    /**
     * Handle a comment.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#comment
     */
    public void comment(int comment) throws Exception {
        fTracer.comment(comment);
        if (fProcessingState == PROCESSING_DOCUMENT) {
            fDocBuilder.addComment(getString(comment));
        }
    }

    /**
     * Handle characters.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#characters
     */
    public void characters(char ch[],
                           int start,
                           int length) throws Exception {
        fTracer.characters(ch, start, length);
        if (fProcessingCDATASection) {
            fDocBuilder.addCDATASection(new String(ch, start, length));
        } else {
            fDocBuilder.addTextNode(new String(ch, start, length));
        }
    }

    /**
     * Handle ignorable whitespace.
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#ignorableWhitespace
     */
    public void ignorableWhitespace(char ch[],
                                    int start,
                                    int length) throws Exception {
        fTracer.ignorableWhitespace(ch, start, length);
        //FIXME: is this right, we ignore them.
    }

    /**
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#startDTD
     */
    public void startDTD(QName rootElement,
                         int publicId,
                         int systemId) {
        fTracer.startDTD(rootElement, publicId, systemId);
        if ((publicId < 0) && (systemId < 0)) {
            fProcessingState = PROCESSING_INTERNAL_SUBSET;
        } else {
            fProcessingState = PROCESSING_EXTERNAL_SUBSET;
        }
        fDocBuilder.setDocumentTypeName(getString(rootElement.rawname));
        fDocBuilder.setPublicId(getString(publicId));
        fDocBuilder.setSystemId(getString(systemId));
    }

    /**
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#internalSubset
     */
    public void internalSubset(int internalSubset) {
        fTracer.internalSubset(internalSubset);
        fDocBuilder.setInternalSubset(getString(internalSubset));
    }

    /** 
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#endDTD
     */
    public void endDTD() {
        fTracer.endDTD();
        fProcessingState = PROCESSING_DOCUMENT;
    }

    /**
     * Recursively search for a #PCDATA leaf. The content spec object is
     * passed to avoid reallocating.  It's contents will be wipped out.
     */
    private boolean searchForPCData(int contentSpecIndex,
                                    XMLContentSpec.Provider provider,
                                    XMLContentSpec contentSpec) {
        if (!provider.getContentSpec(contentSpecIndex, contentSpec)) {
            return false;
        }
        int value = contentSpec.value;
        int otherValue = contentSpec.otherValue;

        switch (contentSpec.type) {
        case XMLContentSpec.CONTENTSPECNODE_LEAF:
            if ((value == -1) && (otherValue == -1)) {
                return true; // #PCDATA leaf
            }
            break;
        case XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE:
        case XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE:
        case XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE:
            // search left side only
            if (searchForPCData(value, provider, contentSpec)) {
                return true; // #PCDATA found below
            }
            break;
        case XMLContentSpec.CONTENTSPECNODE_CHOICE:
        case XMLContentSpec.CONTENTSPECNODE_SEQ:
            // search left and right sides
            if (searchForPCData(value, provider, contentSpec)) {
                return true; // #PCDATA found below
            }
            if (searchForPCData(otherValue, provider, contentSpec)) {
                return true; // #PCDATA found below
            }
            break;
        case XMLContentSpec.CONTENTSPECNODE_ANY:
        case XMLContentSpec.CONTENTSPECNODE_ANY_OTHER:
        case XMLContentSpec.CONTENTSPECNODE_ANY_NS:
        case XMLContentSpec.CONTENTSPECNODE_ALL:
        case XMLContentSpec.CONTENTSPECNODE_ANY_LAX:
        case XMLContentSpec.CONTENTSPECNODE_ANY_OTHER_LAX:
        case XMLContentSpec.CONTENTSPECNODE_ANY_NS_LAX:
        case XMLContentSpec.CONTENTSPECNODE_ANY_SKIP:
        case XMLContentSpec.CONTENTSPECNODE_ANY_OTHER_SKIP:
        case XMLContentSpec.CONTENTSPECNODE_ANY_NS_SKIP:
        default:
            // Don't recurse
            break;
        }
        return false;
    }

    /**
     * Determine if #PCDATA is part of the content spec.  Logic for this method
     * stolen from org.enhydra.apache.xerces.framework.XMLContentSpec.
     */
    private boolean hasPCData(int contentSpecIndex,
                              XMLContentSpec.Provider contentSpecProvider) {
        return searchForPCData(contentSpecIndex, contentSpecProvider,
                               new XMLContentSpec());
    }

    /**
     * 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#elementDecl
     */
    public void elementDecl(QName elementDecl,
                            int contentSpecType, 
                            int contentSpecIndex,
                            XMLContentSpec.Provider contentSpecProvider) throws Exception {
        fTracer.elementDecl(elementDecl, contentSpecType, contentSpecIndex,
                            contentSpecProvider);
        if (hasPCData(contentSpecIndex, contentSpecProvider)) {
            fDocBuilder.addPCDataContentElement(getString(elementDecl.rawname));
        }
    }

    /**
     * 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#attlistDecl
     */
    public void attlistDecl(QName elementDecl,
                            QName attributeDecl,
                            int attType,
                            boolean attList,
                            String enumString,
                            int attDefaultType,
                            int attDefaultValue) throws Exception {
        fTracer.attlistDecl(elementDecl,  attributeDecl, attType, attList,
                            enumString, attDefaultType, attDefaultValue);

        if (attType == XMLAttributeDecl.TYPE_ID) {
            fDocBuilder.addIdAttribute(getString(elementDecl.localpart),
                                       getString(attributeDecl.localpart));
        }
    }

    /**
     * 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#internalPEDecl
     */
    public void internalPEDecl(int entityName,
                               int entityValue) {
        fTracer.internalPEDecl(entityName, entityValue);
    }

    /**
     * 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#externalPEDecl
     */
    public void externalPEDecl(int entityName,
                               int publicId,
                               int systemId) {
        fTracer.externalPEDecl(entityName, publicId, systemId);
    }

    /**
     * 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#internalEntityDecl
     */
    public void internalEntityDecl(int entityName,
                                   int entityValue) {
        fTracer.internalEntityDecl(entityName, entityValue);
    }

    /**
     * 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#externalEntityDecl
     */
    public void externalEntityDecl(int entityName,
                                   int publicId,
                                   int systemId) {
        fTracer.externalEntityDecl(entityName, publicId, systemId);
    }

    /**
     * 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#unparsedEntityDecl
     */
    public void unparsedEntityDecl(int entityName,
                                   int publicId,
                                   int systemId,
                                   int notationName) {
        fTracer.unparsedEntityDecl(entityName, publicId,
                                   systemId, notationName);
    }

    /**
     * 
     *
     * @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#notationDecl
     */
    public void notationDecl(int notationName,
                             int publicId,
                             int systemId) {
        fTracer.notationDecl(notationName, publicId, systemId);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy