All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl Maven / Gradle / Ivy

The newest version!
/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 1997-2010 Oracle and/or its affiliates. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License.  You can
 * obtain a copy of the License at
 * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
 * or packager/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at packager/legal/LICENSE.txt.
 *
 * GPL Classpath Exception:
 * Oracle designates this particular file as subject to the "Classpath"
 * exception as provided by Oracle in the GPL Version 2 section of the License
 * file that accompanied this code.
 *
 * Modifications:
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyright [year] [name of copyright owner]"
 *
 * Contributor(s):
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 *
 *
 * This file incorporates work covered by the following copyright and
 * permission notice:
 *
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Id: XMLDocumentFragmentScannerImpl.java,v 1.19 2010-11-02 19:54:55 joehw Exp $
 * %W% %E%
 */


package com.sun.org.apache.xerces.internal.impl;

import com.sun.xml.internal.stream.XMLBufferListener;
import com.sun.xml.internal.stream.XMLEntityStorage;
import com.sun.xml.internal.stream.XMLInputFactoryImpl;
import com.sun.xml.internal.stream.dtd.DTDGrammarUtil;

import java.io.EOFException;
import java.io.IOException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.events.XMLEvent;
import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
import com.sun.org.apache.xerces.internal.util.AugmentationsImpl;
import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl;
import com.sun.org.apache.xerces.internal.util.XMLChar;
import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
import com.sun.org.apache.xerces.internal.util.XMLSymbols;
import com.sun.org.apache.xerces.internal.xni.QName;
import com.sun.org.apache.xerces.internal.xni.XMLAttributes;
import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler;
import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
import com.sun.org.apache.xerces.internal.xni.XMLString;
import com.sun.org.apache.xerces.internal.xni.XNIException;
import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent;
import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner;
import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
import com.sun.org.apache.xerces.internal.xni.Augmentations;
import com.sun.org.apache.xerces.internal.impl.Constants;
import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler;
import com.sun.org.apache.xerces.internal.util.SecurityManager;
import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
import com.sun.org.apache.xerces.internal.xni.NamespaceContext;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.events.XMLEvent;

/**
 *
 * This class is responsible for scanning the structure and content
 * of document fragments.
 *
 * This class has been modified as per the new design which is more suited to
 * efficiently build pull parser. Lot of improvements have been done and
 * the code has been added to support stax functionality/features.
 *
 * @author Neeraj Bajaj SUN Microsystems
 * @author K.Venugopal SUN Microsystems
 * @author Glenn Marcy, IBM
 * @author Andy Clark, IBM
 * @author Arnaud  Le Hors, IBM
 * @author Eric Ye, IBM
 * @author Sunitha Reddy, SUN Microsystems
 * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.19 2010-11-02 19:54:55 joehw Exp $
 *
 */
public class XMLDocumentFragmentScannerImpl
        extends XMLScanner
        implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener {
    
    //
    // Constants
    //
    
    protected int fElementAttributeLimit;
    
    /** External subset resolver. **/
    protected ExternalSubsetResolver fExternalSubsetResolver;
    
    // scanner states
    
    //XXX this should be divided into more states.
    /** Scanner state: start of markup. */
    protected static final int SCANNER_STATE_START_OF_MARKUP = 21;
    
    /** Scanner state: content. */
    protected static final int SCANNER_STATE_CONTENT = 22;
    
    /** Scanner state: processing instruction. */
    protected static final int SCANNER_STATE_PI = 23;
    
    /** Scanner state: DOCTYPE. */
    protected static final int SCANNER_STATE_DOCTYPE = 24;
    
    /** Scanner state: XML Declaration */
    protected static final int SCANNER_STATE_XML_DECL = 25;
    
    /** Scanner state: root element. */
    protected static final int SCANNER_STATE_ROOT_ELEMENT = 26;
    
    /** Scanner state: comment. */
    protected static final int SCANNER_STATE_COMMENT = 27;
    
    /** Scanner state: reference. */
    protected static final int SCANNER_STATE_REFERENCE = 28;
    
    //  reading attribute name 'type'
    protected static final int SCANNER_STATE_ATTRIBUTE = 29;
    
    //  //reading attribute value.
    protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30;
    
    /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/
    //protected static final int SCANNER_STATE_TRAILING_MISC = 32;
    
    /** Scanner state: end of input. */
    protected static final int SCANNER_STATE_END_OF_INPUT = 33;
    
    /** Scanner state: terminated. */
    protected static final int SCANNER_STATE_TERMINATED = 34;
    
    /** Scanner state: CDATA section. */
    protected static final int SCANNER_STATE_CDATA = 35;
    
    /** Scanner state: Text declaration. */
    protected static final int SCANNER_STATE_TEXT_DECL = 36;
    
    /** Scanner state: Text declaration. */
    protected static final int SCANNER_STATE_CHARACTER_DATA = 37;
    
    //foo
    protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38;
    
    //foo reading 
    protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39;
    
    protected static final int SCANNER_STATE_CHAR_REFERENCE = 40;
    protected static final int SCANNER_STATE_BUILT_IN_REFS = 41;
    
    // feature identifiers
    
    
    /** Feature identifier: notify built-in refereces. */
    protected static final String NOTIFY_BUILTIN_REFS =
            Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
    
    /** Property identifier: entity resolver. */
    protected static final String ENTITY_RESOLVER =
            Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
        
    // recognized features and properties
    
    /** Recognized features. */
    private static final String[] RECOGNIZED_FEATURES = {
                NAMESPACES,
                VALIDATION,
                NOTIFY_BUILTIN_REFS,
                NOTIFY_CHAR_REFS,
                Constants.STAX_REPORT_CDATA_EVENT                
    };
    
    /** Feature defaults. */
    private static final Boolean[] FEATURE_DEFAULTS = {
                Boolean.TRUE,
                null,
                Boolean.FALSE,
                Boolean.FALSE,
                Boolean.TRUE
    };
    
    /** Recognized properties. */
    private static final String[] RECOGNIZED_PROPERTIES = {
        SYMBOL_TABLE,
                ERROR_REPORTER,
                ENTITY_MANAGER,
    };
    
    /** Property defaults. */
    private static final Object[] PROPERTY_DEFAULTS = {
                null,
                null,
                null,
    };
    
    protected static final char [] cdata = {'[','C','D','A','T','A','['};
    protected static final char [] xmlDecl = {'<','?','x','m','l'};
    protected static final char [] endTag = {'<','/'};
    // debugging
    
    /** Debug scanner state. */
    private static final boolean DEBUG_SCANNER_STATE = false;
    
    /** Debug driver. */
    private static final boolean DEBUG_DISPATCHER = false;
    
    /** Debug content driver scanning. */
    protected static final boolean DEBUG_START_END_ELEMENT = false; 
    
    
    /** Debug driver next */
    protected static final boolean DEBUG_NEXT = false ;
    
    /** Debug driver next */
    protected static final boolean DEBUG = false;
    protected static final boolean DEBUG_COALESCE = false;
    //
    // Data
    //
    
    // protected data
    
    /** Document handler. */
    protected XMLDocumentHandler fDocumentHandler;
    protected int fScannerLastState ;
    
    /** Entity Storage */
    protected XMLEntityStorage fEntityStore;
    
    /** Entity stack. */
    protected int[] fEntityStack = new int[4];
    
    /** Markup depth. */
    protected int fMarkupDepth;
    
    //is the element empty
    protected boolean fEmptyElement ;
    
    //track if we are reading attributes, this is usefule while
    //there is a callback
    protected boolean fReadingAttributes = false;
    
    /** Scanner state. */
    protected int fScannerState;
    
    /** SubScanner state: inside scanContent method. */
    protected boolean fInScanContent = false;
    protected boolean fLastSectionWasCData = false;
    protected boolean fLastSectionWasEntityReference = false;
    protected boolean fLastSectionWasCharacterData = false;        
    
    /** has external dtd */
    protected boolean fHasExternalDTD;
    
    /** Standalone. */
    protected boolean fStandaloneSet;
    protected boolean fStandalone;
    protected String fVersion;
    
    // element information
    
    /** Current element. */
    protected QName fCurrentElement;
    
    /** Element stack. */
    protected ElementStack fElementStack = new ElementStack();
    protected ElementStack2 fElementStack2 = new ElementStack2();
    
    // other info
    
    /** Document system identifier.
     * REVISIT:  So what's this used for?  - NG
     * protected String fDocumentSystemId;
     ******/
    
    protected String fPITarget ;
    
    //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values
    protected XMLString fPIData  = new XMLString();
    
    // features
    
    
    /** Notify built-in references. */
    protected boolean fNotifyBuiltInRefs = false;
    
    //STAX related properties
    //defaultValues.
    protected boolean fReplaceEntityReferences = true;
    protected boolean fSupportExternalEntities = false;
    protected boolean fReportCdataEvent = false ;
    protected boolean fIsCoalesce = false ;
    protected String fDeclaredEncoding =  null;
    /** Disallow doctype declaration. */
    protected boolean fDisallowDoctype = false;
    
    // drivers
    
    /** Active driver. */
    protected Driver fDriver;
    
    /** Content driver. */
    protected Driver fContentDriver = createContentDriver();
    
    // temporary variables
    
    /** Element QName. */
    protected QName fElementQName = new QName();
    
    /** Attribute QName. */
    protected QName fAttributeQName = new QName();
    
    /**
     * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class
     * implements Iterator interface so we can directly give Attributes in the form of
     * iterator.
     */
    protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl();
    
    
    /** String. */
    protected XMLString fTempString = new XMLString();
    
    /** String. */
    protected XMLString fTempString2 = new XMLString();
    
    /** Array of 3 strings. */
    private String[] fStrings = new String[3];
    
    /** Making the buffer accesible to derived class -- String buffer. */
    protected XMLStringBuffer fStringBuffer = new XMLStringBuffer();
    
    /** Making the buffer accesible to derived class -- String buffer. */
    protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
    
    /** stores character data. */
    /** Making the buffer accesible to derived class -- stores PI data */
    protected XMLStringBuffer fContentBuffer = new XMLStringBuffer();
    
    /** Single character array. */
    private final char[] fSingleChar = new char[1];
    private String fCurrentEntityName = null;
    
    // New members
    protected boolean fScanToEnd = false;
    
    protected DTDGrammarUtil dtdGrammarUtil= null;
    
    protected boolean fAddDefaultAttr = false;
    
    protected boolean foundBuiltInRefs = false;
    
    protected SecurityManager fSecurityManager = null;
    
    //skip element algorithm
    static final short MAX_DEPTH_LIMIT = 5 ;
    static final short ELEMENT_ARRAY_LENGTH = 200 ;
    static final short MAX_POINTER_AT_A_DEPTH = 4 ;
    static final boolean DEBUG_SKIP_ALGORITHM = false;
    //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH
    String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ;
    //pointer location where last element was skipped
    short fLastPointerLocation = 0 ;
    short fElementPointer = 0 ;
    //2D array to store pointer info
    short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ;
    protected String fElementRawname ;
    protected boolean fShouldSkip = false;
    protected boolean fAdd = false ;
    protected boolean fSkip = false;
    
    /** Reusable Augmentations. */
    private Augmentations fTempAugmentations = null;
    //
    // Constructors
    //
    
    /** Default constructor. */
    public XMLDocumentFragmentScannerImpl() {
    } // ()
    
    //
    // XMLDocumentScanner methods
    //
    
    /**
     * Sets the input source.
     *
     * @param inputSource The input source.
     *
     * @throws IOException Thrown on i/o error.
     */
    public void setInputSource(XMLInputSource inputSource) throws IOException {
        fEntityManager.setEntityHandler(this);
        fEntityManager.startEntity("$fragment$", inputSource, false, true);
        // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
    } // setInputSource(XMLInputSource)
    
    /**
     * Scans a document.
     *
     * @param complete True if the scanner should scan the document
     *                 completely, pushing all events to the registered
     *                 document handler. A value of false indicates that
     *                 that the scanner should only scan the next portion
     *                 of the document and return. A scanner instance is
     *                 permitted to completely scan a document if it does
     *                 not support this "pull" scanning model.
     *
     * @return True if there is more to scan, false otherwise.
     */
   /* public boolean scanDocument(boolean complete)
    throws IOException, XNIException {
    
        // keep dispatching "events"
        fEntityManager.setEntityHandler(this);
    
        return true;
    
    } // scanDocument(boolean):boolean
    */
    
    public boolean scanDocument(boolean complete)
    throws IOException, XNIException {
        
        // keep dispatching "events"
        fEntityManager.setEntityHandler(this);
        //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler );
        
        int event = next();
        do {
            switch (event) {
                case XMLStreamConstants.START_DOCUMENT :
                    //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get
                    break;
                case XMLStreamConstants.START_ELEMENT :
                    //System.out.println(" in scann element");
                    //fDocumentHandler.startElement(getElementQName(),fAttributes,null);
                    break;
                case XMLStreamConstants.CHARACTERS :
                    fDocumentHandler.characters(getCharacterData(),null);
                    break;
                case XMLStreamConstants.SPACE:
                    //check if getCharacterData() is the right function to retrieve ignorableWhitespace information.
                    //System.out.println("in the space");
                    //fDocumentHandler.ignorableWhitespace(getCharacterData(), null);
                    break;
                case XMLStreamConstants.ENTITY_REFERENCE :
                    //entity reference callback are given in startEntity
                    break;
                case XMLStreamConstants.PROCESSING_INSTRUCTION :
                    fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null);
                    break;
                case XMLStreamConstants.COMMENT :
                    //System.out.println(" in COMMENT of the XMLNSDocumentScannerImpl");
                    fDocumentHandler.comment(getCharacterData(),null);
                    break;
                case XMLStreamConstants.DTD :
                    //all DTD related callbacks are handled in DTDScanner.
                    //1. Stax doesn't define DTD states as it does for XML Document.
                    //therefore we don't need to take care of anything here. So Just break;
                    break;
                case XMLStreamConstants.CDATA:
                    fDocumentHandler.startCDATA(null);
                    //xxx: check if CDATA values comes from getCharacterData() function
                    fDocumentHandler.characters(getCharacterData(),null);
                    fDocumentHandler.endCDATA(null);
                    //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl");
                    break;
                case XMLStreamConstants.NOTATION_DECLARATION :
                    break;
                case XMLStreamConstants.ENTITY_DECLARATION :
                    break;
                case XMLStreamConstants.NAMESPACE :
                    break;
                case XMLStreamConstants.ATTRIBUTE :
                    break;
                case XMLStreamConstants.END_ELEMENT :
                    //do not give callback here. 
                    //this callback is given in scanEndElement function.
                    //fDocumentHandler.endElement(getElementQName(),null);
                    break;
                default :
                    throw new InternalError("processing event: " + event);
                    
            }
            //System.out.println("here in before calling next");
            event = next();
            //System.out.println("here in after calling next");
        } while (event!=XMLStreamConstants.END_DOCUMENT && complete);
        
        if(event == XMLStreamConstants.END_DOCUMENT) {            
            fDocumentHandler.endDocument(null);
            return false;
        }
        
        return true;
        
    } // scanDocument(boolean):boolean
    
    
    
    public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){
        if(fScannerLastState == XMLEvent.END_ELEMENT){
            fElementQName.setValues(fElementStack.getLastPoppedElement());
        }
        return fElementQName ;
    }
    
    /** return the next state on the input
     * @return int
     */
    
    public int next() throws IOException, XNIException {
        return fDriver.next();
    }
    
    //
    // XMLComponent methods
    //
    
    /**
     * Resets the component. The component can query the component manager
     * about any features and properties that affect the operation of the
     * component.
     *
     * @param componentManager The component manager.
     *
     * @throws SAXException Thrown by component on initialization error.
     *                      For example, if a feature or property is
     *                      required for the operation of the component, the
     *                      component manager may throw a
     *                      SAXNotRecognizedException or a
     *                      SAXNotSupportedException.
     */
    
    public void reset(XMLComponentManager componentManager)
    throws XMLConfigurationException {
        
        super.reset(componentManager);
        
        // other settings
        // fDocumentSystemId = null;
                
        // sax features
        //fAttributes.setNamespaces(fNamespaces);
                
        // xerces features
        fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true);

        fSecurityManager = (SecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null);
        fElementAttributeLimit = (fSecurityManager != null)?fSecurityManager.getElementAttrLimit():0;
        
        fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false);

        Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null);
        fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ?
                (ExternalSubsetResolver) resolver : null;

        // initialize vars
        fMarkupDepth = 0;
        fCurrentElement = null;
        fElementStack.clear();
        fHasExternalDTD = false;
        fStandaloneSet = false;
        fStandalone = false;
        fInScanContent = false;
        //skipping algorithm
        fShouldSkip = false;
        fAdd = false;
        fSkip = false;
        
        //attribute
        fReadingAttributes = false;
        //xxx: external entities are supported in Xerces
        // it would be good to define feature for this case
        fSupportExternalEntities = true;
        fReplaceEntityReferences = true;
        fIsCoalesce = false;
        
        // setup Driver
        setScannerState(SCANNER_STATE_CONTENT);
        setDriver(fContentDriver);
        fEntityStore = fEntityManager.getEntityStore();
        
        dtdGrammarUtil = null;
                
        
        //fEntityManager.test();
    } // reset(XMLComponentManager)
    
    
    public void reset(PropertyManager propertyManager){
        
        super.reset(propertyManager);
        
        // other settings
        // fDocumentSystemId = null;
        fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue();
        fNotifyBuiltInRefs = false ;
                
        // initialize vars
        fMarkupDepth = 0;
        fCurrentElement = null;
        fShouldSkip = false;
        fAdd = false;
        fSkip = false;
        fElementStack.clear();
        //fElementStack2.clear();
        fHasExternalDTD = false;
        fStandaloneSet = false;
        fStandalone = false;
        //fReplaceEntityReferences = true;
        //fSupportExternalEntities = true;
        Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_REPLACING_ENTITY_REFERENCES);
        fReplaceEntityReferences = bo.booleanValue();
        bo = (Boolean)propertyManager.getProperty(XMLInputFactoryImpl.IS_SUPPORTING_EXTERNAL_ENTITIES);
        fSupportExternalEntities = bo.booleanValue();
        Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ;
        if(cdata != null)
            fReportCdataEvent = cdata.booleanValue() ;
        Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ;
        if(coalesce != null)
            fIsCoalesce = coalesce.booleanValue();
        fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ;
        //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true,
        //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application
        fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences;
        // setup Driver
        //we dont need to do this -- nb.
        //setScannerState(SCANNER_STATE_CONTENT);
        //setDriver(fContentDriver);
        fEntityStore = fEntityManager.getEntityStore();
        //fEntityManager.test();
        
        dtdGrammarUtil = null;
                
    } // reset(XMLComponentManager)
    
    /**
     * Returns a list of feature identifiers that are recognized by
     * this component. This method may return null if no features
     * are recognized by this component.
     */
    public String[] getRecognizedFeatures() {
        return (String[])(RECOGNIZED_FEATURES.clone());
    } // getRecognizedFeatures():String[]
    
    /**
     * Sets the state of a feature. This method is called by the component
     * manager any time after reset when a feature changes state.
     * 

* Note: Components should silently ignore features * that do not affect the operation of the component. * * @param featureId The feature identifier. * @param state The state of the feature. * * @throws SAXNotRecognizedException The component should not throw * this exception. * @throws SAXNotSupportedException The component should not throw * this exception. */ public void setFeature(String featureId, boolean state) throws XMLConfigurationException { super.setFeature(featureId, state); // Xerces properties if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { fNotifyBuiltInRefs = state; } } } // setFeature(String,boolean) /** * Returns a list of property identifiers that are recognized by * this component. This method may return null if no properties * are recognized by this component. */ public String[] getRecognizedProperties() { return (String[])(RECOGNIZED_PROPERTIES.clone()); } // getRecognizedProperties():String[] /** * Sets the value of a property. This method is called by the component * manager any time after reset when a property changes value. *

* Note: Components should silently ignore properties * that do not affect the operation of the component. * * @param propertyId The property identifier. * @param value The value of the property. * * @throws SAXNotRecognizedException The component should not throw * this exception. * @throws SAXNotSupportedException The component should not throw * this exception. */ public void setProperty(String propertyId, Object value) throws XMLConfigurationException { super.setProperty(propertyId, value); // Xerces properties if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { fEntityManager = (XMLEntityManager)value; return; } if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? (ExternalSubsetResolver) value : null; return; } } // Xerces properties if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { fEntityManager = (XMLEntityManager)value; } return; } } // setProperty(String,Object) /** * Returns the default state for a feature, or null if this * component does not want to report a default value for this * feature. * * @param featureId The feature identifier. * * @since Xerces 2.2.0 */ public Boolean getFeatureDefault(String featureId) { for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { if (RECOGNIZED_FEATURES[i].equals(featureId)) { return FEATURE_DEFAULTS[i]; } } return null; } // getFeatureDefault(String):Boolean /** * Returns the default state for a property, or null if this * component does not want to report a default value for this * property. * * @param propertyId The property identifier. * * @since Xerces 2.2.0 */ public Object getPropertyDefault(String propertyId) { for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { return PROPERTY_DEFAULTS[i]; } } return null; } // getPropertyDefault(String):Object // // XMLDocumentSource methods // /** * setDocumentHandler * * @param documentHandler */ public void setDocumentHandler(XMLDocumentHandler documentHandler) { fDocumentHandler = documentHandler; //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); } // setDocumentHandler(XMLDocumentHandler) /** Returns the document handler */ public XMLDocumentHandler getDocumentHandler(){ return fDocumentHandler; } // // XMLEntityHandler methods // /** * This method notifies of the start of an entity. The DTD has the * pseudo-name of "[dtd]" parameter entity names start with '%'; and * general entities are just specified by their name. * * @param name The name of the entity. * @param identifier The resource identifier. * @param encoding The auto-detected IANA encoding name of the entity * stream. This value will be null in those situations * where the entity encoding is not auto-detected (e.g. * internal entities or a document entity that is * parsed from a java.io.Reader). * * @throws XNIException Thrown by handler to signal an error. */ public void startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs) throws XNIException { // keep track of this entity before fEntityDepth is increased if (fEntityDepth == fEntityStack.length) { int[] entityarray = new int[fEntityStack.length * 2]; System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); fEntityStack = entityarray; } fEntityStack[fEntityDepth] = fMarkupDepth; super.startEntity(name, identifier, encoding, augs); // WFC: entity declared in external subset in standalone doc if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", new Object[]{name}); } /** we are not calling the handlers yet.. */ // call handler if (fDocumentHandler != null && !fScanningAttribute) { if (!name.equals("[xml]")) { fDocumentHandler.startGeneralEntity(name, identifier, encoding, null); } } } // startEntity(String,XMLResourceIdentifier,String) /** * This method notifies the end of an entity. The DTD has the pseudo-name * of "[dtd]" parameter entity names start with '%'; and general entities * are just specified by their name. * * @param name The name of the entity. * * @throws XNIException Thrown by handler to signal an error. */ public void endEntity(String name, Augmentations augs) throws IOException, XNIException { /** * // flush possible pending output buffer - see scanContent * if (fInScanContent && fStringBuffer.length != 0 * && fDocumentHandler != null) { * fDocumentHandler.characters(fStringBuffer, null); * fStringBuffer.length = 0; // make sure we know it's been flushed * } */ super.endEntity(name, augs); // make sure markup is properly balanced if (fMarkupDepth != fEntityStack[fEntityDepth]) { reportFatalError("MarkupEntityMismatch", null); } /**/ // call handler if (fDocumentHandler != null && !fScanningAttribute) { if (!name.equals("[xml]")) { fDocumentHandler.endGeneralEntity(name, null); } } } // endEntity(String) // // Protected methods // // Driver factory methods /** Creates a content Driver. */ protected Driver createContentDriver() { return new FragmentContentDriver(); } // createContentDriver():Driver // scanning methods /** * Scans an XML or text declaration. *

*

     * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
     *                 | ('"' ('yes' | 'no') '"'))
     *
     * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
     * 
* * @param scanningTextDecl True if a text declaration is to * be scanned instead of an XML * declaration. */ protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) throws IOException, XNIException { // scan decl super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); fMarkupDepth--; // pseudo-attribute values String version = fStrings[0]; String encoding = fStrings[1]; String standalone = fStrings[2]; fDeclaredEncoding = encoding; // set standalone fStandaloneSet = standalone != null; fStandalone = fStandaloneSet && standalone.equals("yes"); ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information //but this information is only related with Document Entity. fEntityManager.setStandalone(fStandalone); // call handler if (fDocumentHandler != null) { if (scanningTextDecl) { fDocumentHandler.textDecl(version, encoding, null); } else { fDocumentHandler.xmlDecl(version, encoding, standalone, null); } } if(version != null){ fEntityScanner.setVersion(version); fEntityScanner.setXMLVersion(version); } // set encoding on reader, only if encoding was not specified by the application explicitly if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { fEntityScanner.setEncoding(encoding); } } // scanXMLDeclOrTextDecl(boolean) public String getPITarget(){ return fPITarget ; } public XMLStringBuffer getPIData(){ return fContentBuffer ; } //XXX: why not this function behave as per the state of the parser? public XMLString getCharacterData(){ if(fUsebuffer){ return fContentBuffer ; }else{ return fTempString; } } /** * Scans a processing data. This is needed to handle the situation * where a document starts with a processing instruction whose * target name starts with "xml". (e.g. xmlfoo) * * @param target The PI target * @param data The XMLStringBuffer to fill in with the data */ protected void scanPIData(String target, XMLStringBuffer data) throws IOException, XNIException { super.scanPIData(target, data); //set the PI target and values fPITarget = target ; fMarkupDepth--; } // scanPIData(String) /** * Scans a comment. *

*

     * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
     * 
*

* Note: Called after scanning past '<!--' */ protected void scanComment() throws IOException, XNIException { fContentBuffer.clear(); scanComment(fContentBuffer); //getTextCharacters can also be called for reading comments fUsebuffer = true; fMarkupDepth--; } // scanComment() //xxx value returned by this function may not remain valid if another event is scanned. public String getComment(){ return fContentBuffer.toString(); } void addElement(String rawname){ if(fElementPointer < ELEMENT_ARRAY_LENGTH){ //storing element raw name in a linear list of array fElementArray[fElementPointer] = rawname ; //storing elemnetPointer for particular element depth if(DEBUG_SKIP_ALGORITHM){ StringBuffer sb = new StringBuffer() ; sb.append(" Storing element information ") ; sb.append(" fElementPointer = " + fElementPointer) ; sb.append(" fElementRawname = " + fElementQName.rawname) ; sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); System.out.println(sb.toString()) ; } //store pointer information only when element depth is less MAX_DEPTH_LIMIT if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ short column = storePointerForADepth(fElementPointer); if(column > 0){ short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); //identity comparison shouldn't take much time and we can rely on this //since its guaranteed to have same object id for same string. if(rawname == fElementArray[pointer]){ fShouldSkip = true ; fLastPointerLocation = pointer ; //reset the things and return. resetPointer((short)fElementStack.fDepth , column) ; fElementArray[fElementPointer] = null ; return ; }else{ fShouldSkip = false ; } } } fElementPointer++ ; } } void resetPointer(short depth, short column){ fPointerInfo[depth] [column] = (short)0; } //returns column information at which pointer was stored. short storePointerForADepth(short elementPointer){ short depth = (short) fElementStack.fDepth ; //Stores element pointer locations at particular depth , only 4 pointer locations //are stored at particular depth for now. for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ if(canStore(depth, i)){ fPointerInfo[depth][i] = elementPointer ; if(DEBUG_SKIP_ALGORITHM){ StringBuffer sb = new StringBuffer() ; sb.append(" Pointer information ") ; sb.append(" fElementPointer = " + fElementPointer) ; sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); sb.append(" column = " + i ) ; System.out.println(sb.toString()) ; } return i; } //else //pointer was not stored because we reached the limit } return -1 ; } boolean canStore(short depth, short column){ //colum = 0 , means first element at particular depth //column = 1, means second element at particular depth // calle should make sure that it doesn't call for value outside allowed co-ordinates return fPointerInfo[depth][column] == 0 ? true : false ; } short getElementPointer(short depth, short column){ //colum = 0 , means first element at particular depth //column = 1, means second element at particular depth // calle should make sure that it doesn't call for value outside allowed co-ordinates return fPointerInfo[depth][column] ; } //this function assumes that string passed is not null and skips //the following string from the buffer this makes sure boolean skipFromTheBuffer(String rawname) throws IOException{ if(fEntityScanner.skipString(rawname)){ char c = (char)fEntityScanner.peekChar() ; //If the start element was completely skipped we should encounter either ' '(space), //or '/' (in case of empty element) or '>' if( c == ' ' || c == '/' || c == '>'){ fElementRawname = rawname ; return true ; } else{ return false; } } else return false ; } boolean skipQElement(String rawname) throws IOException{ final int c = fEntityScanner.getChar(rawname.length()); //if this character is still valid element name -- this means string can't match if(XMLChar.isName(c)){ return false; }else{ return fEntityScanner.skipString(rawname); } } protected boolean skipElement() throws IOException { if(!fShouldSkip) return false ; if(fLastPointerLocation != 0){ //Look at the next element stored in the array list.. we might just get a match. String rawname = fElementArray[fLastPointerLocation + 1] ; if(rawname != null && skipFromTheBuffer(rawname)){ fLastPointerLocation++ ; if(DEBUG_SKIP_ALGORITHM){ System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); } return true ; } else{ //reset it back to zero... we haven't got the correct subset yet. fLastPointerLocation = 0 ; } } //xxx: we can put some logic here as from what column it should start looking //for now we always start at 0 //fallback to tolerant algorithm, it would look for differnt element stored at different //depth and get us the pointer location. return fShouldSkip && skipElement((short)0); } //start of the column at which it should try searching boolean skipElement(short column) throws IOException { short depth = (short)fElementStack.fDepth ; if(depth > MAX_DEPTH_LIMIT){ return fShouldSkip = false ; } for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ short pointer = getElementPointer(depth , i ) ; if(pointer == 0){ return fShouldSkip = false ; } if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ if(DEBUG_SKIP_ALGORITHM){ System.out.println(); System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); System.out.println(); } fLastPointerLocation = pointer ; return fShouldSkip = true ; } } return fShouldSkip = false ; } /** * Scans a start element. This method will handle the binding of * namespace information and notifying the handler of the start * of the element. *

*

     * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
     * [40] STag ::= '<' Name (S Attribute)* S? '>'
     * 
*

* Note: This method assumes that the leading * '<' character has been consumed. *

* Note: This method uses the fElementQName and * fAttributes variables. The contents of these variables will be * destroyed. The caller should copy important information out of * these variables before calling this method. * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT * * @return True if element is empty. (i.e. It matches * production [44]. */ // fElementQName will have the details of element just read.. // fAttributes will have the details of all the attributes. protected boolean scanStartElement() throws IOException, XNIException { if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); //when skipping is true and no more elements should be added if(fSkip && !fAdd){ //get the stored element -- if everything goes right this should match the //token in the buffer QName name = fElementStack.getNext(); if(DEBUG_SKIP_ALGORITHM){ System.out.println("Trying to skip String = " + name.rawname); } //Be conservative -- if skipping fails -- stop. fSkip = fEntityScanner.skipString(name.rawname); if(fSkip){ if(DEBUG_SKIP_ALGORITHM){ System.out.println("Element SUCESSFULLY skipped = " + name.rawname); } fElementStack.push(); fElementQName = name; }else{ //if skipping fails reposition the stack or fallback to normal way of processing fElementStack.reposition(); if(DEBUG_SKIP_ALGORITHM){ System.out.println("Element was NOT skipped, REPOSITIONING stack" ); } } } //we are still at the stage of adding elements //the elements were not matched or //fSkip is not set to true if(!fSkip || fAdd){ //get the next element from the stack fElementQName = fElementStack.nextElement(); // name if (fNamespaces) { fEntityScanner.scanQName(fElementQName); } else { String name = fEntityScanner.scanName(); fElementQName.setValues(null, name, name, null); } if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); if(DEBUG_SKIP_ALGORITHM){ if(fAdd){ System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); } } } //when the elements are being added , we need to check if we are set for skipping the elements if(fAdd){ //this sets the value of fAdd variable fElementStack.matchElement(fElementQName); } //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName fCurrentElement = fElementQName; String rawname = fElementQName.rawname; fEmptyElement = false; fAttributes.removeAllAttributes(); if(!seekCloseOfStartTag()){ fReadingAttributes = true; fAttributeCacheUsedCount =0; fStringBufferIndex =0; fAddDefaultAttr = true; do { scanAttribute(fAttributes); if (fSecurityManager != null && fAttributes.getLength() > fElementAttributeLimit){ fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "ElementAttributeLimit", new Object[]{rawname, new Integer(fAttributes.getLength()) }, XMLErrorReporter.SEVERITY_FATAL_ERROR ); } } while (!seekCloseOfStartTag()); fReadingAttributes=false; } if (fEmptyElement) { //decrease the markup depth.. fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } // call handler if (fDocumentHandler != null) { fDocumentHandler.emptyElement(fElementQName, fAttributes, null); } //We should not be popping out the context here in endELement becaause the namespace context is still //valid when parser is at the endElement state. //if (fNamespaces) { // fNamespaceContext.popContext(); //} //pop the element off the stack.. fElementStack.popElement(); } else { if(dtdGrammarUtil != null) dtdGrammarUtil.startElement(fElementQName, fAttributes); if(fDocumentHandler != null){ //complete element and attributes are traversed in this function so we can send a callback //here. //we shouldn't be sending callback in scanDocument() fDocumentHandler.startElement(fElementQName, fAttributes, null); } } if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); return fEmptyElement; } // scanStartElement():boolean /** * Looks for the close of start tag, i.e. if it finds '>' or '/>' * Characters are consumed. */ protected boolean seekCloseOfStartTag() throws IOException, XNIException { // spaces boolean sawSpace = fEntityScanner.skipSpaces(); // end tag? final int c = fEntityScanner.peekChar(); if (c == '>') { fEntityScanner.scanChar(); return true; } else if (c == '/') { fEntityScanner.scanChar(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); } fEmptyElement = true; return true; } else if (!isValidNameStartChar(c) || !sawSpace) { reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); } return false; } public boolean hasAttributes(){ return fAttributes.getLength() > 0 ? true : false ; } /** * Scans an attribute. *

*

     * [41] Attribute ::= Name Eq AttValue
     * 
*

* Note: This method assumes that the next * character on the stream is the first character of the attribute * name. *

* Note: This method uses the fAttributeQName and * fQName variables. The contents of these variables will be * destroyed. * * @param attributes The attributes list for the scanned attribute. */ /** * protected void scanAttribute(AttributeIteratorImpl attributes) * throws IOException, XNIException { * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); * * * // name * if (fNamespaces) { * fEntityScanner.scanQName(fAttributeQName); * } * else { * String name = fEntityScanner.scanName(); * fAttributeQName.setValues(null, name, name, null); * } * * // equals * fEntityScanner.skipSpaces(); * if (!fEntityScanner.skipChar('=')) { * reportFatalError("EqRequiredInAttribute", * new Object[]{fAttributeQName.rawname}); * } * fEntityScanner.skipSpaces(); * * * // content * int oldLen = attributes.getLength(); */ /**xxx there is one check of duplicate attribute that has been removed. * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); * * // WFC: Unique Att Spec * if (oldLen == attributes.getLength()) { * reportFatalError("AttributeNotUnique", * new Object[]{fCurrentElement.rawname, * fAttributeQName.rawname}); * } */ /* //REVISIT: one more case needs to be included: external PE and standalone is no boolean isVC = fHasExternalDTD && !fStandalone; scanAttributeValue(fTempString, fTempString2, fAttributeQName.rawname, attributes, oldLen, isVC); //attributes.setValue(oldLen, fTempString.toString()); //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); //attributes.setSpecified(oldLen, true); AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); fAttributes.addAttribute(attribute); if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); } // scanAttribute(XMLAttributes) */ /** return the attribute iterator implementation */ public XMLAttributesIteratorImpl getAttributeIterator(){ if(dtdGrammarUtil != null && fAddDefaultAttr){ dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); fAddDefaultAttr = false; } return fAttributes; } /** return if standalone is set */ public boolean standaloneSet(){ return fStandaloneSet; } /** return if the doucment is standalone */ public boolean isStandAlone(){ return fStandalone ; } /** * Scans an attribute name value pair. *

*

     * [41] Attribute ::= Name Eq AttValue
     * 
*

* Note: This method assumes that the next * character on the stream is the first character of the attribute * name. *

* Note: This method uses the fAttributeQName and * fQName variables. The contents of these variables will be * destroyed. * * @param attributes The attributes list for the scanned attribute. */ protected void scanAttribute(XMLAttributes attributes) throws IOException, XNIException { if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); // name if (fNamespaces) { fEntityScanner.scanQName(fAttributeQName); } else { String name = fEntityScanner.scanName(); fAttributeQName.setValues(null, name, name, null); } // equals fEntityScanner.skipSpaces(); if (!fEntityScanner.skipChar('=')) { reportFatalError("EqRequiredInAttribute", new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); } fEntityScanner.skipSpaces(); int attIndex = 0 ; //REVISIT: one more case needs to be included: external PE and standalone is no boolean isVC = fHasExternalDTD && !fStandalone; //fTempString would store attribute value ///fTempString2 would store attribute non-normalized value //this function doesn't use 'attIndex'. We are adding the attribute later //after we have figured out that current attribute is not namespace declaration //since scanAttributeValue doesn't use attIndex parameter therefore we //can safely add the attribute later.. XMLString tmpStr = getString(); scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes, attIndex, isVC); // content int oldLen = attributes.getLength(); //if the attribute name already exists.. new value is replaced with old value attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); // WFC: Unique Att Spec //attributes count will be same if the current attribute name already exists for this element name. //this means there are two duplicate attributes. if (oldLen == attributes.getLength()) { reportFatalError("AttributeNotUnique", new Object[]{fCurrentElement.rawname, fAttributeQName.rawname}); } //tmpString contains attribute value //we are passing null as the attribute value attributes.setValue(attIndex, null, tmpStr); ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); attributes.setSpecified(attIndex, true); if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); } // scanAttribute(XMLAttributes) /** * Scans element content. * * @return Returns the next character on the stream. */ //CHANGED: //EARLIER: scanContent() //NOW: scanContent(XMLStringBuffer) //It makes things easy if this functions takes XMLStringBuffer as parameter.. //this function appends the data to the buffer. protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { //set the fTempString length to 0 before passing it on to scanContent //scanContent sets the correct co-ordinates as per the content read fTempString.length = 0; int c = fEntityScanner.scanContent(fTempString); content.append(fTempString); fTempString.length = 0; if (c == '\r') { // happens when there is the character reference //xxx: We know the next chracter.. we should just skip it and add ']' directlry fEntityScanner.scanChar(); content.append((char)c); c = -1; } else if (c == ']') { //fStringBuffer.clear(); //xxx: We know the next chracter.. we should just skip it and add ']' directlry content.append((char)fEntityScanner.scanChar()); // remember where we are in case we get an endEntity before we // could flush the buffer out - this happens when we're parsing an // entity which ends with a ] fInScanContent = true; // // We work on a single character basis to handle cases such as: // ']]]>' which we might otherwise miss. // if (fEntityScanner.skipChar(']')) { content.append(']'); while (fEntityScanner.skipChar(']')) { content.append(']'); } if (fEntityScanner.skipChar('>')) { reportFatalError("CDEndInContent", null); } } fInScanContent = false; c = -1; } if (fDocumentHandler != null && content.length > 0) { //fDocumentHandler.characters(content, null); } return c; } // scanContent():int /** * Scans a CDATA section. *

* Note: This method uses the fTempString and * fStringBuffer variables. * * @param complete True if the CDATA section is to be scanned * completely. * * @return True if CDATA is completely scanned. */ //CHANGED: protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) throws IOException, XNIException { // call handler if (fDocumentHandler != null) { //fDocumentHandler.startCDATA(null); } while (true) { //scanData will fill the contentBuffer if (!fEntityScanner.scanData("]]>", contentBuffer)) { break ; /** We dont need all this code if we pass ']]>' as delimeter.. * int brackets = 2; * while (fEntityScanner.skipChar(']')) { * brackets++; * } * * //When we find more than 2 square brackets * if (fDocumentHandler != null && brackets > 2) { * //we dont need to clear the buffer.. * //contentBuffer.clear(); * for (int i = 2; i < brackets; i++) { * contentBuffer.append(']'); * } * fDocumentHandler.characters(contentBuffer, null); * } * * if (fEntityScanner.skipChar('>')) { * break; * } * if (fDocumentHandler != null) { * //we dont need to clear the buffer now.. * //contentBuffer.clear(); * contentBuffer.append("]]"); * fDocumentHandler.characters(contentBuffer, null); * } **/ } else { int c = fEntityScanner.peekChar(); if (c != -1 && isInvalidLiteral(c)) { if (XMLChar.isHighSurrogate(c)) { //contentBuffer.clear(); //scan surrogates if any.... scanSurrogates(contentBuffer); } else { reportFatalError("InvalidCharInCDSect", new Object[]{Integer.toString(c,16)}); fEntityScanner.scanChar(); } } //by this time we have also read surrogate contents if any... if (fDocumentHandler != null) { //fDocumentHandler.characters(contentBuffer, null); } } } fMarkupDepth--; if (fDocumentHandler != null && contentBuffer.length > 0) { //fDocumentHandler.characters(contentBuffer, null); } // call handler if (fDocumentHandler != null) { //fDocumentHandler.endCDATA(null); } return true; } // scanCDATASection(XMLStringBuffer, boolean):boolean /** * Scans an end element. *

*

     * [42] ETag ::= '</' Name S? '>'
     * 
*

* Note: This method uses the fElementQName variable. * The contents of this variable will be destroyed. The caller should * copy the needed information out of this variable before calling * this method. * * @return The element depth. */ protected int scanEndElement() throws IOException, XNIException { if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); // pop context QName endElementName = fElementStack.popElement(); String rawname = endElementName.rawname; if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); // Take advantage of the fact that next string _should_ be "fElementQName.rawName", //In scanners most of the time is consumed on checks done for XML characters, we can // optimize on it and avoid the checks done for endElement, //we will also avoid symbol table lookup - [email protected] // this should work both for namespace processing true or false... //REVISIT: if the string is not the same as expected.. we need to do better error handling.. //We can skip this for now... In any case if the string doesn't match -- document is not well formed. if (!fEntityScanner.skipString(endElementName.rawname)) { reportFatalError("ETagRequired", new Object[]{rawname}); } // end fEntityScanner.skipSpaces(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ETagUnterminated", new Object[]{rawname}); } fMarkupDepth--; //we have increased the depth for two markup "<" characters fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{rawname}); } //We should not be popping out the context here in endELement becaause the namespace context is still //valid when parser is at the endElement state. //if (fNamespaces) { // fNamespaceContext.popContext(); //} // call handler if (fDocumentHandler != null ) { //end element is scanned in this function so we can send a callback //here. //we shouldn't be sending callback in scanDocument() fDocumentHandler.endElement(endElementName, null); } if(dtdGrammarUtil != null) dtdGrammarUtil.endElement(endElementName); return fMarkupDepth; } // scanEndElement():int /** * Scans a character reference. *

*

     * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
     * 
*/ protected void scanCharReference() throws IOException, XNIException { fStringBuffer2.clear(); int ch = scanCharReferenceValue(fStringBuffer2, null); fMarkupDepth--; if (ch != -1) { // call handler if (fDocumentHandler != null) { if (fNotifyCharRefs) { fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); } Augmentations augs = null; if (fValidation && ch <= 0x20) { if (fTempAugmentations != null) { fTempAugmentations.removeAllItems(); } else { fTempAugmentations = new AugmentationsImpl(); } augs = fTempAugmentations; augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); } //xxx: How do we deal with this - how to return charReferenceValues //now this is being commented because this is taken care in scanDocument() //fDocumentHandler.characters(fStringBuffer2, null); if (fNotifyCharRefs) { fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); } } } } // scanCharReference() /** * Scans an entity reference. * * @return returns true if the new entity is started. If it was built-in entity * 'false' is returned. * @throws IOException Thrown if i/o error occurs. * @throws XNIException Thrown if handler throws exception upon * notification. */ protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { String name = fEntityScanner.scanName(); if (name == null) { reportFatalError("NameRequiredInReference", null); } if (!fEntityScanner.skipChar(';')) { reportFatalError("SemicolonRequiredInReference", new Object []{name}); } if (fEntityStore.isUnparsedEntity(name)) { reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); } fMarkupDepth--; fCurrentEntityName = name; // handle built-in entities if (name == fAmpSymbol) { handleCharacter('&', fAmpSymbol, content); fScannerState = SCANNER_STATE_BUILT_IN_REFS; return ; } else if (name == fLtSymbol) { handleCharacter('<', fLtSymbol, content); fScannerState = SCANNER_STATE_BUILT_IN_REFS; return ; } else if (name == fGtSymbol) { handleCharacter('>', fGtSymbol, content); fScannerState = SCANNER_STATE_BUILT_IN_REFS; return ; } else if (name == fQuotSymbol) { handleCharacter('"', fQuotSymbol, content); fScannerState = SCANNER_STATE_BUILT_IN_REFS; return ; } else if (name == fAposSymbol) { handleCharacter('\'', fAposSymbol, content); fScannerState = SCANNER_STATE_BUILT_IN_REFS; return ; } //1. if the entity is external and support to external entities is not required // 2. or entities should not be replaced //3. or if it is built in entity reference. if((fEntityStore.isExternalEntity(name) && !fSupportExternalEntities) || (!fEntityStore.isExternalEntity(name) && !fReplaceEntityReferences) || foundBuiltInRefs){ fScannerState = SCANNER_STATE_REFERENCE; return ; } // start general entity if (!fEntityStore.isDeclaredEntity(name)) { //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception if (fDisallowDoctype && fReplaceEntityReferences) { reportFatalError("EntityNotDeclared", new Object[]{name}); return; } //REVISIT: one more case needs to be included: external PE and standalone is no if ( fHasExternalDTD && !fStandalone) { if (fValidation) fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); } else reportFatalError("EntityNotDeclared", new Object[]{name}); } //we are starting the entity even if the entity was not declared //if that was the case it its taken care in XMLEntityManager.startEntity() //we immediately call the endEntity. Application gets to know if there was //any entity that was not declared. fEntityManager.startEntity(name, false); //set the scaner state to content.. parser will automatically revive itself at any point of time. //setScannerState(SCANNER_STATE_CONTENT); //return true ; } // scanEntityReference() // utility methods /** * Calls document handler with a single character resulting from * built-in entity resolution. * * @param c * @param entity built-in name * @param XMLStringBuffer append the character to buffer * * we really dont need to call this function -- this function is only required when * we integrate with rest of Xerces2. SO maintaining the current behavior and still * calling this function to hanlde built-in entity reference. * */ private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { foundBuiltInRefs = true; content.append(c); if (fDocumentHandler != null) { fSingleChar[0] = c; if (fNotifyBuiltInRefs) { fDocumentHandler.startGeneralEntity(entity, null, null, null); } fTempString.setValues(fSingleChar, 0, 1); //fDocumentHandler.characters(fTempString, null); if (fNotifyBuiltInRefs) { fDocumentHandler.endGeneralEntity(entity, null); } } } // handleCharacter(char) // helper methods /** * Sets the scanner state. * * @param state The new scanner state. */ protected final void setScannerState(int state) { fScannerState = state; if (DEBUG_SCANNER_STATE) { System.out.print("### setScannerState: "); //System.out.print(fScannerState); System.out.print(getScannerStateName(state)); System.out.println(); } } // setScannerState(int) /** * Sets the Driver. * * @param Driver The new Driver. */ protected final void setDriver(Driver driver) { fDriver = driver; if (DEBUG_DISPATCHER) { System.out.print("%%% setDriver: "); System.out.print(getDriverName(driver)); System.out.println(); } } // // Private methods // /** Returns the scanner state name. */ protected String getScannerStateName(int state) { switch (state) { case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; } return "??? ("+state+')'; } // getScannerStateName(int):String public String getEntityName(){ //return the cached name return fCurrentEntityName; } /** Returns the driver name. */ public String getDriverName(Driver driver) { if (DEBUG_DISPATCHER) { if (driver != null) { String name = driver.getClass().getName(); int index = name.lastIndexOf('.'); if (index != -1) { name = name.substring(index + 1); index = name.lastIndexOf('$'); if (index != -1) { name = name.substring(index + 1); } } return name; } } return "null"; } // getDriverName():String // // Classes // /** * @author Neeraj Bajaj, Sun Microsystems. */ protected static final class Element { // // Data // /** Symbol. */ public QName qname; //raw name stored as characters public char[] fRawname; /** The next Element entry. */ public Element next; // // Constructors // /** * Constructs a new Element from the given QName and next Element * reference. */ public Element(QName qname, Element next) { this.qname.setValues(qname); this.fRawname = qname.rawname.toCharArray(); this.next = next; } } // class Element /** * Element stack. * * @author Neeraj Bajaj, Sun Microsystems. */ protected class ElementStack2 { // // Data // /** The stack data. */ protected QName [] fQName = new QName[20]; //Element depth protected int fDepth; //total number of elements protected int fCount; //current position protected int fPosition; //Mark refers to the position protected int fMark; protected int fLastDepth ; // // Constructors // /** Default constructor. */ public ElementStack2() { for (int i = 0; i < fQName.length; i++) { fQName[i] = new QName(); } fMark = fPosition = 1; } // () public void resize(){ /** * int length = fElements.length; * Element [] temp = new Element[length * 2]; * System.arraycopy(fElements, 0, temp, 0, length); * fElements = temp; */ //resize QNames int oldLength = fQName.length; QName [] tmp = new QName[oldLength * 2]; System.arraycopy(fQName, 0, tmp, 0, oldLength); fQName = tmp; for (int i = oldLength; i < fQName.length; i++) { fQName[i] = new QName(); } } // // Public methods // /** Check if the element scanned during the start element *matches the stored element. * *@return true if the match suceeds. */ public boolean matchElement(QName element) { //last depth is the depth when last elemnt was pushed //if last depth is greater than current depth if(DEBUG_SKIP_ALGORITHM){ System.out.println("fLastDepth = " + fLastDepth); System.out.println("fDepth = " + fDepth); } boolean match = false; if(fLastDepth > fDepth && fDepth <= 2){ if(DEBUG_SKIP_ALGORITHM){ System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); } if(element.rawname == fQName[fDepth].rawname){ fAdd = false; //mark this position //decrease the depth by 1 as arrays are 0 based fMark = fDepth - 1; //we found the match and from next element skipping will start, add 1 fPosition = fMark + 1 ; match = true; //Once we get match decrease the count -- this was increased by nextElement() --fCount; if(DEBUG_SKIP_ALGORITHM){ System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); System.out.println("fMark = " + fMark); System.out.println("fPosition = " + fPosition); System.out.println("fDepth = " + fDepth); System.out.println("fCount = " + fCount); } }else{ fAdd = true; if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); } } //store the last depth fLastDepth = fDepth++; return match; } // pushElement(QName):QName /** * This function doesn't increase depth. The function in this function is *broken down into two functions for efficiency. <@see>matchElement. * This function just returns the pointer to the object and its values are set. * *@return QName reference to the next element in the list */ public QName nextElement() { //if number of elements becomes equal to the length of array -- stop the skipping if (fCount == fQName.length) { fShouldSkip = false; fAdd = false; if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); //xxx: this is not correct, we are returning the last element //this wont make any difference since flag has been set to 'false' return fQName[--fCount]; } if(DEBUG_SKIP_ALGORITHM){ System.out.println("fCount = " + fCount); } return fQName[fCount++]; } /** Note that this function is considerably different than nextElement() * This function just returns the previously stored elements */ public QName getNext(){ //when position reaches number of elements in the list.. //set the position back to mark, making it a circular linked list. if(fPosition == fCount){ fPosition = fMark; } return fQName[fPosition++]; } /** returns the current depth */ public int popElement(){ return fDepth--; } /** Clears the stack without throwing away existing QName objects. */ public void clear() { fLastDepth = 0; fDepth = 0; fCount = 0 ; fPosition = fMark = 1; } // clear() } // class ElementStack /** * Element stack. This stack operates without synchronization, error * checking, and it re-uses objects instead of throwing popped items * away. * * @author Andy Clark, IBM */ protected class ElementStack { // // Data // /** The stack data. */ protected QName[] fElements; protected int [] fInt = new int[20]; //Element depth protected int fDepth; //total number of elements protected int fCount; //current position protected int fPosition; //Mark refers to the position protected int fMark; protected int fLastDepth ; // // Constructors // /** Default constructor. */ public ElementStack() { fElements = new QName[20]; for (int i = 0; i < fElements.length; i++) { fElements[i] = new QName(); } } // () // // Public methods // /** * Pushes an element on the stack. *

* Note: The QName values are copied into the * stack. In other words, the caller does not orphan * the element to the stack. Also, the QName object returned * is not orphaned to the caller. It should be * considered read-only. * * @param element The element to push onto the stack. * * @return Returns the actual QName object that stores the */ //XXX: THIS FUNCTION IS NOT USED public QName pushElement(QName element) { if (fDepth == fElements.length) { QName[] array = new QName[fElements.length * 2]; System.arraycopy(fElements, 0, array, 0, fDepth); fElements = array; for (int i = fDepth; i < fElements.length; i++) { fElements[i] = new QName(); } } fElements[fDepth].setValues(element); return fElements[fDepth++]; } // pushElement(QName):QName /** Note that this function is considerably different than nextElement() * This function just returns the previously stored elements */ public QName getNext(){ //when position reaches number of elements in the list.. //set the position back to mark, making it a circular linked list. if(fPosition == fCount){ fPosition = fMark; } //store the position of last opened tag at particular depth //fInt[++fDepth] = fPosition; if(DEBUG_SKIP_ALGORITHM){ System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); } //return fElements[fPosition++]; return fElements[fPosition]; } /** This function should be called only when element was skipped sucessfully. * 1. Increase the depth - because element was sucessfully skipped. *2. Store the position of the element token in array "last opened tag" at depth. *3. increase the position counter so as to point to the next element in the array */ public void push(){ fInt[++fDepth] = fPosition++; } /** Check if the element scanned during the start element *matches the stored element. * *@return true if the match suceeds. */ public boolean matchElement(QName element) { //last depth is the depth when last elemnt was pushed //if last depth is greater than current depth //if(DEBUG_SKIP_ALGORITHM){ // System.out.println("Check if the element " + element.rawname + " matches"); // System.out.println("fLastDepth = " + fLastDepth); // System.out.println("fDepth = " + fDepth); //} boolean match = false; if(fLastDepth > fDepth && fDepth <= 3){ if(DEBUG_SKIP_ALGORITHM){ System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); } if(element.rawname == fElements[fDepth - 1].rawname){ fAdd = false; //mark this position //decrease the depth by 1 as arrays are 0 based fMark = fDepth - 1; //we found the match fPosition = fMark; match = true; //Once we get match decrease the count -- this was increased by nextElement() --fCount; if(DEBUG_SKIP_ALGORITHM){ System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); System.out.println("fMark = " + fMark); System.out.println("fPosition = " + fPosition); System.out.println("fDepth = " + fDepth); System.out.println("fCount = " + fCount); System.out.println("---------MATCH SUCEEDED-----------------"); System.out.println(""); } }else{ fAdd = true; if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); } } //store the position for the current depth //when we are adding the elements, when skipping //starts even then this should be tracked ie. when //calling getNext() if(match){ //from next element skipping will start, add 1 fInt[fDepth] = fPosition++; } else{ if(DEBUG_SKIP_ALGORITHM){ System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); } //sicne fInt[fDepth] contains pointer to the element array which are 0 based. fInt[fDepth] = fCount - 1; } //if number of elements becomes equal to the length of array -- stop the skipping //xxx: should we do "fCount == fInt.length" if (fCount == fElements.length) { fSkip = false; fAdd = false; //reposition the stack -- it seems to be too complex document and there is no symmerty in structure reposition(); if(DEBUG_SKIP_ALGORITHM){ System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); System.out.println("REPOSITIONING THE STACK"); System.out.println("-----------SKIPPING STOPPED----------"); System.out.println(""); } return false; } if(DEBUG_SKIP_ALGORITHM){ if(match){ System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); }else{ System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); } } //store the last depth fLastDepth = fDepth; return match; } // matchElement(QName):QName /** * Returns the next element on the stack. * * @return Returns the actual QName object. Callee should * use this object to store the details of next element encountered. */ public QName nextElement() { if(fSkip){ fDepth++; //boundary checks are done in matchElement() return fElements[fCount++]; } else if (fDepth == fElements.length) { QName[] array = new QName[fElements.length * 2]; System.arraycopy(fElements, 0, array, 0, fDepth); fElements = array; for (int i = fDepth; i < fElements.length; i++) { fElements[i] = new QName(); } } return fElements[fDepth++]; } // pushElement(QName):QName /** * Pops an element off of the stack by setting the values of * the specified QName. *

* Note: The object returned is not * orphaned to the caller. Therefore, the caller should consider * the object to be read-only. */ public QName popElement() { //return the same object that was pushed -- this would avoid //setting the values for every end element. //STRONG: this object is read only -- this object reference shouldn't be stored. if(fSkip || fAdd ){ if(DEBUG_SKIP_ALGORITHM){ System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); System.out.println(""); } return fElements[fInt[fDepth--]]; } else{ if(DEBUG_SKIP_ALGORITHM){ System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); } return fElements[--fDepth] ; } //element.setValues(fElements[--fDepth]); } // popElement(QName) /** Reposition the stack. fInt [] contains all the opened tags at particular depth. * Transfer all the opened tags starting from depth '2' to the current depth and reposition them *as per the depth. */ public void reposition(){ for( int i = 2 ; i <= fDepth ; i++){ fElements[i-1] = fElements[fInt[i]]; } if(DEBUG_SKIP_ALGORITHM){ for( int i = 0 ; i < fDepth ; i++){ System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); } } } /** Clears the stack without throwing away existing QName objects. */ public void clear() { fDepth = 0; fLastDepth = 0; fCount = 0 ; fPosition = fMark = 1; } // clear() /** * This function is as a result of optimization done for endElement -- * we dont need to set the value for every end element encouterd. * For Well formedness checks we can have the same QName object that was pushed. * the values will be set only if application need to know about the endElement * -- [email protected] */ public QName getLastPoppedElement(){ return fElements[fDepth]; } } // class ElementStack /** * Drives the parser to the next state/event on the input. Parser is guaranteed * to stop at the next state/event. * * Internally XML document is divided into several states. Each state represents * a sections of XML document. When this functions returns normally, it has read * the section of XML document and returns the state corresponding to section of * document which has been read. For optimizations, a particular driver * can read ahead of the section of document (state returned) just read and * can maintain a different internal state. * * * @author Neeraj Bajaj, Sun Microsystems */ protected interface Driver { /** * Drives the parser to the next state/event on the input. Parser is guaranteed * to stop at the next state/event. * * Internally XML document is divided into several states. Each state represents * a sections of XML document. When this functions returns normally, it has read * the section of XML document and returns the state corresponding to section of * document which has been read. For optimizations, a particular driver * can read ahead of the section of document (state returned) just read and * can maintain a different internal state. * * @return state representing the section of document just read. * * @throws IOException Thrown on i/o error. * @throws XNIException Thrown on parse error. */ public int next() throws IOException, XNIException; } // interface Driver /** * Driver to handle content scanning. This driver is capable of reading * the fragment of XML document. When it has finished reading fragment * of XML documents, it can pass the job of reading to another driver. * * This class has been modified as per the new design which is more suited to * efficiently build pull parser. Lot of performance improvements have been done and * the code has been added to support stax functionality/features. * * @author Neeraj Bajaj, Sun Microsystems * * * @author Andy Clark, IBM * @author Eric Ye, IBM */ protected class FragmentContentDriver implements Driver { // // Driver methods // private boolean fContinueDispatching = true; private boolean fScanningForMarkup = true; /** * decides the appropriate state of the parser */ private void startOfMarkup() throws IOException { fMarkupDepth++; final int ch = fEntityScanner.peekChar(); switch(ch){ case '?' :{ setScannerState(SCANNER_STATE_PI); fEntityScanner.skipChar(ch); break; } case '!' :{ fEntityScanner.skipChar(ch); if (fEntityScanner.skipChar('-')) { if (!fEntityScanner.skipChar('-')) { reportFatalError("InvalidCommentStart", null); } setScannerState(SCANNER_STATE_COMMENT); } else if (fEntityScanner.skipString(cdata)) { setScannerState(SCANNER_STATE_CDATA ); } else if (!scanForDoctypeHook()) { reportFatalError("MarkupNotRecognizedInContent", null); } break; } case '/' :{ setScannerState(SCANNER_STATE_END_ELEMENT_TAG); fEntityScanner.skipChar(ch); break; } default :{ if (isValidNameStartChar(ch)) { setScannerState(SCANNER_STATE_START_ELEMENT_TAG); } else { reportFatalError("MarkupNotRecognizedInContent", null); } } } }//startOfMarkup private void startOfContent() throws IOException { if (fEntityScanner.skipChar('<')) { setScannerState(SCANNER_STATE_START_OF_MARKUP); } else if (fEntityScanner.skipChar('&')) { setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE } else { //element content is there.. setScannerState(SCANNER_STATE_CHARACTER_DATA); } }//startOfContent /** * * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. * At any point of time when in doubt over the current state of the parser, the state should be * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of * the parser to one of its sub state. * sub states are defined in the parser on the basis of different XML component like * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. * These sub states help the parser to have fine control over the parsing. These are the * different milepost, parser stops at each sub state (milepost). Based on this state it is * decided if paresr needs to stop at next milepost ?? * */ public void decideSubState() throws IOException { while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ switch (fScannerState) { case SCANNER_STATE_CONTENT: { startOfContent() ; break; } case SCANNER_STATE_START_OF_MARKUP: { startOfMarkup() ; break; } } } }//decideSubState /** * Drives the parser to the next state/event on the input. Parser is guaranteed * to stop at the next state/event. Internally XML document * is divided into several states. Each state represents a sections of XML * document. When this functions returns normally, it has read the section * of XML document and returns the state corresponding to section of * document which has been read. For optimizations, a particular driver * can read ahead of the section of document (state returned) just read and * can maintain a different internal state. * * State returned corresponds to Stax states. * * @return state representing the section of document just read. * * @throws IOException Thrown on i/o error. * @throws XNIException Thrown on parse error. */ public int next() throws IOException, XNIException { while (true) { try { if(DEBUG_NEXT){ System.out.println("NOW IN FragmentContentDriver"); System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); } //decide the actual sub state of the scanner.For more information refer to the javadoc of //decideSubState. switch (fScannerState) { case SCANNER_STATE_CONTENT: { final int ch = fEntityScanner.peekChar(); if (ch == '<') { fEntityScanner.scanChar(); setScannerState(SCANNER_STATE_START_OF_MARKUP); } else if (ch == '&') { fEntityScanner.scanChar(); setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE break; } else { //element content is there.. setScannerState(SCANNER_STATE_CHARACTER_DATA); break; } } case SCANNER_STATE_START_OF_MARKUP: { startOfMarkup(); break; }//case: SCANNER_STATE_START_OF_MARKUP }//end of switch //decideSubState() ; //do some special handling if isCoalesce is set to true. if(fIsCoalesce){ fUsebuffer = true ; //if the last section was character data if(fLastSectionWasCharacterData){ //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA //return the last scanned charactrer data. if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ fLastSectionWasCharacterData = false; return XMLEvent.CHARACTERS; } }//if last section was CDATA or ENTITY REFERENCE //xxx: there might be another entity reference or CDATA after this //blah blah &<blah blah else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ //and current state is not SCANNER_STATE_CHARACTER_DATA //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE //this means there is nothing more to be coalesced. //return the CHARACTERS event. if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ fLastSectionWasCData = false; fLastSectionWasEntityReference = false; return XMLEvent.CHARACTERS; } } } if(DEBUG_NEXT){ System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); } switch(fScannerState){ case XMLEvent.START_DOCUMENT : return XMLEvent.START_DOCUMENT; case SCANNER_STATE_START_ELEMENT_TAG :{ //xxx this function returns true when element is empty.. can be linked to end element event. //returns true if the element is empty fEmptyElement = scanStartElement() ; //if the element is empty the next event is "end element" if(fEmptyElement){ setScannerState(SCANNER_STATE_END_ELEMENT_TAG); }else{ //set the next possible state setScannerState(SCANNER_STATE_CONTENT); } return XMLEvent.START_ELEMENT ; } case SCANNER_STATE_CHARACTER_DATA: { if(DEBUG_COALESCE){ System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); System.out.println("fIsCoalesce = " + fIsCoalesce); } //if last section was either entity reference or cdata or character data we should be using buffer fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ fLastSectionWasEntityReference = false; fLastSectionWasCData = false; fLastSectionWasCharacterData = true ; fUsebuffer = true; }else{ //clear the buffer fContentBuffer.clear(); } //set the fTempString length to 0 before passing it on to scanContent //scanContent sets the correct co-ordinates as per the content read fTempString.length = 0; int c = fEntityScanner.scanContent(fTempString); if(DEBUG){ System.out.println("fTempString = " + fTempString); } if(fEntityScanner.skipChar('<')){ //check if we have reached end of element if(fEntityScanner.skipChar('/')){ //increase the mark up depth fMarkupDepth++; fLastSectionWasCharacterData = false; setScannerState(SCANNER_STATE_END_ELEMENT_TAG); //check if its start of new element }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ fMarkupDepth++; fLastSectionWasCharacterData = false; setScannerState(SCANNER_STATE_START_ELEMENT_TAG); }else{ setScannerState(SCANNER_STATE_START_OF_MARKUP); //there can be cdata ahead if coalesce is true we should call again if(fIsCoalesce){ fUsebuffer = true; fLastSectionWasCharacterData = true; fContentBuffer.append(fTempString); fTempString.length = 0; continue; } } //in case last section was either entity reference or cdata or character data -- we should be using buffer if(fUsebuffer){ fContentBuffer.append(fTempString); fTempString.length = 0; } if(DEBUG){ System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); } if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ if(DEBUG)System.out.println("Return SPACE EVENT"); return XMLEvent.SPACE; }else return XMLEvent.CHARACTERS; } else{ fUsebuffer = true ; if(DEBUG){ System.out.println("fContentBuffer = " + fContentBuffer); System.out.println("fTempString = " + fTempString); } fContentBuffer.append(fTempString); fTempString.length = 0; } if (c == '\r') { if(DEBUG){ System.out.println("'\r' character found"); } // happens when there is the character reference //xxx: We know the next chracter.. we should just skip it and add ']' directlry fEntityScanner.scanChar(); fUsebuffer = true; fContentBuffer.append((char)c); c = -1 ; } else if (c == ']') { //fStringBuffer.clear(); //xxx: We know the next chracter.. we should just skip it and add ']' directlry fUsebuffer = true; fContentBuffer.append((char)fEntityScanner.scanChar()); // remember where we are in case we get an endEntity before we // could flush the buffer out - this happens when we're parsing an // entity which ends with a ] fInScanContent = true; // We work on a single character basis to handle cases such as: // ']]]>' which we might otherwise miss. // if (fEntityScanner.skipChar(']')) { fContentBuffer.append(']'); while (fEntityScanner.skipChar(']')) { fContentBuffer.append(']'); } if (fEntityScanner.skipChar('>')) { reportFatalError("CDEndInContent", null); } } c = -1 ; fInScanContent = false; } do{ //xxx: we should be using only one buffer.. // we need not to grow the buffer only when isCoalesce() is not true; if (c == '<') { fEntityScanner.scanChar(); setScannerState(SCANNER_STATE_START_OF_MARKUP); break; }//xxx what should be the behavior if entity reference is present in the content ? else if (c == '&') { fEntityScanner.scanChar(); setScannerState(SCANNER_STATE_REFERENCE); break; }///xxx since this part is also characters, it should be merged... else if (c != -1 && isInvalidLiteral(c)) { if (XMLChar.isHighSurrogate(c)) { // special case: surrogates scanSurrogates(fContentBuffer) ; setScannerState(SCANNER_STATE_CONTENT); } else { reportFatalError("InvalidCharInContent", new Object[] { Integer.toString(c, 16)}); fEntityScanner.scanChar(); } break; } //xxx: scanContent also gives character callback. c = scanContent(fContentBuffer) ; //we should not be iterating again if fIsCoalesce is not set to true if(!fIsCoalesce){ setScannerState(SCANNER_STATE_CONTENT); break; } }while(true); //if (fDocumentHandler != null) { // fDocumentHandler.characters(fContentBuffer, null); //} if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); //if fIsCoalesce is true there might be more data so call fDriver.next() if(fIsCoalesce){ fLastSectionWasCharacterData = true ; continue; }else{ if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ if(DEBUG)System.out.println("Return SPACE EVENT"); return XMLEvent.SPACE; } else return XMLEvent.CHARACTERS ; } } case SCANNER_STATE_END_ELEMENT_TAG :{ if(fEmptyElement){ //set it back to false. fEmptyElement = false; setScannerState(SCANNER_STATE_CONTENT); //check the case when there is comment after single element document // and some comment after this return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; } else if(scanEndElement() == 0) { //It is last element of the document if (elementDepthIsZeroHook()) { //if element depth is zero , it indicates the end of the document //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function //xxx understand this point once again.. return XMLEvent.END_ELEMENT ; } } setScannerState(SCANNER_STATE_CONTENT); return XMLEvent.END_ELEMENT ; } case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: scanComment(); setScannerState(SCANNER_STATE_CONTENT); return XMLEvent.COMMENT; //break; } case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { //clear the buffer first fContentBuffer.clear() ; //xxx: which buffer should be passed. Ideally we shouldn't have //more than two buffers -- //xxx: where should we add the switch for buffering. scanPI(fContentBuffer); setScannerState(SCANNER_STATE_CONTENT); return XMLEvent.PROCESSING_INSTRUCTION; //break; } case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { //xxx: What if CDATA is the first event //<>]]>append //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ fLastSectionWasCData = true ; fLastSectionWasEntityReference = false; fLastSectionWasCharacterData = false; }//if we dont need to coalesce clear the buffer else{ fContentBuffer.clear(); } fUsebuffer = true; //CDATA section is completely read in all the case. scanCDATASection(fContentBuffer , true); setScannerState(SCANNER_STATE_CONTENT); //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true //and just call fDispatche.next(). Since we have set the scanner state to //SCANNER_STATE_CONTENT (super state) parser will automatically recover and //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event //2. Check if application has set for reporting CDATA event //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent //return the cdata event as characters. if(fIsCoalesce){ fLastSectionWasCData = true ; //there might be more data to coalesce. continue; }else if(fReportCdataEvent){ return XMLEvent.CDATA; } else{ return XMLEvent.CHARACTERS; } } case SCANNER_STATE_REFERENCE :{ fMarkupDepth++; foundBuiltInRefs = false; //we should not clear the buffer only when the last state was either CDATA or //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ //fLastSectionWasEntityReference or fLastSectionWasCData are only //used when fIsCoalesce is set to true. fLastSectionWasEntityReference = true ; fLastSectionWasCData = false; fLastSectionWasCharacterData = false; }//if we dont need to coalesce clear the buffer else{ fContentBuffer.clear(); } fUsebuffer = true ; //take care of character reference if (fEntityScanner.skipChar('#')) { scanCharReferenceValue(fContentBuffer, null); fMarkupDepth--; if(!fIsCoalesce){ setScannerState(SCANNER_STATE_CONTENT); return XMLEvent.CHARACTERS; } } else { // this function also starts new entity scanEntityReference(fContentBuffer); //if there was built-in entity reference & coalesce is not true //return CHARACTERS if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ setScannerState(SCANNER_STATE_CONTENT); return XMLEvent.CHARACTERS; } //if there was a text declaration, call next() it will be taken care. if(fScannerState == SCANNER_STATE_TEXT_DECL){ fLastSectionWasEntityReference = true ; continue; } if(fScannerState == SCANNER_STATE_REFERENCE){ setScannerState(SCANNER_STATE_CONTENT); if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { // Skip the entity reference, we don't care continue; } return XMLEvent.ENTITY_REFERENCE; } } //Wether it was character reference, entity reference or built-in entity //set the next possible state to SCANNER_STATE_CONTENT setScannerState(SCANNER_STATE_CONTENT); fLastSectionWasEntityReference = true ; continue; } case SCANNER_STATE_TEXT_DECL: { // scan text decl if (fEntityScanner.skipString("





© 2015 - 2024 Weber Informatics LLC | Privacy Policy