All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jdk8u.jaxp.org.apache.xerces.external.impl.XMLEntityScanner Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
 */
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package jdk8u.jaxp.org.apache.xerces.external.impl;


import jdk8u.jaxp.org.apache.xerces.external.impl.XMLScanner.NameType;
import jdk8u.jaxp.org.apache.xerces.external.impl.io.ASCIIReader;
import jdk8u.jaxp.org.apache.xerces.external.impl.io.UCSReader;
import jdk8u.jaxp.org.apache.xerces.external.impl.io.UTF8Reader;
import jdk8u.jaxp.org.apache.xerces.external.impl.msg.XMLMessageFormatter;
import jdk8u.jaxp.org.apache.xerces.external.util.EncodingMap;
import jdk8u.jaxp.org.apache.xerces.external.util.SymbolTable;
import jdk8u.jaxp.org.apache.xerces.external.util.XMLChar;
import jdk8u.jaxp.org.apache.xerces.external.util.XMLStringBuffer;
import jdk8u.jaxp.org.apache.xerces.external.utils.XMLLimitAnalyzer;
import jdk8u.jaxp.org.apache.xerces.external.utils.XMLSecurityManager;
import jdk8u.jaxp.org.apache.xerces.external.utils.XMLSecurityManager.Limit;
import jdk8u.jaxp.org.apache.xerces.external.xni.*;
import jdk8u.jaxp.org.apache.xerces.external.xni.parser.XMLComponentManager;
import jdk8u.jaxp.org.apache.xerces.external.xni.parser.XMLConfigurationException;
import jdk8u.jaxp.xml.external.stream.Entity;
import jdk8u.jaxp.xml.external.stream.Entity.ScannedEntity;
import jdk8u.jaxp.xml.external.stream.XMLBufferListener;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Locale;

/**
 * Implements the entity scanner methods.
 *
 * @author Neeraj Bajaj, Sun Microsystems
 * @author Andy Clark, IBM
 * @author Arnaud  Le Hors, IBM
 * @author K.Venugopal Sun Microsystems
 *
 */
public class XMLEntityScanner implements XMLLocator  {

    protected Entity.ScannedEntity fCurrentEntity = null;
    protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;

    protected XMLEntityManager fEntityManager;

    /** Security manager. */
    protected XMLSecurityManager fSecurityManager = null;

    /** Limit analyzer. */
    protected XMLLimitAnalyzer fLimitAnalyzer = null;

    /** Debug switching readers for encodings. */
    private static final boolean DEBUG_ENCODINGS = false;

    /** Listeners which should know when load is being called */
    private ArrayList listeners = new ArrayList<>();

    private static final boolean [] VALID_NAMES = new boolean[127];

    /**
     * Debug printing of buffer. This debugging flag works best when you
     * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
     * 64 characters.
     */
    private static final boolean DEBUG_BUFFER = false;
    private static final boolean DEBUG_SKIP_STRING = false;
    /**
     * To signal the end of the document entity, this exception will be thrown.
     */
    private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
        private static final long serialVersionUID = 980337771224675268L;
        public Throwable fillInStackTrace() {
            return this;
        }
    };

    protected SymbolTable fSymbolTable = null;
    protected XMLErrorReporter fErrorReporter = null;
    int [] whiteSpaceLookup = new int[100];
    int whiteSpaceLen = 0;
    boolean whiteSpaceInfoNeeded = true;

    /**
     * Allow Java encoding names. This feature identifier is:
     * http://apache.org/xml/features/allow-java-encodings
     */
    protected boolean fAllowJavaEncodings;

    //Will be used only during internal subsets.
    //for appending data.

    /** Property identifier: symbol table. */
    protected static final String SYMBOL_TABLE =
            Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;

    /** Property identifier: error reporter. */
    protected static final String ERROR_REPORTER =
            Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;

    /** Feature identifier: allow Java encodings. */
    protected static final String ALLOW_JAVA_ENCODINGS =
            Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;

    protected PropertyManager fPropertyManager = null ;

    boolean isExternal = false;
    static {

        for(int i=0x0041;i<=0x005A ; i++){
            VALID_NAMES[i]=true;
        }
        for(int i=0x0061;i<=0x007A; i++){
            VALID_NAMES[i]=true;
        }
        for(int i=0x0030;i<=0x0039; i++){
            VALID_NAMES[i]=true;
        }
        VALID_NAMES[45]=true;
        VALID_NAMES[46]=true;
        VALID_NAMES[58]=true;
        VALID_NAMES[95]=true;
    }

    // Remember, that the XML version has explicitly been set,
    // so that XMLStreamReader.getVersion() can find that out.
    protected boolean xmlVersionSetExplicitly = false;

    // indicates that the operation is for detecting XML version
    boolean detectingVersion = false;

    //
    // Constructors
    //

    /** Default constructor. */
    public XMLEntityScanner() {
    } // ()


    /**  private constructor, this class can only be instantiated within this class. Instance of this class should
     *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
     *    @see getEntityScanner()
     *    @see getEntityScanner(ScannedEntity)
     */
    public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
        fEntityManager = entityManager ;
        reset(propertyManager);
    } // ()


    // set buffer size:
    public final void setBufferSize(int size) {
        // REVISIT: Buffer size passed to entity scanner
        // was not being kept in synch with the actual size
        // of the buffers in each scanned entity. If any
        // of the buffers were actually resized, it was possible
        // that the parser would throw an ArrayIndexOutOfBoundsException
        // for documents which contained names which are longer than
        // the current buffer size. Conceivably the buffer size passed
        // to entity scanner could be used to determine a minimum size
        // for resizing, if doubling its size is smaller than this
        // minimum. -- mrglavas
        fBufferSize = size;
    }

    /**
     * Resets the components.
     */
    public void reset(PropertyManager propertyManager){
        fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
        fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
        resetCommon();
    }

    /**
     * Resets the component. The component can query the component manager
     * about any features and properties that affect the operation of the
     * component.
     *
     * @param componentManager The component manager.
     *
     * @throws SAXException Thrown by component on initialization error.
     *                      For example, if a feature or property is
     *                      required for the operation of the component, the
     *                      component manager may throw a
     *                      SAXNotRecognizedException or a
     *                      SAXNotSupportedException.
     */
    public void reset(XMLComponentManager componentManager)
    throws XMLConfigurationException {
        // xerces features
        fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);

        //xerces properties
        fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
        fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
        resetCommon();
    } // reset(XMLComponentManager)


    public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
            XMLErrorReporter reporter) {
        fCurrentEntity = null;
        fSymbolTable = symbolTable;
        fEntityManager = entityManager;
        fErrorReporter = reporter;
        fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
        fSecurityManager = fEntityManager.fSecurityManager;
    }

    private void resetCommon() {
        fCurrentEntity = null;
        whiteSpaceLen = 0;
        whiteSpaceInfoNeeded = true;
        listeners.clear();
        fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
        fSecurityManager = fEntityManager.fSecurityManager;
    }

    /**
     * Returns the XML version of the current entity. This will normally be the
     * value from the XML or text declaration or defaulted by the parser. Note that
     * that this value may be different than the version of the processing rules
     * applied to the current entity. For instance, an XML 1.1 document may refer to
     * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
     * document. Also note that, for a given entity, this value can only be considered
     * final once the XML or text declaration has been read or once it has been
     * determined that there is no such declaration.
     */
    public final String getXMLVersion() {
        if (fCurrentEntity != null) {
            return fCurrentEntity.xmlVersion;
        }
        return null;
    } // getXMLVersion():String

    /**
     * Sets the XML version. This method is used by the
     * scanners to report the value of the version pseudo-attribute
     * in an XML or text declaration.
     *
     * @param xmlVersion the XML version of the current entity
     */
    public final void setXMLVersion(String xmlVersion) {
        xmlVersionSetExplicitly = true;
        fCurrentEntity.xmlVersion = xmlVersion;
    } // setXMLVersion(String)


    /** set the instance of current scanned entity.
     *   @param ScannedEntity
     */

    public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
        fCurrentEntity = scannedEntity ;
        if(fCurrentEntity != null){
            isExternal = fCurrentEntity.isExternal();
            if(DEBUG_BUFFER)
                System.out.println("Current Entity is "+scannedEntity.name);
        }
    }

    public  Entity.ScannedEntity getCurrentEntity(){
        return fCurrentEntity ;
    }
    //
    // XMLEntityReader methods
    //

    /**
     * Returns the base system identifier of the currently scanned
     * entity, or null if none is available.
     */
    public final String getBaseSystemId() {
        return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
    } // getBaseSystemId():String

    /**
     * @see jdk8u.jaxp.org.apache.xerces.external.xni.XMLResourceIdentifier#setBaseSystemId(String)
     */
    public void setBaseSystemId(String systemId) {
        //no-op
    }

    ///////////// Locator methods start.
    public final int getLineNumber(){
        //if the entity is closed, we should return -1
        //xxx at first place why such call should be there...
        return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
    }

    /**
     * @see jdk8u.jaxp.org.apache.xerces.external.xni.XMLLocator#setLineNumber(int)
     */
    public void setLineNumber(int line) {
        //no-op
    }


    public final int getColumnNumber(){
        //if the entity is closed, we should return -1
        //xxx at first place why such call should be there...
        return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
    }

    /**
     * @see jdk8u.jaxp.org.apache.xerces.external.xni.XMLLocator#setColumnNumber(int)
     */
    public void setColumnNumber(int col) {
        // no-op
    }


    public final int getCharacterOffset(){
        return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
    }

    /** Returns the expanded system identifier.  */
    public final String getExpandedSystemId() {
        return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
    }

    /**
     * @see jdk8u.jaxp.org.apache.xerces.external.xni.XMLResourceIdentifier#setExpandedSystemId(String)
     */
    public void setExpandedSystemId(String systemId) {
        //no-op
    }

    /** Returns the literal system identifier.  */
    public final String getLiteralSystemId() {
        return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
    }

    /**
     * @see jdk8u.jaxp.org.apache.xerces.external.xni.XMLResourceIdentifier#setLiteralSystemId(String)
     */
    public void setLiteralSystemId(String systemId) {
        //no-op
    }

    /** Returns the public identifier.  */
    public final String getPublicId() {
        return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
    }

    /**
     * @see jdk8u.jaxp.org.apache.xerces.external.xni.XMLResourceIdentifier#setPublicId(String)
     */
    public void setPublicId(String publicId) {
        //no-op
    }

    ///////////////// Locator methods finished.

    /** the version of the current entity being scanned */
    public void setVersion(String version){
        fCurrentEntity.version = version;
    }

    public String getVersion(){
        if (fCurrentEntity != null)
            return fCurrentEntity.version ;
        return null;
    }

    /**
     * Returns the encoding of the current entity.
     * Note that, for a given entity, this value can only be
     * considered final once the encoding declaration has been read (or once it
     * has been determined that there is no such declaration) since, no encoding
     * having been specified on the XMLInputSource, the parser
     * will make an initial "guess" which could be in error.
     */
    public final String getEncoding() {
        if (fCurrentEntity != null) {
            return fCurrentEntity.encoding;
        }
        return null;
    } // getEncoding():String

    /**
     * Sets the encoding of the scanner. This method is used by the
     * scanners if the XMLDecl or TextDecl line contains an encoding
     * pseudo-attribute.
     * 

* Note: The underlying character reader on the * current entity will be changed to accomodate the new encoding. * However, the new encoding is ignored if the current reader was * not constructed from an input stream (e.g. an external entity * that is resolved directly to the appropriate java.io.Reader * object). * * @param encoding The IANA encoding name of the new encoding. * * @throws IOException Thrown if the new encoding is not supported. * * @see jdk8u.jaxp.org.apache.xerces.external.util.EncodingMap */ public final void setEncoding(String encoding) throws IOException { if (DEBUG_ENCODINGS) { System.out.println("$$$ setEncoding: "+encoding); } if (fCurrentEntity.stream != null) { // if the encoding is the same, don't change the reader and // re-use the original reader used by the OneCharReader // NOTE: Besides saving an object, this overcomes deficiencies // in the UTF-16 reader supplied with the standard Java // distribution (up to and including 1.3). The UTF-16 // decoder buffers 8K blocks even when only asked to read // a single char! -Ac if (fCurrentEntity.encoding == null || !fCurrentEntity.encoding.equals(encoding)) { // UTF-16 is a bit of a special case. If the encoding is UTF-16, // and we know the endian-ness, we shouldn't change readers. // If it's ISO-10646-UCS-(2|4), then we'll have to deduce // the endian-ness from the encoding we presently have. if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { String ENCODING = encoding.toUpperCase(Locale.ENGLISH); if(ENCODING.equals("UTF-16")) return; if(ENCODING.equals("ISO-10646-UCS-4")) { if(fCurrentEntity.encoding.equals("UTF-16BE")) { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); } else { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); } return; } if(ENCODING.equals("ISO-10646-UCS-2")) { if(fCurrentEntity.encoding.equals("UTF-16BE")) { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); } else { fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); } return; } } // wrap a new reader around the input stream, changing // the encoding if (DEBUG_ENCODINGS) { System.out.println("$$$ creating new reader from stream: "+ fCurrentEntity.stream); } //fCurrentEntity.stream.reset(); fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); fCurrentEntity.encoding = encoding; } else { if (DEBUG_ENCODINGS) System.out.println("$$$ reusing old reader on stream"); } } } // setEncoding(String) /** Returns true if the current entity being scanned is external. */ public final boolean isExternal() { return fCurrentEntity.isExternal(); } // isExternal():boolean public int getChar(int relative) throws IOException{ if(arrangeCapacity(relative + 1, false)){ return fCurrentEntity.ch[fCurrentEntity.position + relative]; }else{ return -1; } }//getChar() /** * Returns the next character on the input. *

* Note: The character is not consumed. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int peekChar() throws IOException { if (DEBUG_BUFFER) { System.out.print("(peekChar: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } // peek at character int c = fCurrentEntity.ch[fCurrentEntity.position]; // return peeked character if (DEBUG_BUFFER) { System.out.print(")peekChar: "); print(); if (isExternal) { System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); } else { System.out.println(" -> '"+(char)c+"'"); } } if (isExternal) { return c != '\r' ? c : '\n'; } else { return c; } } // peekChar():int /** * Returns the next character on the input. *

* Note: The character is consumed. * * @param nt The type of the name (element or attribute) * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ protected int scanChar(NameType nt) throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanChar: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } // scan character int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == '\n' || (c == '\r' && isExternal)) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(1); fCurrentEntity.ch[0] = (char)c; load(1, false, false); offset = 0; } if (c == '\r' && isExternal) { if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { fCurrentEntity.position--; } c = '\n'; } } // return character that was scanned if (DEBUG_BUFFER) { System.out.print(")scanChar: "); print(); System.out.println(" -> '"+(char)c+"'"); } fCurrentEntity.columnNumber++; if (!detectingVersion) { checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset); } return c; } // scanChar():int /** * Returns a string matching the NMTOKEN production appearing immediately * on the input as a symbol, or null if NMTOKEN Name string is present. *

* Note: The NMTOKEN characters are consumed. *

* Note: The string returned must be a symbol. The * SymbolTable can be used for this purpose. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see jdk8u.jaxp.org.apache.xerces.external.util.SymbolTable * @see jdk8u.jaxp.org.apache.xerces.external.util.XMLChar#isName */ protected String scanNmtoken() throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanNmtoken: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } // scan nmtoken int offset = fCurrentEntity.position; boolean vc = false; char c; while (true){ //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { c = fCurrentEntity.ch[fCurrentEntity.position]; if(c < 127){ vc = VALID_NAMES[c]; }else{ vc = XMLChar.isName(c); } if(!vc)break; if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; invokeListeners(length); if (length == fCurrentEntity.fBufferSize) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; fCurrentEntity.fBufferSize *= 2; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false, false)) { break; } } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; // return nmtoken String symbol = null; if (length > 0) { symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); } if (DEBUG_BUFFER) { System.out.print(")scanNmtoken: "); print(); System.out.println(" -> "+String.valueOf(symbol)); } return symbol; } // scanNmtoken():String /** * Returns a string matching the Name production appearing immediately * on the input as a symbol, or null if no Name string is present. *

* Note: The Name characters are consumed. *

* Note: The string returned must be a symbol. The * SymbolTable can be used for this purpose. * * @param nt The type of the name (element or attribute) * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see jdk8u.jaxp.org.apache.xerces.external.util.SymbolTable * @see jdk8u.jaxp.org.apache.xerces.external.util.XMLChar#isName * @see jdk8u.jaxp.org.apache.xerces.external.util.XMLChar#isNameStart */ protected String scanName(NameType nt) throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanName: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } // scan name int offset = fCurrentEntity.position; int length; if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { if (++fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(1); fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; offset = 0; if (load(1, false, false)) { fCurrentEntity.columnNumber++; String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); if (DEBUG_BUFFER) { System.out.print(")scanName: "); print(); System.out.println(" -> "+String.valueOf(symbol)); } return symbol; } } boolean vc =false; while (true ){ //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; char c = fCurrentEntity.ch[fCurrentEntity.position]; if(c < 127){ vc = VALID_NAMES[c]; }else{ vc = XMLChar.isName(c); } if(!vc)break; if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) { offset = 0; if (load(length, false, false)) { break; } } } } length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; // return name String symbol; if (length > 0) { checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); checkEntityLimit(nt, fCurrentEntity, offset, length); symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); } else symbol = null; if (DEBUG_BUFFER) { System.out.print(")scanName: "); print(); System.out.println(" -> "+String.valueOf(symbol)); } return symbol; } // scanName():String /** * Scans a qualified name from the input, setting the fields of the * QName structure appropriately. *

* Note: The qualified name characters are consumed. *

* Note: The strings used to set the values of the * QName structure must be symbols. The SymbolTable can be used for * this purpose. * * @param qname The qualified name structure to fill. * @param nt The type of the name (element or attribute) * * @return Returns true if a qualified name appeared immediately on * the input and was scanned, false otherwise. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see jdk8u.jaxp.org.apache.xerces.external.util.SymbolTable * @see jdk8u.jaxp.org.apache.xerces.external.util.XMLChar#isName * @see jdk8u.jaxp.org.apache.xerces.external.util.XMLChar#isNameStart */ protected boolean scanQName(QName qname, NameType nt) throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanQName, "+qname+": "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } // scan qualified name int offset = fCurrentEntity.position; //making a check if if the specified character is a valid name start character //as defined by production [5] in the XML 1.0 specification. // Name ::= (Letter | '_' | ':') (NameChar)* if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { if (++fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(1); fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; offset = 0; if (load(1, false, false)) { fCurrentEntity.columnNumber++; //adding into symbol table. //XXX We are trying to add single character in SymbolTable?????? String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); qname.setValues(null, name, name, null); if (DEBUG_BUFFER) { System.out.print(")scanQName, "+qname+": "); print(); System.out.println(" -> true"); } checkEntityLimit(nt, fCurrentEntity, 0, 1); return true; } } int index = -1; boolean vc = false; int length; while ( true){ //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; char c = fCurrentEntity.ch[fCurrentEntity.position]; if(c < 127){ vc = VALID_NAMES[c]; }else{ vc = XMLChar.isName(c); } if(!vc)break; if (c == ':') { if (index != -1) { break; } index = fCurrentEntity.position; //check prefix before further read checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset); } if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) { if (index != -1) { index = index - offset; } offset = 0; if (load(length, false, false)) { break; } } } length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; if (length > 0) { String prefix = null; String localpart = null; String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); if (index != -1) { int prefixLength = index - offset; //check the result: prefix checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength); prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, prefixLength); int len = length - prefixLength - 1; //check the result: localpart checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len); localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, index + 1, len); } else { localpart = rawname; //check the result: localpart checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); } qname.setValues(prefix, localpart, rawname, null); if (DEBUG_BUFFER) { System.out.print(")scanQName, "+qname+": "); print(); System.out.println(" -> true"); } checkEntityLimit(nt, fCurrentEntity, offset, length); return true; } } // no qualified name found if (DEBUG_BUFFER) { System.out.print(")scanQName, "+qname+": "); print(); System.out.println(" -> false"); } return false; } // scanQName(QName):boolean /** * Checks whether the end of the entity buffer has been reached. If yes, * checks against the limit and buffer size before loading more characters. * * @param entity the current entity * @param offset the offset from which the current read was started * @param nameOffset the offset from which the current name starts * @return the length of characters scanned before the end of the buffer, * zero if there is more to be read in the buffer */ protected int checkBeforeLoad(Entity.ScannedEntity entity, int offset, int nameOffset) throws IOException { int length = 0; if (++entity.position == entity.count) { length = entity.position - offset; int nameLength = length; if (nameOffset != -1) { nameOffset = nameOffset - offset; nameLength = length - nameOffset; } else { nameOffset = offset; } //check limit before loading more data checkLimit(Limit.MAX_NAME_LIMIT, entity, nameOffset, nameLength); invokeListeners(length); if (length == entity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[entity.fBufferSize * 2]; System.arraycopy(entity.ch, offset, tmp, 0, length); entity.ch = tmp; entity.fBufferSize *= 2; } else { System.arraycopy(entity.ch, offset, entity.ch, 0, length); } } return length; } /** * If the current entity is an Entity reference, check the accumulated size * against the limit. * * @param nt type of name (element, attribute or entity) * @param entity The current entity * @param offset The index of the first byte * @param length The length of the entity scanned */ protected void checkEntityLimit(NameType nt, ScannedEntity entity, int offset, int length) { if (entity == null || !entity.isGE) { return; } if (nt != NameType.REFERENCE) { checkLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT, entity, offset, length); } if (nt == NameType.ELEMENTSTART || nt == NameType.ATTRIBUTENAME) { checkNodeCount(entity); } } /** * If the current entity is an Entity reference, counts the total nodes in * the entity and checks the accumulated value against the limit. * * @param entity The current entity */ protected void checkNodeCount(ScannedEntity entity) { if (entity != null && entity.isGE) { checkLimit(Limit.ENTITY_REPLACEMENT_LIMIT, entity, 0, 1); } } /** * Checks whether the value of the specified Limit exceeds its limit * * @param limit The Limit to be checked * @param entity The current entity * @param offset The index of the first byte * @param length The length of the entity scanned */ protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) { fLimitAnalyzer.addValue(limit, entity.name, length); if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) { fSecurityManager.debugPrint(fLimitAnalyzer); Object[] e = (limit == Limit.ENTITY_REPLACEMENT_LIMIT) ? new Object[]{fLimitAnalyzer.getValue(limit), fSecurityManager.getLimit(limit), fSecurityManager.getStateLiteral(limit)} : new Object[]{entity.name, fLimitAnalyzer.getValue(limit), fSecurityManager.getLimit(limit), fSecurityManager.getStateLiteral(limit)}; fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(), e, XMLErrorReporter.SEVERITY_FATAL_ERROR); } if (fSecurityManager.isOverLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { fSecurityManager.debugPrint(fLimitAnalyzer); fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "TotalEntitySizeLimit", new Object[]{fLimitAnalyzer.getTotalValue(Limit.TOTAL_ENTITY_SIZE_LIMIT), fSecurityManager.getLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT), fSecurityManager.getStateLiteral(Limit.TOTAL_ENTITY_SIZE_LIMIT)}, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } /** * CHANGED: * Scans a range of parsed character data, This function appends the character data to * the supplied buffer. *

* Note: The characters are consumed. *

* Note: This method does not guarantee to return * the longest run of parsed character data. This method may return * before markup due to reaching the end of the input buffer or any * other reason. *

* * @param content The content structure to fill. * * @return Returns the next character on the input, if known. This * value may be -1 but this does note designate * end of file. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ protected int scanContent(XMLString content) throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanContent: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { invokeListeners(1); fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; load(1, false, false); fCurrentEntity.position = 0; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; boolean counted = false; if (c == '\n' || (c == '\r' && isExternal)) { if (DEBUG_BUFFER) { System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == '\r' && isExternal) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { checkEntityLimit(null, fCurrentEntity, offset, newlines); offset = 0; fCurrentEntity.position = newlines; if (load(newlines, false, true)) { counted = true; break; } } if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } } else if (c == '\n') { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { checkEntityLimit(null, fCurrentEntity, offset, newlines); offset = 0; fCurrentEntity.position = newlines; if (load(newlines, false, true)) { counted = true; break; } } } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); for (int i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { checkEntityLimit(null, fCurrentEntity, offset, length); //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee //on buffering the data.. content.setValues(fCurrentEntity.ch, offset, length); //content.append(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } return -1; } if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } } while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (!XMLChar.isContent(c)) { fCurrentEntity.position--; break; } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; if (!counted) { checkEntityLimit(null, fCurrentEntity, offset, length); } //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee //on buffering the data.. content.setValues(fCurrentEntity.ch, offset, length); //content.append(fCurrentEntity.ch, offset, length); // return next character if (fCurrentEntity.position != fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position]; // REVISIT: Does this need to be updated to fix the // #x0D ^#x0A newline normalization problem? -Ac if (c == '\r' && isExternal) { c = '\n'; } } else { c = -1; } if (DEBUG_BUFFER) { System.out.print(")scanContent: "); print(); System.out.println(" -> '"+(char)c+"'"); } return c; } // scanContent(XMLString):int /** * Scans a range of attribute value data, setting the fields of the * XMLString structure, appropriately. *

* Note: The characters are consumed. *

* Note: This method does not guarantee to return * the longest run of attribute value data. This method may return * before the quote character due to reaching the end of the input * buffer or any other reason. *

* Note: The fields contained in the XMLString * structure are not guaranteed to remain valid upon subsequent calls * to the entity scanner. Therefore, the caller is responsible for * immediately using the returned character data or making a copy of * the character data. * * @param quote The quote character that signifies the end of the * attribute value data. * @param content The content structure to fill. * @param isNSURI a flag indicating whether the content is a Namespace URI * * @return Returns the next character on the input, if known. This * value may be -1 but this does note designate * end of file. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ protected int scanLiteral(int quote, XMLString content, boolean isNSURI) throws IOException { if (DEBUG_BUFFER) { System.out.print("(scanLiteral, '"+(char)quote+"': "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { invokeListeners(1); fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; load(1, false, false); fCurrentEntity.position = 0; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; if(whiteSpaceInfoNeeded) whiteSpaceLen=0; if (c == '\n' || (c == '\r' && isExternal)) { if (DEBUG_BUFFER) { System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == '\r' && isExternal) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.position = newlines; if (load(newlines, false, true)) { break; } } if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } /***/ } else if (c == '\n') { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.position = newlines; if (load(newlines, false, true)) { break; } } /*** NEWLINE NORMALIZATION *** * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' * && external) { * fCurrentEntity.position++; * offset++; * } * /***/ } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); int i=0; for ( i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; storeWhiteSpace(i); } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { content.setValues(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } return -1; } if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } } // scan literal value for (; fCurrentEntity.position '"+(char)c+"'"); } return c; } // scanLiteral(int,XMLString):int /** * Save whitespace information. Increase the whitespace buffer by 100 * when needed. * * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). * * @param whiteSpacePos position of a whitespace in the scanner entity buffer */ private void storeWhiteSpace(int whiteSpacePos) { if (whiteSpaceLen >= whiteSpaceLookup.length) { int [] tmp = new int[whiteSpaceLookup.length + 100]; System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); whiteSpaceLookup = tmp; } whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; } //CHANGED: /** * Scans a range of character data up to the specified delimiter, * setting the fields of the XMLString structure, appropriately. *

* Note: The characters are consumed. *

* Note: This assumes that the delimiter contains at * least one character. *

* Note: This method does not guarantee to return * the longest run of character data. This method may return before * the delimiter due to reaching the end of the input buffer or any * other reason. *

* @param delimiter The string that signifies the end of the character * data to be scanned. * @param buffer The XMLStringBuffer to fill. * * @return Returns true if there is more data to scan, false otherwise. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ protected boolean scanData(String delimiter, XMLStringBuffer buffer) throws IOException { boolean done = false; int delimLen = delimiter.length(); char charAt0 = delimiter.charAt(0); do { if (DEBUG_BUFFER) { System.out.print("(scanData: "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, false); } boolean bNextEntity = false; while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) && (!bNextEntity)) { System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position); bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false); fCurrentEntity.position = 0; fCurrentEntity.startPosition = 0; } if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { // something must be wrong with the input: e.g., file ends in an unterminated comment int length = fCurrentEntity.count - fCurrentEntity.position; checkEntityLimit(NameType.COMMENT, fCurrentEntity, fCurrentEntity.position, length); buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); fCurrentEntity.columnNumber += fCurrentEntity.count; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = fCurrentEntity.count; fCurrentEntity.startPosition = fCurrentEntity.count; load(0, true, false); return false; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; if (c == '\n' || (c == '\r' && isExternal)) { if (DEBUG_BUFFER) { System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == '\r' && isExternal) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.position = newlines; if (load(newlines, false, true)) { break; } } if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } } else if (c == '\n') { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.position = newlines; fCurrentEntity.count = newlines; if (load(newlines, false, true)) { break; } } } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); for (int i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length); buffer.append(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } return true; } if (DEBUG_BUFFER) { System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); print(); System.out.println(); } } // iterate over buffer looking for delimiter OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == charAt0) { // looks like we just hit the delimiter int delimOffset = fCurrentEntity.position - 1; for (int i = 1; i < delimLen; i++) { if (fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.position -= i; break OUTER; } c = fCurrentEntity.ch[fCurrentEntity.position++]; if (delimiter.charAt(i) != c) { fCurrentEntity.position -= i; break; } } if (fCurrentEntity.position == delimOffset + delimLen) { done = true; break; } } else if (c == '\n' || (isExternal && c == '\r')) { fCurrentEntity.position--; break; } else if (XMLChar.isInvalid(c)) { fCurrentEntity.position--; int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length); buffer.append(fCurrentEntity.ch, offset, length); return true; } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length); if (done) { length -= delimLen; } buffer.append(fCurrentEntity.ch, offset, length); // return true if string was skipped if (DEBUG_BUFFER) { System.out.print(")scanData: "); print(); System.out.println(" -> " + done); } } while (!done); return !done; } // scanData(String, XMLStringBuffer) /** * Skips a character appearing immediately on the input. *

* Note: The character is consumed only if it matches * the specified character. * * @param c The character to skip. * @param nt The type of the name (element or attribute) * * @return Returns true if the character was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ protected boolean skipChar(int c, NameType nt) throws IOException { if (DEBUG_BUFFER) { System.out.print("(skipChar, '"+(char)c+"': "); print(); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } // skip character int offset = fCurrentEntity.position; int cc = fCurrentEntity.ch[fCurrentEntity.position]; if (cc == c) { fCurrentEntity.position++; if (c == '\n') { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; } else { fCurrentEntity.columnNumber++; } if (DEBUG_BUFFER) { System.out.print(")skipChar, '"+(char)c+"': "); print(); System.out.println(" -> true"); } checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset); return true; } else if (c == '\n' && cc == '\r' && isExternal) { // handle newlines if (fCurrentEntity.position == fCurrentEntity.count) { invokeListeners(1); fCurrentEntity.ch[0] = (char)cc; load(1, false, false); } fCurrentEntity.position++; if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { fCurrentEntity.position++; } fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (DEBUG_BUFFER) { System.out.print(")skipChar, '"+(char)c+"': "); print(); System.out.println(" -> true"); } checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset); return true; } // character was not skipped if (DEBUG_BUFFER) { System.out.print(")skipChar, '"+(char)c+"': "); print(); System.out.println(" -> false"); } return false; } // skipChar(int):boolean public boolean isSpace(char ch){ return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); } /** * Skips space characters appearing immediately on the input. *

* Note: The characters are consumed only if they are * space characters. * * @return Returns true if at least one space character was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see jdk8u.jaxp.org.apache.xerces.external.util.XMLChar#isSpace */ protected boolean skipSpaces() throws IOException { if (DEBUG_BUFFER) { System.out.print("(skipSpaces: "); print(); System.out.println(); } //boolean entityChanged = false; // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); } //we are doing this check only in skipSpace() because it is called by //fMiscDispatcher and we want the parser to exit gracefully when document //is well-formed. //it is possible that end of document is reached and //fCurrentEntity becomes null //nothing was read so entity changed 'false' should be returned. if(fCurrentEntity == null){ return false ; } // skip spaces int c = fCurrentEntity.ch[fCurrentEntity.position]; int offset = fCurrentEntity.position - 1; if (XMLChar.isSpace(c)) { do { boolean entityChanged = false; // handle newlines if (c == '\n' || (isExternal && c == '\r')) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count - 1) { invokeListeners(1); fCurrentEntity.ch[0] = (char)c; entityChanged = load(1, true, false); if (!entityChanged){ // the load change the position to be 1, // need to restore it when entity not changed fCurrentEntity.position = 0; }else if(fCurrentEntity == null){ return true ; } } if (c == '\r' && isExternal) { // REVISIT: Does this need to be updated to fix the // #x0D ^#x0A newline normalization problem? -Ac if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { fCurrentEntity.position--; } } } else { fCurrentEntity.columnNumber++; } //If this is a general entity, spaces within a start element should be counted checkEntityLimit(null, fCurrentEntity, offset, fCurrentEntity.position - offset); offset = fCurrentEntity.position; // load more characters, if needed if (!entityChanged){ fCurrentEntity.position++; } if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, true); //we are doing this check only in skipSpace() because it is called by //fMiscDispatcher and we want the parser to exit gracefully when document //is well-formed. //it is possible that end of document is reached and //fCurrentEntity becomes null //nothing was read so entity changed 'false' should be returned. if(fCurrentEntity == null){ return true ; } } } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); if (DEBUG_BUFFER) { System.out.print(")skipSpaces: "); print(); System.out.println(" -> true"); } return true; } // no spaces were found if (DEBUG_BUFFER) { System.out.print(")skipSpaces: "); print(); System.out.println(" -> false"); } return false; } // skipSpaces():boolean /** * @param length This function checks that following number of characters are available. * to the underlying buffer. * @return This function returns true if capacity asked is available. */ public boolean arrangeCapacity(int length) throws IOException{ return arrangeCapacity(length, false); } /** * @param length This function checks that following number of characters are available. * to the underlying buffer. * @param changeEntity a flag to indicate that the underlying function should change the entity * @return This function returns true if capacity asked is available. * */ public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ //check if the capacity is availble in the current buffer //count is no. of characters in the buffer [x][m][l] //position is '0' based //System.out.println("fCurrent Entity " + fCurrentEntity); if((fCurrentEntity.count - fCurrentEntity.position) >= length) { return true; } if(DEBUG_SKIP_STRING){ System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); System.out.println("length = " + length); } boolean entityChanged = false; //load more characters -- this function shouldn't change the entity while((fCurrentEntity.count - fCurrentEntity.position) < length){ if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ invokeListeners(0); System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; fCurrentEntity.position = 0; } if((fCurrentEntity.count - fCurrentEntity.position) < length){ int pos = fCurrentEntity.position; invokeListeners(pos); entityChanged = load(fCurrentEntity.count, changeEntity, false); fCurrentEntity.position = pos; if(entityChanged)break; } if(DEBUG_SKIP_STRING){ System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); System.out.println("length = " + length); } } //load changes the position.. set it back to the point where we started. //after loading check again. if((fCurrentEntity.count - fCurrentEntity.position) >= length) { return true; } else { return false; } } /** * Skips the specified string appearing immediately on the input. *

* Note: The characters are consumed only if all * the characters are skipped. * * @param s The string to skip. * * @return Returns true if the string was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ protected boolean skipString(String s) throws IOException { final int length = s.length(); //first make sure that required capacity is avaible if(arrangeCapacity(length, false)){ final int beforeSkip = fCurrentEntity.position ; int afterSkip = fCurrentEntity.position + length - 1 ; if(DEBUG_SKIP_STRING){ System.out.println("skipString,length = " + s + "," + length); System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); } //s.charAt() indexes are 0 to 'Length -1' based. int i = length - 1 ; //check from reverse while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ if(afterSkip-- == beforeSkip){ fCurrentEntity.position = fCurrentEntity.position + length ; fCurrentEntity.columnNumber += length; if (!detectingVersion) { checkEntityLimit(null, fCurrentEntity, beforeSkip, length); } return true; } } } return false; } // skipString(String):boolean protected boolean skipString(char [] s) throws IOException { final int length = s.length; //first make sure that required capacity is avaible if(arrangeCapacity(length, false)){ int beforeSkip = fCurrentEntity.position; if(DEBUG_SKIP_STRING){ System.out.println("skipString,length = " + new String(s) + "," + length); System.out.println("skipString,length = " + new String(s) + "," + length); } for(int i=0;i XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; } if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); // reset count and position boolean entityChanged = false; if (count != -1) { if (count != 0) { // record the last count fCurrentEntity.fLastCount = count; fCurrentEntity.count = count + offset; fCurrentEntity.position = offset; } } // end of this entity else { fCurrentEntity.count = offset; fCurrentEntity.position = offset; entityChanged = true; if (changeEntity) { //notify the entity manager about the end of entity fEntityManager.endEntity(); //return if the current entity becomes null if(fCurrentEntity == null){ throw END_OF_DOCUMENT_ENTITY; } // handle the trailing edges if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, false); } } } if (DEBUG_BUFFER) { System.out.print(")load, "+offset+": "); print(); System.out.println(); } return entityChanged; } // load(int, boolean):boolean /** * Creates a reader capable of reading the given input stream in * the specified encoding. * * @param inputStream The input stream. * @param encoding The encoding name that the input stream is * encoded using. If the user has specified that * Java encoding names are allowed, then the * encoding name may be a Java encoding name; * otherwise, it is an ianaEncoding name. * @param isBigEndian For encodings (like uCS-4), whose names cannot * specify a byte order, this tells whether the order is bigEndian. null menas * unknown or not relevant. * * @return Returns a reader. */ protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) throws IOException { // normalize encoding name if (encoding == null) { encoding = "UTF-8"; } // try to use an optimized reader String ENCODING = encoding.toUpperCase(Locale.ENGLISH); if (ENCODING.equals("UTF-8")) { if (DEBUG_ENCODINGS) { System.out.println("$$$ creating UTF8Reader"); } return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); } if (ENCODING.equals("US-ASCII")) { if (DEBUG_ENCODINGS) { System.out.println("$$$ creating ASCIIReader"); } return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); } if(ENCODING.equals("ISO-10646-UCS-4")) { if(isBigEndian != null) { boolean isBE = isBigEndian.booleanValue(); if(isBE) { return new UCSReader(inputStream, UCSReader.UCS4BE); } else { return new UCSReader(inputStream, UCSReader.UCS4LE); } } else { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EncodingByteOrderUnsupported", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } if(ENCODING.equals("ISO-10646-UCS-2")) { if(isBigEndian != null) { // sould never happen with this encoding... boolean isBE = isBigEndian.booleanValue(); if(isBE) { return new UCSReader(inputStream, UCSReader.UCS2BE); } else { return new UCSReader(inputStream, UCSReader.UCS2LE); } } else { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EncodingByteOrderUnsupported", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); } } // check for valid name boolean validIANA = XMLChar.isValidIANAEncoding(encoding); boolean validJava = XMLChar.isValidJavaEncoding(encoding); if (!validIANA || (fAllowJavaEncodings && !validJava)) { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 // because every byte is a valid ISO Latin 1 character. // It may not translate correctly but if we failed on // the encoding anyway, then we're expecting the content // of the document to be bad. This will just prevent an // invalid UTF-8 sequence to be detected. This is only // important when continue-after-fatal-error is turned // on. -Ac encoding = "ISO-8859-1"; } // try to use a Java reader String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); if (javaEncoding == null) { if(fAllowJavaEncodings) { javaEncoding = encoding; } else { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); // see comment above. javaEncoding = "ISO8859_1"; } } else if (javaEncoding.equals("ASCII")) { if (DEBUG_ENCODINGS) { System.out.println("$$$ creating ASCIIReader"); } return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); } if (DEBUG_ENCODINGS) { System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); if (javaEncoding == encoding) { System.out.print(" (IANA encoding)"); } System.out.println(); } return new InputStreamReader(inputStream, javaEncoding); } // createReader(InputStream,String, Boolean): Reader /** * Returns the IANA encoding name that is auto-detected from * the bytes specified, with the endian-ness of that encoding where appropriate. * * @param b4 The first four bytes of the input. * @param count The number of bytes actually read. * @return a 2-element array: the first element, an IANA-encoding string, * the second element a Boolean which is true iff the document is big endian, false * if it's little-endian, and null if the distinction isn't relevant. */ protected Object[] getEncodingName(byte[] b4, int count) { if (count < 2) { return new Object[]{"UTF-8", null}; } // UTF-16, with BOM int b0 = b4[0] & 0xFF; int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian return new Object [] {"UTF-16BE", new Boolean(true)}; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian return new Object [] {"UTF-16LE", new Boolean(false)}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { return new Object [] {"UTF-8", null}; } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { return new Object [] {"UTF-8", null}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { return new Object [] {"UTF-8", null}; } // other encodings int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? return new Object [] {"ISO-10646-UCS-4", null}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) // REVISIT: What should this be? return new Object [] {"ISO-10646-UCS-4", null}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? return new Object [] {"UTF-16BE", new Boolean(true)}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... return new Object [] {"UTF-16LE", new Boolean(false)}; } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here return new Object [] {"CP037", null}; } // default encoding return new Object [] {"UTF-8", null}; } // getEncodingName(byte[],int):Object[] /** * xxx not removing endEntity() so that i remember that we need to implement it. * Ends an entity. * * @throws XNIException Thrown by entity handler to signal an error. */ // /** Prints the contents of the buffer. */ final void print() { if (DEBUG_BUFFER) { if (fCurrentEntity != null) { System.out.print('['); System.out.print(fCurrentEntity.count); System.out.print(' '); System.out.print(fCurrentEntity.position); if (fCurrentEntity.count > 0) { System.out.print(" \""); for (int i = 0; i < fCurrentEntity.count; i++) { if (i == fCurrentEntity.position) { System.out.print('^'); } char c = fCurrentEntity.ch[i]; switch (c) { case '\n': { System.out.print("\\n"); break; } case '\r': { System.out.print("\\r"); break; } case '\t': { System.out.print("\\t"); break; } case '\\': { System.out.print("\\\\"); break; } default: { System.out.print(c); } } } if (fCurrentEntity.position == fCurrentEntity.count) { System.out.print('^'); } System.out.print('"'); } System.out.print(']'); System.out.print(" @ "); System.out.print(fCurrentEntity.lineNumber); System.out.print(','); System.out.print(fCurrentEntity.columnNumber); } else { System.out.print("*NO CURRENT ENTITY*"); } } } /** * Registers the listener object and provides callback. * @param listener listener to which call back should be provided when scanner buffer * is being changed. */ public void registerListener(XMLBufferListener listener) { if (!listeners.contains(listener)) { listeners.add(listener); } } /** * * @param loadPos Starting position from which new data is being loaded into scanner buffer. */ public void invokeListeners(int loadPos){ for (int i=0; i * Note: The characters are consumed only if they would * match non-terminal S before end of line normalization is performed. * * @return Returns true if at least one space character was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see jdk8u.jaxp.org.apache.xerces.external.util.XMLChar#isSpace */ protected final boolean skipDeclSpaces() throws IOException { if (DEBUG_BUFFER) { System.out.print("(skipDeclSpaces: "); //XMLEntityManager.print(fCurrentEntity); System.out.println(); } // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, false); } // skip spaces int c = fCurrentEntity.ch[fCurrentEntity.position]; if (XMLChar.isSpace(c)) { boolean external = fCurrentEntity.isExternal(); do { boolean entityChanged = false; // handle newlines if (c == '\n' || (external && c == '\r')) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count - 1) { fCurrentEntity.ch[0] = (char)c; entityChanged = load(1, true, false); if (!entityChanged) // the load change the position to be 1, // need to restore it when entity not changed fCurrentEntity.position = 0; } if (c == '\r' && external) { // REVISIT: Does this need to be updated to fix the // #x0D ^#x0A newline normalization problem? -Ac if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { fCurrentEntity.position--; } } /*** NEWLINE NORMALIZATION *** * else { * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' * && external) { * fCurrentEntity.position++; * } * } * /***/ } else { fCurrentEntity.columnNumber++; } // load more characters, if needed if (!entityChanged) fCurrentEntity.position++; if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true, false); } } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); if (DEBUG_BUFFER) { System.out.print(")skipDeclSpaces: "); // XMLEntityManager.print(fCurrentEntity); System.out.println(" -> true"); } return true; } // no spaces were found if (DEBUG_BUFFER) { System.out.print(")skipDeclSpaces: "); //XMLEntityManager.print(fCurrentEntity); System.out.println(" -> false"); } return false; } // skipDeclSpaces():boolean } // class XMLEntityScanner





© 2015 - 2024 Weber Informatics LLC | Privacy Policy