All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.enhydra.apache.xerces.readers.StreamingCharReader Maven / Gradle / Ivy

The newest version!
/*
 * The Apache Software License, Version 1.1
 *
 *
 * Copyright (c) 1999,2000 The Apache Software Foundation.  All rights 
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:  
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written 
 *    permission, please contact [email protected].
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.apache.org.  For more
 * information on the Apache Software Foundation, please see
 * .
 */

package org.enhydra.apache.xerces.readers;

import java.io.Reader;
import java.util.Vector;

import org.enhydra.apache.xerces.framework.XMLErrorReporter;
import org.enhydra.apache.xerces.utils.CharDataChunk;
import org.enhydra.apache.xerces.utils.ImplementationMessages;
import org.enhydra.apache.xerces.utils.QName;
import org.enhydra.apache.xerces.utils.StringHasher;
import org.enhydra.apache.xerces.utils.StringPool;
import org.enhydra.apache.xerces.utils.XMLCharacterProperties;

/**
 * An reader class for applications that need to process input data as 
 * it arrives on the stream.
 * 
 * @version $Id: StreamingCharReader.java,v 1.2 2005/01/26 08:28:44 jkjome Exp $
 */
public class StreamingCharReader extends XMLEntityReader {

    /**
     * Constructor
     *
     * @param entityHandler The entity handler.
     * @param errorReporter The error reporter.
     * @param sendCharDataAsCharArray true if char data should be reported using
     *                                char arrays instead of string handles.
     * @param stringPool The string pool.
     */
    public StreamingCharReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, Reader reader, StringPool stringPool) throws Exception {
        super(entityHandler, errorReporter, sendCharDataAsCharArray);
        fStringPool = stringPool;
        fCharacterStream = reader;
        fCurrentChunk = CharDataChunk.createChunk(fStringPool, null);
        loadFirstChar();
    }

    /**
     * Delay reporting an error message.
     *
     * If there is an error detected in the underlying input stream during
     * the fillCurrentChunk method, the error is described here and will be
     * reported when we reach that offset during normal processing.  The
     * subclass should place a character with a value of zero at that offset,
     * which will be detected here as an invalid character.  When the invalid
     * character is scanned, we will generate the deferred exception.
     *
     * @param errorCode the errorCode to report
     * @param args an array of arguments needed to generate a good error message
     * @param offset the position in the reader where the error occured
     */
    protected void deferException(int errorCode, Object[] args, int offset) {
        if (fDeferredErrors == null)
            fDeferredErrors = new Vector();
        DeferredError de = new DeferredError(errorCode, args, offset);
        fDeferredErrors.addElement(de);
    }

    /**
     * Change readers at end of input.
     *
     * We override our superclass method to release the final chunk
     * of the input data before handing off to the next reader.
     *
     * @return The next reader used to continue processing the document.
     */
    protected XMLEntityHandler.EntityReader changeReaders() throws Exception {
        XMLEntityHandler.EntityReader nextReader = super.changeReaders();
        fCurrentChunk.releaseChunk();
        fCurrentChunk = null;
        return nextReader;
    }

    //
    // XMLEntityHandler.EntityReader implementation
    //
    // The first five methods of the interface are implemented
    // in the XMLEntityHandler base class for us, namely
    //
    //    public int currentOffset();
    //    public int getLineNumber();
    //    public int getColumnNumber();
    //    public void setInCDSect(boolean inCDSect);
    //    public boolean getInCDSect();
    //

    /**
     * Append the characters processed by this reader associated with offset and
     * length to the CharBuffer.
     *
     * @param charBuffer The CharBuffer to append the characters to.
     * @param offset The offset within this reader where the copy should start.
     * @param length The length within this reader where the copy should stop.
     */
    public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) {
        fCurrentChunk.append(charBuffer, offset, length);
    }

    /**
     * Add a string to the StringPool from the characters scanned using this
     * reader as described by offset and length.
     *
     * @param offset The offset within this reader where the characters start.
     * @param length The length within this reader where the characters end.
     * @return The StringPool handle for the string.
     */
    public int addString(int offset, int length) {
        if (length == 0)
            return 0;
        return fCurrentChunk.addString(offset, length);
    }

    /**
     * Add a symbol to the StringPool from the characters scanned using this
     * reader as described by offset and length.
     *
     * @param offset The offset within this reader where the characters start.
     * @param length The length within this reader where the characters end.
     * @return The StringPool handle for the symbol.
     */
    public int addSymbol(int offset, int length) {
        if (length == 0)
            return 0;
        return fCurrentChunk.addSymbol(offset, length, 0);
    }

    /**
     *
     */
    public boolean lookingAtChar(char chr, boolean skipPastChar) throws Exception {
        int ch = fMostRecentChar;
        if (ch != chr) {
            if (ch == 0) {
                if (atEOF(fCurrentOffset + 1)) {
                    return changeReaders().lookingAtChar(chr, skipPastChar);
                }
            }
            return false;
        }
        if (skipPastChar) {
            fCharacterCounter++;
            loadNextChar();
        }
        return true;
    }

    /**
     *
     */
    public boolean lookingAtValidChar(boolean skipPastChar) throws Exception {
        int ch = fMostRecentChar;
        if (ch < 0xD800) {
            if (ch >= 0x20 || ch == 0x09) {
                if (skipPastChar) {
                    fCharacterCounter++;
                    loadNextChar();
                }
                return true;
            }
            if (ch == 0x0A) {
                if (skipPastChar) {
                    fLinefeedCounter++;
                    fCharacterCounter = 1;
                    loadNextChar();
                }
                return true;
            }
            if (ch == 0) {
                if (atEOF(fCurrentOffset + 1)) {
                    return changeReaders().lookingAtValidChar(skipPastChar);
                }
            }
            return false;
        }
        if (ch > 0xFFFD) {
            return false;
        }
        if (ch < 0xDC00) {
            CharDataChunk savedChunk = fCurrentChunk;
            int savedIndex = fCurrentIndex;
            int savedOffset = fCurrentOffset;
            ch = loadNextChar();
            boolean valid = (ch >= 0xDC00 && ch < 0xE000);
            if (!valid || !skipPastChar) {
                fCurrentChunk = savedChunk;
                fCurrentIndex = savedIndex;
                fCurrentOffset = savedOffset;
                fMostRecentData = savedChunk.toCharArray();
                fMostRecentChar = fMostRecentData[savedIndex] & 0xFFFF;
                return valid;
            }
        } else if (ch < 0xE000) {
            return false;
        }
        if (skipPastChar) {
            fCharacterCounter++;
            loadNextChar();
        }
        return true;
    }

    /**
     *
     */
    public boolean lookingAtSpace(boolean skipPastChar) throws Exception {
        int ch = fMostRecentChar;
        if (ch > 0x20)
            return false;
        if (ch == 0x20 || ch == 0x09) {
            if (!skipPastChar)
                return true;
            fCharacterCounter++;
        } else if (ch == 0x0A) {
            if (!skipPastChar)
                return true;
            fLinefeedCounter++;
            fCharacterCounter = 1;
        } else {
            if (ch == 0) { // REVISIT - should we be checking this here ?
                if (atEOF(fCurrentOffset + 1)) {
                    return changeReaders().lookingAtSpace(skipPastChar);
                }
            }
            return false;
        }
        loadNextChar();
        return true;
    }

    /**
     *
     */
    public void skipToChar(char chr) throws Exception {
        //
        // REVISIT - this will skip invalid characters without reporting them.
        //
        int ch = fMostRecentChar;
        while (true) {
            if (ch == chr)
                return;
            if (ch == 0) {
                if (atEOF(fCurrentOffset + 1)) {
                    changeReaders().skipToChar(chr);
                    return;
                }
                fCharacterCounter++;
            } else if (ch == 0x0A) {
                fLinefeedCounter++;
                fCharacterCounter = 1;
            } else if (ch >= 0xD800 && ch < 0xDC00) {
                fCharacterCounter++;
                ch = loadNextChar();
                if (ch < 0xDC00 || ch >= 0xE000)
                    continue;
            } else
                fCharacterCounter++;
            ch = loadNextChar();
        }
    }

    /**
     *
     */
    public void skipPastSpaces() throws Exception {
        int ch = fMostRecentChar;
        while (true) {
            if (ch == 0x20 || ch == 0x09) {
                fCharacterCounter++;
            } else if (ch == 0x0A) {
                fLinefeedCounter++;
                fCharacterCounter = 1;
            } else {
                if (ch == 0 && atEOF(fCurrentOffset + 1))
                    changeReaders().skipPastSpaces();
                return;
            }
            ch = loadNextChar();
        }
    }

    /**
     *
     */
    public void skipPastName(char fastcheck) throws Exception {
        int ch = fMostRecentChar;
        if (ch < 0x80) {
            if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0)
                return;
        } else {
            if (!fCalledCharPropInit) {
                XMLCharacterProperties.initCharFlags();
                fCalledCharPropInit = true;
            }
            if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
                return;
        }
        while (true) {
            fCharacterCounter++;
            ch = loadNextChar();
            if (fastcheck == ch)
                return;
            if (ch < 0x80) {
                if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
                    return;
            } else {
                if (!fCalledCharPropInit) {
                    XMLCharacterProperties.initCharFlags();
                    fCalledCharPropInit = true;
                }
                if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
                    return;
            }
        }
    }

    /**
     *
     */
    public void skipPastNmtoken(char fastcheck) throws Exception {
        int ch = fMostRecentChar;
        while (true) {
            if (fastcheck == ch)
                return;
            if (ch < 0x80) {
                if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
                    return;
            } else {
                if (!fCalledCharPropInit) {
                    XMLCharacterProperties.initCharFlags();
                    fCalledCharPropInit = true;
                }
                if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
                    return;
            }
            fCharacterCounter++;
            ch = loadNextChar();
        }
    }

    /**
     *
     */
    public boolean skippedString(char[] s) throws Exception {
        int ch = fMostRecentChar;
        if (ch != s[0])
            return false;
        int length = s.length;
        CharDataChunk dataChunk = fCurrentChunk;
        int offset = fCurrentOffset;
        int index = fCurrentIndex;
        ch = loadNextChar();
        for (int i = 1; i < length; i++) {
            if (ch != s[i]) {
                fCurrentChunk = dataChunk;
                fCurrentIndex = index;
                fCurrentOffset = offset;
                fMostRecentData = dataChunk.toCharArray();
                fMostRecentChar = fMostRecentData[index] & 0xFFFF;
                return false;
            }
            ch = loadNextChar();
        }
        fCharacterCounter += length;
        return true;
    }

    /**
     *
     */
    public int scanInvalidChar() throws Exception {
        int ch = fMostRecentChar;
        if (ch == 0x0A) {
            fLinefeedCounter++;
            fCharacterCounter = 1;
            loadNextChar();
        } else if (ch == 0) {
            if (atEOF(fCurrentOffset + 1)) {
                return changeReaders().scanInvalidChar();
            }
            if (fDeferredErrors != null) {
                for (int i = 0; i < fDeferredErrors.size(); i++) {
                    DeferredError de = (DeferredError)fDeferredErrors.elementAt(i);
                    if (de.offset == fCurrentIndex) {
                        fErrorReporter.reportError(fErrorReporter.getLocator(),
                                                   ImplementationMessages.XERCES_IMPLEMENTATION_DOMAIN,
                                                   de.errorCode,
                                                   0,
                                                   de.args,
                                                   XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
                        fDeferredErrors.removeElementAt(i);
                        fCharacterCounter++;
                        loadNextChar();
                        return -1;
                    }
                }
            }
            fCharacterCounter++;
            loadNextChar();
        } else {
            fCharacterCounter++;
            if (ch >= 0xD800 && ch < 0xDC00) {
                int ch2 = loadNextChar();
                if (ch2 >= 0xDC00 && ch2 < 0xE000) {
                    ch = ((ch-0xD800)<<10)+(ch2-0xDC00)+0x10000;
                    loadNextChar();
                }
            } else
                loadNextChar();
        }
        return ch;
    }

    /**
     *
     */
    public int scanCharRef(boolean hex) throws Exception {
        int ch = fMostRecentChar;
        if (ch == 0) {
            if (atEOF(fCurrentOffset + 1)) {
                return changeReaders().scanCharRef(hex);
            }
            return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
        }
        int num = 0;
        if (hex) {
            if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
                return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
            num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
        } else {
            if (ch < '0' || ch > '9')
                return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
            num = ch - '0';
        }
        fCharacterCounter++;
        loadNextChar();
        boolean toobig = false;
        while (true) {
            ch = fMostRecentChar;
            if (ch == 0)
                break;
            if (hex) {
                if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
                    break;
            } else {
                if (ch < '0' || ch > '9')
                    break;
            }
            fCharacterCounter++;
            loadNextChar();
            if (hex) {
                int dig = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
                num = (num << 4) + dig;
            } else {
                int dig = ch - '0';
                num = (num * 10) + dig;
            }
            if (num > 0x10FFFF) {
                toobig = true;
                num = 0;
            }
        }
        if (ch != ';')
            return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED;
        fCharacterCounter++;
        loadNextChar();
        if (toobig)
            return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE;
        return num;
    }

    /**
     *
     */
    public int scanStringLiteral() throws Exception {
        boolean single;
        if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) {
            return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
        }
        int offset = fCurrentOffset;
        char qchar = single ? '\'' : '\"';
        while (!lookingAtChar(qchar, false)) {
            if (!lookingAtValidChar(true)) {
                return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR;
            }
        }
        int stringIndex = addString(offset, fCurrentOffset - offset);
        lookingAtChar(qchar, true); // move past qchar
        return stringIndex;
    }

    //
    // [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
    //                   | "'" ([^<&'] | Reference)* "'"
    //
    /**
     *
     */
    public int scanAttValue(char qchar, boolean asSymbol) throws Exception
    {
        int offset = fCurrentOffset;
        while (true) {
            if (lookingAtChar(qchar, false)) {
                break;
            }
            if (lookingAtChar(' ', true)) {
                continue;
            }
            if (lookingAtSpace(false)) {
                return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
            }
            if (lookingAtChar('&', false)) {
                return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
            }
            if (lookingAtChar('<', false)) {
                return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN;
            }
            if (!lookingAtValidChar(true)) {
                return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
            }
        }
        int result = asSymbol ? addSymbol(offset, fCurrentOffset - offset) : addString(offset, fCurrentOffset - offset);
        lookingAtChar(qchar, true);
        return result;
    }

    //
    //  [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
    //                      | "'" ([^%&'] | PEReference | Reference)* "'"
    //
    /**
     *
     */
    public int scanEntityValue(int qchar, boolean createString) throws Exception
    {
        int offset = fCurrentOffset;
        while (true) {
            if (atEOF(fCurrentOffset + 1)) {
                changeReaders();
                return XMLEntityHandler.ENTITYVALUE_RESULT_END_OF_INPUT;
            }
            if (qchar != -1 && lookingAtChar((char)qchar, false)) {
                if (!createString)
                    return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED;
                break;
            }
            if (lookingAtChar('&', false)) {
                return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE;
            }
            if (lookingAtChar('%', false)) {
                return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF;
            }
            if (!lookingAtValidChar(true)) {
                return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
            }
        }
        int result = addString(offset, fCurrentOffset - offset);
        lookingAtChar((char)qchar, true);
        return result;
    }

    /**
     *
     */
    public int scanName(char fastcheck) throws Exception {
        int ch = fMostRecentChar;
        if (ch < 0x80) {
            if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0)
                return -1;
        } else {
            if (!fCalledCharPropInit) {
                XMLCharacterProperties.initCharFlags();
                fCalledCharPropInit = true;
            }
            if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
                return -1;
        }
        int offset = fCurrentOffset;
        fCharacterCounter++;
        int hashcode = 0;
        while (true) {
            hashcode = StringHasher.hashChar(hashcode, ch);
            ch = loadNextChar();
            if (fastcheck == ch)
                break;
            if (ch < 0x80) {
                if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
                    break;
            } else {
                if (!fCalledCharPropInit) {
                    XMLCharacterProperties.initCharFlags();
                    fCalledCharPropInit = true;
                }
                if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
                    break;
            }
            fCharacterCounter++;
        }
        hashcode = StringHasher.finishHash(hashcode);
        int length = fCurrentOffset - offset;
        int nameIndex = fCurrentChunk.addSymbol(offset, length, hashcode);
        return nameIndex;
    }

    /**
     *
     */
    public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws Exception {
        char[] expected = expectedName.chars;
        int offset = expectedName.offset;
        int len = expectedName.length;
        int ch = fMostRecentChar;
        for (int i = 0; i < len; i++) {
            if (ch != expected[offset++]) {
                skipPastNmtoken(fastcheck);
                return false;
            }
            fCharacterCounter++;
            ch = loadNextChar();
        }
        if (ch == fastcheck)
            return true;
        if (ch < 0x80) {
            if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
                return true;
        } else {
            if (!fCalledCharPropInit) {
                XMLCharacterProperties.initCharFlags();
                fCalledCharPropInit = true;
            }
            if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
                return true;
        }
        skipPastNmtoken(fastcheck);
        return false;
    }

    /**
     *
     */
    public void scanQName(char fastcheck, QName qname) throws Exception {
        int ch = fMostRecentChar;
        if (ch < 0x80) {
            if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) {
                qname.clear();
                return;
            }
            if (ch == ':') {
                qname.clear();
                return;
            }
        } else {
            if (!fCalledCharPropInit) {
                XMLCharacterProperties.initCharFlags();
                fCalledCharPropInit = true;
            }
            if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) {
                qname.clear();
                return;
            }
        }
        int offset = fCurrentOffset;
        fCharacterCounter++;
        int hashcode = 0;
        int prefixend = -1;
        while (true) {
            hashcode = StringHasher.hashChar(hashcode, ch);
            ch = loadNextChar();
            if (fastcheck == ch)
                break;
            if (ch < 0x80) {
                if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
                    break;
                if (ch == ':') {
                    if (prefixend != -1)
                        break;
                    prefixend = fCurrentOffset;
                    //
                    // We need to peek ahead one character.  If the next character is not a
                    // valid initial name character, or is another colon, then we cannot meet
                    // both the Prefix and LocalPart productions for the QName production,
                    // which means that there is no Prefix and we need to terminate the QName
                    // at the first colon.
                    //
                    CharDataChunk savedChunk = fCurrentChunk;
                    int savedOffset = fCurrentOffset;
                    int savedIndex = fCurrentIndex;
                    ch = loadNextChar();
                    fCurrentChunk = savedChunk;
                    fCurrentOffset = savedOffset;
                    fCurrentIndex = savedIndex;
                    fMostRecentData = savedChunk.toCharArray();
                    boolean lpok = true;
                    if (ch < 0x80) {
                        if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0 || ch == ':')
                            lpok = false;
                    } else {
                        if (!fCalledCharPropInit) {
                            XMLCharacterProperties.initCharFlags();
                            fCalledCharPropInit = true;
                        }
                        if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
                            lpok = false;
                    }
                    ch = ':';
                    if (!lpok) {
                        prefixend = -1;
                        fMostRecentChar = ch;
                        break;
                    }
                }
            } else {
                if (!fCalledCharPropInit) {
                    XMLCharacterProperties.initCharFlags();
                    fCalledCharPropInit = true;
                }
                if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
                    break;
            }
            fCharacterCounter++;
        }
        hashcode = StringHasher.finishHash(hashcode);
        int length = fCurrentOffset - offset;
        qname.rawname = fCurrentChunk.addSymbol(offset, length, hashcode);
        qname.prefix = prefixend == -1 ? -1 : addSymbol(offset, prefixend - offset);
        qname.localpart = prefixend == -1 ? qname.rawname : addSymbol(prefixend + 1, fCurrentOffset - (prefixend + 1));
        qname.uri = StringPool.EMPTY_STRING;

    } // scanQName(char,QName)

    //
    // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
    //
    /**
     *
     */
    public int scanContent(QName element) throws Exception {
        if (fCallClearPreviousChunk && fCurrentChunk.clearPreviousChunk())
            fCallClearPreviousChunk = false;
        int charDataOffset = fCurrentOffset;
        int ch = fMostRecentChar;
        if (ch < 0x80) {
            switch (XMLCharacterProperties.fgAsciiWSCharData[ch]) {
            case 0:
                fCharacterCounter++;
                ch = loadNextChar();
                break;
            case 1: // '<'
                fCharacterCounter++;
                ch = loadNextChar();
                if (!fInCDSect) {
                    return recognizeMarkup(ch);
                }
                break;
            case 2: // '&'
                fCharacterCounter++;
                ch = loadNextChar();
                if (!fInCDSect) {
                    return recognizeReference(ch);
                }
                break;
            case 3: // ']'
                fCharacterCounter++;
                ch = loadNextChar();
                if (ch != ']')
                    break;
                {
                    CharDataChunk dataChunk = fCurrentChunk;
                    int index = fCurrentIndex;
                    int offset = fCurrentOffset;
                    if (loadNextChar() != '>') {
                        fCurrentChunk = dataChunk;
                        fCurrentIndex = index;
                        fCurrentOffset = offset;
                        fMostRecentData = dataChunk.toCharArray();
                        fMostRecentChar = ']';
                        break;
                    }
                }
                loadNextChar();
                fCharacterCounter += 2;
                return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
            case 4: // invalid char
                if (ch == 0 && atEOF(fCurrentOffset + 1)) {
                    changeReaders();
                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
                }
                return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
            case 5:
                do {
                    if (ch == 0x0A) {
                        fLinefeedCounter++;
                        fCharacterCounter = 1;
                    } else
                        fCharacterCounter++;
                    ch = loadNextChar();
                } while (ch == 0x20 || ch == 0x09 || ch == 0x0A);
                if (ch < 0x80) {
                    switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
                    case 0:
                        fCharacterCounter++;
                        ch = loadNextChar();
                        break;
                    case 1: // '<'
                        if (!fInCDSect) {
                            callCharDataHandler(charDataOffset, fCurrentOffset, true);
                            fCharacterCounter++;
                            ch = loadNextChar();
                            return recognizeMarkup(ch);
                        }
                        fCharacterCounter++;
                        ch = loadNextChar();
                        break;
                    case 2: // '&'
                        if (!fInCDSect) {
                            callCharDataHandler(charDataOffset, fCurrentOffset, true);
                            fCharacterCounter++;
                            ch = loadNextChar();
                            return recognizeReference(ch);
                        }
                        fCharacterCounter++;
                        ch = loadNextChar();
                        break;
                    case 3: // ']'
                        int endOffset = fCurrentOffset;
                        ch = loadNextChar();
                        if (ch != ']') {
                            fCharacterCounter++;
                            break;
                        }
                        {
                            CharDataChunk dataChunk = fCurrentChunk;
                            int index = fCurrentIndex;
                            int offset = fCurrentOffset;
                            if (loadNextChar() != '>') {
                                fCurrentChunk = dataChunk;
                                fCurrentIndex = index;
                                fCurrentOffset = offset;
                                fMostRecentData = dataChunk.toCharArray();
                                fMostRecentChar = ']';
                                fCharacterCounter++;
                                break;
                            }
                        }
                        loadNextChar();
                        callCharDataHandler(charDataOffset, endOffset, true);
                        fCharacterCounter += 3;
                        return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
                    case 4: // invalid char
                        callCharDataHandler(charDataOffset, fCurrentOffset, true);
                        if (ch == 0 && atEOF(fCurrentOffset + 1)) {
                            changeReaders();
                            return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
                        }
                        return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
                    }
                } else if (!skipMultiByteCharData(ch)) {
                    callCharDataHandler(charDataOffset, fCurrentOffset, true);
                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
                }
                break;
            }
        } else if (!skipMultiByteCharData(ch)) {
            return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
        }
        ch = skipAsciiCharData();
        while (true) {
            if (ch < 0x80) {
                switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
                case 0:
                    fCharacterCounter++;
                    ch = loadNextChar();
                    break;
                case 1: // '<'
                    if (!fInCDSect) {
                        callCharDataHandler(charDataOffset, fCurrentOffset, false);
                        fCharacterCounter++;
                        ch = loadNextChar();
                        return recognizeMarkup(ch);
                    }
                    fCharacterCounter++;
                    ch = loadNextChar();
                    break;
                case 2: // '&'
                    if (!fInCDSect) {
                        callCharDataHandler(charDataOffset, fCurrentOffset, false);
                        fCharacterCounter++;
                        ch = loadNextChar();
                        return recognizeReference(ch);
                    }
                    fCharacterCounter++;
                    ch = loadNextChar();
                    break;
                case 3: // ']'
                    int endOffset = fCurrentOffset;
                    ch = loadNextChar();
                    if (ch != ']') {
                        fCharacterCounter++;
                        break;
                    }
                    CharDataChunk dataChunk = fCurrentChunk;
                    int index = fCurrentIndex;
                    int offset = fCurrentOffset;
                    if (loadNextChar() != '>') {
                        fCurrentChunk = dataChunk;
                        fCurrentIndex = index;
                        fCurrentOffset = offset;
                        fMostRecentData = dataChunk.toCharArray();
                        fMostRecentChar = ']';
                        fCharacterCounter++;
                        break;
                    }
                    loadNextChar();
                    callCharDataHandler(charDataOffset, endOffset, false);
                    fCharacterCounter += 3;
                    return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
                case 4: // invalid char
                    if (ch == 0x0A) {
                        fLinefeedCounter++;
                        fCharacterCounter = 1;
                        ch = loadNextChar();
                        break;
                    }
                    callCharDataHandler(charDataOffset, fCurrentOffset, false);
                    if (ch == 0 && atEOF(fCurrentOffset + 1)) {
                        changeReaders();
                        return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
                    }
                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
                }
            } else {
                if (!skipMultiByteCharData(ch)) {
                    callCharDataHandler(charDataOffset, fCurrentOffset, false);
                    return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
                }
                ch = fMostRecentChar;
            }
        }
    }

    //
    // Private data members
    //
    private static final char[] cdata_string = { 'C','D','A','T','A','[' };
    private StringPool fStringPool = null;
    private boolean fCallClearPreviousChunk = true;
    private Vector fDeferredErrors = null;

    //
    // Private classes
    //
    private class DeferredError {
        int errorCode;
        Object[] args;
        int offset;
        DeferredError(int ec, Object[] a, int o) {
            errorCode = ec;
            args = a;
            offset = o;
        }
    }

    //
    // Private methods
    //

    /*
     * Return a result code for scanContent when the character data
     * ends with a less-than character.
     */
    private int recognizeMarkup(int ch) throws Exception {
        switch (ch) {
        case 0:
            return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
        case '?':
            fCharacterCounter++;
            loadNextChar();
            return XMLEntityHandler.CONTENT_RESULT_START_OF_PI;
        case '!':
            fCharacterCounter++;
            ch = loadNextChar();
            if (ch == 0) {
                fCharacterCounter--;
                fCurrentOffset--;
                return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
            }
            if (ch == '-') {
                fCharacterCounter++;
                ch = loadNextChar();
                if (ch == 0) {
                    fCharacterCounter -= 2;
                    fCurrentOffset -= 2;
                    return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
                }
                if (ch == '-') {
                    fCharacterCounter++;
                    loadNextChar();
                    return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT;
                }
                break;
            }
            if (ch == '[') {
                for (int i = 0; i < 6; i++) {
                    fCharacterCounter++;
                    ch = loadNextChar();
                    if (ch == 0) {
                        fCharacterCounter -= (2 + i);
                        fCurrentOffset -= (2 + i);
                        return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
                    }
                    if (ch != cdata_string[i]) {
                        return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
                    }
                }
                fCharacterCounter++;
                loadNextChar();
                return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT;
            }
            break;
        case '/':
            fCharacterCounter++;
            loadNextChar();
            return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
        default:
            return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT;
        }
        return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
    }

    /*
     * Return a result code for scanContent when the character data
     * ends with an ampersand character.
     */
    private int recognizeReference(int ch) throws Exception {
        if (ch == 0) {
            return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT;
        }
        //
        // [67] Reference ::= EntityRef | CharRef
        // [68] EntityRef ::= '&' Name ';'
        // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
        //
        if (ch == '#') {
            fCharacterCounter++;
            loadNextChar();
            return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF;
        } else {
            return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF;
        }
    }

    /*
     * Skip over a multi-byte character.
     */
    private boolean skipMultiByteCharData(int ch) throws Exception {
        if (ch < 0xD800) {
            loadNextChar();
            return true;
        }
        if (ch > 0xFFFD)
            return false;
        if (ch >= 0xDC00 && ch < 0xE000)
            return false;
        if (ch >= 0xD800 && ch < 0xDC00) {
            CharDataChunk savedChunk = fCurrentChunk;
            int savedIndex = fCurrentIndex;
            int savedOffset = fCurrentOffset;
            ch = loadNextChar();
            if (ch < 0xDC00 || ch >= 0xE000) {
                fCurrentChunk = savedChunk;
                fCurrentIndex = savedIndex;
                fCurrentOffset = savedOffset;
                fMostRecentData = savedChunk.toCharArray();
                fMostRecentChar = fMostRecentData[savedIndex] & 0xFFFF;
                return false;
            }
        }
        loadNextChar();
        return true;
    }

    /*
     * Skip over contiguous ascii character data.
     *
     * @return the character skipped
     * @exception java.lang.Exception
     */
    private int skipAsciiCharData() throws Exception {
        int ch = fMostRecentChar;
        while (true) {
            if (ch >= 0x80) {
                return ch;
            }
            if (XMLCharacterProperties.fgAsciiCharData[ch] == 0) {
                fCharacterCounter++;
            } else if (ch == 0x0A) {
                fLinefeedCounter++;
                fCharacterCounter = 1;
            } else {
                return ch;
            }
            ch = loadNextChar();
        }
    }

    /*
     * Report character data to the parser through the entity handler interface.
     *
     * @param offset the offset of the start of the character data
     * @param endOffset the offset of the end of the character data
     * @param isWhitespace true if the character data is whitespace
     * @exception java.lang.Exception
     */
    private void callCharDataHandler(int offset, int endOffset, boolean isWhitespace) throws Exception {

        int length = endOffset - offset;
        if (!fSendCharDataAsCharArray) {
            int stringIndex = addString(offset, length);
            if (isWhitespace)
                fCharDataHandler.processWhitespace(stringIndex);
            else
                fCharDataHandler.processCharacters(stringIndex);
            return;
        }

        CharDataChunk dataChunk = fCurrentChunk.chunkFor(offset);
        int index = offset & CharDataChunk.CHUNK_MASK;
        if (index + length <= CharDataChunk.CHUNK_SIZE) {
            //
            // All the chars are in the same chunk
            //
            if (length != 0) {
                if (isWhitespace)
                    fCharDataHandler.processWhitespace(dataChunk.toCharArray(), index, length);
                else
                    fCharDataHandler.processCharacters(dataChunk.toCharArray(), index, length);
            }
            return;
        }

        //
        // The data is spread across chunks.
        //
        int count = length;
        int nbytes = CharDataChunk.CHUNK_SIZE - index;
        if (isWhitespace)
            fCharDataHandler.processWhitespace(dataChunk.toCharArray(), index, nbytes);
        else
            fCharDataHandler.processCharacters(dataChunk.toCharArray(), index, nbytes);
        count -= nbytes;

        //
        // Use each Chunk in turn until we are done.
        //
        do {
            dataChunk = dataChunk.nextChunk();
            if (dataChunk == null) {
                throw new RuntimeException(new ImplementationMessages().createMessage(null, ImplementationMessages.INT_DCN, 0, null));
            }
            nbytes = count <= CharDataChunk.CHUNK_SIZE ? count : CharDataChunk.CHUNK_SIZE;
            if (isWhitespace)
                fCharDataHandler.processWhitespace(dataChunk.toCharArray(), 0, nbytes);
            else
                fCharDataHandler.processCharacters(dataChunk.toCharArray(), 0, nbytes);
            count -= nbytes;
        } while (count > 0);
    }

    /*
     * Advance the reader's notion of where it is, moving on to the next chunk.
     *
     * @return The next character that will be processed.
     * @exception java.lang.Exception
     */
    private int slowLoadNextChar() throws Exception {
        fCallClearPreviousChunk = true;
        if (fCurrentChunk.nextChunk() != null) {
            fCurrentChunk = fCurrentChunk.nextChunk();
            fCurrentIndex = 0;
            fMostRecentData = fCurrentChunk.toCharArray();
            return (fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF);
        } else {
            fCurrentChunk = CharDataChunk.createChunk(fStringPool, fCurrentChunk);
            fCurrentIndex = 0;
            fFillIndex = 0;
            loadFirstChar();
            return fMostRecentChar;
        }
    }

    /*
     * Advance the reader's notion of where it is
     *
     * @return The next character that will be processed.
     * @exception java.lang.Exception
     */
    private int loadNextChar() throws Exception {
        fCurrentOffset++;
        if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE)
            return slowLoadNextChar();
        if (fCurrentIndex < fFillIndex)
            return (fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF);
        return loadMoreChars();
    }

    /*
     * Read the first character.
     *
     * @exception java.lang.Exception
     */
    private void loadFirstChar() throws Exception {
        fMostRecentData = fCurrentChunk.toCharArray();
        if (fMostRecentData == null) {
            fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
            fCurrentChunk.setCharArray(fMostRecentData);
        }
        loadMoreChars();
    }

    /*
     * Fetch more characters.
     *
     * @exception java.lang.Exception
     */
    private boolean seenCR = false;
    private int oweChar = -1;
    private char[] inBuffer = new char[2];
    private int loadMoreChars() throws Exception {
        if (oweChar != -1) {
            fMostRecentData[fFillIndex] = (char)oweChar;
            fFillIndex++;
            fLength++;
            fMostRecentChar = oweChar;
            oweChar = -1;
            return fMostRecentChar;
        } 
        int result = -1;
        try {
            while (true) {
                result = fCharacterStream.read(inBuffer, 0, 2);
                switch (result) {
                case -1:
                    break;
                case 0:
                    continue;
                case 1:
                    result = inBuffer[0];
                    if (seenCR) {
                        seenCR = false;
                        if (result == 0x0A)
                            continue;
                    }
                    if (result == 0x0D) {
                        seenCR = true;
                        result = 0x0A;
                    }
                    fMostRecentChar = (fMostRecentData[fFillIndex] = (char)result);
                    fFillIndex++;
                    fLength++;
                    return fMostRecentChar;
                case 2:
                    result = inBuffer[0];
                    boolean readchar2 = false;
                    if (seenCR) {
                        seenCR = false;
                        if (result == 0x0A) {
                            result = inBuffer[1];
                            readchar2 = true;
                        }
                    }
                    if (result == 0x0D) {
                        seenCR = true;
                        result = 0x0A;
                    }
                    fMostRecentChar = (fMostRecentData[fFillIndex] = (char)result);
                    fFillIndex++;
                    fLength++;
                    if (!readchar2) {
                        result = inBuffer[1];
                        if (seenCR) {
                            seenCR = false;
                            if (result == 0x0A)
                                return fMostRecentChar;
                        }
                        if (result == 0x0D) {
                            seenCR = true;
                            result = 0x0A;
                        }
                        oweChar = result;
                    }
                    return fMostRecentChar;
                }
                break;
            }
        } catch (java.io.IOException ex) {
        }
        //
        // We have reached the end of the stream.
        //
        try {
            fCharacterStream.close();
        } catch (java.io.IOException ex) {
        }
        fCharacterStream = null;
        fMostRecentChar = (fMostRecentData[fFillIndex] = 0);
        return 0;
    }

    /*
     * Would the reader be at end of file at a given offset?
     *
     * @param offset the offset to test for being at EOF
     * @return true if being at offset would mean being at or beyond EOF
     */
    private boolean atEOF(int offset) {
        return (offset > fLength);
    }

    //
    //
    //
    protected Reader fCharacterStream = null;
    protected CharDataChunk fCurrentChunk = null;
    protected int fCurrentIndex = 0;
    protected int fFillIndex = 0;
    protected char[] fMostRecentData = null;
    protected int fMostRecentChar = 0;
    protected int fLength = 0;
    protected boolean fCalledCharPropInit = false;

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy