All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.xerces.impl.XML11EntityScanner Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.xerces.impl;

import java.io.EOFException;
import java.io.IOException;

import org.apache.xerces.impl.msg.XMLMessageFormatter;
import org.apache.xerces.util.XML11Char;
import org.apache.xerces.util.XMLChar;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLString;

/**
 * Implements the entity scanner methods in
 * the context of XML 1.1.
 *
 * @xerces.internal
 *
 * @author Michael Glavassevich, IBM
 * @author Neil Graham, IBM
 * @version $Id: XML11EntityScanner.java 699892 2008-09-28 21:08:27Z mrglavas $
 */
public class XML11EntityScanner
    extends XMLEntityScanner {

    //
    // Constructors
    //

    /** Default constructor. */
    public XML11EntityScanner() {
        super();
    } // ()

    //
    // XMLEntityScanner methods
    //

    /**
     * Returns the next character on the input.
     * 

* Note: The character is not consumed. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int peekChar() throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // peek at character int c = fCurrentEntity.ch[fCurrentEntity.position]; // return peeked character if (fCurrentEntity.isExternal()) { return (c != '\r' && c != 0x85 && c != 0x2028) ? c : '\n'; } else { return c; } } // peekChar():int /** * Returns the next character on the input. *

* Note: The character is consumed. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int scanChar() throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // scan character int c = fCurrentEntity.ch[fCurrentEntity.position++]; boolean external = false; if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && (external = fCurrentEntity.isExternal()))) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = (char)c; load(1, false); } if (c == '\r' && external) { int cc = fCurrentEntity.ch[fCurrentEntity.position++]; if (cc != '\n' && cc != 0x85) { fCurrentEntity.position--; } } c = '\n'; } // return character that was scanned fCurrentEntity.columnNumber++; return c; } // scanChar():int /** * Returns a string matching the NMTOKEN production appearing immediately * on the input as a symbol, or null if NMTOKEN Name string is present. *

* Note: The NMTOKEN characters are consumed. *

* Note: The string returned must be a symbol. The * SymbolTable can be used for this purpose. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see org.apache.xerces.util.SymbolTable * @see org.apache.xerces.util.XML11Char#isXML11Name */ public String scanNmtoken() throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // scan nmtoken int offset = fCurrentEntity.position; do { char ch = fCurrentEntity.ch[fCurrentEntity.position]; if (XML11Char.isXML11Name(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { break; } } } else if (XML11Char.isXML11NameHighSurrogate(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { --fCurrentEntity.startPosition; --fCurrentEntity.position; break; } } char ch2 = fCurrentEntity.ch[fCurrentEntity.position]; if ( !XMLChar.isLowSurrogate(ch2) || !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) { --fCurrentEntity.position; break; } if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { break; } } } else { break; } } while (true); int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; // return nmtoken String symbol = null; if (length > 0) { symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); } return symbol; } // scanNmtoken():String /** * Returns a string matching the Name production appearing immediately * on the input as a symbol, or null if no Name string is present. *

* Note: The Name characters are consumed. *

* Note: The string returned must be a symbol. The * SymbolTable can be used for this purpose. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see org.apache.xerces.util.SymbolTable * @see org.apache.xerces.util.XML11Char#isXML11Name * @see org.apache.xerces.util.XML11Char#isXML11NameStart */ public String scanName() throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // scan name int offset = fCurrentEntity.position; char ch = fCurrentEntity.ch[offset]; if (XML11Char.isXML11NameStart(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; offset = 0; if (load(1, false)) { fCurrentEntity.columnNumber++; String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); return symbol; } } } else if (XML11Char.isXML11NameHighSurrogate(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; offset = 0; if (load(1, false)) { --fCurrentEntity.position; --fCurrentEntity.startPosition; return null; } } char ch2 = fCurrentEntity.ch[fCurrentEntity.position]; if ( !XMLChar.isLowSurrogate(ch2) || !XML11Char.isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) { --fCurrentEntity.position; return null; } if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; fCurrentEntity.ch[1] = ch2; offset = 0; if (load(2, false)) { fCurrentEntity.columnNumber += 2; String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2); return symbol; } } } else { return null; } do { ch = fCurrentEntity.ch[fCurrentEntity.position]; if (XML11Char.isXML11Name(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { break; } } } else if (XML11Char.isXML11NameHighSurrogate(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { --fCurrentEntity.position; --fCurrentEntity.startPosition; break; } } char ch2 = fCurrentEntity.ch[fCurrentEntity.position]; if ( !XMLChar.isLowSurrogate(ch2) || !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) { --fCurrentEntity.position; break; } if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { break; } } } else { break; } } while (true); int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; // return name String symbol = null; if (length > 0) { symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); } return symbol; } // scanName():String /** * Returns a string matching the NCName production appearing immediately * on the input as a symbol, or null if no NCName string is present. *

* Note: The NCName characters are consumed. *

* Note: The string returned must be a symbol. The * SymbolTable can be used for this purpose. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see org.apache.xerces.util.SymbolTable * @see org.apache.xerces.util.XML11Char#isXML11NCName * @see org.apache.xerces.util.XML11Char#isXML11NCNameStart */ public String scanNCName() throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // scan name int offset = fCurrentEntity.position; char ch = fCurrentEntity.ch[offset]; if (XML11Char.isXML11NCNameStart(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; offset = 0; if (load(1, false)) { fCurrentEntity.columnNumber++; String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); return symbol; } } } else if (XML11Char.isXML11NameHighSurrogate(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; offset = 0; if (load(1, false)) { --fCurrentEntity.position; --fCurrentEntity.startPosition; return null; } } char ch2 = fCurrentEntity.ch[fCurrentEntity.position]; if ( !XMLChar.isLowSurrogate(ch2) || !XML11Char.isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) { --fCurrentEntity.position; return null; } if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; fCurrentEntity.ch[1] = ch2; offset = 0; if (load(2, false)) { fCurrentEntity.columnNumber += 2; String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2); return symbol; } } } else { return null; } do { ch = fCurrentEntity.ch[fCurrentEntity.position]; if (XML11Char.isXML11NCName(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { break; } } } else if (XML11Char.isXML11NameHighSurrogate(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { --fCurrentEntity.startPosition; --fCurrentEntity.position; break; } } char ch2 = fCurrentEntity.ch[fCurrentEntity.position]; if ( !XMLChar.isLowSurrogate(ch2) || !XML11Char.isXML11NCName(XMLChar.supplemental(ch, ch2)) ) { --fCurrentEntity.position; break; } if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } offset = 0; if (load(length, false)) { break; } } } else { break; } } while (true); int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; // return name String symbol = null; if (length > 0) { symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); } return symbol; } // scanNCName():String /** * Scans a qualified name from the input, setting the fields of the * QName structure appropriately. *

* Note: The qualified name characters are consumed. *

* Note: The strings used to set the values of the * QName structure must be symbols. The SymbolTable can be used for * this purpose. * * @param qname The qualified name structure to fill. * * @return Returns true if a qualified name appeared immediately on * the input and was scanned, false otherwise. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see org.apache.xerces.util.SymbolTable * @see org.apache.xerces.util.XML11Char#isXML11Name * @see org.apache.xerces.util.XML11Char#isXML11NameStart */ public boolean scanQName(QName qname) throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // scan qualified name int offset = fCurrentEntity.position; char ch = fCurrentEntity.ch[offset]; if (XML11Char.isXML11NCNameStart(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; offset = 0; if (load(1, false)) { fCurrentEntity.columnNumber++; String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); qname.setValues(null, name, name, null); return true; } } } else if (XML11Char.isXML11NameHighSurrogate(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; offset = 0; if (load(1, false)) { --fCurrentEntity.startPosition; --fCurrentEntity.position; return false; } } char ch2 = fCurrentEntity.ch[fCurrentEntity.position]; if ( !XMLChar.isLowSurrogate(ch2) || !XML11Char.isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) { --fCurrentEntity.position; return false; } if (++fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = ch; fCurrentEntity.ch[1] = ch2; offset = 0; if (load(2, false)) { fCurrentEntity.columnNumber += 2; String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 2); qname.setValues(null, name, name, null); return true; } } } else { return false; } int index = -1; boolean sawIncompleteSurrogatePair = false; do { ch = fCurrentEntity.ch[fCurrentEntity.position]; if (XML11Char.isXML11Name(ch)) { if (ch == ':') { if (index != -1) { break; } index = fCurrentEntity.position; } if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } if (index != -1) { index = index - offset; } offset = 0; if (load(length, false)) { break; } } } else if (XML11Char.isXML11NameHighSurrogate(ch)) { if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } if (index != -1) { index = index - offset; } offset = 0; if (load(length, false)) { sawIncompleteSurrogatePair = true; --fCurrentEntity.startPosition; --fCurrentEntity.position; break; } } char ch2 = fCurrentEntity.ch[fCurrentEntity.position]; if ( !XMLChar.isLowSurrogate(ch2) || !XML11Char.isXML11Name(XMLChar.supplemental(ch, ch2)) ) { sawIncompleteSurrogatePair = true; --fCurrentEntity.position; break; } if (++fCurrentEntity.position == fCurrentEntity.count) { int length = fCurrentEntity.position - offset; if (length == fCurrentEntity.ch.length) { // bad luck we have to resize our buffer char[] tmp = new char[fCurrentEntity.ch.length << 1]; System.arraycopy(fCurrentEntity.ch, offset, tmp, 0, length); fCurrentEntity.ch = tmp; } else { System.arraycopy(fCurrentEntity.ch, offset, fCurrentEntity.ch, 0, length); } if (index != -1) { index = index - offset; } offset = 0; if (load(length, false)) { break; } } } else { break; } } while (true); int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length; if (length > 0) { String prefix = null; String localpart = null; String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); if (index != -1) { int prefixLength = index - offset; prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, prefixLength); int len = length - prefixLength - 1; int startLocal = index +1; if (!XML11Char.isXML11NCNameStart(fCurrentEntity.ch[startLocal]) && (!XML11Char.isXML11NameHighSurrogate(fCurrentEntity.ch[startLocal]) || sawIncompleteSurrogatePair)){ fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "IllegalQName", null, XMLErrorReporter.SEVERITY_FATAL_ERROR); } localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, index + 1, len); } else { localpart = rawname; } qname.setValues(prefix, localpart, rawname, null); return true; } return false; } // scanQName(QName):boolean /** * Scans a range of parsed character data, setting the fields of the * XMLString structure, appropriately. *

* Note: The characters are consumed. *

* Note: This method does not guarantee to return * the longest run of parsed character data. This method may return * before markup due to reaching the end of the input buffer or any * other reason. *

* Note: The fields contained in the XMLString * structure are not guaranteed to remain valid upon subsequent calls * to the entity scanner. Therefore, the caller is responsible for * immediately using the returned character data or making a copy of * the character data. * * @param content The content structure to fill. * * @return Returns the next character on the input, if known. This * value may be -1 but this does note designate * end of file. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int scanContent(XMLString content) throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; load(1, false); fCurrentEntity.position = 0; fCurrentEntity.startPosition = 0; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; boolean external = fCurrentEntity.isExternal(); if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) { do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if ((c == '\r' ) && external) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = newlines; fCurrentEntity.startPosition = newlines; if (load(newlines, false)) { break; } } int cc = fCurrentEntity.ch[fCurrentEntity.position]; if (cc == '\n' || cc == 0x85) { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } } else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = newlines; fCurrentEntity.startPosition = newlines; if (load(newlines, false)) { break; } } } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); for (int i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { content.setValues(fCurrentEntity.ch, offset, length); return -1; } } // inner loop, scanning for content if (external) { while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (!XML11Char.isXML11Content(c) || c == 0x85 || c == 0x2028) { fCurrentEntity.position--; break; } } } else { while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; // In internal entities control characters are allowed to appear unescaped. if (!XML11Char.isXML11InternalEntityContent(c)) { fCurrentEntity.position--; break; } } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; content.setValues(fCurrentEntity.ch, offset, length); // return next character if (fCurrentEntity.position != fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position]; // REVISIT: Does this need to be updated to fix the // #x0D ^#x0A newline normalization problem? -Ac if ((c == '\r' || c == 0x85 || c == 0x2028) && external) { c = '\n'; } } else { c = -1; } return c; } // scanContent(XMLString):int /** * Scans a range of attribute value data, setting the fields of the * XMLString structure, appropriately. *

* Note: The characters are consumed. *

* Note: This method does not guarantee to return * the longest run of attribute value data. This method may return * before the quote character due to reaching the end of the input * buffer or any other reason. *

* Note: The fields contained in the XMLString * structure are not guaranteed to remain valid upon subsequent calls * to the entity scanner. Therefore, the caller is responsible for * immediately using the returned character data or making a copy of * the character data. * * @param quote The quote character that signifies the end of the * attribute value data. * @param content The content structure to fill. * * @return Returns the next character on the input, if known. This * value may be -1 but this does note designate * end of file. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public int scanLiteral(int quote, XMLString content) throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; load(1, false); fCurrentEntity.startPosition = 0; fCurrentEntity.position = 0; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; boolean external = fCurrentEntity.isExternal(); if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) { do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if ((c == '\r' ) && external) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = newlines; fCurrentEntity.startPosition = newlines; if (load(newlines, false)) { break; } } int cc = fCurrentEntity.ch[fCurrentEntity.position]; if (cc == '\n' || cc == 0x85) { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } } else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = newlines; fCurrentEntity.startPosition = newlines; if (load(newlines, false)) { break; } } } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); for (int i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { content.setValues(fCurrentEntity.ch, offset, length); return -1; } } // scan literal value if (external) { while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == quote || c == '%' || !XML11Char.isXML11Content(c) || c == 0x85 || c == 0x2028) { fCurrentEntity.position--; break; } } } else { while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; // In internal entities control characters are allowed to appear unescaped. if ((c == quote && !fCurrentEntity.literal) || c == '%' || !XML11Char.isXML11InternalEntityContent(c)) { fCurrentEntity.position--; break; } } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; content.setValues(fCurrentEntity.ch, offset, length); // return next character if (fCurrentEntity.position != fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position]; // NOTE: We don't want to accidentally signal the // end of the literal if we're expanding an // entity appearing in the literal. -Ac if (c == quote && fCurrentEntity.literal) { c = -1; } } else { c = -1; } return c; } // scanLiteral(int,XMLString):int /** * Scans a range of character data up to the specicied delimiter, * setting the fields of the XMLString structure, appropriately. *

* Note: The characters are consumed. *

* Note: This assumes that the internal buffer is * at least the same size, or bigger, than the length of the delimiter * and that the delimiter contains at least one character. *

* Note: This method does not guarantee to return * the longest run of character data. This method may return before * the delimiter due to reaching the end of the input buffer or any * other reason. *

* Note: The fields contained in the XMLString * structure are not guaranteed to remain valid upon subsequent calls * to the entity scanner. Therefore, the caller is responsible for * immediately using the returned character data or making a copy of * the character data. * * @param delimiter The string that signifies the end of the character * data to be scanned. * @param buffer The XMLStringBuffer to fill. * * @return Returns true if there is more data to scan, false otherwise. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public boolean scanData(String delimiter, XMLStringBuffer buffer) throws IOException { boolean done = false; int delimLen = delimiter.length(); char charAt0 = delimiter.charAt(0); boolean external = fCurrentEntity.isExternal(); do { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } boolean bNextEntity = false; while ((fCurrentEntity.position >= fCurrentEntity.count - delimLen) && (!bNextEntity)) { System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position); bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false); fCurrentEntity.position = 0; fCurrentEntity.startPosition = 0; } if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) { // something must be wrong with the input: e.g., file ends an unterminated comment int length = fCurrentEntity.count - fCurrentEntity.position; buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); fCurrentEntity.columnNumber += fCurrentEntity.count; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = fCurrentEntity.count; fCurrentEntity.startPosition = fCurrentEntity.count; load(0,true); return false; } // normalize newlines int offset = fCurrentEntity.position; int c = fCurrentEntity.ch[offset]; int newlines = 0; if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) { do { c = fCurrentEntity.ch[fCurrentEntity.position++]; if ((c == '\r' ) && external) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = newlines; fCurrentEntity.startPosition = newlines; if (load(newlines, false)) { break; } } int cc = fCurrentEntity.ch[fCurrentEntity.position]; if (cc == '\n' || cc == 0x85) { fCurrentEntity.position++; offset++; } /*** NEWLINE NORMALIZATION ***/ else { newlines++; } } else if (c == '\n' || ((c == 0x85 || c == 0x2028) && external)) { newlines++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count) { offset = 0; fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); fCurrentEntity.position = newlines; fCurrentEntity.startPosition = newlines; fCurrentEntity.count = newlines; if (load(newlines, false)) { break; } } } else { fCurrentEntity.position--; break; } } while (fCurrentEntity.position < fCurrentEntity.count - 1); for (int i = offset; i < fCurrentEntity.position; i++) { fCurrentEntity.ch[i] = '\n'; } int length = fCurrentEntity.position - offset; if (fCurrentEntity.position == fCurrentEntity.count - 1) { buffer.append(fCurrentEntity.ch, offset, length); return true; } } // iterate over buffer looking for delimiter if (external) { OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == charAt0) { // looks like we just hit the delimiter int delimOffset = fCurrentEntity.position - 1; for (int i = 1; i < delimLen; i++) { if (fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.position -= i; break OUTER; } c = fCurrentEntity.ch[fCurrentEntity.position++]; if (delimiter.charAt(i) != c) { fCurrentEntity.position--; break; } } if (fCurrentEntity.position == delimOffset + delimLen) { done = true; break; } } else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) { fCurrentEntity.position--; break; } // In external entities control characters cannot appear // as literals so do not skip over them. else if (!XML11Char.isXML11ValidLiteral(c)) { fCurrentEntity.position--; int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; buffer.append(fCurrentEntity.ch, offset, length); return true; } } } else { OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c == charAt0) { // looks like we just hit the delimiter int delimOffset = fCurrentEntity.position - 1; for (int i = 1; i < delimLen; i++) { if (fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.position -= i; break OUTER; } c = fCurrentEntity.ch[fCurrentEntity.position++]; if (delimiter.charAt(i) != c) { fCurrentEntity.position--; break; } } if (fCurrentEntity.position == delimOffset + delimLen) { done = true; break; } } else if (c == '\n') { fCurrentEntity.position--; break; } // Control characters are allowed to appear as literals // in internal entities. else if (!XML11Char.isXML11Valid(c)) { fCurrentEntity.position--; int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; buffer.append(fCurrentEntity.ch, offset, length); return true; } } } int length = fCurrentEntity.position - offset; fCurrentEntity.columnNumber += length - newlines; if (done) { length -= delimLen; } buffer.append(fCurrentEntity.ch, offset, length); // return true if string was skipped } while (!done); return !done; } // scanData(String,XMLString) /** * Skips a character appearing immediately on the input. *

* Note: The character is consumed only if it matches * the specified character. * * @param c The character to skip. * * @return Returns true if the character was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public boolean skipChar(int c) throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // skip character int cc = fCurrentEntity.ch[fCurrentEntity.position]; if (cc == c) { fCurrentEntity.position++; if (c == '\n') { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; } else { fCurrentEntity.columnNumber++; } return true; } else if (c == '\n' && ((cc == 0x2028 || cc == 0x85) && fCurrentEntity.isExternal())) { fCurrentEntity.position++; fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; return true; } else if (c == '\n' && (cc == '\r' ) && fCurrentEntity.isExternal()) { // handle newlines if (fCurrentEntity.position == fCurrentEntity.count) { fCurrentEntity.ch[0] = (char)cc; load(1, false); } int ccc = fCurrentEntity.ch[++fCurrentEntity.position]; if (ccc == '\n' || ccc == 0x85) { fCurrentEntity.position++; } fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; return true; } // character was not skipped return false; } // skipChar(int):boolean /** * Skips space characters appearing immediately on the input. *

* Note: The characters are consumed only if they are * space characters. * * @return Returns true if at least one space character was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. * * @see org.apache.xerces.util.XMLChar#isSpace * @see org.apache.xerces.util.XML11Char#isXML11Space */ public boolean skipSpaces() throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // skip spaces int c = fCurrentEntity.ch[fCurrentEntity.position]; // External -- Match: S + 0x85 + 0x2028, and perform end of line normalization if (fCurrentEntity.isExternal()) { if (XML11Char.isXML11Space(c)) { do { boolean entityChanged = false; // handle newlines if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count - 1) { fCurrentEntity.ch[0] = (char)c; entityChanged = load(1, true); if (!entityChanged) { // the load change the position to be 1, // need to restore it when entity not changed fCurrentEntity.startPosition = 0; fCurrentEntity.position = 0; } } if (c == '\r') { // REVISIT: Does this need to be updated to fix the // #x0D ^#x0A newline normalization problem? -Ac int cc = fCurrentEntity.ch[++fCurrentEntity.position]; if (cc != '\n' && cc != 0x85 ) { fCurrentEntity.position--; } } } else { fCurrentEntity.columnNumber++; } // load more characters, if needed if (!entityChanged) fCurrentEntity.position++; if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } } while (XML11Char.isXML11Space(c = fCurrentEntity.ch[fCurrentEntity.position])); return true; } } // Internal -- Match: S (only) else if (XMLChar.isSpace(c)) { do { boolean entityChanged = false; // handle newlines if (c == '\n') { fCurrentEntity.lineNumber++; fCurrentEntity.columnNumber = 1; if (fCurrentEntity.position == fCurrentEntity.count - 1) { fCurrentEntity.ch[0] = (char)c; entityChanged = load(1, true); if (!entityChanged) { // the load change the position to be 1, // need to restore it when entity not changed fCurrentEntity.startPosition = 0; fCurrentEntity.position = 0; } } } else { fCurrentEntity.columnNumber++; } // load more characters, if needed if (!entityChanged) fCurrentEntity.position++; if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); return true; } // no spaces were found return false; } // skipSpaces():boolean /** * Skips the specified string appearing immediately on the input. *

* Note: The characters are consumed only if they are * space characters. * * @param s The string to skip. * * @return Returns true if the string was skipped. * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. */ public boolean skipString(String s) throws IOException { // load more characters, if needed if (fCurrentEntity.position == fCurrentEntity.count) { load(0, true); } // skip string final int length = s.length(); for (int i = 0; i < length; i++) { char c = fCurrentEntity.ch[fCurrentEntity.position++]; if (c != s.charAt(i)) { fCurrentEntity.position -= i + 1; return false; } if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) { System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1); // REVISIT: Can a string to be skipped cross an // entity boundary? -Ac if (load(i + 1, false)) { fCurrentEntity.startPosition -= i + 1; fCurrentEntity.position -= i + 1; return false; } } } fCurrentEntity.columnNumber += length; return true; } // skipString(String):boolean } // class XML11EntityScanner





© 2015 - 2024 Weber Informatics LLC | Privacy Policy