nu.validator.checker.XmlPiChecker Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of validator Show documentation
An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)
There is a newer version: 20.7.2
/*
 * Copyright (c) 2010 Mozilla Foundation
 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 
 * Foundation, and Opera Software ASA.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.checker;

import org.xml.sax.Attributes;
import org.xml.sax.SAXParseException;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;

import org.xml.sax.helpers.AttributesImpl;

import org.relaxng.datatype.DatatypeException;

import nu.validator.datatype.Html5DatatypeLibrary;
import nu.validator.datatype.Html5DatatypeException;
import nu.validator.datatype.Charset;
import nu.validator.datatype.IriRef;
import nu.validator.datatype.MediaQuery;
import nu.validator.datatype.MimeType;

public class XmlPiChecker extends Checker implements LexicalHandler {

    private static final char[][] NAMES = { "amp;".toCharArray(),
            "lt;".toCharArray(), "gt;".toCharArray(), "quot;".toCharArray(),
            "apos;".toCharArray(), };

    private static final char[][] VALUES = { { '\u0026' }, { '\u003c' },
            { '\u003e' }, { '\u0022' }, { '\'' }, };

    private static final int DATA_AND_RCDATA_MASK = ~1;

    private static final int BEFORE_ATTRIBUTE_NAME = 0;

    private static final int ATTRIBUTE_NAME = 1;

    private static final int AFTER_ATTRIBUTE_NAME = 2;

    private static final int BEFORE_ATTRIBUTE_VALUE = 3;

    private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED = 4;

    private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED = 5;

    private static final int ATTRIBUTE_VALUE_UNQUOTED = 6;

    private static final int AFTER_ATTRIBUTE_VALUE_QUOTED = 7;

    private static final int CONSUME_CHARACTER_REFERENCE = 8;

    private static final int CONSUME_NCR = 9;

    private static final int CHARACTER_REFERENCE_LOOP = 10;

    private static final int HEX_NCR_LOOP = 11;

    private static final int DECIMAL_NRC_LOOP = 12;

    private static final int HANDLE_NCR_VALUE = 13;

    private static final int BUFFER_GROW_BY = 1024;

    private static final char[] REPLACEMENT_CHARACTER = { '\uFFFD' };

    private static final int LEAD_OFFSET = (0xD800 - (0x10000 >> 10));

    private char[] strBuf = new char[64];

    private int strBufLen;

    private char[] longStrBuf = new char[1024];

    private int longStrBufLen;

    private final char[] bmpChar = new char[1];

    private final char[] astralChar = new char[2];

    private int entCol;

    private int lo;

    private int hi;

    private int candidate;

    private int strBufMark;

    private int prevValue;

    private int value;

    private boolean seenDigits;

    private char additional;

    private boolean alreadyWarnedAboutPrivateUseCharacters;

    private AttributesImpl attributes;

    private String attributeName;

    private boolean inDoctype;

    private boolean alreadyHasElement;

    private String piTarget = null;

    private boolean hasXsltPi;

    private enum PseudoAttrName {
        HREF, TYPE, TITLE, MEDIA, CHARSET, ALTERNATE, INVALID;
        private static PseudoAttrName toCaps(String str) {
            try {
                if (!str.toLowerCase().equals(str)) {
                    return INVALID;
                }
                return valueOf(newAsciiUpperCaseStringFromString(str));
            } catch (Exception ex) {
                return INVALID;
            }
        }
    }

    public XmlPiChecker() {
        super();
        inDoctype = false;
        hasXsltPi = false;
        alreadyHasElement = false;
    }

    public void startDTD(String name, String publicId, String systemId)
            throws SAXException {
        inDoctype = true;
    }

    public void endDTD() throws SAXException {
        inDoctype = false;
    }

    public void startEntity(String name) throws SAXException {
    }

    public void endEntity(String name) throws SAXException {
    }

    public void startCDATA() throws SAXException {
    }

    public void endCDATA() throws SAXException {
    }

    public void comment(char[] ch, int start, int len) throws SAXException {
    }

    @Override public void startDocument() throws SAXException {
        inDoctype = false;
        hasXsltPi = false;
        alreadyHasElement = false;
    }

    @Override public void startElement(String uri, String localName,
            String qName, Attributes atts) throws SAXException {
        alreadyHasElement = true;
    }

    @Override public void processingInstruction(String target, String data)
            throws SAXException {
        piTarget = target;
        if ("xml-stylesheet".equals(piTarget)) {
            checkXmlStylesheetPiData(data);
        }
    }

    private void errBadPseudoAttrDatatype(DatatypeException e,
            Class datatypeClass, String attrName, String attrValue)
            throws SAXException, ClassNotFoundException {
        if (getErrorHandler() != null) {
            Html5DatatypeException ex5 = (Html5DatatypeException) e;
            boolean warning = ex5.isWarning() ? true : false;
            DatatypeMismatchException bpe = new DatatypeMismatchException(
                    "Bad value \u201c" + attrValue + "\u201d for \u201c"
                            + piTarget + "\u201d pseudo-attribute \u201c"
                            + attrName + "\u201d. "
                            + e.getMessage(),
                    getDocumentLocator(), datatypeClass, warning);
            getErrorHandler().error(bpe);
        }
    }

    private void errAttributeWithNoValue() throws SAXException {
        err("Found \u201c" + piTarget + "\u201d pseudo-attribute \u201c"
                + attributeName
                + "\u201d without a value. All pseudo-attributes in \u201c"
                + piTarget + "\u201d instructions must have values.");
    }

    private void errAttributeValueContainsLt() throws SAXException {
        err("Found \u201c"
                + piTarget
                + "\u201d pseudo-attribute \u201c"
                + attributeName
                + "\u201d with the character \u201c<\u201d in its value. All pseudo-attribute values in \u201c"
                + piTarget
                + "\u201d instructions must not contain the character \u201c<\u201d.");
    }

    private void errUpperCaseXinHexNcr() throws SAXException {
        err("In XML documents, a hexadecimal character reference must begin with "
                + "\u201c&#x\u201d (lowercase \u201cx\u201d), not \u201c&#X\u201d (uppercase \u201cX\u201d).");
    }

    private void checkXmlStylesheetPiData(String data) throws SAXException {
        boolean hasHref = false;
        boolean hasTitle = false;
        boolean hasMedia = false;
        boolean hasCharset = false;
        boolean hasAlternate = false;
        boolean hasNonEmptyTitle = false;
        boolean alternateIsYes = false;
        boolean badDatatype = false;
        if (inDoctype) {
            warn("An \u201cxml-stylesheet\u201d instruction should not be used within a \u201cDOCTYPE\u201d declaration.");
        }
        if (alreadyHasElement) {
            err("Any \u201cxml-stylesheet\u201d instruction in a document must occur before any elements in the document. "
                    + "Suppressing any further errors for this \u201cxml-stylesheet\u201d instruction.");
            return;
        }
        if (!"".equals(data)) {
            Html5DatatypeLibrary dl = new Html5DatatypeLibrary();
            AttributesImpl patts = getPseudoAttributesFromPiData(data);
            String attrName;
            String attrValue;
            for (int i = 0; i < patts.getLength(); i++) {
                attrName = patts.getQName(i);
                attrValue = patts.getValue(i);
                switch (PseudoAttrName.toCaps(attrName)) {
                    case HREF:
                        hasHref = true;
                        if (attrValue == null) {
                            break;
                        }
                        try {
                            IriRef ir = (IriRef) dl.createDatatype("iri-ref");
                            ir.checkValid(attrValue);
                        } catch (DatatypeException e) {
                            try {
                                errBadPseudoAttrDatatype(e, IriRef.class,
                                        "href", attrValue);
                            } catch (ClassNotFoundException ce) {
                            }
                        }
                        break;
                    case TYPE:
                        if (attrValue == null) {
                            break;
                        }
                        try {
                            MimeType mt = (MimeType) dl.createDatatype("mime-type");
                            mt.checkValid(attrValue);
                            attrValue = newAsciiLowerCaseStringFromString(attrValue);
                        } catch (DatatypeException e) {
                            badDatatype = true;
                            try {
                                errBadPseudoAttrDatatype(e, MimeType.class,
                                        "type", attrValue);
                            } catch (ClassNotFoundException ce) {
                            }
                        }
                        if (!badDatatype) {
                            if (attrValue.matches("application/xml(;.*)?")
                                    || attrValue.matches("text/xml(;.*)?")
                                    || attrValue.matches("application/xslt+xml(;.*)?")
                                    || attrValue.matches("text/xsl(;.*)?")
                                    || attrValue.matches("text/xslt(;.*)?")) {
                                if (!attrValue.matches("text/xsl(;.*)?")) {
                                    warn("For indicating XSLT, \u201ctext/xsl\u201d is the only MIME type for the "
                                            + "\u201cxml-stylesheet\u201d pseudo-attribute \u201ctype\u201d that is supported across browsers.");
                                }
                                if (hasXsltPi) {
                                    warn("Browsers do not support multiple \u201cxml-stylesheet\u201d instructions with a "
                                            + "\u201ctype\u201d value that indicates XSLT.");
                                }
                                hasXsltPi = true;
                            } else if (!attrValue.matches("^text/css(;.*)?$")) {
                                warn("\u201ctext/css\u201d and \u201ctext/xsl\u201d are the only MIME types for the "
                                        + "\u201cxml-stylesheet\u201d pseudo-attribute \u201ctype\u201d that are supported across browsers.");
                            }
                        }
                        break;
                    case TITLE:
                        hasTitle = true;
                        if (attrValue == null) {
                            break;
                        }
                        if (!"".equals(attrValue)) {
                            hasNonEmptyTitle = true;
                        }
                        break;
                    case MEDIA:
                        hasMedia = true;
                        if (attrValue == null) {
                            break;
                        }
                        try {
                            MediaQuery mq = (MediaQuery) dl.createDatatype("media-query");
                            mq.checkValid(attrValue);
                        } catch (DatatypeException e) {
                            try {
                                errBadPseudoAttrDatatype(e, MediaQuery.class,
                                        "media", attrValue);
                            } catch (ClassNotFoundException ce) {
                            }
                        }
                        break;
                    case CHARSET:
                        hasCharset = true;
                        if (attrValue == null) {
                            break;
                        }
                        try {
                            Charset c = (Charset) dl.createDatatype("charset");
                            c.checkValid(attrValue);
                        } catch (DatatypeException e) {
                            try {
                                errBadPseudoAttrDatatype(e, Charset.class,
                                        "charset", attrValue);
                            } catch (ClassNotFoundException ce) {
                            }
                        }
                        break;
                    case ALTERNATE:
                        hasAlternate = true;
                        if (attrValue == null) {
                            break;
                        }
                        if ("yes".equals(attrValue)) {
                            alternateIsYes = true;
                        } else if (!"no".equals(attrValue)) {
                            err("The value of the \u201cxml-stylesheet\u201d pseudo-attribute \u201calternate\u201d "
                                    + "must be either \u201cyes\u201d or \u201cno\u201d.");
                        }
                        break;
                    default:
                        err("Pseudo-attribute \u201c"
                                + attrName
                                + "\u201D not allowed in \u201cxml-stylesheet\u201d instruction.");
                        break;
                }
            }
            if (alternateIsYes && !hasNonEmptyTitle) {
                err("An \u201cxml-stylesheet\u201d instruction with an \u201calternate\u201d pseudo-attribute "
                        + "whose value is \u201cyes\u201d must also have a \u201ctitle\u201d pseudo-attribute with a non-empty value.");
            }
        }
        if (!hasHref) {
            err("\u201cxml-stylesheet\u201d instruction lacks \u201chref\u201d pseudo-attribute. "
                    + "The \u201chref\u201d pseudo-attribute is required in all \u201cxml-stylesheet\u201d instructions.");
        }
        if (hasXsltPi && (hasTitle || hasMedia || hasCharset || hasAlternate)) {
            warn("When processing \u201cxml-stylesheet\u201d instructions, browsers ignore the pseudo-attributes "
                    + "\u201ctitle\u201d, \u201cmedia\u201d, \u201ccharset\u201d, and \u201calternate\u201d.");
        } else if (hasCharset) {
            warn("Some browsers ignore the value of the \u201cxml-stylesheet\u201d pseudo-attribute \u201ccharset\u201d.");
        }
    }

    /**
     * Collect a set of attribues and values from the data part of a PI.
     * 
     * 
     * The bulk of this method and associated methods that follow it here are
     * copied from the nu.validator.htmlparser.impl.Tokenizer class, with
     * appropriate modifications.
     * 
     * 
     * @see nu.validator.htmlparser.impl.Tokenizer
     * @see nu.validator.htmlparser.impl.ErrorReportingTokenizer
     * 
     */
    private AttributesImpl getPseudoAttributesFromPiData(String buf)
            throws SAXException {

        int state = BEFORE_ATTRIBUTE_NAME;
        int returnState = BEFORE_ATTRIBUTE_NAME;
        char c = '\u0000';
        int pos = -1;
        int endPos = buf.length();
        boolean reconsume = false;
        attributes = null;
        attributeName = null;
        stateloop: for (;;) {
            switch (state) {
                case BEFORE_ATTRIBUTE_NAME:
                    beforeattributenameloop: for (;;) {
                        if (reconsume) {
                            reconsume = false;
                        } else {
                            if (++pos == endPos) {
                                break stateloop;
                            }
                            c = buf.charAt(pos);
                        }
                        /*
                         * Consume the next input character:
                         */
                        switch (c) {
                            case '\n':
                            case ' ':
                            case '\t':
                                continue;
                            case '/':
                            case '>':
                            case '\"':
                            case '\'':
                            case '<':
                            case '=':
                                /*
                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
                                 * SIGN (=) Parse error.
                                 */
                                errBadCharBeforeAttributeNameOrNull(c);
                                /*
                                 * Treat it as per the "anything else" entry
                                 * below.
                                 */
                            default:
                                /*
                                 * Anything else Start a new attribute in the
                                 * current tag token.
                                 */
                                /*
                                 * Set that attribute's name to the current
                                 * input character,
                                 */
                                clearStrBufAndAppendCurrentC(c);
                                /*
                                 * and its value to the empty string.
                                 */
                                // Will do later.
                                /*
                                 * Switch to the attribute name state.
                                 */
                                state = ATTRIBUTE_NAME;
                                break beforeattributenameloop;
                            // continue stateloop;
                        }
                    }
                    // FALLTHRU DON'T REORDER
                case ATTRIBUTE_NAME:
                    attributenameloop: for (;;) {
                        if (++pos == endPos) {
                            attributeNameComplete();
                            addAttributeWithoutValue();
                            break stateloop;
                        }
                        c = buf.charAt(pos);
                        /*
                         * Consume the next input character:
                         */
                        switch (c) {
                            case '\n':
                            case ' ':
                            case '\t':
                                attributeNameComplete();
                                state = AFTER_ATTRIBUTE_NAME;
                                continue stateloop;
                            case '=':
                                /*
                                 * U+003D EQUALS SIGN (=) Switch to the before
                                 * attribute value state.
                                 */
                                attributeNameComplete();
                                state = BEFORE_ATTRIBUTE_VALUE;
                                break attributenameloop;
                            // continue stateloop;
                            case '\"':
                            case '\'':
                            case '<':
                                /*
                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                 * (') U+003C LESS-THAN SIGN (<) Parse error.
                                 */
                                errQuoteOrLtInAttributeNameOrNull(c);
                                /*
                                 * Treat it as per the "anything else" entry
                                 * below.
                                 */
                            default:
                                /*
                                 * Anything else Append the current input
                                 * character to the current attribute's name.
                                 */
                                appendStrBuf(c);
                                /*
                                 * Stay in the attribute name state.
                                 */
                                continue;
                        }
                    }
                    // FALLTHRU DON'T REORDER
                case BEFORE_ATTRIBUTE_VALUE:
                    beforeattributevalueloop: for (;;) {
                        if (++pos == endPos) {
                            addAttributeWithoutValue();
                            break stateloop;
                        }
                        c = buf.charAt(pos);
                        /*
                         * Consume the next input character:
                         */
                        switch (c) {
                            case '\n':
                            case ' ':
                            case '\t':
                                continue;
                            case '"':
                                /*
                                 * U+0022 QUOTATION MARK (") Switch to the
                                 * attribute value (double-quoted) state.
                                 */
                                clearLongStrBufForNextState();
                                state = ATTRIBUTE_VALUE_DOUBLE_QUOTED;
                                break beforeattributevalueloop;
                            // continue stateloop;
                            case '&':
                                /*
                                 * U+0026 AMPERSAND (&) Switch to the attribute
                                 * value (unquoted) state and reconsume this
                                 * input character.
                                 */
                                clearLongStrBuf();
                                state = ATTRIBUTE_VALUE_UNQUOTED;
                                reconsume = true;
                                continue stateloop;
                            case '\'':
                                /*
                                 * U+0027 APOSTROPHE (') Switch to the attribute
                                 * value (single-quoted) state.
                                 */
                                clearLongStrBufForNextState();
                                state = ATTRIBUTE_VALUE_SINGLE_QUOTED;
                                continue stateloop;
                            case '<':
                            case '=':
                            case '`':
                                /*
                                 * U+003C LESS-THAN SIGN (<) U+003D EQUALS SIGN
                                 * (=) U+0060 GRAVE ACCENT (`)
                                 */
                                errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
                                /*
                                 * Treat it as per the "anything else" entry
                                 * below.
                                 */
                            default:
                                /*
                                 * Anything else Append the current input
                                 * character to the current attribute's value.
                                 */
                                clearLongStrBufAndAppendCurrentC(c);
                                /*
                                 * Switch to the attribute value (unquoted)
                                 * state.
                                 */

                                state = ATTRIBUTE_VALUE_UNQUOTED;
                                continue stateloop;
                        }
                    }
                    // FALLTHRU DON'T REORDER
                case ATTRIBUTE_VALUE_DOUBLE_QUOTED:
                    attributevaluedoublequotedloop: for (;;) {
                        if (reconsume) {
                            reconsume = false;
                        } else {
                            if (++pos == endPos) {
                                addAttributeWithoutValue();
                                break stateloop;
                            }
                            c = buf.charAt(pos);
                        }
                        /*
                         * Consume the next input character:
                         */
                        switch (c) {
                            case '"':
                                /*
                                 * U+0022 QUOTATION MARK (") Switch to the after
                                 * attribute value (quoted) state.
                                 */
                                addAttributeWithValue();

                                state = AFTER_ATTRIBUTE_VALUE_QUOTED;
                                break attributevaluedoublequotedloop;
                            // continue stateloop;
                            case '&':
                                /*
                                 * U+0026 AMPERSAND (&) Switch to the character
                                 * reference in attribute value state, with the
                                 * additional allowed character being U+0022
                                 * QUOTATION MARK (").
                                 */
                                clearStrBufAndAppendCurrentC(c);
                                returnState = state;
                                state = CONSUME_CHARACTER_REFERENCE;
                                continue stateloop;
                            case '\n':
                                appendLongStrBufLineFeed();
                                continue;
                            default:
                                /*
                                 * Anything else Append the current input
                                 * character to the current attribute's value.
                                 */
                                appendLongStrBuf(c);
                                /*
                                 * Stay in the attribute value (double-quoted)
                                 * state.
                                 */
                                continue;
                        }
                    }
                    // FALLTHRU DON'T REORDER
                case AFTER_ATTRIBUTE_VALUE_QUOTED:
                    for (;;) {
                        if (++pos == endPos) {
                            break stateloop;
                        }
                        c = buf.charAt(pos);
                        /*
                         * Consume the next input character:
                         */
                        switch (c) {
                            case '\n':
                            case ' ':
                            case '\t':
                                state = BEFORE_ATTRIBUTE_NAME;
                                continue stateloop;
                            default:
                                /*
                                 * Anything else Parse error.
                                 */
                                errNoSpaceBetweenAttributes();
                                /*
                                 * Reconsume the character in the before
                                 * attribute name state.
                                 */
                                state = BEFORE_ATTRIBUTE_NAME;
                                reconsume = true;
                                continue stateloop;
                        }
                    }
                    // FALLTHRU DON'T REORDER
                case ATTRIBUTE_VALUE_UNQUOTED:
                    errUnquotedAttributeValOrNull();
                    for (;;) {
                        if (reconsume) {
                            reconsume = false;
                        } else {
                            if (++pos == endPos) {
                                addAttributeWithValue();
                                break stateloop;
                            }
                            c = buf.charAt(pos);
                        }
                        /*
                         * Consume the next input character:
                         */
                        switch (c) {
                            case '\n':
                            case ' ':
                            case '\t':
                                addAttributeWithValue();
                                state = BEFORE_ATTRIBUTE_NAME;
                                continue stateloop;
                            case '&':
                                /*
                                 * U+0026 AMPERSAND (&) Switch to the character
                                 * reference in attribute value state, with the
                                 * additional allowed character being U+003E
                                 * GREATER-THAN SIGN (>)
                                 */
                                clearStrBufAndAppendCurrentC(c);
                                returnState = state;
                                state = CONSUME_CHARACTER_REFERENCE;
                                continue stateloop;
                            case '>':
                                /*
                                 * U+003E GREATER-THAN SIGN (>) Emit the current
                                 * tag token.
                                 */
                                // addAttributeWithValue();
                                // state = emitCurrentTagToken(false, pos);
                                // if (shouldSuspend) {
                                // break stateloop;
                                // }
                                /*
                                 * Switch to the data state.
                                 */
                                continue stateloop;
                            case '<':
                            case '\"':
                            case '\'':
                            case '=':
                            case '`':
                                /*
                                 * U+0022 QUOTATION MARK (") U+0027 APOSTROPHE
                                 * (') U+003C LESS-THAN SIGN (<) U+003D EQUALS
                                 * SIGN (=) U+0060 GRAVE ACCENT (`) Parse error.
                                 */
                                // errUnquotedAttributeValOrNull(c);
                                /*
                                 * Treat it as per the "anything else" entry
                                 * below.
                                 */
                                // fall through
                            default:
                                /*
                                 * Anything else Append the current input
                                 * character to the current attribute's value.
                                 */
                                appendLongStrBuf(c);
                                /*
                                 * Stay in the attribute value (unquoted) state.
                                 */
                                continue;
                        }
                    }
                    // XXX reorder point
                case AFTER_ATTRIBUTE_NAME:
                    for (;;) {
                        if (++pos == endPos) {
                            addAttributeWithoutValue();
                            break stateloop;
                        }
                        c = buf.charAt(pos);
                        /*
                         * Consume the next input character:
                         */
                        switch (c) {
                            case '\n':
                            case ' ':
                            case '\t':
                                continue;
                            case '=':
                                /*
                                 * U+003D EQUALS SIGN (=) Switch to the before
                                 * attribute value state.
                                 */
                                state = BEFORE_ATTRIBUTE_VALUE;
                                continue stateloop;
                            case '\"':
                            case '\'':
                            case '<':
                                errQuoteOrLtInAttributeNameOrNull(c);
                                /*
                                 * Treat it as per the "anything else" entry
                                 * below.
                                 */
                            default:
                                addAttributeWithoutValue();
                                /*
                                 * Anything else Start a new attribute in the
                                 * current tag token.
                                 */
                                /*
                                 * Set that attribute's name to the current
                                 * input character,
                                 */
                                clearStrBufAndAppendCurrentC(c);
                                /*
                                 * and its value to the empty string.
                                 */
                                // Will do later.
                                /*
                                 * Switch to the attribute name state.
                                 */
                                state = ATTRIBUTE_NAME;
                                continue stateloop;
                        }
                    }
                    // XXX reorder point
                case ATTRIBUTE_VALUE_SINGLE_QUOTED:
                    attributevaluesinglequotedloop: for (;;) {
                        if (reconsume) {
                            reconsume = false;
                        } else {
                            if (++pos == endPos) {
                                addAttributeWithoutValue();
                                break stateloop;
                            }
                            c = buf.charAt(pos);
                        }
                        /*
                         * Consume the next input character:
                         */
                        switch (c) {
                            case '\'':
                                /*
                                 * U+0027 APOSTROPHE (') Switch to the after
                                 * attribute value (quoted) state.
                                 */
                                addAttributeWithValue();
                                state = AFTER_ATTRIBUTE_VALUE_QUOTED;
                                continue stateloop;
                            case '&':
                                /*
                                 * U+0026 AMPERSAND (&) Switch to the character
                                 * reference in attribute value state, with the
                                 * + additional allowed character being U+0027
                                 * APOSTROPHE (').
                                 */
                                clearStrBufAndAppendCurrentC(c);
                                returnState = state;
                                state = CONSUME_CHARACTER_REFERENCE;
                                break attributevaluesinglequotedloop;
                            // continue stateloop;
                            case '\n':
                                appendLongStrBufLineFeed();
                                continue;
                            default:
                                /*
                                 * Anything else Append the current input
                                 * character to the current attribute's value.
                                 */
                                appendLongStrBuf(c);
                                /*
                                 * Stay in the attribute value (double-quoted)
                                 * state.
                                 */
                                continue;
                        }
                    }
                    // FALLTHRU DON'T REORDER
                case CONSUME_CHARACTER_REFERENCE:
                    if (++pos == endPos) {
                        break stateloop;
                    }
                    c = buf.charAt(pos);
                    /*
                     * Unlike the definition is the spec, this state does not
                     * return a value and never requires the caller to
                     * backtrack. This state takes care of emitting characters
                     * or appending to the current attribute value. It also
                     * takes care of that in the case when consuming the
                     * character reference fails.
                     */
                    /*
                     * This section defines how to consume a character
                     * reference. This definition is used when parsing character
                     * references in text and in attributes.
                     * 
                     * The behavior depends on the identity of the next
                     * character (the one immediately after the U+0026 AMPERSAND
                     * character):
                     */
                    switch (c) {
                        case '#':
                            /*
                             * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
                             * SIGN.
                             */
                            appendStrBuf('#');
                            state = CONSUME_NCR;
                            continue stateloop;
                        default:
                            if (c == additional) {
                                emitOrAppendStrBuf(returnState);
                                state = returnState;
                                reconsume = true;
                                continue stateloop;
                            }
                            entCol = -1;
                            lo = 0;
                            hi = (NAMES.length - 1);
                            candidate = -1;
                            strBufMark = 0;
                            state = CHARACTER_REFERENCE_LOOP;
                            reconsume = true;
                            // FALL THROUGH continue stateloop;
                    }
                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
                case CHARACTER_REFERENCE_LOOP:
                    outer: for (;;) {
                        if (reconsume) {
                            reconsume = false;
                        } else {
                            if (++pos == endPos) {
                                break stateloop;
                            }
                            c = buf.charAt(pos);
                        }
                        entCol++;
                        /*
                         * Consume the maximum number of characters possible,
                         * with the consumed characters matching one of the
                         * identifiers in the first column of the named
                         * character references table (in a case-sensitive
                         * manner).
                         */
                        hiloop: for (;;) {
                            if (hi == -1) {
                                break hiloop;
                            }
                            if (entCol == NAMES[hi].length) {
                                break hiloop;
                            }
                            if (entCol > NAMES[hi].length) {
                                break outer;
                            } else if (c < NAMES[hi][entCol]) {
                                hi--;
                            } else {
                                break hiloop;
                            }
                        }

                        loloop: for (;;) {
                            if (hi < lo) {
                                break outer;
                            }
                            if (entCol == NAMES[lo].length) {
                                candidate = lo;
                                strBufMark = strBufLen;
                                lo++;
                            } else if (entCol > NAMES[lo].length) {
                                break outer;
                            } else if (c > NAMES[lo][entCol]) {
                                lo++;
                            } else {
                                break loloop;
                            }
                        }
                        if (hi < lo) {
                            break outer;
                        }
                        appendStrBuf(c);
                        continue;
                    }

                    if (candidate == -1) {
                        /*
                         * If no match can be made, then this is a parse error.
                         */
                        errNoNamedCharacterMatch();
                        emitOrAppendStrBuf(returnState);
                        state = returnState;
                        reconsume = true;
                        continue stateloop;
                    } else {
                        char[] candidateArr = NAMES[candidate];
                        if (candidateArr[candidateArr.length - 1] != ';') {
                            /*
                             * If the last character matched is not a U+003B
                             * SEMICOLON (;), there is a parse error.
                             */
                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                /*
                                 * If the entity is being consumed as part of an
                                 * attribute, and the last character matched is
                                 * not a U+003B SEMICOLON (;),
                                 */
                                char ch;
                                if (strBufMark == strBufLen) {
                                    ch = c;
                                } else {
                                    // if (strBufOffset != -1) {
                                    // ch = buf[strBufOffset + strBufMark];
                                    // } else {
                                    ch = strBuf[strBufMark];
                                    // }
                                }
                                if ((ch >= '0' && ch <= '9')
                                        || (ch >= 'A' && ch <= 'Z')
                                        || (ch >= 'a' && ch <= 'z')) {
                                    /*
                                     * and the next character is in the range
                                     * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
                                     * U+0041 LATIN CAPITAL LETTER A to U+005A
                                     * LATIN CAPITAL LETTER Z, or U+0061 LATIN
                                     * SMALL LETTER A to U+007A LATIN SMALL
                                     * LETTER Z, then, for historical reasons,
                                     * all the characters that were matched
                                     * after the U+0026 AMPERSAND (&) must be
                                     * unconsumed, and nothing is returned.
                                     */
                                    errNoNamedCharacterMatch();
                                    appendStrBufToLongStrBuf();
                                    state = returnState;
                                    reconsume = true;
                                    continue stateloop;
                                }
                            }
                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                errUnescapedAmpersandInterpretedAsCharacterReference();
                            }
                        }

                        /*
                         * Otherwise, return a character token for the character
                         * corresponding to the entity name (as given by the
                         * second column of the named character references
                         * table).
                         */
                        char[] val = VALUES[candidate];
                        emitOrAppend(val, returnState);
                        // this is so complicated!
                        if (strBufMark < strBufLen) {
                            // if (strBufOffset != -1) {
                            // if ((returnState & (~1)) != 0) {
                            // for (int i = strBufMark; i < strBufLen; i++) {
                            // appendLongStrBuf(buf[strBufOffset + i]);
                            // }
                            // } else {
                            // tokenHandler.characters(buf, strBufOffset
                            // + strBufMark, strBufLen
                            // - strBufMark);
                            // }
                            // } else {
                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                for (int i = strBufMark; i < strBufLen; i++) {
                                    appendLongStrBuf(strBuf[i]);
                                }
                            }
                            // }
                        }
                        state = returnState;
                        reconsume = true;
                        continue stateloop;
                        /*
                         * If the markup contains I'm ¬it; I tell you, the
                         * entity is parsed as "not", as in, I'm ¬it; I tell
                         * you. But if the markup was I'm ∉ I tell you,
                         * the entity would be parsed as "notin;", resulting in
                         * I'm ∉ I tell you.
                         */
                    }
                    // XXX reorder point
                case CONSUME_NCR:
                    if (++pos == endPos) {
                        break stateloop;
                    }
                    c = buf.charAt(pos);
                    prevValue = -1;
                    value = 0;
                    seenDigits = false;
                    /*
                     * The behavior further depends on the character after the
                     * U+0023 NUMBER SIGN:
                     */
                    switch (c) {
                        case 'x':
                            /*
                             * U+0078 LATIN SMALL LETTER X U+0058 LATIN CAPITAL
                             * LETTER X Consume the X.
                             * 
                             * Follow the steps below, but using the range of
                             * characters U+0030 DIGIT ZERO through to U+0039
                             * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
                             * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
                             * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
                             * LETTER F (in other words, 0-9, A-F, a-f).
                             * 
                             * When it comes to interpreting the number,
                             * interpret it as a hexadecimal number.
                             */
                            appendStrBuf(c);
                            state = HEX_NCR_LOOP;
                            continue stateloop;
                        case 'X':
                            /*
                             * XML requires a lowercase 'x' for hex character
                             * refs
                             */
                            errUpperCaseXinHexNcr();
                            appendStrBuf(c);
                            state = HEX_NCR_LOOP;
                            continue stateloop;
                        default:
                            /*
                             * Anything else Follow the steps below, but using
                             * the range of characters U+0030 DIGIT ZERO through
                             * to U+0039 DIGIT NINE (i.e. just 0-9).
                             * 
                             * When it comes to interpreting the number,
                             * interpret it as a decimal number.
                             */
                            state = DECIMAL_NRC_LOOP;
                            reconsume = true;
                            // FALL THROUGH continue stateloop;
                    }
                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
                case DECIMAL_NRC_LOOP:
                    decimalloop: for (;;) {
                        if (reconsume) {
                            reconsume = false;
                        } else {
                            if (++pos == endPos) {
                                break stateloop;
                            }
                            c = buf.charAt(pos);
                        }
                        // Deal with overflow gracefully
                        if (value < prevValue) {
                            value = 0x110000; // Value above Unicode range but
                            // within int
                            // range
                        }
                        prevValue = value;
                        /*
                         * Consume as many characters as match the range of
                         * characters given above.
                         */
                        if (c >= '0' && c <= '9') {
                            seenDigits = true;
                            value *= 10;
                            value += c - '0';
                            continue;
                        } else if (c == ';') {
                            if (seenDigits) {
                                state = HANDLE_NCR_VALUE;
                                // FALL THROUGH continue stateloop;
                                break decimalloop;
                            } else {
                                errNoDigitsInNCR();
                                appendStrBuf(';');
                                emitOrAppendStrBuf(returnState);
                                state = returnState;
                                continue stateloop;
                            }
                        } else {
                            /*
                             * If no characters match the range, then don't
                             * consume any characters (and unconsume the U+0023
                             * NUMBER SIGN character and, if appropriate, the X
                             * character). This is a parse error; nothing is
                             * returned.
                             * 
                             * Otherwise, if the next character is a U+003B
                             * SEMICOLON, consume that too. If it isn't, there
                             * is a parse error.
                             */
                            if (!seenDigits) {
                                errNoDigitsInNCR();
                                emitOrAppendStrBuf(returnState);
                                state = returnState;
                                reconsume = true;
                                continue stateloop;
                            } else {
                                errCharRefLacksSemicolon();
                                state = HANDLE_NCR_VALUE;
                                reconsume = true;
                                // FALL THROUGH continue stateloop;
                                break decimalloop;
                            }
                        }
                    }
                    // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
                case HANDLE_NCR_VALUE:
                    // WARNING previous state sets reconsume
                    handleNcrValue(returnState);
                    state = returnState;
                    continue stateloop;
                    // XXX reorder point
                case HEX_NCR_LOOP:
                    for (;;) {
                        if (++pos == endPos) {
                            break stateloop;
                        }
                        c = buf.charAt(pos);
                        // Deal with overflow gracefully
                        if (value < prevValue) {
                            value = 0x110000; // Value above Unicode range but
                            // within int
                            // range
                        }
                        prevValue = value;
                        /*
                         * Consume as many characters as match the range of
                         * characters given above.
                         */
                        if (c >= '0' && c <= '9') {
                            seenDigits = true;
                            value *= 16;
                            value += c - '0';
                            continue;
                        } else if (c >= 'A' && c <= 'F') {
                            seenDigits = true;
                            value *= 16;
                            value += c - 'A' + 10;
                            continue;
                        } else if (c >= 'a' && c <= 'f') {
                            seenDigits = true;
                            value *= 16;
                            value += c - 'a' + 10;
                            continue;
                        } else if (c == ';') {
                            if (seenDigits) {
                                state = HANDLE_NCR_VALUE;
                                continue stateloop;
                            } else {
                                errNoDigitsInNCR();
                                appendStrBuf(';');
                                emitOrAppendStrBuf(returnState);
                                state = returnState;
                                continue stateloop;
                            }
                        } else {
                            /*
                             * If no characters match the range, then don't
                             * consume any characters (and unconsume the U+0023
                             * NUMBER SIGN character and, if appropriate, the X
                             * character). This is a parse error; nothing is
                             * returned.
                             * 
                             * Otherwise, if the next character is a U+003B
                             * SEMICOLON, consume that too. If it isn't, there
                             * is a parse error.
                             */
                            if (!seenDigits) {
                                errNoDigitsInNCR();
                                emitOrAppendStrBuf(returnState);
                                state = returnState;
                                reconsume = true;
                                continue stateloop;
                            } else {
                                errCharRefLacksSemicolon();
                                state = HANDLE_NCR_VALUE;
                                reconsume = true;
                                continue stateloop;
                            }
                        }
                    }
            }
        }
        return attributes;
    }

    private void appendStrBufToLongStrBuf() {
        appendLongStrBuf(strBuf, 0, strBufLen);
    }

    private void appendLongStrBuf(char[] buffer, int offset, int length) {
        int reqLen = longStrBufLen + length;
        if (longStrBuf.length < reqLen) {
            char[] newBuf = new char[reqLen + (reqLen >> 1)];
            System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
            longStrBuf = newBuf;
        }
        System.arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
        longStrBufLen = reqLen;
    }

    private void appendLongStrBuf(char[] arr) {
        appendLongStrBuf(arr, 0, arr.length);
    }

    private void appendLongStrBuf(char c) {
        if (longStrBufLen == longStrBuf.length) {
            char[] newBuf = new char[longStrBufLen + (longStrBufLen >> 1)];
            System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
            longStrBuf = newBuf;
        }
        longStrBuf[longStrBufLen++] = c;
    }

    private void appendLongStrBufLineFeed() {
        appendLongStrBuf('\n');
    }

    private void appendStrBuf(char c) {
        if (strBufLen == strBuf.length) {
            char[] newBuf = new char[strBuf.length + BUFFER_GROW_BY];
            System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
            strBuf = newBuf;
        }
        strBuf[strBufLen++] = c;
    }

    private void clearLongStrBufForNextState() {
        longStrBufLen = 0;
    }

    private void clearLongStrBuf() {
        longStrBufLen = 0;
    }

    private void clearLongStrBufAndAppendCurrentC(char c) {
        longStrBuf[0] = c;
        longStrBufLen = 1;
        // longStrBufOffset = pos;
    }

    private void clearStrBufAndAppendCurrentC(char c) {
        strBuf[0] = c;
        strBufLen = 1;
    }

    private void emitOrAppend(char[] val, int returnState) throws SAXException {
        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
            appendLongStrBuf(val);
        }
    }

    private void emitOrAppendOne(char[] val, int returnState)
            throws SAXException {
        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
            appendLongStrBuf(val[0]);
        }
    }

    private void emitOrAppendTwo(char[] val, int returnState)
            throws SAXException {
        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
            appendLongStrBuf(val[0]);
            appendLongStrBuf(val[1]);
        }
    }

    private void emitOrAppendStrBuf(int returnState) throws SAXException {
        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
            appendStrBufToLongStrBuf();
        }
    }

    private String longStrBufToString() {
        return new String(longStrBuf, 0, longStrBufLen);
    }

    private void attributeNameComplete() throws SAXException {
        attributeName = new String(strBuf, 0, strBufLen).intern();

        if (attributes == null) {
            attributes = new AttributesImpl();
        }

        /*
         * When the user agent leaves the attribute name state (and before
         * emitting the tag token, if appropriate), the complete attribute's
         * name must be compared to the other attributes on the same token; if
         * there is already an attribute on the token with the exact same name,
         * then this is a parse error and the new attribute must be dropped,
         * along with the value that gets associated with it (if any).
         */
        for (int i = 0; i < attributes.getLength(); i++) {
            if (attributes.getQName(i).equals(attributeName)) {
                errDuplicateAttribute();
                attributeName = null;
                return;
            }
        }
    }

    private void addAttributeWithValue() throws SAXException {
        if (attributeName != null) {
            String value = longStrBufToString();
            if (value.indexOf('<') != -1) {
                errAttributeValueContainsLt();
                return;
            }
            if (badCharInCandidateAttributeName()) {
                return;
            }
            attributes.addAttribute("", "", attributeName, "", value);
            attributeName = null;
        }
    }

    private void addAttributeWithoutValue() throws SAXException {
        if (attributeName != null) {
            if (badCharInCandidateAttributeName()) {
                return;
            }
            attributes.addAttribute("", "", attributeName, "", null);
            errAttributeWithNoValue();
            attributeName = null;
        }
    }

    private boolean badCharInCandidateAttributeName() {
        return attributeName.indexOf('/') != -1
                || attributeName.indexOf('>') != -1
                || attributeName.indexOf('\"') != -1
                || attributeName.indexOf('\'') != -1
                || attributeName.indexOf('<') != -1
                || attributeName.indexOf('=') != -1;
    }

    private void handleNcrValue(int returnState) throws SAXException {
        if (!isLegalXmlCharValue(value)) {
            errNcrIllegalValueForXml();
        } else {
            /*
             * If one or more characters match the range, then take them all and
             * interpret the string of characters as a number (either
             * hexadecimal or decimal as appropriate).
             */
            if (value <= 0xFFFF) {
                /*
                 * about ((value <= 0x0008) || (value == 0x000B) || (value >=
                 * 0x000E && value <= 0x001F)) -- we already check for
                 * XML-illegal control characters in isLegalXmlCharValue
                 */
                if ((value & 0xF800) == 0xD800) {
                    errNcrSurrogate();
                    emitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
                } else {
                    /*
                     * Otherwise, return a character token for the Unicode
                     * character whose code point is that number.
                     */
                    char ch = (char) value;
                    if (value >= 0xFDD0 && value <= 0xFDEF) {
                        errNcrUnassigned();
                    } else if ((value & 0xFFFE) == 0xFFFE) {
                        ch = errNcrNonCharacter(ch);
                    } else if (value >= 0x007F && value <= 0x009F) {
                        errNcrControlChar();
                    } else {
                        maybeWarnPrivateUse(ch);
                    }
                    bmpChar[0] = ch;
                    emitOrAppendOne(bmpChar, returnState);
                }
            } else if (value <= 0x10FFFF) {
                maybeWarnPrivateUseAstral();
                if ((value & 0xFFFE) == 0xFFFE) {
                    errAstralNonCharacter(value);
                }
                astralChar[0] = (char) (LEAD_OFFSET + (value >> 10));
                astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
                emitOrAppendTwo(astralChar, returnState);
            } else {
                errNcrOutOfRange();
                emitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
    }




            if ((value & 0xF800) == 0xD800) {
                errNcrSurrogate();
                emitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
            } else if (value <= 0xFFFF) {
                /*
                 * Otherwise, return a character token for the Unicode character
                 * whose code point is that number.
                 */
                char ch = (char) value;
                /*
                 * if ((value <= 0x0008) || (value == 0x000B) || (value >=
                 * 0x000E && value <= 0x001F)) { // we already check for
                 * XML-illegal control // characters in isLegalXmlCharValue ch =
                 * errNcrControlChar(ch); }
                 */
                if (value >= 0xFDD0 && value <= 0xFDEF) {
                    errNcrUnassigned();
                } else if ((value & 0xFFFE) == 0xFFFE) {
                    ch = errNcrNonCharacter(ch);
                } else if (value >= 0x007F && value <= 0x009F) {
                    errNcrControlChar();
                } else {
                    maybeWarnPrivateUse(ch);
                }
                bmpChar[0] = ch;
                emitOrAppendOne(bmpChar, returnState);
            } else if (value <= 0x10FFFF) {
                maybeWarnPrivateUseAstral();
                astralChar[0] = (char) (LEAD_OFFSET + (value >> 10));
                astralChar[1] = (char) (0xDC00 + (value & 0x3FF));
                emitOrAppend(astralChar, returnState);
            } else {
                errNcrOutOfRange();
                emitOrAppendOne(REPLACEMENT_CHARACTER, returnState);
            }





        }
    }

    private String toUPlusString(char c) {
        String hexString = Integer.toHexString(c);
        switch (hexString.length()) {
            case 1:
                return "U+000" + hexString;
            case 2:
                return "U+00" + hexString;
            case 3:
                return "U+0" + hexString;
            case 4:
                return "U+" + hexString;
            default:
                throw new RuntimeException("Unreachable.");
        }
    }

    private boolean isLegalXmlCharValue(int charval) {
        return charval == 0x0009 || charval == 0x000A || charval == 0x000D
                || (charval >= 0x0020 && charval <= 0xD7FF)
                || (charval >= 0xE000 && charval <= 0xFFFD)
                || (charval >= 0x10000 && charval <= 0x10FFFF);
    }

    private boolean isPrivateUse(char c) {
        return c >= '\uE000' && c <= '\uF8FF';
    }

    private boolean isAstralPrivateUse(int c) {
        return (c >= 0xF0000 && c <= 0xFFFFD)
                || (c >= 0x100000 && c <= 0x10FFFD);
    }

    private void warnAboutPrivateUseChar() throws SAXException {
        if (!alreadyWarnedAboutPrivateUseCharacters) {
            warn("Document uses the Unicode Private Use Area(s), which should not be used in publicly exchanged documents. (Charmod C073)");
            alreadyWarnedAboutPrivateUseCharacters = true;
        }
    }

    private void errBadCharBeforeAttributeNameOrNull(char c)
            throws SAXException {
        if (c == '=') {
            errEqualsSignBeforeAttributeName();
        } else if (c != '\uFFFD') {
            errQuoteBeforeAttributeName(c);
        } else {
            err("The character \u201C" + c + "\u201D is not allowed in \u201C"
                    + piTarget + "\u201D pseudo-attribute names.");
        }
    }

    private void errCharRefLacksSemicolon() throws SAXException {
        err("Character reference was not terminated by a semicolon.");
    }

    private void errDuplicateAttribute() throws SAXException {
        err("Duplicate \u201C" + piTarget + "\u201D pseudo-attribute \u201C"
                + attributeName + "\u201D.");
    }

    private void errEqualsSignBeforeAttributeName() throws SAXException {
        err("Saw \u201C=\u201D when expecting \u201C"
                + piTarget
                + "\u201D pseudo-attribute name. Probable cause: Pseudo-attribute name missing.");
    }

    private void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
            throws SAXException {
        switch (c) {
            case '=':
                err("\u201C=\u201D at the start of an unquoted \u201C"
                        + piTarget
                        + "\u201D pseudo-attribute value. Probable cause: Stray duplicate equals sign.");
                return;
            case '<':
                /*
                 * we deal with this case in the attribute-value error- checking
                 * code in the getPseudoAttributesFromPiData method
                 */
                // err("\u201C<\u201D at the start of an unquoted attribute value. Probable cause: Missing \u201C>\u201D immediately before.");
                return;
            case '`':
                err("\u201C`\u201D at the start of an unquoted \u201C"
                        + piTarget
                        + "\u201D pseudo-attribute value. Probable cause: Using the wrong character as a quote.");
                return;
        }
    }

    private void errNoSpaceBetweenAttributes() throws SAXException {
        err("Space is required between \u201C" + piTarget
                + "\u201D pseudo-attributes.");
    }

    private void errQuoteBeforeAttributeName(char c) throws SAXException {
        err("Saw \u201C"
                + c
                + "\u201D when expecting a pseudo-attribute name. Probable cause: \u201C=\u201D missing immediately before.");
    }

    private void errQuoteOrLtInAttributeNameOrNull(char c) throws SAXException {
        if (c != '\uFFFD') {
            err("Quote \u201C"
                    + c
                    + "\u201D in pseudo-attribute name. Probable cause: Matching quote missing somewhere earlier.");
        }
    }

    private void errUnquotedAttributeValOrNull() throws SAXException {
        err("Found unquoted value for \u201c" + piTarget
                + "\u201d pseudo-attribute \u201c" + attributeName
                + "\u201d. The value of all pseudo-attributes in \u201c"
                + piTarget + "\u201d instructions must be quoted.");
    }

    private void errNoNamedCharacterMatch() throws SAXException {
        if (getErrorHandler() == null) {
            return;
        }
        SAXParseException spe = new SAXParseException(
                "\u201C&\u201D did not start a character reference. (\u201C&\u201D probably should have been escaped as \u201C&\u201D.)",
                getDocumentLocator());
        getErrorHandler().error(spe);
    }

    private void errNcrControlChar() throws SAXException {
        /*
         * warn instead of error because these control characters are legal in
         * XML
         */
        warn("Character reference expands to a control character ("
                + toUPlusString((char) value) + ").");
    }

    private void errNcrIllegalValueForXml() throws SAXException {
        err("Character reference expands to a character that is not legal in XML ("
                + toUPlusString((char) value) + ").");
    }

    private void errNcrSurrogate() throws SAXException {
        err("Character reference expands to a surrogate.");
    }

    private void errNcrUnassigned() throws SAXException {
        err("Character reference expands to a permanently unassigned code point.");
    }

    private char errNcrNonCharacter(char ch) throws SAXException {
        err("Character reference expands to a non-character ("
                + toUPlusString((char) value) + ").");
        return ch;
    }

    private void errNcrOutOfRange() throws SAXException {
        err("Character reference outside the permissible Unicode range.");
    }

    private void errNoDigitsInNCR() throws SAXException {
        err("No digits after \u201C" + new String(strBuf, 0, strBufLen)
                + "\u201D.");
    }

    private void errUnescapedAmpersandInterpretedAsCharacterReference()
            throws SAXException {
        if (getErrorHandler() == null) {
            return;
        }
        SAXParseException spe = new SAXParseException(
                "The string following \u201C&\u201D was interpreted as a character reference. (\u201C&\u201D probably should have been escaped as \u201C&\u201D.)",
                getDocumentLocator());
        getErrorHandler().error(spe);
    }

    private void maybeWarnPrivateUse(char ch) throws SAXException {
        if (getErrorHandler() != null && isPrivateUse(ch)) {
            warnAboutPrivateUseChar();
        }
    }

    private void maybeWarnPrivateUseAstral() throws SAXException {
        if (getErrorHandler() != null && isAstralPrivateUse(value)) {
            warnAboutPrivateUseChar();
        }
    }

    private  void errAstralNonCharacter(int ch) throws SAXException {
        err("Character reference expands to an astral non-character ("
                + toUPlusString((char) value) + ").");
    }

    private static String newAsciiLowerCaseStringFromString(String str) {
        if (str == null) {
            return null;
        }
        char[] buf = new char[str.length()];
        for (int i = 0; i < str.length(); i++) {
            char c = str.charAt(i);
            if (c >= 'A' && c <= 'Z') {
                c += 0x20;
            }
            buf[i] = c;
        }
        return new String(buf);
    }

    private static String newAsciiUpperCaseStringFromString(String str) {
        if (str == null) {
            return null;
        }
        char[] buf = new char[str.length()];
        for (int i = 0; i < str.length(); i++) {
            char c = str.charAt(i);
            if (c >= 'a' && c <= 'z') {
                c -= 0x20;
            }
            buf[i] = c;
        }
        return new String(buf);
    }

}