org.netbeans.lib.html.lexer.HtmlLexer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of org-netbeans-modules-html-lexer
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.netbeans.lib.html.lexer;

import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.netbeans.api.html.lexer.HTMLTokenId;
import org.netbeans.api.html.lexer.HtmlLexerPlugin;
import org.netbeans.api.lexer.InputAttributes;
import org.netbeans.api.lexer.LanguagePath;
import org.netbeans.api.lexer.Token;
import org.netbeans.spi.lexer.Lexer;
import org.netbeans.spi.lexer.LexerInput;
import org.netbeans.spi.lexer.LexerRestartInfo;
import org.netbeans.spi.lexer.TokenFactory;
import org.netbeans.spi.lexer.TokenPropertyProvider;

/**
 * Lexical analyzer for HTML. Based on original HTML lexer from html/editor module.
 *
 * @author Petr Nejedly
 * @author Miloslav Metelka
 * @author Jan Lahoda
 * @author Marek Fukala
 * @version 1.00
 */

public final class HtmlLexer implements Lexer {

    private static final Logger LOGGER = Logger.getLogger(HtmlLexer.class.getName());
    private static final boolean LOG = Boolean.getBoolean("j2ee_lexer_debug"); //NOI18N

    private static final int EOF = LexerInput.EOF;

    private final LexerInput input;

    private final TokenFactory tokenFactory;

    private static final class CompoundState {
        private int lexerState;
        private int lexerSubState;
        private int lexerEmbeddingState;
        private byte customELIndex;
        private String attribute;
        private String tag;
        private String scriptType;
        private boolean quoteType;

        public CompoundState(int lexerState, int lexerSubState, int lexerEmbeddingState, String attributeName, String tagName, String scriptType, byte customELIndex, boolean quoteType) {
            this.lexerState = lexerState;
            this.lexerSubState = lexerSubState;
            this.lexerEmbeddingState = lexerEmbeddingState;
            this.attribute = attributeName;
            this.tag = tagName;
            this.scriptType = scriptType;
            this.customELIndex = customELIndex;
            this.quoteType = quoteType;
        }

        @Override
        public boolean equals(Object obj) {
            if (obj == null) {
                return false;
            }
            if (getClass() != obj.getClass()) {
                return false;
            }
            final CompoundState other = (CompoundState) obj;
            if (this.lexerState != other.lexerState) {
                return false;
            }
            if (this.lexerSubState != other.lexerSubState) {
                return false;
            }
            if (this.lexerEmbeddingState != other.lexerEmbeddingState) {
                return false;
            }
            if (this.attribute != other.attribute && (this.attribute == null || !this.attribute.equals(other.attribute))) {
                return false;
            }
            if (this.tag != other.tag && (this.tag == null || !this.tag.equals(other.tag))) {
                return false;
            }
            if (this.scriptType != other.scriptType && (this.scriptType == null || !this.scriptType.equals(other.scriptType))) {
                return false;
            }
            if (this.customELIndex != other.customELIndex) {
                return false;
            }
            if (this.quoteType != other.quoteType) {
                return false;
            }

            return true;
        }

        @Override
        public int hashCode() {
            int hash = 3;
            hash = 17 * hash + this.lexerState;
            hash = 17 * hash + this.lexerSubState;
            hash = 17 * hash + this.lexerEmbeddingState;
            hash = 17 * hash + (this.attribute != null ? this.attribute.hashCode() : 0);
            hash = 17 * hash + (this.tag != null ? this.tag.hashCode() : 0);
            hash = 17 * hash + (this.scriptType != null ? this.scriptType.hashCode() : 0);
            if(this.customELIndex > 0) {
                //do not alter hash code if there's no custom el index set
                hash = 17 * hash + this.customELIndex;
            }
            //do not alter the hash code out of the related area
            switch(lexerState) {
                case ISI_VAL_QUOT:
                case ISI_VAL_QUOT_EL:
                case ISI_VAL_QUOT_ESC:
                    hash = 17 * hash + (quoteType ? 1 : 0);
                    break;
            }

            return hash;
        }

        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("HLS(hc="); //NOI18N
            sb.append(hashCode());
            sb.append(",s="); //NOI18N
            sb.append(lexerState);
            if(lexerSubState > 0) {
                sb.append(",ss="); //NOI18N
                sb.append(lexerSubState);
            }
            if(lexerEmbeddingState > 0) {
                sb.append(",es="); //NOI18N
                sb.append(lexerEmbeddingState);
            }
            if(tag != null) {
                sb.append(",tag="); //NOI18N
                sb.append(tag);
            }
            if(attribute != null) {
                sb.append(",attribute="); //NOI18N
                sb.append(attribute);
            }
            if(scriptType != null) {
                sb.append(",scriptType="); //NOI18N
                sb.append(scriptType);
            }
            sb.append(')'); //NOI18N
            return sb.toString();
        }

    }

    private final HashMap STATES_CACHE = new HashMap<>();

    @Override
    public Object state() {
        //cache the states so lexing of large files do not eat too much memory
        CompoundState currentState = new CompoundState(lexerState, lexerSubState, lexerEmbeddingState, attribute, tag, scriptType, customELIndex, quoteType);
        CompoundState cached = STATES_CACHE.get(currentState);
        if(cached == null) {
            STATES_CACHE.put(currentState, currentState);
            return currentState;
        } else {
            return cached;
        }
    }

    //script and style tag names
    private static final String SCRIPT = "script"; //NOI18N
    private static final String STYLE = "style"; //NOI18N

    private static final String[] STYLE_ATTRS = new String[]{"style", "id", "class"}; //NOI18N

    /** Internal state of the lexical analyzer before entering subanalyzer of
     * character references. It is initially set to INIT, but before first usage,
     * this will be overwritten with state, which originated transition to
     * charref subanalyzer.
     */
    private int lexerSubState = INIT;
    private int lexerState    = INIT;

    private String attribute;
    private String tag; //tag name of the current context tag

    /**
     * Value of the "type" attribute in SCRIPT tag
     */
    private String scriptType;

    //tag name with namespace prefix to collection of attributes which should have
    //css class embedding by default
    private Map> cssClassTagAttrMap;
    private String CSS_CLASS_MAP_PROPERTY_KEY = "cssClassTagAttrMap"; //NOI18N //semi api

    /** indicated whether we are in a script */
    private int lexerEmbeddingState = INIT;

    private byte customELIndex = INIT;

    /**
     * Indicates the quote type in ISI_VAL_QUOT state.
     *
     * true means double qoute, false single quote.
     */
    private boolean quoteType;

    public static final String EL_CONTENT_PROVIDER_INDEX = "elci"; //NOI18N

    // internal 'in script' state. 'scriptState' internal state is set to it when the
    // analyzer goes into a script tag body
    private static final int ISI_SCRIPT = 1;
    private static final int ISI_STYLE = 2;

    // Internal states
    private static final int INIT = 0;
    private static final int ISI_TEXT = 1;    // Plain text between tags
    private static final int ISI_ERROR = 2;   // Syntax error in HTML syntax
    private static final int ISA_LT = 3;      // After start of tag delimiter - "<"
    private static final int ISA_SLASH = 4;   // After ETAGO - ""
    private static final int ISI_TAG = 8;     // Inside tag - "<[a..Z]+"
    private static final int ISP_TAG_X = 9;   // X-switch after TAG's name
    private static final int ISP_TAG_WS = 10; // In WS in TAG - ""
    private static final int ISI_ARG = 11;    // Inside tag's argument - ""
    private static final int ISP_ARG_X = 12;  // X-switch after ARGUMENT's name
    private static final int ISP_ARG_WS = 13; // Inside WS after argument awaiting '='
    private static final int ISP_EQ = 14;     // X-switch after '=' in TAG's ARGUMENT
    private static final int ISP_EQ_WS = 15;  // In WS after '='
    private static final int ISI_VAL = 16;    // Non-quoted value
    private static final int ISI_VAL_QUOT = 17;   // quoted value
    private static final int ISI_VAL_QUOT_EL = 18;   // in EL in quoted value
    private static final int ISA_SGML_ESCAPE = 19;  // After " tags closing symbol '>' - the tag content
    private static final int ISI_SCRIPT_CONTENT_AFTER_LT = 36; //after < in script content
    private static final int ISI_SCRIPT_CONTENT_ENDTAG = 37; //after  tags closing symbol '>' - the tag content
    private static final int ISI_STYLE_CONTENT_AFTER_LT = 39; //after < in style content
    private static final int ISI_STYLE_CONTENT_ENDTAG = 40; //after 
    private static final int ISI_XML_PI_QM = 48; //after ? in XML PI

    private static final int ISI_EL = 49; //EL custom open delimiter: {{.....}}

    static final Set EVENT_HANDLER_NAMES = new HashSet<>();
    static {
        // See http://www.w3.org/TR/html401/interact/scripts.html
        EVENT_HANDLER_NAMES.add("onload"); // NOI18N
        EVENT_HANDLER_NAMES.add("onunload"); // NOI18N
        EVENT_HANDLER_NAMES.add("onclick"); // NOI18N
        EVENT_HANDLER_NAMES.add("ondblclick"); // NOI18N
        EVENT_HANDLER_NAMES.add("onmousedown"); // NOI18N
        EVENT_HANDLER_NAMES.add("onmouseup"); // NOI18N
        EVENT_HANDLER_NAMES.add("onmouseover"); // NOI18N
        EVENT_HANDLER_NAMES.add("onmousemove"); // NOI18N
        EVENT_HANDLER_NAMES.add("onmouseout"); // NOI18N
        EVENT_HANDLER_NAMES.add("onfocus"); // NOI18N
        EVENT_HANDLER_NAMES.add("onblur"); // NOI18N
        EVENT_HANDLER_NAMES.add("onkeypress"); // NOI18N
        EVENT_HANDLER_NAMES.add("onkeydown"); // NOI18N
        EVENT_HANDLER_NAMES.add("onkeyup"); // NOI18N
        EVENT_HANDLER_NAMES.add("onsubmit"); // NOI18N
        EVENT_HANDLER_NAMES.add("onreset"); // NOI18N
        EVENT_HANDLER_NAMES.add("onselect"); // NOI18N
        EVENT_HANDLER_NAMES.add("onchange"); // NOI18N
        EVENT_HANDLER_NAMES.add("ondrag"); // NOI18N
        EVENT_HANDLER_NAMES.add("ondrop"); // NOI18N

        // IMPORTANT - if you add any that DON'T start with "o" here,
        // make sure you update the optimized firstchar look in isJavaScriptArgument
    }

    private static final String SUPPORTED_SCRIPT_TYPE = "text/javascript"; //NOI18N

    //flyweight token images
    private static final String IMG_EQUAL_SIGN = "="; //NOI18N
    private static final String IMG_CLOSE_TAG_SYMBOL = ">"; //NOI18N
    private static final String IMG_CLOSE_TAG_SYMBOL2 = "/>"; //NOI18N
    private static final String IMG_OPEN_TAG_SYMBOL = "<"; //NOI18N
    private static final String IMG_OPEN_TAG_SYMBOL2 = " info) {
        this.input = info.input();
        this.tokenFactory = info.tokenFactory();
        if (info.state() == null) {
            this.lexerSubState = INIT;
            this.lexerState = INIT;
            this.lexerEmbeddingState = INIT;
            this.customELIndex = INIT;
            this.quoteType = false;
        } else {
            CompoundState cs = (CompoundState) info.state();
            lexerState = cs.lexerState;
            lexerSubState = cs.lexerSubState;
            lexerEmbeddingState = cs.lexerEmbeddingState;
            attribute = cs.attribute;
            tag = cs.tag;
            customELIndex = cs.customELIndex;
            quoteType = cs.quoteType;
        }

        InputAttributes inputAttributes = info.inputAttributes();
        if (inputAttributes != null) {
            cssClassTagAttrMap = (Map>)inputAttributes.getValue(
                    LanguagePath.get(HTMLTokenId.language()), CSS_CLASS_MAP_PROPERTY_KEY); //NOI18N
        }
    }

    private boolean isAZ( int character ) {
        return( (character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z') );
    }

    private boolean isName( int character ) {
        return Character.isLetterOrDigit(character) ||
                character == '-' || character == '_' || character == '.' || character == ':';
    }

    private boolean isAttributeName( int character ) {
        return (! Character.isWhitespace(character)) && character != '/'
            && character != '>' && character != '<' && character != '='
            && character != 0;
    }

    /**
     * Resolves if given char is whitespace in terms of HTML4.0 specs
     * According to specs, following characters are treated as whitespace:
     * Space - '\u0020', Tab - '\u0009',
     * Formfeed - '\u000C',Zero-width space - '\u200B',
     * Carriage return - '\u000D' and Line feed - '\u000A'
     * CR's are included for completenes only, they should never appear in document
     */

    private boolean isWS( int character ) {
        //why there is the || character == '@'???
        //----------------------------------------
        //see the issue #149968. It is the simpliest
        //and not very harmful solution to that.
        //In principle we need to recognize three at signs
        // (@@@) anywhere in the html code and ignore it.
        //This mark can occure in the generated virtual
        //html code and denotes the places where there is
        //some templating language in the real document.
        //To fix this completely properly I would have to
        //either somehow preprocess the text or introduce some
        //more states to the already complicated lexer.
        //The sideeffect of this change is that a single at sign
        //wont be signalled as error in the editor and lexed as whitespace
        //which doesn't sound too bad.
        //
        //note: the language construct where one generates
        //attribute name doesn't work, but I consider this a quite
        //unusuall: ="center" />
        return Character.isWhitespace(character) || character == '@';
    }

    private boolean isJavascriptEventHandlerName(CharSequence attributeName) {
        if(attributeName == null) {
            return false;
        }
        if(attributeName.length() > 2) {
            char firstChar = attributeName.charAt(0);
            char secondChar = attributeName.charAt(1);
            if((firstChar == 'o' || firstChar == 'O') &&
                    (secondChar == 'n' || secondChar == 'N')) {
                return EVENT_HANDLER_NAMES.contains(attributeName.toString().toLowerCase(Locale.ENGLISH));
            }
        }
        return false;
    }

    private boolean isStyleAttributeName(CharSequence chs) {
        if(chs == null) {
            return false;
        }
        outer: for (int j = 0; j < STYLE_ATTRS.length; j++) {
            if (chs.length() == STYLE_ATTRS[j].length()) {
                for (int i = 0; i < chs.length(); i++) {
                    if (Character.toLowerCase(chs.charAt(i)) != Character.toLowerCase(STYLE_ATTRS[j].charAt(i))) {
                        continue outer;
                    }
                }
                return true;
            }
        }
        return false;
    }

    private CharSequence getScriptType(CharSequence attributeValue, boolean quoted) {
        char lastChar = attributeValue.charAt(attributeValue.length() - 1);
        boolean hasEndQuote = attributeValue.length() > 1 && (lastChar == '\'' || lastChar == '"');
        return quoted ? attributeValue.subSequence(1, attributeValue.length() - (hasEndQuote ? 1 : 0)) : attributeValue;
    }

    private boolean followsCloseTag(CharSequence closeTagName) {
        int actChar;
        int prev_read = input.readLength(); //remember the size of the read sequence //substract the first read character
        int read = 0;
        while(true) {
            actChar = input.read();
            read++;
            if(!(Character.isLetter(actChar) ||
                    Character.isDigit(actChar) ||
                    (actChar == '_') ||
                    (actChar == '-') ||
                    (actChar == ':') ||
                    (actChar == '.') ||
                    (actChar == '/')) ||
                    (actChar == EOF)) { // EOL or not alpha
                //end of tagname
                CharSequence tagName = input.readText().subSequence(prev_read, prev_read + read - 1);

                input.backup(read); //put the lookahead text back to the buffer

                if(equals(closeTagName, tagName, true, true)) {
                    if(actChar == '>') {
                        return true;
                    }
                }

                return false;
            }
        }
    }


    @Override
    public Token nextToken() {
        int actChar;

        main: while (true) {
            actChar = input.read();

            if (actChar == EOF) {
                if(input.readLengthEOF() == 1) {
                    return null; //just EOL is read
                } else {
                    //there is something else in the buffer except EOL
                    //we will return last token now
                    input.backup(1); //backup the EOL, we will return null in next nextToken() call
                    break;
                }
            }


            //System.out.println("HTMLSyntax: parseToken tokenOffset=" + tokenOffset + ", actChar='" + actChar + "', offset=" + offset + ", state=" + getStateName(state) +
            //      ", stopOffset=" + stopOffset + ", lastBuffer=" + lastBuffer);
            switch( lexerState ) {
                case INIT:              // DONE
                    switch( actChar ) {
                        case '<':
                            lexerState = ISA_LT;
                            continue main;
                        case '&':
                            lexerState = ISA_REF;
                            lexerSubState = ISI_TEXT;
                            continue main;
                        default:
                            lexerState = ISI_TEXT;
                            break;
                    }
                    //fall through to ISI_TEXT

                case ISI_TEXT:        // DONE
                    switch( actChar ) {
                        case '<':
                        case '&':
                            lexerState = INIT;
                            input.backup(1);
                            if(input.readLength() > 0) { //is there any text before & or < ???
                                return token(HTMLTokenId.TEXT);
                            }
                            break;
                    }

                    //custom EL support
                    delimiters: for(byte delimiterIndex = 0; delimiterIndex < customELQuery.getOpenDelimiters().length; delimiterIndex++ ) {
                        String openDelimiter = customELQuery.getOpenDelimiters()[delimiterIndex];
                        if(openDelimiter == null) {
                            continue;
                        }
                        int alreadyRead = input.readLength();
                        char read = (char)actChar; //first char is already read
                        for(int i = 0; i < openDelimiter.length(); i++) {
                            char delimChar = openDelimiter.charAt(i);
                            if(read != delimChar) {
                                //no match
                                input.backup(input.readLengthEOF() - alreadyRead); //backup text
                                continue delimiters; //and try next one
                            }
                            if((i+1) < openDelimiter.length()) {
                                //will be next loop, read char
                                read = (char)input.read();
                            }
                        }

                        //we've found an open delimiter
                        //check if the there was already something read before checking the delimiter,
                        //if so then return it and re-run this step again so then we can return
                        //clean token for the delimiter
                        if(input.readLength() > openDelimiter.length()) {
                            input.backup(openDelimiter.length());
                            return token(HTMLTokenId.TEXT);
                        } else {
                            //return the open symbol token and switch to "in el" state
                            lexerState = ISI_EL;
                            customELIndex = (byte)(delimiterIndex + 1); //0 is reserved for "no delimiter", 1 means delimiter with index 0
                            //save the provider's index in the delimiter token's property so once can recognize what should be
                            //the delimiters' content if it is empty
                            //TODO "contentMimetype" INTO API???
                            return token(HTMLTokenId.EL_OPEN_DELIMITER,
                                    new HtmlTokenPropertyProvider(EL_EXPRESSION_CONTENT_MIMETYPE_TOKEN_PROPERTY_KEY, customELQuery.getMimeTypes()[delimiterIndex]));
                        }

                    }

                    break;

                case ISI_EL:
                    delimiters: for(byte delimiterIndex = 0; delimiterIndex < customELQuery.getOpenDelimiters().length; delimiterIndex++ ) {
                        String closeDelimiter = customELQuery.getCloseDelimiters()[delimiterIndex];
                        if(closeDelimiter == null) {
                            continue;
                        }
                        int alreadyRead = input.readLength();
                        char read = (char)actChar; //first char is already read
                        for(int i = 0; i < closeDelimiter.length(); i++) {
                            char delimChar = closeDelimiter.charAt(i);
                            if(read != delimChar) {
                                //no match
                                input.backup(input.readLength() - alreadyRead); //backup text
                                continue delimiters; //and try next one
                            }
                            if((i+1) < closeDelimiter.length()) {
                                //will be next loop, read char
                                read = (char)input.read();
                            }
                        }
                        //we've found a close delimiter
                        //check if the there was already something read before checking the delimiter,
                        //if so then return it and re-run this step again so then we can return
                        //clean token for the delimiter
                        if(input.readLength() > closeDelimiter.length()) {
                            input.backup(closeDelimiter.length());
                            //save the provider's index in the token's property so we can set the corresponding embdding in HTMLTokenId.language()
                            return token(HTMLTokenId.EL_CONTENT, new HtmlTokenPropertyProvider(EL_CONTENT_PROVIDER_INDEX, (byte)(customELIndex - 1)));
                        } else {
                            //return the open symbol token and switch to "in el" state
                            lexerState = INIT;
                            customELIndex = INIT;
                            return token(HTMLTokenId.EL_CLOSE_DELIMITER);
                        }
                    }

                    break;

                case ISI_ERROR:      // DONE
                    lexerState = INIT;
                    tag = null;
                    return token(HTMLTokenId.ERROR);

                case ISA_LT:         // PENDING other transitions - e.g ' 1) { //lexer restart check, token already returned before last EOF
                            input.backup(1);
                            return token(HTMLTokenId.TAG_OPEN_SYMBOL);
                        }
                        break;
                    }
                    switch( actChar ) {
                        case '/':               // ETAGO - ':               // Empty start tag <>, RELAXED
                            lexerState = INIT;
                            return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
                        case '!':
                            lexerState = ISA_SGML_ESCAPE;
                            break;
                        case '?':
                            lexerState = ISI_XML_PI;
                            break;
                        default:
                            input.backup(1);
                            lexerState = ISI_TEXT;
                            break;
                    }
                    break;

                case ISI_XML_PI:
                    if(actChar == '?') {
                        lexerState = ISI_XML_PI_QM;
                        break;
                    }
                    //else stay in XML PI
                    break;

                case ISI_XML_PI_QM:
                    if(actChar == '>') {
                        //XML PI token
                        lexerState = INIT;
                        return token(HTMLTokenId.XML_PI);
                    } else {
                        lexerState = ISI_XML_PI;
                        break;
                    }

                case ISA_SLASH:        // DONE
                    if( isAZ( actChar ) ) {   // ':               // Empty end tag , RELAXED
                            lexerState = INIT;
                            return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
                        default:                // Part of text, e.g.  1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        return token(HTMLTokenId.TAG_CLOSE);
                    }
                    break;


                case ISP_ENDTAG_X:      // DONE
                    if( isWS( actChar ) ) {
                        lexerState = ISP_ENDTAG_WS;
                        break;
                    }
                    tag = null;
                    switch( actChar ) {
                        case '>':               // Closing of endtag, e.g. _
                            lexerState = INIT;
                            return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
                        case '<':               // next tag, e.g.  1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        return token(HTMLTokenId.WS);
                    }
                    break;


                case ISI_TAG:        // DONE
                    if( isName( actChar ) ) break;    // Still in tag identifier, eat next char
                    lexerState = ISP_TAG_X;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        //test if the tagname is SCRIPT
                        tag = input.readText().toString();
                        if(equals(SCRIPT, tag, true, true)) {
                            lexerEmbeddingState = ISI_SCRIPT;
                        }
                        if(equals(STYLE, tag, true, true)) {
                            lexerEmbeddingState = ISI_STYLE;
                        }
                        return token(HTMLTokenId.TAG_OPEN);
                    }
                    break;

                case ISP_TAG_X:     // DONE
                    if( isWS( actChar ) ) {
                        lexerState = ISP_TAG_WS;
                        break;
                    }
                    if( isAttributeName(actChar) ) {
                        lexerState = ISI_ARG;
                        break;
                    }
                    switch( actChar ) {
                        case '/':
                            lexerState = ISI_TAG_SLASH;
                            break;
                        case '>':
                            switch (lexerEmbeddingState) {
                                case INIT:
                                    lexerState = INIT;
                                    break;
                                case ISI_SCRIPT:
                                    //script w/ "text/html" content type workaround
                                    //do lex the script content as normal html code
                                    if(scriptType != null && "text/html".equalsIgnoreCase(scriptType)) { //NOI18N
                                        lexerEmbeddingState = INIT;
                                        scriptType = null;
                                        lexerState = INIT;
                                    } else {
                                        lexerState = ISI_SCRIPT_CONTENT;
                                    }
                                    break;
                                case ISI_STYLE:
                                    lexerState = ISI_STYLE_CONTENT;
                                    break;
                            }
                            tag = null;
                            return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
                        case '<':
                            tag = null;
                            lexerState = INIT;
                            input.backup(1);
                            break;
                        default:
                            lexerState = ISP_TAG_X_ERROR;
                            break;
                    }
                    break;

                case ISP_TAG_X_ERROR:
                    if(isWS(actChar)) {
                        lexerState = tag == null ? INIT : ISP_TAG_X;
                        input.backup(1); //backup the WS
                        return token(HTMLTokenId.ERROR);
                    }
                    switch(actChar) {
                        case '/':
                        case '>':
                            lexerState = tag == null ? INIT : ISP_TAG_X;
                            input.backup(1); //lets reread the token again
                            return token(HTMLTokenId.ERROR);
                    }
                    //stay in error
                    break;

                case ISP_TAG_WS:        // DONE
                    if( isWS( actChar ) ) break;    // eat all WS
                    lexerState = ISP_TAG_X;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        return token(HTMLTokenId.WS);
                    }

                case ISI_TAG_SLASH:
                    tag = null;
                    switch( actChar ) {
                        case '>':
                            lexerEmbeddingState = INIT; //possibly cancel 'in script' if empty tag found
                            lexerState = INIT;
                            return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
                        default:
                            lexerState = tag == null ? INIT : ISP_TAG_X;
                            input.backup(1);
                            return token(HTMLTokenId.ERROR);
                    }

                case ISI_SCRIPT_CONTENT:
                    switch( actChar ) {
                        case '<' :
                            lexerState = ISI_SCRIPT_CONTENT_AFTER_LT;
                            break;
                        default:
                            break;
                    }
                    break;

                case ISI_SCRIPT_CONTENT_AFTER_LT:
                    if (actChar == '/') {
                        if (followsCloseTag(SCRIPT)) {
                            //end of script section found
                            lexerEmbeddingState = INIT;
                            lexerState = INIT;
                            tag = null;
                            String type = scriptType;
                            scriptType = null;
                            input.backup(input.readLength() > 2 ? 2 : input.readLength()); //backup the ' 0) {
                                //the script has a body
                                return token(HTMLTokenId.SCRIPT, new HtmlTokenPropertyProvider(HTMLTokenId.SCRIPT_TYPE_TOKEN_PROPERTY, type)); //NOI18N
                            } else {
                                break;
                            }
                        }
                    }
                    lexerState = ISI_SCRIPT_CONTENT;
                    break;

                case ISI_STYLE_CONTENT:
                    switch( actChar ) {
                        case '<' :
                            lexerState = ISI_STYLE_CONTENT_AFTER_LT;
                            break;
                        default:
                            break;
                    }
                    break;

                case ISI_STYLE_CONTENT_AFTER_LT:
                    if (actChar == '/') {
                        if (followsCloseTag(STYLE)) {
                            //end of script section found
                            lexerEmbeddingState = INIT;
                            lexerState = INIT;
                            tag = null;
                            input.backup(input.readLength() > 2 ? 2 : input.readLength()); //backup the ' 0) {
                                //the script has a body
                                return token(HTMLTokenId.STYLE);
                            } else {
                                break;
                            }
                        }
                    }
                    lexerState = ISI_STYLE_CONTENT;
                    break;

                case ISI_ARG:           // DONE
                    if( isAttributeName(actChar) ) break; // eat next char
                    lexerState = ISP_ARG_X;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        attribute =input.readText().toString();
                        return token(HTMLTokenId.ARGUMENT);
                    }
                    break;

                case ISP_ARG_X:
                    if( isWS( actChar ) ) {
                        lexerState = ISP_ARG_WS;
                        break;
                    }
                    if( isAttributeName(actChar) ) {
                        lexerState = ISI_ARG;
                        break;
                    }
                    switch( actChar ) {
                        case '/':
                        case '>':
                            input.backup(1);
                            lexerState = ISP_TAG_X;
                            break;
                        case '<':
                            lexerState = INIT;
                            input.backup(1);
                            break;
                        case '=':
                            lexerState = ISP_EQ;
                            return token(HTMLTokenId.OPERATOR);
                        default:
                            lexerState = ISI_ERROR;
                            input.backup(1);
                            break;
                    }
                    break;

                case ISP_ARG_WS:
                    if( isWS( actChar ) ) break;    // Eat all WhiteSpace
                    lexerState = ISP_ARG_X;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        return token(HTMLTokenId.WS);
                    }
                    break;

                case ISP_EQ:
                    if( isWS( actChar ) ) {
                        lexerState = ISP_EQ_WS;
                        break;
                    }
                    switch( actChar ) {
                        case '\'':
                            quoteType = false;
                            lexerState = ISI_VAL_QUOT;
                            break;
                        case '"':
                            quoteType = true;
                            lexerState = ISI_VAL_QUOT;
                            break;
                        case '/':
                        case '>':
                        case '<':
                            input.backup(1);
                            lexerState = ISP_TAG_X;
                            break;
                        default:
                            lexerState = ISI_VAL; //everything else if attribute value
                            break;
                    }
                    break;

                case ISP_EQ_WS:
                    if( isWS( actChar ) ) break;    // Consume all WS
                    lexerState = ISP_EQ;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        return token(HTMLTokenId.WS);
                    }
                    break;


                case ISI_VAL:
                    if(actChar == '/') {
                        //slash in unquoted value -- may be there but not followed by >.
                        //In such case IMO the value should be closed
                        char next = (char)input.read();
                        input.backup(1); //backup the next char
                        if(next != '>') {
                            //continue lexing the value
                            break;
                        }
                    } else if(!isWS(actChar) && actChar != '>' && actChar != '<') {
                        break; //continue lexing the attribute value
                    }

                    //finish lexing the value
                    lexerState = ISP_TAG_X;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        Token resolveValueToken = resolveValueToken();
                        attribute = null;
                        return resolveValueToken;
                    }

                    break;

                case ISI_VAL_QUOT:
                     //custom EL support
                    delimiters: for(byte delimiterIndex = 0; delimiterIndex < customELQuery.getOpenDelimiters().length; delimiterIndex++ ) {
                        String openDelimiter = customELQuery.getOpenDelimiters()[delimiterIndex];
                        if(openDelimiter == null) {
                            continue;
                        }
                        int alreadyRead = input.readLength();
                        char read = (char)actChar; //first char is already read
                        for(int i = 0; i < openDelimiter.length(); i++) {
                            char delimChar = openDelimiter.charAt(i);
                            if(read != delimChar) {
                                //no match
                                input.backup(input.readLength() - alreadyRead); //backup text
                                continue delimiters; //and try next one
                            }
                            if((i+1) < openDelimiter.length()) {
                                //will be next loop, read char
                                read = (char)input.read();
                            }
                        }

                        //we've found an open delimiter
                        //check if the there was already something read before checking the delimiter,
                        //if so then return it and re-run this step again so then we can return
                        //clean token for the delimiter
                        if(input.readLength() > openDelimiter.length()) {
                            input.backup(openDelimiter.length());
                            return resolveValueToken();
                        } else {
                            //return the open symbol token and switch to "in el" state
                            lexerState = ISI_VAL_QUOT_EL;
                            customELIndex = (byte)(delimiterIndex + 1); //0 is reserved for "no delimiter", 1 means delimiter with index 0
                            //save the provider's index in the delimiter token's property so once can recognize what should be
                            //the delimiters' content if it is empty
                            //TODO "contentMimetype" INTO API???
                            return token(HTMLTokenId.EL_OPEN_DELIMITER,
                                    new HtmlTokenPropertyProvider(EL_EXPRESSION_CONTENT_MIMETYPE_TOKEN_PROPERTY_KEY, customELQuery.getMimeTypes()[delimiterIndex]));
                        }

                    }

                    switch (actChar) {
                        case '\\':
                            //may be escaped quote
                            lexerState = ISI_VAL_QUOT_ESC;
                            break;

                        case '\'':
                        case '"':
                            if(actChar == '\'' && !quoteType || actChar == '"' && quoteType) {
                                //reset the 'script embedding will follow state' if the value represents a
                                //type attribute value of a script tag
                                if(equals(SCRIPT, tag, true, true) && equals("type", attribute, true, true)) { //NOI18N
                                    //inside script tag
                                    scriptType = getScriptType(input.readText(), true).toString();
                                }

                                lexerState = ISP_TAG_X;
                                Token resolveValueToken = resolveValueToken();
                                attribute = null;
                                return resolveValueToken;
                            }
                    }
                    break;  // else simply consume next char of VALUE

                case ISI_VAL_QUOT_EL:
                     delimiters: for(byte delimiterIndex = 0; delimiterIndex < customELQuery.getOpenDelimiters().length; delimiterIndex++ ) {
                        String closeDelimiter = customELQuery.getCloseDelimiters()[delimiterIndex];
                        if(closeDelimiter == null) {
                            continue;
                        }
                        int alreadyRead = input.readLength();
                        char read = (char)actChar; //first char is already read
                        for(int i = 0; i < closeDelimiter.length(); i++) {
                            char delimChar = closeDelimiter.charAt(i);
                            if(read != delimChar) {
                                //no match
                                input.backup(input.readLength() - alreadyRead); //backup text
                                continue delimiters; //and try next one
                            }
                            if((i+1) < closeDelimiter.length()) {
                                //will be next loop, read char
                                read = (char)input.read();
                            }
                        }
                        //we've found a close delimiter
                        //check if the there was already something read before checking the delimiter,
                        //if so then return it and re-run this step again so then we can return
                        //clean token for the delimiter
                        if(input.readLength() > closeDelimiter.length()) {
                            input.backup(closeDelimiter.length());
                            //save the provider's index in the token's property so we can set the corresponding embdding in HTMLTokenId.language()
                            return token(HTMLTokenId.EL_CONTENT, new HtmlTokenPropertyProvider(EL_CONTENT_PROVIDER_INDEX, (byte)(customELIndex - 1)));
                        } else {
                            //return the close symbol token and switch to "in value" state
                            lexerState = ISI_VAL_QUOT;
                            customELIndex = INIT;
                            return token(HTMLTokenId.EL_CLOSE_DELIMITER);
                        }
                    }

                    break;

                case ISI_VAL_QUOT_ESC:
                    //Just consume the escaped char.
                    //The state prevents the quoted value
                    //to be finished by an escaped quote.
                    lexerState = ISI_VAL_QUOT;
                    break;

                case ISA_SGML_ESCAPE:       // DONE
                    if( isAZ(actChar) ) {
                        lexerState = ISI_SGML_DECL;
                        break;
                    }
                    switch( actChar ) {
                        case '-':
                            lexerState = ISA_SGML_DASH;
                            break;
                        default:
                            lexerState = ISI_TEXT;
                            input.backup(1);
                            continue;
                    }
                    break;

                case ISA_SGML_DASH:       // DONE
                    switch( actChar ) {
                        case '-':
                            lexerState = ISI_HTML_COMMENT;
                            break;
                        default:
                            lexerState = ISI_TEXT;
                            input.backup(1);
                            continue;
                    }
                    break;

                case ISI_HTML_COMMENT:        // DONE
                    switch( actChar ) {
                        case '-':
                            lexerState = ISA_HTML_COMMENT_DASH;
                            break;
                            //create an HTML comment token for each line of the comment - a performance fix for #43532
                        case '\n':
                            //leave the some state - we are still in an HTML comment,
                            //we just need to create a token for each line.
                            return token(HTMLTokenId.BLOCK_COMMENT);
                    }
                    break;

                case ISA_HTML_COMMENT_DASH:
                    switch( actChar ) {
                        case '-':
                            lexerState = ISI_HTML_COMMENT_WS;
                            break;
                        default:
                            lexerState = ISI_HTML_COMMENT;
                            continue;
                    }
                    break;

                case ISI_HTML_COMMENT_WS:       // DONE
                    switch( actChar ) {
                        case '>':
                            lexerState = INIT;
                            return token(HTMLTokenId.BLOCK_COMMENT);
                        default:
                            lexerState = ISI_HTML_COMMENT;
                            input.backup(2); //backup everything except the first comma
                            break;
                    }
                    break;

                case ISI_SGML_DECL:
                    if(Character.isWhitespace(actChar)) {
                        lexerState = ISI_SGML_DECL_WS;
                        if(input.readLength() > 1) {
                            input.backup(1); //backup the whitespace
                            return token(HTMLTokenId.DECLARATION);
                        }
                        break;
                    }
                    switch( actChar ) {
                        case '>':
                            if(input.readLength() > 1) {
                                input.backup(1); //backup the '<' char
                                return token(HTMLTokenId.DECLARATION);
                            } else {
                                //just the symbol read - return it as a part of declaration
                                lexerState = INIT;
                                return token(HTMLTokenId.DECLARATION);
                            }

                    }
                    break;

                case ISI_SGML_DECL_WS:
                    if(actChar == '-') {
                            if( input.readLength() == 1 ) {
                                lexerState = ISA_SGML_DECL_DASH;
                                break;
                            } else {
                                if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                                    input.backup(1);
                                    return token(HTMLTokenId.DECLARATION);
                                }
                            }
                    } else if(!Character.isWhitespace(actChar)) {
                        lexerState = ISI_SGML_DECL;
                        input.backup(1);
                        return token(HTMLTokenId.WS);
                    }
                    break;

                case ISA_SGML_DECL_DASH:
                    if( actChar == '-' ) {
                        lexerState = ISI_SGML_COMMENT;
                        break;
                    } else {
                        lexerState = ISI_SGML_DECL;
                        input.backup(1);
                        continue;
                    }

                case ISI_SGML_COMMENT:
                    switch( actChar ) {
                        case '-':
                            lexerState = ISA_SGML_COMMENT_DASH;
                            break;
                    }
                    break;

                case ISA_SGML_COMMENT_DASH:
                    if( actChar == '-' ) {
                        lexerState = ISI_SGML_DECL;
                        return token(HTMLTokenId.SGML_COMMENT);
                    } else {
                        lexerState = ISI_SGML_COMMENT;
                        input.backup(1);
                        continue;
                    }


                case ISA_REF:
                    if( isAZ( actChar ) ) {
                        lexerState = ISI_REF_NAME;
                        break;
                    }
                    if( actChar == '#' ) {
                        lexerState = ISA_REF_HASH;
                        break;
                    }
                    lexerState = lexerSubState;
                    input.backup(1);
                    continue;

                case ISI_REF_NAME:
                    if( isName( actChar ) ) break;
                    lexerState = lexerSubState;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        if( actChar != ';' ) {
                            input.backup(1);
                            return token(HTMLTokenId.TEXT);
                        }
                        return token(HTMLTokenId.CHARACTER);
                    }
                    break;

                case ISA_REF_HASH:
                    if( actChar >= '0' && actChar <= '9' ) {
                        lexerState = ISI_REF_DEC;
                        break;
                    }
                    if( actChar == 'x' || actChar == 'X' ) {
                        lexerState = ISA_REF_X;
                        break;
                    }
                    if( isAZ( actChar ) ) {
                        lexerState = lexerSubState;
                        return token(HTMLTokenId.ERROR);
                    }
                    lexerState = lexerSubState;
                    input.backup(1);
                    continue;

                case ISI_REF_DEC:
                    if( actChar >= '0' && actChar <= '9' ) break;
                    lexerState = lexerSubState;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        if( actChar != ';' )
                            input.backup(1);
                        return token(HTMLTokenId.CHARACTER);
                    }
                    break;

                case ISA_REF_X:
                    if( (actChar >= '0' && actChar <= '9') ||
                            (actChar >= 'a' && actChar <= 'f') ||
                            (actChar >= 'A' && actChar <= 'F')
                            ) {
                        lexerState = ISI_REF_HEX;
                        break;
                    }
                    lexerState = lexerSubState;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        input.backup(1);
                        return token(HTMLTokenId.ERROR);       // error on previous "&#x" sequence
                    }
                    break;

                case ISI_REF_HEX:
                    if( (actChar >= '0' && actChar <= '9') ||
                            (actChar >= 'a' && actChar <= 'f') ||
                            (actChar >= 'A' && actChar <= 'F')
                            ) break;
                    lexerState = lexerSubState;
                    if(input.readLength() > 1) { //lexer restart check, token already returned before last EOF
                        if( actChar != ';' )
                            input.backup(1);
                        return token(HTMLTokenId.CHARACTER);
                    }
                    break;
            }
        } // end of while(offset...)

        /** At this stage there's no more text in the scanned buffer.
         * Scanner first checks whether this is completely the last
         * available buffer.
         */
        switch( lexerState ) {
            case INIT:
                if (input.readLength() == 0) {
                    return null;
                }
                break;
            case ISI_TEXT:
            case ISA_LT:
            case ISA_SLASH:
            case ISA_SGML_ESCAPE:
            case ISA_SGML_DASH:
            case ISI_TAG_SLASH:
                return token(HTMLTokenId.TEXT);

            case ISI_XML_PI:
            case ISI_XML_PI_QM:
                return token(HTMLTokenId.XML_PI);

            case ISA_REF:
            case ISA_REF_HASH:
                if( lexerSubState == ISI_TEXT ) return token(HTMLTokenId.TEXT);
                else return token(HTMLTokenId.VALUE);

            case ISI_HTML_COMMENT:
            case ISA_HTML_COMMENT_DASH:
            case ISI_HTML_COMMENT_WS:
                return token(HTMLTokenId.BLOCK_COMMENT);

            case ISI_TAG:
                lexerState = ISP_TAG_X;
                //test if the tagname is SCRIPT
                if(equals(SCRIPT, input.readText(), true, true)) {
                        lexerEmbeddingState = ISI_SCRIPT;
                    }
                if(equals(STYLE, input.readText(), true, true)) {
                    lexerEmbeddingState = ISI_STYLE;
                }
                return token(HTMLTokenId.TAG_OPEN);
            case ISI_ENDTAG:
                return token(HTMLTokenId.TAG_CLOSE);

            case ISI_ARG:
                return token(HTMLTokenId.ARGUMENT);

            case ISI_ERROR:
            case ISP_TAG_X_ERROR:
                return token(HTMLTokenId.ERROR);

            case ISP_ARG_WS:
            case ISP_TAG_WS:
            case ISP_ENDTAG_WS:
            case ISP_EQ_WS:
                return token(HTMLTokenId.WS);

            case ISP_ARG_X:
            case ISP_TAG_X:
            case ISP_ENDTAG_X:
            case ISP_EQ:
                return token(HTMLTokenId.WS);

            case ISI_VAL:
            case ISI_VAL_QUOT:
            case ISI_VAL_QUOT_ESC:
                return resolveValueToken();

            case ISI_SGML_DECL:
            case ISA_SGML_DECL_DASH:
            case ISI_SGML_DECL_WS:
                return token(HTMLTokenId.DECLARATION);

            case ISI_SGML_COMMENT:
            case ISA_SGML_COMMENT_DASH:
                return token(HTMLTokenId.SGML_COMMENT);

            case ISI_REF_NAME:
            case ISI_REF_DEC:
            case ISA_REF_X:
            case ISI_REF_HEX:
                return token(HTMLTokenId.TEXT);
            case ISI_SCRIPT_CONTENT:
            case ISI_SCRIPT_CONTENT_ENDTAG:
            case ISI_SCRIPT_CONTENT_AFTER_LT:
                return token(HTMLTokenId.SCRIPT);
            case ISI_STYLE_CONTENT:
            case ISI_STYLE_CONTENT_ENDTAG:
            case ISI_STYLE_CONTENT_AFTER_LT:
                return token(HTMLTokenId.STYLE);

            case ISI_EL:
            case ISI_VAL_QUOT_EL:
                return token(HTMLTokenId.EL_CONTENT, new HtmlTokenPropertyProvider(EL_CONTENT_PROVIDER_INDEX, (byte)(customELIndex - 1)));


        }

        assert input.readLength() == 0 : "Returning null even if some chars still needs to be tokenized! " +
            "lexer state=" + lexerState + "; " +
            "lexer substate=" + lexerSubState + "; " +
            "lexer embedding state=" + lexerEmbeddingState + "; " +
            "readtext='" + input.readText() + "'";

        return null;
    }

    private static final String CLASS_ATTR_NAME = "class"; //NOI18N
    private static final String ID_ATTR_NAME = "id"; //NOI18N

    private Token resolveValueToken() {
        assert attribute != null;

        //onclick and similar method javascript embedding
        if (isJavascriptEventHandlerName(attribute)) {
            return token(HTMLTokenId.VALUE_JAVASCRIPT);
        }
        //style, id or class attribute value css embeddeding
        if (isStyleAttributeName(attribute)) {
            return createCssValueToken();
        }

        //generic css "class" embedding
        if (cssClassTagAttrMap != null && tag != null) {
            Collection attrs = cssClassTagAttrMap.get(tag);
            if (attrs != null && attrs.contains(attribute)) {
                //yup the attribute's value should have css "class" selector embedding
                return token(HTMLTokenId.VALUE_CSS, CLASS_TOKEN_PP);
            }
        }
        //lexer plugins:
        String embeddingMimeType = HtmlPlugins.getDefault().createAttributeEmbedding(tag, attribute);
        if (embeddingMimeType != null) {
            LOGGER.log(Level.FINE, "creating html attribute value token {0} in tag {1} with embedding {2}",
                    new Object[]{attribute, tag, embeddingMimeType});
            return token(HTMLTokenId.VALUE, new HtmlTokenPropertyProvider(ATTRIBUTE_VALUE_EMBEDDING_MIMETYPE_TOKEN_PROPERTY_KEY, embeddingMimeType));
        }

        return token(HTMLTokenId.VALUE);
    }

    private Token createCssValueToken() {
        TokenPropertyProvider provider;
        if(equals(CLASS_ATTR_NAME, attribute, true, true)) {
            provider = CLASS_TOKEN_PP;
        } else if(equals(ID_ATTR_NAME, attribute, true, true)) {
            provider = ID_TOKEN_PP;
        } else {
            provider = null;
        }

        return token(HTMLTokenId.VALUE_CSS, provider);
    }

    private Token token(HTMLTokenId tokenId) {
        return token(tokenId, null);
    }

    private Token token(HTMLTokenId tokenId, TokenPropertyProvider tokenPropertyProvider) {
        if(LOG) {
            if(input.readLength() == 0) {
                LOGGER.log(Level.INFO, "Found zero length token: "); //NOI18N
            }
            LOGGER.log(Level.INFO, "[{0}] token (''{1}''; id={2}; state={3})\n", new Object[]{this.getClass().getSimpleName(), input.readText().toString(), tokenId, state()}); //NOI18N
        }
         if(tokenPropertyProvider != null) {
            return tokenFactory.createPropertyToken(tokenId, input.readLength(), tokenPropertyProvider);
        } else {
            CharSequence image = input.readText();
            switch(tokenId) {
                case OPERATOR:
                    return tokenFactory.getFlyweightToken(tokenId, IMG_EQUAL_SIGN);

                case TAG_CLOSE_SYMBOL:
                    switch(image.charAt(0)) {
                        case '/':
                            if(input.readLength() > 1) {
                                if(image.charAt(1) == '>') {
                                    return tokenFactory.getFlyweightToken(tokenId, IMG_CLOSE_TAG_SYMBOL2);
                                }
                            }
                            break;
                        case '>':
                            return tokenFactory.getFlyweightToken(tokenId, IMG_CLOSE_TAG_SYMBOL);
                    }

                case TAG_OPEN_SYMBOL:
                    switch(image.charAt(0)) {
                        case '<':
                            if(input.readLength() > 1) {
                                if(image.charAt(1) == '/') {
                                    return tokenFactory.getFlyweightToken(tokenId, IMG_OPEN_TAG_SYMBOL2);
                                }
                                break;
                            } else  {
                                return tokenFactory.getFlyweightToken(tokenId, IMG_OPEN_TAG_SYMBOL);
                            }

                    }

                case TAG_OPEN:
                case TAG_CLOSE:
                    String cachedTagName = HtmlElements.getCachedTagName(image);
                    if(cachedTagName != null) {
                        assert (cachedTagName.length() <= input.readLength()) : "readlength == " + input.readLength() + "; text=" + cachedTagName + "; image=" + image;
                        return tokenFactory.getFlyweightToken(tokenId, cachedTagName);
                    }
                    break;
                case ARGUMENT:
                    String cachedAttrName = HtmlElements.getCachedAttrName(image);
                    if(cachedAttrName != null) {
                        assert (cachedAttrName.length() <= input.readLength()) : "readlength == " + input.readLength() + "; text=" + cachedAttrName + "; image=" + image;
                        return tokenFactory.getFlyweightToken(tokenId, cachedAttrName);
                    }
                    break;
            }

            return tokenFactory.createToken(tokenId);

        }

    }

    @Override
    public void release() {
    }

    /** @param optimized - first sequence is lowercase, one call to Character.toLowerCase() */
    private static boolean equals(CharSequence text1, CharSequence text2, boolean ignoreCase, boolean optimized) {
        if (text1 == text2) {
            return true;
        }
        if (text1 == null || text2 == null) {
            return false;
        }
        if (text1.length() != text2.length()) {
            return false;
        } else {
            //compare content
            for (int i = 0; i < text1.length(); i++) {
                char ch1 = ignoreCase && !optimized ? Character.toLowerCase(text1.charAt(i)) : text1.charAt(i);
                char ch2 = ignoreCase ? Character.toLowerCase(text2.charAt(i)) : text2.charAt(i);
                if (ch1 != ch2) {
                    return false;
                }
            }
            return true;
        }
    }

    private static class HtmlTokenPropertyProvider implements TokenPropertyProvider {

        private final String key;
        private final Object value;

        HtmlTokenPropertyProvider(String key, Object value) {
            this.key = key;
            this.value = value;
        }

        @Override
        public Object getValue(Token token, Object key) {
            if (this.key.equals(key)) {
                return value;
            } else {
                return null;
            }
        }

    }

    private static final TokenPropertyProvider CLASS_TOKEN_PP = new HtmlTokenPropertyProvider(HTMLTokenId.VALUE_CSS_TOKEN_TYPE_PROPERTY, HTMLTokenId.VALUE_CSS_TOKEN_TYPE_CLASS);
    private static final TokenPropertyProvider ID_TOKEN_PP = new HtmlTokenPropertyProvider(HTMLTokenId.VALUE_CSS_TOKEN_TYPE_PROPERTY, HTMLTokenId.VALUE_CSS_TOKEN_TYPE_ID);


}