All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.fife.ui.rsyntaxtextarea.modes.HTMLTokenMaker.flex Maven / Gradle / Ivy

The newest version!
/*
 * 01/24/2005
 *
 * HTMLTokenMaker.java - Generates tokens for HTML syntax highlighting.
 * 
 * This library is distributed under a modified BSD license.  See the included
 * RSyntaxTextArea.License.txt file for details.
 */
package org.fife.ui.rsyntaxtextarea.modes;

import java.io.*;
import javax.swing.text.Segment;

import org.fife.ui.rsyntaxtextarea.*;


/**
 * Scanner for HTML 5 files.
 *
 * This implementation was created using
 * JFlex 1.4.1; however, the generated file
 * was modified for performance.  Memory allocation needs to be almost
 * completely removed to be competitive with the handwritten lexers (subclasses
 * of AbstractTokenMaker, so this class has been modified so that
 * Strings are never allocated (via yytext()), and the scanner never has to
 * worry about refilling its buffer (needlessly copying chars around).
 * We can achieve this because RText always scans exactly 1 line of tokens at a
 * time, and hands the scanner this line as an array of characters (a Segment
 * really).  Since tokens contain pointers to char arrays instead of Strings
 * holding their contents, there is no need for allocating new memory for
 * Strings.

* * The actual algorithm generated for scanning has, of course, not been * modified.

* * If you wish to regenerate this file yourself, keep in mind the following: *

    *
  • The generated HTMLTokenMaker.java file will contain two * definitions of both zzRefill and yyreset. * You should hand-delete the second of each definition (the ones * generated by the lexer), as these generated methods modify the input * buffer, which we'll never have to do.
  • *
  • You should also change the declaration/definition of zzBuffer to NOT * be initialized. This is a needless memory allocation for us since we * will be pointing the array somewhere else anyway.
  • *
  • You should NOT call yylex() on the generated scanner * directly; rather, you should use getTokenList as you would * with any other TokenMaker instance.
  • *
* * @author Robert Futrell * @version 0.9 */ %% %public %class HTMLTokenMaker %extends AbstractMarkupTokenMaker %unicode %type org.fife.ui.rsyntaxtextarea.Token %{ /** * Type specific to XMLTokenMaker denoting a line ending with an unclosed * double-quote attribute. */ public static final int INTERNAL_ATTR_DOUBLE = -1; /** * Type specific to XMLTokenMaker denoting a line ending with an unclosed * single-quote attribute. */ public static final int INTERNAL_ATTR_SINGLE = -2; /** * Token type specific to HTMLTokenMaker; this signals that the user has * ended a line with an unclosed HTML tag; thus a new line is beginning * still inside of the tag. */ public static final int INTERNAL_INTAG = -3; /** * Token type specific to HTMLTokenMaker; this signals that the user has * ended a line with an unclosed <script> tag. */ public static final int INTERNAL_INTAG_SCRIPT = -4; /** * Token type specifying we're in a double-qouted attribute in a * script tag. */ public static final int INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT = -5; /** * Token type specifying we're in a single-qouted attribute in a * script tag. */ public static final int INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT = -6; /** * Token type specific to HTMLTokenMaker; this signals that the user has * ended a line with an unclosed <style> tag. */ public static final int INTERNAL_INTAG_STYLE = -7; /** * Token type specifying we're in a double-qouted attribute in a * style tag. */ public static final int INTERNAL_ATTR_DOUBLE_QUOTE_STYLE = -8; /** * Token type specifying we're in a single-qouted attribute in a * style tag. */ public static final int INTERNAL_ATTR_SINGLE_QUOTE_STYLE = -9; /** * Token type specifying we're in JavaScript. */ public static final int INTERNAL_IN_JS = -10; /** * Token type specifying we're in a JavaScript multiline comment. */ public static final int INTERNAL_IN_JS_MLC = -11; /** * Token type specifying we're in an invalid multi-line JS string. */ public static final int INTERNAL_IN_JS_STRING_INVALID = -12; /** * Token type specifying we're in a valid multi-line JS string. */ public static final int INTERNAL_IN_JS_STRING_VALID = -13; /** * Token type specifying we're in an invalid multi-line JS single-quoted string. */ public static final int INTERNAL_IN_JS_CHAR_INVALID = -14; /** * Token type specifying we're in a valid multi-line JS single-quoted string. */ public static final int INTERNAL_IN_JS_CHAR_VALID = -15; /** * Internal type denoting a line ending in CSS. */ public static final int INTERNAL_CSS = -16; /** * Internal type denoting a line ending in a CSS property. */ public static final int INTERNAL_CSS_PROPERTY = -17; /** * Internal type denoting a line ending in a CSS property value. */ public static final int INTERNAL_CSS_VALUE = -18; /** * Internal type denoting line ending in a CSS double-quote string. * The state to return to is embedded in the actual end token type. */ public static final int INTERNAL_CSS_STRING = -(1<<11); /** * Internal type denoting line ending in a CSS single-quote string. * The state to return to is embedded in the actual end token type. */ public static final int INTERNAL_CSS_CHAR = -(2<<11); /** * Internal type denoting line ending in a CSS multi-line comment. * The state to return to is embedded in the actual end token type. */ public static final int INTERNAL_CSS_MLC = -(3<<11); /** * The state previous CSS-related state we were in before going into a CSS * string, multi-line comment, etc. */ private int cssPrevState; /** * Whether closing markup tags are automatically completed for HTML. */ private static boolean completeCloseTags; /** * When in the JS_STRING state, whether the current string is valid. */ private boolean validJSString; /** * Language state set on HTML tokens. Must be 0. */ private static final int LANG_INDEX_DEFAULT = 0; /** * Language state set on JavaScript tokens. */ private static final int LANG_INDEX_JS = 1; /** * Language state set on CSS tokens. */ private static final int LANG_INDEX_CSS = 2; /** * Constructor. This must be here because JFlex does not generate a * no-parameter constructor. */ public HTMLTokenMaker() { super(); } /** * Adds the token specified to the current linked list of tokens as an * "end token;" that is, at zzMarkedPos. * * @param tokenType The token's type. */ private void addEndToken(int tokenType) { addToken(zzMarkedPos,zzMarkedPos, tokenType); } /** * Adds the token specified to the current linked list of tokens. * * @param tokenType The token's type. * @see #addToken(int, int, int) */ private void addHyperlinkToken(int start, int end, int tokenType) { int so = start + offsetShift; addToken(zzBuffer, start,end, tokenType, so, true); } /** * Adds the token specified to the current linked list of tokens. * * @param tokenType The token's type. */ private void addToken(int tokenType) { addToken(zzStartRead, zzMarkedPos-1, tokenType); } /** * Adds the token specified to the current linked list of tokens. * * @param tokenType The token's type. */ private void addToken(int start, int end, int tokenType) { int so = start + offsetShift; addToken(zzBuffer, start,end, tokenType, so); } /** * Adds the token specified to the current linked list of tokens. * * @param array The character array. * @param start The starting offset in the array. * @param end The ending offset in the array. * @param tokenType The token's type. * @param startOffset The offset in the document at which this token * occurs. */ @Override public void addToken(char[] array, int start, int end, int tokenType, int startOffset) { super.addToken(array, start,end, tokenType, startOffset); zzStartRead = zzMarkedPos; } /** * {@inheritDoc} */ @Override protected OccurrenceMarker createOccurrenceMarker() { return new HtmlOccurrenceMarker(); } /** * Sets whether markup close tags should be completed. You might not want * this to be the case, since some tags in standard HTML aren't usually * closed. * * @return Whether closing markup tags are completed. * @see #setCompleteCloseTags(boolean) */ @Override public boolean getCompleteCloseTags() { return completeCloseTags; } @Override public boolean getCurlyBracesDenoteCodeBlocks(int languageIndex) { return languageIndex==LANG_INDEX_CSS || languageIndex==LANG_INDEX_JS; } /** * {@inheritDoc} */ @Override public String[] getLineCommentStartAndEnd(int languageIndex) { switch (languageIndex) { case LANG_INDEX_JS: return new String[] { "//", null }; case LANG_INDEX_CSS: return new String[] { "/*", "*/" }; default: return new String[] { "" }; } } /** * Returns Token.MARKUP_TAG_NAME. * * @param type The token type. * @return Whether tokens of this type should have "mark occurrences" * enabled. */ @Override public boolean getMarkOccurrencesOfTokenType(int type) { return type==Token.MARKUP_TAG_NAME; } /** * Overridden to handle newlines in JS and CSS differently than those in * markup. */ @Override public boolean getShouldIndentNextLineAfter(Token token) { int languageIndex = token==null ? 0 : token.getLanguageIndex(); if (getCurlyBracesDenoteCodeBlocks(languageIndex)) { if (token!=null && token.length()==1) { char ch = token.charAt(0); return ch=='{' || ch=='('; } } return false; } /** * Returns the first token in the linked list of tokens generated * from text. This method must be implemented by * subclasses so they can correctly implement syntax highlighting. * * @param text The text from which to get tokens. * @param initialTokenType The token type we should start with. * @param startOffset The offset into the document at which * text starts. * @return The first Token in a linked list representing * the syntax highlighted text. */ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { resetTokenList(); this.offsetShift = -text.offset + startOffset; cssPrevState = CSS; // Shouldn't be necessary int languageIndex = 0; // Start off in the proper state. int state = Token.NULL; switch (initialTokenType) { case Token.MARKUP_COMMENT: state = COMMENT; break; case Token.PREPROCESSOR: state = PI; break; case Token.VARIABLE: state = DTD; break; case INTERNAL_INTAG: state = INTAG; break; case INTERNAL_INTAG_SCRIPT: state = INTAG_SCRIPT; break; case INTERNAL_INTAG_STYLE: state = INTAG_STYLE; break; case INTERNAL_ATTR_DOUBLE: state = INATTR_DOUBLE; break; case INTERNAL_ATTR_SINGLE: state = INATTR_SINGLE; break; case INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT: state = INATTR_DOUBLE_SCRIPT; break; case INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT: state = INATTR_SINGLE_SCRIPT; break; case INTERNAL_ATTR_DOUBLE_QUOTE_STYLE: state = INATTR_DOUBLE_STYLE; break; case INTERNAL_ATTR_SINGLE_QUOTE_STYLE: state = INATTR_SINGLE_STYLE; break; case INTERNAL_IN_JS: state = JAVASCRIPT; languageIndex = LANG_INDEX_JS; break; case INTERNAL_IN_JS_MLC: state = JS_MLC; languageIndex = LANG_INDEX_JS; break; case INTERNAL_IN_JS_STRING_INVALID: state = JS_STRING; validJSString = false; languageIndex = LANG_INDEX_JS; break; case INTERNAL_IN_JS_STRING_VALID: state = JS_STRING; validJSString = true; languageIndex = LANG_INDEX_JS; break; case INTERNAL_IN_JS_CHAR_INVALID: state = JS_CHAR; validJSString = false; languageIndex = LANG_INDEX_JS; break; case INTERNAL_IN_JS_CHAR_VALID: state = JS_CHAR; validJSString = true; languageIndex = LANG_INDEX_JS; break; case INTERNAL_CSS: state = CSS; languageIndex = LANG_INDEX_CSS; break; case INTERNAL_CSS_PROPERTY: state = CSS_PROPERTY; languageIndex = LANG_INDEX_CSS; break; case INTERNAL_CSS_VALUE: state = CSS_VALUE; languageIndex = LANG_INDEX_CSS; break; default: if (initialTokenType<-1024) { int main = -(-initialTokenType & 0xffffff00); switch (main) { default: // Should never happen case INTERNAL_CSS_STRING: state = CSS_STRING; break; case INTERNAL_CSS_CHAR: state = CSS_CHAR_LITERAL; break; case INTERNAL_CSS_MLC: state = CSS_C_STYLE_COMMENT; break; } cssPrevState = -initialTokenType&0xff; languageIndex = LANG_INDEX_CSS; } else { state = Token.NULL; } break; } setLanguageIndex(languageIndex); start = text.offset; s = text; try { yyreset(zzReader); yybegin(state); return yylex(); } catch (IOException ioe) { ioe.printStackTrace(); return new TokenImpl(); } } /** * Sets whether markup close tags should be completed. You might not want * this to be the case, since some tags in standard HTML aren't usually * closed. * * @param complete Whether closing markup tags are completed. * @see #getCompleteCloseTags() */ public static void setCompleteCloseTags(boolean complete) { completeCloseTags = complete; } /** * Refills the input buffer. * * @return true if EOF was reached, otherwise * false. */ private boolean zzRefill() { return zzCurrentPos>=s.offset+s.count; } /** * Resets the scanner to read from a new input stream. * Does not close the old reader. * * All internal variables are reset, the old input stream * cannot be reused (internal buffer is discarded and lost). * Lexical state is set to YY_INITIAL. * * @param reader the new input stream */ public final void yyreset(java.io.Reader reader) { // 's' has been updated. zzBuffer = s.array; /* * We replaced the line below with the two below it because zzRefill * no longer "refills" the buffer (since the way we do it, it's always * "full" the first time through, since it points to the segment's * array). So, we assign zzEndRead here. */ //zzStartRead = zzEndRead = s.offset; zzStartRead = s.offset; zzEndRead = zzStartRead + s.count - 1; zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset; zzLexicalState = YYINITIAL; zzReader = reader; zzAtBOL = true; zzAtEOF = false; } %} // HTML-specific stuff. Whitespace = ([ \t\f]+) LineTerminator = ([\n]) Identifier = ([^ \t\n<&]+) EntityReference = ([&][^; \t]*[;]?) InTagIdentifier = ([^ \t\n\"\'/=>]+) EndScriptTag = ("") EndStyleTag = ("") // General stuff. Letter = [A-Za-z] NonzeroDigit = [1-9] Digit = ("0"|{NonzeroDigit}) HexDigit = ({Digit}|[A-Fa-f]) OctalDigit = ([0-7]) LetterOrUnderscore = ({Letter}|[_]) LetterOrUnderscoreOrDash = ({LetterOrUnderscore}|[\-]) // JavaScript stuff. EscapedSourceCharacter = ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit}) NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\") IdentifierStart = ({Letter}|"_"|"$") IdentifierPart = ({IdentifierStart}|{Digit}|("\\"{EscapedSourceCharacter})) JS_MLCBegin = "/*" JS_MLCEnd = "*/" JS_LineCommentBegin = "//" JS_IntegerHelper1 = (({NonzeroDigit}{Digit}*)|"0") JS_IntegerHelper2 = ("0"(([xX]{HexDigit}+)|({OctalDigit}*))) JS_IntegerLiteral = ({JS_IntegerHelper1}[lL]?) JS_HexLiteral = ({JS_IntegerHelper2}[lL]?) JS_FloatHelper1 = ([fFdD]?) JS_FloatHelper2 = ([eE][+-]?{Digit}+{JS_FloatHelper1}) JS_FloatLiteral1 = ({Digit}+"."({JS_FloatHelper1}|{JS_FloatHelper2}|{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2}))) JS_FloatLiteral2 = ("."{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2})) JS_FloatLiteral3 = ({Digit}+{JS_FloatHelper2}) JS_FloatLiteral = ({JS_FloatLiteral1}|{JS_FloatLiteral2}|{JS_FloatLiteral3}|({Digit}+[fFdD])) JS_ErrorNumberFormat = (({JS_IntegerLiteral}|{JS_HexLiteral}|{JS_FloatLiteral}){NonSeparator}+) JS_Separator = ([\(\)\{\}\[\]\]]) JS_Separator2 = ([\;,.]) JS_NonAssignmentOperator = ("+"|"-"|"<="|"^"|"++"|"<"|"*"|">="|"%"|"--"|">"|"/"|"!="|"?"|">>"|"!"|"&"|"=="|":"|">>"|"~"|"||"|"&&"|">>>") JS_AssignmentOperator = ("="|"-="|"*="|"/="|"|="|"&="|"^="|"+="|"%="|"<<="|">>="|">>>=") JS_Operator = ({JS_NonAssignmentOperator}|{JS_AssignmentOperator}) JS_Identifier = ({IdentifierStart}{IdentifierPart}*) JS_ErrorIdentifier = ({NonSeparator}+) JS_Regex = ("/"([^\*\\/]|\\.)([^/\\]|\\.)*"/"[gim]*) // CSS stuff. CSS_SelectorPiece = (("*"|"."|{LetterOrUnderscoreOrDash})({LetterOrUnderscoreOrDash}|"."|{Digit})*) CSS_PseudoClass = (":"("root"|"nth-child"|"nth-last-child"|"nth-of-type"|"nth-last-of-type"|"first-child"|"last-child"|"first-of-type"|"last-of-type"|"only-child"|"only-of-type"|"empty"|"link"|"visited"|"active"|"hover"|"focus"|"target"|"lang"|"enabled"|"disabled"|"checked"|":first-line"|":first-letter"|":before"|":after"|"not")) CSS_AtKeyword = ("@"{CSS_SelectorPiece}) CSS_Id = ("#"{CSS_SelectorPiece}) CSS_Separator = ([;\(\)\[\]]) CSS_MlcStart = ({JS_MLCBegin}) CSS_MlcEnd = ({JS_MLCEnd}) CSS_Property = ([\*]?{LetterOrUnderscoreOrDash}({LetterOrUnderscoreOrDash}|{Digit})*) CSS_ValueChar = ({LetterOrUnderscoreOrDash}|[\\/]) CSS_Value = ({CSS_ValueChar}*) CSS_Function = ({CSS_Value}\() CSS_Digits = ([\-]?{Digit}+([0-9\.]+)?(pt|pc|in|mm|cm|em|ex|px|ms|s|%)?) CSS_Hex = ("#"[0-9a-fA-F]+) CSS_Number = ({CSS_Digits}|{CSS_Hex}) URLGenDelim = ([:\/\?#\[\]@]) URLSubDelim = ([\!\$&'\(\)\*\+,;=]) URLUnreserved = ({LetterOrUnderscore}|{Digit}|[\-\.\~]) URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%]) URLCharacters = ({URLCharacter}*) URLEndCharacter = ([\/\$]|{Letter}|{Digit}) URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?) %state COMMENT %state PI %state DTD %state INTAG %state INTAG_CHECK_TAG_NAME %state INATTR_DOUBLE %state INATTR_SINGLE %state INTAG_SCRIPT %state INATTR_DOUBLE_SCRIPT %state INATTR_SINGLE_SCRIPT %state INTAG_STYLE %state INATTR_DOUBLE_STYLE %state INATTR_SINGLE_STYLE %state JAVASCRIPT %state JS_STRING %state JS_CHAR %state JS_MLC %state JS_EOL_COMMENT %state CSS %state CSS_PROPERTY %state CSS_VALUE %state CSS_STRING %state CSS_CHAR_LITERAL %state CSS_C_STYLE_COMMENT %% { "" { yybegin(YYINITIAL); addToken(start,zzStartRead+2, Token.MARKUP_COMMENT); } "-" {} <> { addToken(start,zzStartRead-1, Token.MARKUP_COMMENT); return firstToken; } } { [^\n\?]+ {} {LineTerminator} { addToken(start,zzStartRead-1, Token.MARKUP_PROCESSING_INSTRUCTION); return firstToken; } "?>" { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.MARKUP_PROCESSING_INSTRUCTION); } "?" {} <> { addToken(start,zzStartRead-1, Token.MARKUP_PROCESSING_INSTRUCTION); return firstToken; } } { [^\n>]+ {} ">" { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.MARKUP_DTD); } {LineTerminator} | <> { addToken(start,zzStartRead-1, Token.MARKUP_DTD); return firstToken; } } { [Aa] | [aA][bB][bB][rR] | [aA][cC][rR][oO][nN][yY][mM] | [aA][dD][dD][rR][eE][sS][sS] | [aA][pP][pP][lL][eE][tT] | [aA][rR][eE][aA] | [aA][rR][tT][iI][cC][lL][eE] | [aA][sS][iI][dD][eE] | [aA][uU][dD][iI][oO] | [bB] | [bB][aA][sS][eE] | [bB][aA][sS][eE][fF][oO][nN][tT] | [bB][dD][oO] | [bB][gG][sS][oO][uU][nN][dD] | [bB][iI][gG] | [bB][lL][iI][nN][kK] | [bB][lL][oO][cC][kK][qQ][uU][oO][tT][eE] | [bB][oO][dD][yY] | [bB][rR] | [bB][uU][tT][tT][oO][nN] | [cC][aA][nN][vV][aA][sS] | [cC][aA][pP][tT][iI][oO][nN] | [cC][eE][nN][tT][eE][rR] | [cC][iI][tT][eE] | [cC][oO][dD][eE] | [cC][oO][lL] | [cC][oO][lL][gG][rR][oO][uU][pP] | [cC][oO][mM][mM][aA][nN][dD] | [cC][oO][mM][mM][eE][nN][tT] | [dD][dD] | [dD][aA][tT][aA][gG][rR][iI][dD] | [dD][aA][tT][aA][lL][iI][sS][tT] | [dD][aA][tT][aA][tT][eE][mM][pP][lL][aA][tT][eE] | [dD][eE][lL] | [dD][eE][tT][aA][iI][lL][sS] | [dD][fF][nN] | [dD][iI][aA][lL][oO][gG] | [dD][iI][rR] | [dD][iI][vV] | [dD][lL] | [dD][tT] | [eE][mM] | [eE][mM][bB][eE][dD] | [eE][vV][eE][nN][tT][sS][oO][uU][rR][cC][eE] | [fF][iI][eE][lL][dD][sS][eE][tT] | [fF][iI][gG][uU][rR][eE] | [fF][oO][nN][tT] | [fF][oO][oO][tT][eE][rR] | [fF][oO][rR][mM] | [fF][rR][aA][mM][eE] | [fF][rR][aA][mM][eE][sS][eE][tT] | [hH][123456] | [hH][eE][aA][dD] | [hH][eE][aA][dD][eE][rR] | [hH][rR] | [hH][tT][mM][lL] | [iI] | [iI][fF][rR][aA][mM][eE] | [iI][lL][aA][yY][eE][rR] | [iI][mM][gG] | [iI][nN][pP][uU][tT] | [iI][nN][sS] | [iI][sS][iI][nN][dD][eE][xX] | [kK][bB][dD] | [kK][eE][yY][gG][eE][nN] | [lL][aA][bB][eE][lL] | [lL][aA][yY][eE][rR] | [lL][eE][gG][eE][nN][dD] | [lL][iI] | [lL][iI][nN][kK] | [mM][aA][pP] | [mM][aA][rR][kK] | [mM][aA][rR][qQ][uU][eE][eE] | [mM][eE][nN][uU] | [mM][eE][tT][aA] | [mM][eE][tT][eE][rR] | [mM][uU][lL][tT][iI][cC][oO][lL] | [nN][aA][vV] | [nN][eE][sS][tT] | [nN][oO][bB][rR] | [nN][oO][eE][mM][bB][eE][dD] | [nN][oO][fF][rR][aA][mM][eE][sS] | [nN][oO][lL][aA][yY][eE][rR] | [nN][oO][sS][cC][rR][iI][pP][tT] | [oO][bB][jJ][eE][cC][tT] | [oO][lL] | [oO][pP][tT][gG][rR][oO][uU][pP] | [oO][pP][tT][iI][oO][nN] | [oO][uU][tT][pP][uU][tT] | [pP] | [pP][aA][rR][aA][mM] | [pP][lL][aA][iI][nN][tT][eE][xX][tT] | [pP][rR][eE] | [pP][rR][oO][gG][rR][eE][sS][sS] | [qQ] | [rR][uU][lL][eE] | [sS] | [sS][aA][mM][pP] | [sS][cC][rR][iI][pP][tT] | [sS][eE][cC][tT][iI][oO][nN] | [sS][eE][lL][eE][cC][tT] | [sS][eE][rR][vV][eE][rR] | [sS][mM][aA][lL][lL] | [sS][oO][uU][rR][cC][eE] | [sS][pP][aA][cC][eE][rR] | [sS][pP][aA][nN] | [sS][tT][rR][iI][kK][eE] | [sS][tT][rR][oO][nN][gG] | [sS][tT][yY][lL][eE] | [sS][uU][bB] | [sS][uU][pP] | [tT][aA][bB][lL][eE] | [tT][bB][oO][dD][yY] | [tT][dD] | [tT][eE][xX][tT][aA][rR][eE][aA] | [tT][fF][oO][oO][tT] | [tT][hH] | [tT][hH][eE][aA][dD] | [tT][iI][mM][eE] | [tT][iI][tT][lL][eE] | [tT][rR] | [tT][tT] | [uU] | [uU][lL] | [vV][aA][rR] | [vV][iI][dD][eE][oO] { addToken(Token.MARKUP_TAG_NAME); } {InTagIdentifier} { /* A non-recognized HTML tag name */ yypushback(yylength()); yybegin(INTAG); } . { /* Shouldn't happen */ yypushback(1); yybegin(INTAG); } <> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG); return firstToken; } } { "/" { addToken(Token.MARKUP_TAG_DELIMITER); } {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); } {Whitespace} { addToken(Token.WHITESPACE); } "=" { addToken(Token.OPERATOR); } "/>" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); } ">" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); } [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE); } [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE); } <> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG); return firstToken; } } { [^\"]* {} [\"] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); } <> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE); return firstToken; } } { [^\']* {} [\'] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); } <> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE); return firstToken; } } { {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); } "/>" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(YYINITIAL); } "/" { addToken(Token.MARKUP_TAG_DELIMITER); } // Won't appear in valid HTML. {Whitespace} { addToken(Token.WHITESPACE); } "=" { addToken(Token.OPERATOR); } ">" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(JAVASCRIPT, LANG_INDEX_JS); } [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE_SCRIPT); } [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE_SCRIPT); } <> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG_SCRIPT); return firstToken; } } { [^\"]* {} [\"] { yybegin(INTAG_SCRIPT); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); } <> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT); return firstToken; } } { [^\']* {} [\'] { yybegin(INTAG_SCRIPT); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); } <> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT); return firstToken; } } { {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); } "/>" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(YYINITIAL); } "/" { addToken(Token.MARKUP_TAG_DELIMITER); } // Won't appear in valid HTML. {Whitespace} { addToken(Token.WHITESPACE); } "=" { addToken(Token.OPERATOR); } ">" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(CSS, LANG_INDEX_CSS); } [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE_STYLE); } [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE_STYLE); } <> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG_STYLE); return firstToken; } } { [^\"]* {} [\"] { yybegin(INTAG_STYLE); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); } <> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE_QUOTE_STYLE); return firstToken; } } { [^\']* {} [\'] { yybegin(INTAG_STYLE); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); } <> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE_QUOTE_STYLE); return firstToken; } } { {EndScriptTag} { yybegin(YYINITIAL, LANG_INDEX_DEFAULT); addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER); addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME); addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER); } // ECMA 3+ keywords. "break" | "continue" | "delete" | "else" | "for" | "function" | "if" | "in" | "new" | "this" | "typeof" | "var" | "void" | "while" | "with" { addToken(Token.RESERVED_WORD); } "return" { addToken(Token.RESERVED_WORD_2); } //JavaScript 1.6 "each" {if(JavaScriptTokenMaker.isJavaScriptCompatible("1.6")){ addToken(Token.RESERVED_WORD);} else {addToken(Token.IDENTIFIER);} } //JavaScript 1.7 "let" {if(JavaScriptTokenMaker.isJavaScriptCompatible("1.7")){ addToken(Token.RESERVED_WORD);} else {addToken(Token.IDENTIFIER);} } // Reserved (but not yet used) ECMA keywords. "abstract" { addToken(Token.RESERVED_WORD); } "boolean" { addToken(Token.DATA_TYPE); } "byte" { addToken(Token.DATA_TYPE); } "case" { addToken(Token.RESERVED_WORD); } "catch" { addToken(Token.RESERVED_WORD); } "char" { addToken(Token.DATA_TYPE); } "class" { addToken(Token.RESERVED_WORD); } "const" { addToken(Token.RESERVED_WORD); } "debugger" { addToken(Token.RESERVED_WORD); } "default" { addToken(Token.RESERVED_WORD); } "do" { addToken(Token.RESERVED_WORD); } "double" { addToken(Token.DATA_TYPE); } "enum" { addToken(Token.RESERVED_WORD); } "export" { addToken(Token.RESERVED_WORD); } "extends" { addToken(Token.RESERVED_WORD); } "final" { addToken(Token.RESERVED_WORD); } "finally" { addToken(Token.RESERVED_WORD); } "float" { addToken(Token.DATA_TYPE); } "goto" { addToken(Token.RESERVED_WORD); } "implements" { addToken(Token.RESERVED_WORD); } "import" { addToken(Token.RESERVED_WORD); } "instanceof" { addToken(Token.RESERVED_WORD); } "int" { addToken(Token.DATA_TYPE); } "interface" { addToken(Token.RESERVED_WORD); } "long" { addToken(Token.DATA_TYPE); } "native" { addToken(Token.RESERVED_WORD); } "package" { addToken(Token.RESERVED_WORD); } "private" { addToken(Token.RESERVED_WORD); } "protected" { addToken(Token.RESERVED_WORD); } "public" { addToken(Token.RESERVED_WORD); } "short" { addToken(Token.DATA_TYPE); } "static" { addToken(Token.RESERVED_WORD); } "super" { addToken(Token.RESERVED_WORD); } "switch" { addToken(Token.RESERVED_WORD); } "synchronized" { addToken(Token.RESERVED_WORD); } "throw" { addToken(Token.RESERVED_WORD); } "throws" { addToken(Token.RESERVED_WORD); } "transient" { addToken(Token.RESERVED_WORD); } "try" { addToken(Token.RESERVED_WORD); } "volatile" { addToken(Token.RESERVED_WORD); } "null" { addToken(Token.RESERVED_WORD); } // Literals. "false" | "true" { addToken(Token.LITERAL_BOOLEAN); } "NaN" { addToken(Token.RESERVED_WORD); } "Infinity" { addToken(Token.RESERVED_WORD); } // Functions. "eval" | "parseInt" | "parseFloat" | "escape" | "unescape" | "isNaN" | "isFinite" { addToken(Token.FUNCTION); } {LineTerminator} { addEndToken(INTERNAL_IN_JS); return firstToken; } {JS_Identifier} { addToken(Token.IDENTIFIER); } {Whitespace} { addToken(Token.WHITESPACE); } /* String/Character literals. */ [\'] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_CHAR); } [\"] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_STRING); } /* Comment literals. */ "/**/" { addToken(Token.COMMENT_MULTILINE); } {JS_MLCBegin} { start = zzMarkedPos-2; yybegin(JS_MLC); } {JS_LineCommentBegin} { start = zzMarkedPos-2; yybegin(JS_EOL_COMMENT); } /* Attempt to identify regular expressions (not foolproof) - do after comments! */ {JS_Regex} { boolean highlightedAsRegex = false; if (firstToken==null) { addToken(Token.REGEX); highlightedAsRegex = true; } else { // If this is *likely* to be a regex, based on // the previous token, highlight it as such. Token t = firstToken.getLastNonCommentNonWhitespaceToken(); if (RSyntaxUtilities.regexCanFollowInJavaScript(t)) { addToken(Token.REGEX); highlightedAsRegex = true; } } // If it doesn't *appear* to be a regex, highlight it as // individual tokens. if (!highlightedAsRegex) { int temp = zzStartRead + 1; addToken(zzStartRead, zzStartRead, Token.OPERATOR); zzStartRead = zzCurrentPos = zzMarkedPos = temp; } } /* Separators. */ {JS_Separator} { addToken(Token.SEPARATOR); } {JS_Separator2} { addToken(Token.IDENTIFIER); } /* Operators. */ {JS_Operator} { addToken(Token.OPERATOR); } /* Numbers */ {JS_IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); } {JS_HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); } {JS_FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); } {JS_ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); } {JS_ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); } /* Ended with a line not in a string or comment. */ <> { addEndToken(INTERNAL_IN_JS); return firstToken; } /* Catch any other (unhandled) characters and flag them as bad. */ . { addToken(Token.ERROR_IDENTIFIER); } } { [^\n\\\"]+ {} \\x{HexDigit}{2} {} \\x { /* Invalid latin-1 character \xXX */ validJSString = false; } \\u{HexDigit}{4} {} \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; } \\. { /* Skip all escaped chars. */ } \\ { /* Line ending in '\' => continue to next line. */ if (validJSString) { addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE); addEndToken(INTERNAL_IN_JS_STRING_VALID); } else { addToken(start,zzStartRead, Token.ERROR_STRING_DOUBLE); addEndToken(INTERNAL_IN_JS_STRING_INVALID); } return firstToken; } \" { int type = validJSString ? Token.LITERAL_STRING_DOUBLE_QUOTE : Token.ERROR_STRING_DOUBLE; addToken(start,zzStartRead, type); yybegin(JAVASCRIPT); } \n | <> { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addEndToken(INTERNAL_IN_JS); return firstToken; } } { [^\n\\\']+ {} \\x{HexDigit}{2} {} \\x { /* Invalid latin-1 character \xXX */ validJSString = false; } \\u{HexDigit}{4} {} \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; } \\. { /* Skip all escaped chars. */ } \\ { /* Line ending in '\' => continue to next line. */ if (validJSString) { addToken(start,zzStartRead, Token.LITERAL_CHAR); addEndToken(INTERNAL_IN_JS_CHAR_VALID); } else { addToken(start,zzStartRead, Token.ERROR_CHAR); addEndToken(INTERNAL_IN_JS_CHAR_INVALID); } return firstToken; } \' { int type = validJSString ? Token.LITERAL_CHAR : Token.ERROR_CHAR; addToken(start,zzStartRead, type); yybegin(JAVASCRIPT); } \n | <> { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addEndToken(INTERNAL_IN_JS); return firstToken; } } { // JavaScript MLC's. This state is essentially Java's MLC state. [^hwf<\n\*]+ {} {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; } [hwf] {} {EndScriptTag} { yybegin(YYINITIAL, LANG_INDEX_DEFAULT); int temp = zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addToken(temp,temp+1, Token.MARKUP_TAG_DELIMITER); addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME); addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER); } "<" {} {JS_MLCEnd} { yybegin(JAVASCRIPT); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); } \* {} \n | <> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; } } { [^hwf<\n]+ {} {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; } [hwf] {} {EndScriptTag} { int temp = zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); yybegin(YYINITIAL, LANG_INDEX_DEFAULT); addToken(temp,temp+1, Token.MARKUP_TAG_DELIMITER); addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME); addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER); } "<" {} \n | <> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addEndToken(INTERNAL_IN_JS); return firstToken; } } { {EndStyleTag} { yybegin(YYINITIAL, LANG_INDEX_DEFAULT); addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER); addToken(zzMarkedPos-6,zzMarkedPos-2, Token.MARKUP_TAG_NAME); addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER); } {CSS_SelectorPiece} { addToken(Token.DATA_TYPE); } {CSS_PseudoClass} { addToken(Token.RESERVED_WORD); } ":" { /* Unknown pseudo class */ addToken(Token.DATA_TYPE); } {CSS_AtKeyword} { addToken(Token.REGEX); } {CSS_Id} { addToken(Token.VARIABLE); } "{" { addToken(Token.SEPARATOR); yybegin(CSS_PROPERTY); } [,] { addToken(Token.IDENTIFIER); } \" { start = zzMarkedPos-1; cssPrevState = zzLexicalState; yybegin(CSS_STRING); } \' { start = zzMarkedPos-1; cssPrevState = zzLexicalState; yybegin(CSS_CHAR_LITERAL); } [+>~\^$\|=] { addToken(Token.OPERATOR); } {CSS_Separator} { addToken(Token.SEPARATOR); } {Whitespace} { addToken(Token.WHITESPACE); } {CSS_MlcStart} { start = zzMarkedPos-2; cssPrevState = zzLexicalState; yybegin(CSS_C_STYLE_COMMENT); } . { /*System.out.println("CSS: " + yytext());*/ addToken(Token.IDENTIFIER); } "\n" | <> { addEndToken(INTERNAL_CSS); return firstToken; } } { {EndStyleTag} { yybegin(YYINITIAL, LANG_INDEX_DEFAULT); addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER); addToken(zzMarkedPos-6,zzMarkedPos-2, Token.MARKUP_TAG_NAME); addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER); } {CSS_Property} { addToken(Token.RESERVED_WORD); } "}" { addToken(Token.SEPARATOR); yybegin(CSS); } ":" { addToken(Token.OPERATOR); yybegin(CSS_VALUE); } {Whitespace} { addToken(Token.WHITESPACE); } {CSS_MlcStart} { start = zzMarkedPos-2; cssPrevState = zzLexicalState; yybegin(CSS_C_STYLE_COMMENT); } . { /*System.out.println("css_property: " + yytext());*/ addToken(Token.IDENTIFIER); } "\n" | <> { addEndToken(INTERNAL_CSS_PROPERTY); return firstToken; } } { {EndStyleTag} { yybegin(YYINITIAL, LANG_INDEX_DEFAULT); addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER); addToken(zzMarkedPos-6,zzMarkedPos-2, Token.MARKUP_TAG_NAME); addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER); } {CSS_Value} { addToken(Token.IDENTIFIER); } "!important" { addToken(Token.ANNOTATION); } {CSS_Function} { int temp = zzMarkedPos - 2; addToken(zzStartRead, temp, Token.FUNCTION); addToken(zzMarkedPos-1, zzMarkedPos-1, Token.SEPARATOR); zzStartRead = zzCurrentPos = zzMarkedPos; } {CSS_Number} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); } \" { start = zzMarkedPos-1; cssPrevState = zzLexicalState; yybegin(CSS_STRING); } \' { start = zzMarkedPos-1; cssPrevState = zzLexicalState; yybegin(CSS_CHAR_LITERAL); } ")" { /* End of a function */ addToken(Token.SEPARATOR); } [;] { addToken(Token.OPERATOR); yybegin(CSS_PROPERTY); } [,\.] { addToken(Token.IDENTIFIER); } "}" { addToken(Token.SEPARATOR); yybegin(CSS); } {Whitespace} { addToken(Token.WHITESPACE); } {CSS_MlcStart} { start = zzMarkedPos-2; cssPrevState = zzLexicalState; yybegin(CSS_C_STYLE_COMMENT); } . { /*System.out.println("css_value: " + yytext());*/ addToken(Token.IDENTIFIER); } "\n" | <> { addEndToken(INTERNAL_CSS_VALUE); return firstToken; } } { [^\n\\\"]+ {} \\.? { /* Skip escaped chars. */ } \" { addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE); yybegin(cssPrevState); } \n | <> { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); addEndToken(INTERNAL_CSS_STRING - cssPrevState); return firstToken; } } { [^\n\\\']+ {} \\.? { /* Skip escaped chars. */ } \' { addToken(start,zzStartRead, Token.LITERAL_CHAR); yybegin(cssPrevState); } \n | <> { addToken(start,zzStartRead-1, Token.LITERAL_CHAR); addEndToken(INTERNAL_CSS_CHAR - cssPrevState); return firstToken; } } { [^hwf\n\*]+ {} {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; } [hwf] {} {CSS_MlcEnd} { addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); yybegin(cssPrevState); } \* {} \n | <> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_CSS_MLC - cssPrevState); return firstToken; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy