org.fife.ui.rsyntaxtextarea.modes.PHPTokenMaker.flex Maven / Gradle / Ivy
The newest version!
/* * 01/28/2009 * * PHPTokenMaker.java - Generates tokens for PHP syntax highlighting. * * This library is distributed under a modified BSD license. See the included * RSyntaxTextArea.License.txt file for details. */ package org.fife.ui.rsyntaxtextarea.modes; import java.io.*; import javax.swing.text.Segment; import org.fife.ui.rsyntaxtextarea.*; /** * Scanner for PHP files. * * This implementation was created using * JFlex 1.4.1; however, the generated file * was modified for performance. Memory allocation needs to be almost * completely removed to be competitive with the handwritten lexers (subclasses * of
file will contain two * definitions of bothAbstractTokenMaker
, so this class has been modified so that * Strings are never allocated (via yytext()), and the scanner never has to * worry about refilling its buffer (needlessly copying chars around). * We can achieve this because RText always scans exactly 1 line of tokens at a * time, and hands the scanner this line as an array of characters (a Segment * really). Since tokens contain pointers to char arrays instead of Strings * holding their contents, there is no need for allocating new memory for * Strings.* * The actual algorithm generated for scanning has, of course, not been * modified.
* * If you wish to regenerate this file yourself, keep in mind the following: *
*
- The generated PHPTokenMaker.java
zzRefill
andyyreset
. * You should hand-delete the second of each definition (the ones * generated by the lexer), as these generated methods modify the input * buffer, which we'll never have to do. *
yylex()
on the generated scanner
* directly; rather, you should use getTokenList
as you would
* with any other TokenMaker
instance.<script>
tag.
*/
private static final int INTERNAL_INTAG_SCRIPT = -4;
/**
* Token type specifying we're in a double-qouted attribute in a
* script tag.
*/
private static final int INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT = -5;
/**
* Token type specifying we're in a single-qouted attribute in a
* script tag.
*/
private static final int INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT = -6;
/**
* Token type specifying that the user has
* ended a line with an unclosed <style>
tag.
*/
private static final int INTERNAL_INTAG_STYLE = -7;
/**
* Token type specifying we're in a double-qouted attribute in a
* style tag.
*/
private static final int INTERNAL_ATTR_DOUBLE_QUOTE_STYLE = -8;
/**
* Token type specifying we're in a single-qouted attribute in a
* style tag.
*/
private static final int INTERNAL_ATTR_SINGLE_QUOTE_STYLE = -9;
/**
* Token type specifying we're in JavaScript.
*/
private static final int INTERNAL_IN_JS = -10;
/**
* Token type specifying we're in a JavaScript multiline comment.
*/
private static final int INTERNAL_IN_JS_MLC = -11;
/**
* Token type specifying we're in an invalid multi-line JS string.
*/
private static final int INTERNAL_IN_JS_STRING_INVALID = -12;
/**
* Token type specifying we're in a valid multi-line JS string.
*/
private static final int INTERNAL_IN_JS_STRING_VALID = -13;
/**
* Token type specifying we're in an invalid multi-line JS single-quoted string.
*/
private static final int INTERNAL_IN_JS_CHAR_INVALID = -14;
/**
* Token type specifying we're in a valid multi-line JS single-quoted string.
*/
private static final int INTERNAL_IN_JS_CHAR_VALID = -15;
/**
* Internal type denoting a line ending in CSS.
*/
private static final int INTERNAL_CSS = -16;
/**
* Internal type denoting a line ending in a CSS property.
*/
private static final int INTERNAL_CSS_PROPERTY = -17;
/**
* Internal type denoting a line ending in a CSS property value.
*/
private static final int INTERNAL_CSS_VALUE = -18;
/**
* Internal type denoting line ending in a CSS double-quote string.
* The state to return to is embedded in the actual end token type.
*/
private static final int INTERNAL_CSS_STRING = -(1<<11);
/**
* Internal type denoting line ending in a CSS single-quote string.
* The state to return to is embedded in the actual end token type.
*/
private static final int INTERNAL_CSS_CHAR = -(2<<11);
/**
* Internal type denoting line ending in a CSS multi-line comment.
* The state to return to is embedded in the actual end token type.
*/
private static final int INTERNAL_CSS_MLC = -(3<<11);
/**
* Token type specifying we're in PHP. This particular field is public so
* that we can hack and key off of it for code completion.
*/
public static final int INTERNAL_IN_PHP = -(4<<11);
/**
* Token type specifying we're in a PHP multiline comment.
*/
private static final int INTERNAL_IN_PHP_MLC = -(5<<11);
/**
* Token type specifying we're in a PHP multiline string.
*/
private static final int INTERNAL_IN_PHP_STRING = -(6<<11);
/**
* Token type specifying we're in a PHP multiline char.
*/
private static final int INTERNAL_IN_PHP_CHAR = -(7<<11);
/**
* The state previous CSS-related state we were in before going into a CSS
* string, multi-line comment, etc.
*/
private int cssPrevState;
/**
* Whether closing markup tags are automatically completed for PHP.
*/
private static boolean completeCloseTags;
/**
* The state PHP was started in (YYINITIAL, INTERNAL_IN_JS, etc.).
*/
private int phpInState;
/**
* The language index we were in when PHP was started.
*/
private int phpInLangIndex;
/**
* When in the JS_STRING state, whether the current string is valid.
*/
private boolean validJSString;
/**
* Language state set on HTML tokens. Must be 0.
*/
private static final int LANG_INDEX_DEFAULT = 0;
/**
* Language state set on JavaScript tokens.
*/
private static final int LANG_INDEX_JS = 1;
/**
* Language state set on CSS tokens.
*/
private static final int LANG_INDEX_CSS = 2;
/**
* Language state set on PHP.
*/
private static final int LANG_INDEX_PHP = 3;
/**
* Constructor. This must be here because JFlex does not generate a
* no-parameter constructor.
*/
public PHPTokenMaker() {
super();
}
/**
* Adds the token specified to the current linked list of tokens as an
* "end token;" that is, at zzMarkedPos
.
*
* @param tokenType The token's type.
*/
private void addEndToken(int tokenType) {
addToken(zzMarkedPos,zzMarkedPos, tokenType);
}
/**
* Adds the token specified to the current linked list of tokens.
*
* @param tokenType The token's type.
* @see #addToken(int, int, int)
*/
private void addHyperlinkToken(int start, int end, int tokenType) {
int so = start + offsetShift;
addToken(zzBuffer, start,end, tokenType, so, true);
}
/**
* Adds an end token that encodes the information necessary to return
* to the pre-PHP state and language index.
*
* @param endTokenState The PHP-related end-token state.
*/
private void addPhpEndToken(int endTokenState) {
addEndToken(endTokenState - phpInState - (phpInLangIndex<<16));
}
/**
* Adds the token specified to the current linked list of tokens.
*
* @param tokenType The token's type.
*/
private void addToken(int tokenType) {
addToken(zzStartRead, zzMarkedPos-1, tokenType);
}
/**
* Adds the token specified to the current linked list of tokens.
*
* @param tokenType The token's type.
*/
private void addToken(int start, int end, int tokenType) {
int so = start + offsetShift;
addToken(zzBuffer, start,end, tokenType, so);
}
/**
* Adds the token specified to the current linked list of tokens.
*
* @param array The character array.
* @param start The starting offset in the array.
* @param end The ending offset in the array.
* @param tokenType The token's type.
* @param startOffset The offset in the document at which this token
* occurs.
*/
@Override
public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
super.addToken(array, start,end, tokenType, startOffset);
zzStartRead = zzMarkedPos;
}
/**
* {@inheritDoc}
*/
@Override
protected OccurrenceMarker createOccurrenceMarker() {
return new HtmlOccurrenceMarker();
}
/**
* Sets whether markup close tags should be completed. You might not want
* this to be the case, since some tags in standard HTML aren't usually
* closed.
*
* @return Whether closing markup tags are completed.
* @see #setCompleteCloseTags(boolean)
*/
@Override
public boolean getCompleteCloseTags() {
return completeCloseTags;
}
@Override
public boolean getCurlyBracesDenoteCodeBlocks(int languageIndex) {
return languageIndex==LANG_INDEX_CSS || languageIndex==LANG_INDEX_JS ||
languageIndex==LANG_INDEX_PHP;
}
/**
* {@inheritDoc}
*/
@Override
public String[] getLineCommentStartAndEnd(int languageIndex) {
switch (languageIndex) {
case LANG_INDEX_JS:
case LANG_INDEX_PHP:
return new String[] { "//", null };
case LANG_INDEX_CSS:
return new String[] { "/*", "*/" };
default:
return new String[] { "" };
}
}
/**
* {@inheritDoc}
*/
@Override
public boolean getMarkOccurrencesOfTokenType(int type) {
return type==Token.FUNCTION || type==Token.VARIABLE ||
type==Token.MARKUP_TAG_NAME;
}
/**
* Overridden to handle newlines in JS and CSS differently than those in
* markup.
*/
@Override
public boolean getShouldIndentNextLineAfter(Token token) {
int languageIndex = token==null ? 0 : token.getLanguageIndex();
if (getCurlyBracesDenoteCodeBlocks(languageIndex)) {
if (token!=null && token.length()==1) {
char ch = token.charAt(0);
return ch=='{' || ch=='(';
}
}
return false;
}
/**
* Returns the first token in the linked list of tokens generated
* from text
. This method must be implemented by
* subclasses so they can correctly implement syntax highlighting.
*
* @param text The text from which to get tokens.
* @param initialTokenType The token type we should start with.
* @param startOffset The offset into the document at which
* text
starts.
* @return The first Token
in a linked list representing
* the syntax highlighted text.
*/
public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
resetTokenList();
this.offsetShift = -text.offset + startOffset;
phpInState = YYINITIAL; // Shouldn't be necessary
cssPrevState = CSS; // Shouldn't be necessary
int languageIndex = LANG_INDEX_DEFAULT;
// Start off in the proper state.
int state = Token.NULL;
switch (initialTokenType) {
case Token.MARKUP_COMMENT:
state = COMMENT;
break;
case Token.VARIABLE:
state = DTD;
break;
case INTERNAL_INTAG:
state = INTAG;
break;
case INTERNAL_INTAG_SCRIPT:
state = INTAG_SCRIPT;
break;
case INTERNAL_INTAG_STYLE:
state = INTAG_STYLE;
break;
case INTERNAL_ATTR_DOUBLE:
state = INATTR_DOUBLE;
break;
case INTERNAL_ATTR_SINGLE:
state = INATTR_SINGLE;
break;
case INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT:
state = INATTR_DOUBLE_SCRIPT;
break;
case INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT:
state = INATTR_SINGLE_SCRIPT;
break;
case INTERNAL_ATTR_DOUBLE_QUOTE_STYLE:
state = INATTR_DOUBLE_STYLE;
break;
case INTERNAL_ATTR_SINGLE_QUOTE_STYLE:
state = INATTR_SINGLE_STYLE;
break;
case INTERNAL_IN_JS:
state = JAVASCRIPT;
languageIndex = LANG_INDEX_JS;
break;
case INTERNAL_IN_JS_MLC:
state = JS_MLC;
languageIndex = LANG_INDEX_JS;
break;
case INTERNAL_IN_JS_STRING_INVALID:
state = JS_STRING;
languageIndex = LANG_INDEX_JS;
validJSString = false;
break;
case INTERNAL_IN_JS_STRING_VALID:
state = JS_STRING;
languageIndex = LANG_INDEX_JS;
validJSString = true;
break;
case INTERNAL_IN_JS_CHAR_INVALID:
state = JS_CHAR;
languageIndex = LANG_INDEX_JS;
validJSString = false;
break;
case INTERNAL_IN_JS_CHAR_VALID:
state = JS_CHAR;
languageIndex = LANG_INDEX_JS;
validJSString = true;
break;
case INTERNAL_CSS:
state = CSS;
languageIndex = LANG_INDEX_CSS;
break;
case INTERNAL_CSS_PROPERTY:
state = CSS_PROPERTY;
languageIndex = LANG_INDEX_CSS;
break;
case INTERNAL_CSS_VALUE:
state = CSS_VALUE;
languageIndex = LANG_INDEX_CSS;
break;
default:
if (initialTokenType<-1024) { // INTERNAL_IN_PHPxxx - phpInState
int main = -(-initialTokenType & 0x0000ff00);
switch (main) {
default: // Should never happen
case INTERNAL_IN_PHP:
state = PHP;
languageIndex = LANG_INDEX_PHP;
phpInState = -initialTokenType&0xff;
phpInLangIndex = (-initialTokenType&0x00ff0000)>>16;
break;
case INTERNAL_IN_PHP_MLC:
state = PHP_MLC;
languageIndex = LANG_INDEX_PHP;
phpInState = -initialTokenType&0xff;
phpInLangIndex = (-initialTokenType&0x00ff0000)>>16;
break;
case INTERNAL_IN_PHP_STRING:
state = PHP_STRING;
languageIndex = LANG_INDEX_PHP;
phpInState = -initialTokenType&0xff;
phpInLangIndex = (-initialTokenType&0x00ff0000)>>16;
break;
case INTERNAL_IN_PHP_CHAR:
state = PHP_CHAR;
languageIndex = LANG_INDEX_PHP;
phpInState = -initialTokenType&0xff;
phpInLangIndex = (-initialTokenType&0x00ff0000)>>16;
break;
case INTERNAL_CSS_STRING:
state = CSS_STRING;
languageIndex = LANG_INDEX_CSS;
cssPrevState = -initialTokenType&0xff;
break;
case INTERNAL_CSS_CHAR:
state = CSS_CHAR_LITERAL;
languageIndex = LANG_INDEX_CSS;
cssPrevState = -initialTokenType&0xff;
break;
case INTERNAL_CSS_MLC:
state = CSS_C_STYLE_COMMENT;
languageIndex = LANG_INDEX_CSS;
cssPrevState = -initialTokenType&0xff;
break;
}
}
else {
state = YYINITIAL;
}
break;
}
setLanguageIndex(languageIndex);
start = text.offset;
s = text;
try {
yyreset(zzReader);
yybegin(state);
return yylex();
} catch (IOException ioe) {
ioe.printStackTrace();
return new TokenImpl();
}
}
/**
* Sets whether markup close tags should be completed. You might not want
* this to be the case, since some tags in standard HTML aren't usually
* closed.
*
* @param complete Whether closing markup tags are completed.
* @see #getCompleteCloseTags()
*/
public static void setCompleteCloseTags(boolean complete) {
completeCloseTags = complete;
}
/**
* Overridden to remember the language index we're leaving.
*/
@Override
protected void yybegin(int state, int languageIndex) {
phpInLangIndex = getLanguageIndex();
yybegin(state);
setLanguageIndex(languageIndex);
}
/**
* Refills the input buffer.
*
* @return true
if EOF was reached, otherwise
* false
.
*/
private boolean zzRefill() {
return zzCurrentPos>=s.offset+s.count;
}
/**
* Resets the scanner to read from a new input stream.
* Does not close the old reader.
*
* All internal variables are reset, the old input stream
* cannot be reused (internal buffer is discarded and lost).
* Lexical state is set to YY_INITIAL.
*
* @param reader the new input stream
*/
public final void yyreset(Reader reader) {
// 's' has been updated.
zzBuffer = s.array;
/*
* We replaced the line below with the two below it because zzRefill
* no longer "refills" the buffer (since the way we do it, it's always
* "full" the first time through, since it points to the segment's
* array). So, we assign zzEndRead here.
*/
//zzStartRead = zzEndRead = s.offset;
zzStartRead = s.offset;
zzEndRead = zzStartRead + s.count - 1;
zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
zzLexicalState = YYINITIAL;
zzReader = reader;
zzAtBOL = true;
zzAtEOF = false;
}
%}
// HTML-specific stuff.
Whitespace = ([ \t\f]+)
LineTerminator = ([\n])
Identifier = ([^ \t\n<&]+)
EntityReference = ([&][^; \t]*[;]?)
InTagIdentifier = ([^ \t\n\"\'/=>]+)
EndScriptTag = ("" [sS][cC][rR][iI][pP][tT] ">")
EndStyleTag = ("" [sS][tT][yY][lL][eE] ">")
// General stuff.
Letter = [A-Za-z]
NonzeroDigit = [1-9]
Digit = ("0"|{NonzeroDigit})
HexDigit = ({Digit}|[A-Fa-f])
OctalDigit = ([0-7])
LetterOrUnderscore = ({Letter}|"_")
LetterOrUnderscoreOrDash = ({LetterOrUnderscore}|[\-])
// JavaScript stuff.
EscapedSourceCharacter = ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\")
IdentifierStart = ({Letter}|"_"|"$")
IdentifierPart = ({IdentifierStart}|{Digit}|("\\"{EscapedSourceCharacter}))
JS_MLCBegin = "/*"
JS_MLCEnd = "*/"
JS_LineCommentBegin = "//"
JS_IntegerHelper1 = (({NonzeroDigit}{Digit}*)|"0")
JS_IntegerHelper2 = ("0"(([xX]{HexDigit}+)|({OctalDigit}*)))
JS_IntegerLiteral = ({JS_IntegerHelper1}[lL]?)
JS_HexLiteral = ({JS_IntegerHelper2}[lL]?)
JS_FloatHelper1 = ([fFdD]?)
JS_FloatHelper2 = ([eE][+-]?{Digit}+{JS_FloatHelper1})
JS_FloatLiteral1 = ({Digit}+"."({JS_FloatHelper1}|{JS_FloatHelper2}|{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2})))
JS_FloatLiteral2 = ("."{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2}))
JS_FloatLiteral3 = ({Digit}+{JS_FloatHelper2})
JS_FloatLiteral = ({JS_FloatLiteral1}|{JS_FloatLiteral2}|{JS_FloatLiteral3}|({Digit}+[fFdD]))
JS_ErrorNumberFormat = (({JS_IntegerLiteral}|{JS_HexLiteral}|{JS_FloatLiteral}){NonSeparator}+)
JS_Separator = ([\(\)\{\}\[\]\]])
JS_Separator2 = ([\;,.])
JS_NonAssignmentOperator = ("+"|"-"|"<="|"^"|"++"|"<"|"*"|">="|"%"|"--"|">"|"/"|"!="|"?"|">>"|"!"|"&"|"=="|":"|">>"|"~"|"||"|"&&"|">>>")
JS_AssignmentOperator = ("="|"-="|"*="|"/="|"|="|"&="|"^="|"+="|"%="|"<<="|">>="|">>>=")
JS_Operator = ({JS_NonAssignmentOperator}|{JS_AssignmentOperator})
JS_Identifier = ({IdentifierStart}{IdentifierPart}*)
JS_ErrorIdentifier = ({NonSeparator}+)
JS_Regex = ("/"([^\*\\/]|\\.)([^/\\]|\\.)*"/"[gim]*)
JS_BooleanLiteral = ("true"|"false")
// PHP stuff (most PHP stuff is shared with JS for simplicity)
PHP_Start = ("""php"?)
LetterOrUnderscoreOrDigit = ({LetterOrUnderscore}|{Digit})
PHP_Variable = ("$"{LetterOrUnderscore}{LetterOrUnderscoreOrDigit}*)
PHP_LineCommentBegin = ("//"|[#])
PHP_BooleanLiteral = ({JS_BooleanLiteral}|"TRUE"|"FALSE")
PHP_Null = ("null"|"NULL")
// CSS stuff.
CSS_SelectorPiece = (("*"|"."|{LetterOrUnderscoreOrDash})({LetterOrUnderscoreOrDash}|"."|{Digit})*)
CSS_PseudoClass = (":"("root"|"nth-child"|"nth-last-child"|"nth-of-type"|"nth-last-of-type"|"first-child"|"last-child"|"first-of-type"|"last-of-type"|"only-child"|"only-of-type"|"empty"|"link"|"visited"|"active"|"hover"|"focus"|"target"|"lang"|"enabled"|"disabled"|"checked"|":first-line"|":first-letter"|":before"|":after"|"not"))
CSS_AtKeyword = ("@"{CSS_SelectorPiece})
CSS_Id = ("#"{CSS_SelectorPiece})
CSS_Separator = ([;\(\)\[\]])
CSS_MlcStart = ({JS_MLCBegin})
CSS_MlcEnd = ({JS_MLCEnd})
CSS_Property = ([\*]?{LetterOrUnderscoreOrDash}({LetterOrUnderscoreOrDash}|{Digit})*)
CSS_ValueChar = ({LetterOrUnderscoreOrDash}|[\\/])
CSS_Value = ({CSS_ValueChar}*)
CSS_Function = ({CSS_Value}\()
CSS_Digits = ([\-]?{Digit}+([0-9\.]+)?(pt|pc|in|mm|cm|em|ex|px|ms|s|%)?)
CSS_Hex = ("#"[0-9a-fA-F]+)
CSS_Number = ({CSS_Digits}|{CSS_Hex})
URLGenDelim = ([:\/\?#\[\]@])
URLSubDelim = ([\!\$&'\(\)\*\+,;=])
URLUnreserved = ({LetterOrUnderscoreOrDigit}|[\-\.\~])
URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
URLCharacters = ({URLCharacter}*)
URLEndCharacter = ([\/\$]|{Letter}|{Digit})
URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
%state COMMENT
%state DTD
%state INTAG
%state INTAG_CHECK_TAG_NAME
%state INATTR_DOUBLE
%state INATTR_SINGLE
%state INTAG_SCRIPT
%state INATTR_DOUBLE_SCRIPT
%state INATTR_SINGLE_SCRIPT
%state INTAG_STYLE
%state INATTR_DOUBLE_STYLE
%state INATTR_SINGLE_STYLE
%state JAVASCRIPT
%state JS_CHAR
%state JS_STRING
%state JS_MLC
%state JS_EOL_COMMENT
%state PHP
%state PHP_MLC
%state PHP_STRING
%state PHP_CHAR
%state CSS
%state CSS_PROPERTY
%state CSS_VALUE
%state CSS_STRING
%state CSS_CHAR_LITERAL
%state CSS_C_STYLE_COMMENT
%%