org.fife.ui.rsyntaxtextarea.modes.CPlusPlusTokenMaker.flex Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of rsyntaxtextarea Show documentation
RSyntaxTextArea is the syntax highlighting text editor for Swing applications. Features include syntax highlighting for 40+ languages, code folding, code completion, regex find and replace, macros, code templates, undo/redo, line numbering and bracket matching.
The newest version!
/*
 * 11/19/2004
 *
 * CPlusPlusTokenMaker.java - An object that can take a chunk of text and
 * return a linked list of tokens representing it in C++.
 * 
 * This library is distributed under a modified BSD license.  See the included
 * LICENSE file for details.
 */
package org.fife.ui.rsyntaxtextarea.modes;

import java.io.*;
import javax.swing.text.Segment;

import org.fife.ui.rsyntaxtextarea.*;


/**
 * A parser for the C++ programming language.
 *
 * This implementation was created using
 * JFlex 1.4.1; however, the generated file
 * was modified for performance.  Memory allocation needs to be almost
 * completely removed to be competitive with the handwritten lexers (subclasses
 * of AbstractTokenMaker), so this class has been modified so that
 * Strings are never allocated (via yytext()), and the scanner never has to
 * worry about refilling its buffer (needlessly copying chars around).
 * We can achieve this because RText always scans exactly 1 line of tokens at a
 * time, and hands the scanner this line as an array of characters (a Segment
 * really).  Since tokens contain pointers to char arrays instead of Strings
 * holding their contents, there is no need for allocating new memory for
 * Strings.
 *
 * The actual algorithm generated for scanning has, of course, not been
 * modified.

 *
 * If you wish to regenerate this file yourself, keep in mind the following:
 * 

 *   The generated CPlusPlusTokenMaker.java file will contain two
 *       definitions of both zzRefill and yyreset.
 *       You should hand-delete the second of each definition (the ones
 *       generated by the lexer), as these generated methods modify the input
 *       buffer, which we'll never have to do.
 *   You should also change the declaration/definition of zzBuffer to NOT
 *       be initialized.  This is a needless memory allocation for us since we
 *       will be pointing the array somewhere else anyway.
 *   You should NOT call yylex() on the generated scanner
 *       directly; rather, you should use getTokenList as you would
 *       with any other TokenMaker instance.
 * 
 *
 * @author Robert Futrell
 * @version 0.6
 *
 */
%%

%public
%class CPlusPlusTokenMaker
%extends AbstractJFlexCTokenMaker
%unicode
%type org.fife.ui.rsyntaxtextarea.Token


%{


	/**
	 * Constructor.  This must be here because JFlex does not generate a
	 * no-parameter constructor.
	 */
	public CPlusPlusTokenMaker() {
		super();
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param tokenType The token's type.
	 * @see #addToken(int, int, int)
	 */
	private void addHyperlinkToken(int start, int end, int tokenType) {
		int so = start + offsetShift;
		addToken(zzBuffer, start,end, tokenType, so, true);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param tokenType The token's type.
	 */
	private void addToken(int tokenType) {
		addToken(zzStartRead, zzMarkedPos-1, tokenType);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param tokenType The token's type.
	 */
	private void addToken(int start, int end, int tokenType) {
		int so = start + offsetShift;
		addToken(zzBuffer, start,end, tokenType, so);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param array The character array.
	 * @param start The starting offset in the array.
	 * @param end The ending offset in the array.
	 * @param tokenType The token's type.
	 * @param startOffset The offset in the document at which this token
	 *                    occurs.
	 */
	@Override
	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
		super.addToken(array, start,end, tokenType, startOffset);
		zzStartRead = zzMarkedPos;
	}


	@Override
	public String[] getLineCommentStartAndEnd(int languageIndex) {
		return new String[] { "//", null };
	}


	/**
	 * Returns the first token in the linked list of tokens generated
	 * from text.  This method must be implemented by
	 * subclasses so they can correctly implement syntax highlighting.
	 *
	 * @param text The text from which to get tokens.
	 * @param initialTokenType The token type we should start with.
	 * @param startOffset The offset into the document at which
	 *        text starts.
	 * @return The first Token in a linked list representing
	 *         the syntax highlighted text.
	 */
	@Override
	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {

		resetTokenList();
		this.offsetShift = -text.offset + startOffset;

		// Start off in the proper state.
		int state = YYINITIAL;
		switch (initialTokenType) {
			case Token.COMMENT_EOL:
				state = EOL_COMMENT;
				start = text.offset;
				break;
			case Token.COMMENT_MULTILINE:
				state = MLC;
				start = text.offset;
				break;
			default:
				state = YYINITIAL;
		}

		s = text;
		try {
			yyreset(zzReader);
			yybegin(state);
			return yylex();
		} catch (IOException ioe) {
			ioe.printStackTrace();
			return new TokenImpl();
		}

	}


	/**
	 * Refills the input buffer.
	 *
	 * @return      true if EOF was reached, otherwise
	 *              false.
	 */
	private boolean zzRefill() {
		return zzCurrentPos>=s.offset+s.count;
	}


	/**
	 * Resets the scanner to read from a new input stream.
	 * Does not close the old reader.
	 *
	 * All internal variables are reset, the old input stream 
	 * cannot be reused (internal buffer is discarded and lost).
	 * Lexical state is set to YY_INITIAL.
	 *
	 * @param reader   the new input stream 
	 */
	public final void yyreset(Reader reader) {
		// 's' has been updated.
		zzBuffer = s.array;
		/*
		 * We replaced the line below with the two below it because zzRefill
		 * no longer "refills" the buffer (since the way we do it, it's always
		 * "full" the first time through, since it points to the segment's
		 * array).  So, we assign zzEndRead here.
		 */
		//zzStartRead = zzEndRead = s.offset;
		zzStartRead = s.offset;
		zzEndRead = zzStartRead + s.count - 1;
		zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
		zzLexicalState = YYINITIAL;
		zzReader = reader;
		zzAtBOL  = true;
		zzAtEOF  = false;
	}


%}

Letter				= [A-Za-z]
LetterOrUnderscore	= ({Letter}|"_")
Digit				= [0-9]
HexDigit			= {Digit}|[A-Fa-f]
OctalDigit			= [0-7]
Exponent			= [eE][+-]?{Digit}+

PreprocessorWord	= define|elif|else|endif|error|if|ifdef|ifndef|include|line|pragma|undef

Trigraph				= ("??="|"??("|"??)"|"??/"|"??'"|"??<"|"??>"|"??!"|"??-")

OctEscape1			= ([\\]{OctalDigit})
OctEscape2			= ([\\]{OctalDigit}{OctalDigit})
OctEscape3			= ([\\][0-3]{OctalDigit}{OctalDigit})
OctEscape				= ({OctEscape1}|{OctEscape2}|{OctEscape3})
HexEscape				= ([\\][xX]{HexDigit}{HexDigit})

AnyChrChr					= ([^\'\n\\])
Escape					= ([\\]([abfnrtv\'\"\?\\0e]))
UnclosedCharLiteral			= ([\']({Escape}|{OctEscape}|{HexEscape}|{Trigraph}|{AnyChrChr}))
CharLiteral				= ({UnclosedCharLiteral}[\'])
ErrorUnclosedCharLiteral		= ([\'][^\'\n]*)
ErrorCharLiteral			= (([\'][\'])|{ErrorUnclosedCharLiteral}[\'])
AnyStrChr					= ([^\"\n\\])
FalseTrigraph				= (("?"(("?")*)[^\=\(\)\/\'\<\>\!\-\\\?\"\n])|("?"[\=\(\)\/\'\<\>\!\-]))
StringLiteral				= ([\"]((((("?")*)({Escape}|{OctEscape}|{HexEscape}|{Trigraph}))|{FalseTrigraph}|{AnyStrChr})*)(("?")*)[\"])
UnclosedStringLiteral		= ([\"]([\\].|[^\\\"])*[^\"]?)
ErrorStringLiteral			= ({UnclosedStringLiteral}[\"])


LineTerminator		= \n
WhiteSpace		= [ \t\f]

MLCBegin			= "/*"
MLCEnd			= "*/"
LineCommentBegin	= "//"

NonFloatSuffix		= (([uU][lL]?)|([lL][uU]?))
IntegerLiteral		= ({Digit}+{Exponent}?{NonFloatSuffix}?)
HexLiteral		= ("0"[xX]{HexDigit}+{NonFloatSuffix}?)
FloatLiteral		= ((({Digit}*[\.]{Digit}+)|({Digit}+[\.]{Digit}*)){Exponent}?[fFlL]?)
ErrorNumberFormat	= (({IntegerLiteral}|{HexLiteral}|{FloatLiteral}){NonSeparator}+)

NonSeparator		= ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#")
Identifier		= ({LetterOrUnderscore}({LetterOrUnderscore}|{Digit}|[$])*)
ErrorIdentifier	= ({NonSeparator}+)

URLGenDelim				= ([:\/\?#\[\]@])
URLSubDelim				= ([\!\$&'\(\)\*\+,;=])
URLUnreserved			= ({LetterOrUnderscore}|{Digit}|[\-\.\~])
URLCharacter			= ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
URLCharacters			= ({URLCharacter}*)
URLEndCharacter			= ([\/\$]|{Letter}|{Digit})
URL						= (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)


%state MLC
%state EOL_COMMENT

%%

 {

	/* Keywords */
	"auto" |
	"break" |
	"case" |
	"catch" |
	"class" |
	"const" |
	"const_cast" |
	"continue" |
	"default" |
	"delete" |
	"do" |
	"dynamic_cast" |
	"else" |
	"enum" |
	"explicit" |
	"extern" |
	"for" |
	"friend" |
	"goto" |
	"if" |
	"inline" |
	"mutable" |
	"namespace" |
	"new" |
	"operator" |
	"private" |
	"protected" |
	"public" |
	"register" |
	"reinterpret_cast" |
	"sizeof" |
	"static" |
	"static_cast" |
	"struct" |
	"switch" |
	"template" |
	"this" |
	"throw" |
	"try" |
	"typedef" |
	"typeid" |
	"typename" |
	"union" |
	"using" |
	"virtual" |
	"volatile" |
	"while"					{ addToken(Token.RESERVED_WORD); }

	"return"				{ addToken(Token.RESERVED_WORD_2); }

	/* Boolean literals. */
	"true" |
	"false" |

	/* Data types. */
	"char" |
	"div_t" |
	"double" |
	"float" |
	"int" |
	"ldiv_t" |
	"long" |
	"short" |
	"signed" |
	"size_t" |
	"unsigned" |
	"void" |
	"wchar_t"				{ addToken(Token.DATA_TYPE); }

	/* Standard functions */
	"abort" |
	"abs" |
	"acos" |
	"asctime" |
	"asin" |
	"assert" |
	"atan2" |
	"atan" |
	"atexit" |
	"atof" |
	"atoi" |
	"atol" |
	"bsearch" |
	"btowc" |
	"calloc" |
	"ceil" |
	"clearerr" |
	"clock" |
	"cosh" |
	"cos" |
	"ctime" |
	"difftime" |
	"div" |
	"errno" |
	"exit" |
	"exp" |
	"fabs" |
	"fclose" |
	"feof" |
	"ferror" |
	"fflush" |
	"fgetc" |
	"fgetpos" |
	"fgetwc" |
	"fgets" |
	"fgetws" |
	"floor" |
	"fmod" |
	"fopen" |
	"fprintf" |
	"fputc" |
	"fputs" |
	"fputwc" |
	"fputws" |
	"fread" |
	"free" |
	"freopen" |
	"frexp" |
	"fscanf" |
	"fseek" |
	"fsetpos" |
	"ftell" |
	"fwprintf" |
	"fwrite" |
	"fwscanf" |
	"getchar" |
	"getc" |
	"getenv" |
	"gets" |
	"getwc" |
	"getwchar" |
	"gmtime" |
	"isalnum" |
	"isalpha" |
	"iscntrl" |
	"isdigit" |
	"isgraph" |
	"islower" |
	"isprint" |
	"ispunct" |
	"isspace" |
	"isupper" |
	"isxdigit" |
	"labs" |
	"ldexp" |
	"ldiv" |
	"localeconv" |
	"localtime" |
	"log10" |
	"log" |
	"longjmp" |
	"malloc" |
	"mblen" |
	"mbrlen" |
	"mbrtowc" |
	"mbsinit" |
	"mbsrtowcs" |
	"mbstowcs" |
	"mbtowc" |
	"memchr" |
	"memcmp" |
	"memcpy" |
	"memmove" |
	"memset" |
	"mktime" |
	"modf" |
	"offsetof" |
	"perror" |
	"pow" |
	"printf" |
	"putchar" |
	"putc" |
	"puts" |
	"putwc" |
	"putwchar" |
	"qsort" |
	"raise" |
	"rand" |
	"realloc" |
	"remove" |
	"rename" |
	"rewind" |
	"scanf" |
	"setbuf" |
	"setjmp" |
	"setlocale" |
	"setvbuf" |
	"setvbuf" |
	"signal" |
	"sinh" |
	"sin" |
	"sprintf" |
	"sqrt" |
	"srand" |
	"sscanf" |
	"strcat" |
	"strchr" |
	"strcmp" |
	"strcmp" |
	"strcoll" |
	"strcpy" |
	"strcspn" |
	"strerror" |
	"strftime" |
	"strlen" |
	"strncat" |
	"strncmp" |
	"strncpy" |
	"strpbrk" |
	"strrchr" |
	"strspn" |
	"strstr" |
	"strtod" |
	"strtok" |
	"strtol" |
	"strtoul" |
	"strxfrm" |
	"swprintf" |
	"swscanf" |
	"system" |
	"tanh" |
	"tan" |
	"time" |
	"tmpfile" |
	"tmpnam" |
	"tolower" |
	"toupper" |
	"ungetc" |
	"ungetwc" |
	"va_arg" |
	"va_end" |
	"va_start" |
	"vfprintf" |
	"vfwprintf" |
	"vprintf" |
	"vsprintf" |
	"vswprintf" |
	"vwprintf" |
	"wcrtomb" |
	"wcscat" |
	"wcschr" |
	"wcscmp" |
	"wcscoll" |
	"wcscpy" |
	"wcscspn" |
	"wcsftime" |
	"wcslen" |
	"wcsncat" |
	"wcsncmp" |
	"wcsncpy" |
	"wcspbrk" |
	"wcsrchr" |
	"wcsrtombs" |
	"wcsspn" |
	"wcsstr" |
	"wcstod" |
	"wcstok" |
	"wcstol" |
	"wcstombs" |
	"wcstoul" |
	"wcsxfrm" |
	"wctob" |
	"wctomb" |
	"wmemchr" |
	"wmemcmp" |
	"wmemcpy" |
	"wmemmove" |
	"wmemset" |
	"wprintf" |
	"wscanf"				{ addToken(Token.FUNCTION); }

	/* Standard-defined macros. */
	"__DATE__" |
	"__TIME__" |
	"__FILE__" |
	"__LINE__" |
	"__STDC__"				{ addToken(Token.PREPROCESSOR); }

	{LineTerminator}				{ addNullToken(); return firstToken; }

	{Identifier}					{ addToken(Token.IDENTIFIER); }

	{WhiteSpace}+					{ addToken(Token.WHITESPACE); }

	/* Preprocessor directives */
	/* Special-case  for uniform appearance with "string" includes "*/
	"#"{WhiteSpace}*"include"{WhiteSpace}*"<"[A-Za-z0-9_./-]+">" {
                                                     // It's allowed, but discouraged, to have spaces after the '#'
                                                     int start = zzStartRead;
                                                     int end = start + 1;
                                                     while (Character.isWhitespace(zzBuffer[end])) {
                                                         end++;
                                                     }
                                                     end += "include".length() - 1;
                                                     addToken(start, end, TokenTypes.PREPROCESSOR);

                                                     // Arbitrary space between the #include and the header.
                                                     // Bounds check isn't necessary since our regex was matched.
                                                     start = end = end + 1;
                                                     while (Character.isWhitespace(zzBuffer[end + 1])) {
                                                         end++;
                                                     }
                                                     addToken(start, end, TokenTypes.WHITESPACE);

                                                     addToken(end + 1, zzMarkedPos - 1, TokenTypes.LITERAL_STRING_DOUBLE_QUOTE);
                                                 }
	"#"{WhiteSpace}*{PreprocessorWord}	{ addToken(Token.PREPROCESSOR); }

	/* String/Character Literals. */
	{CharLiteral}					{ addToken(Token.LITERAL_CHAR); }
	{UnclosedCharLiteral}			{ addToken(Token.ERROR_CHAR); /*addNullToken(); return firstToken;*/ }
	{ErrorUnclosedCharLiteral}		{ addToken(Token.ERROR_CHAR); addNullToken(); return firstToken; }
	{ErrorCharLiteral}				{ addToken(Token.ERROR_CHAR); }
	{StringLiteral}				{ addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
	{UnclosedStringLiteral}			{ addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
	{ErrorStringLiteral}			{ addToken(Token.ERROR_STRING_DOUBLE); }

	/* Comment Literals. */
	{MLCBegin}					{ start = zzMarkedPos-2; yybegin(MLC); }
	{LineCommentBegin}			{ start = zzMarkedPos-2; yybegin(EOL_COMMENT); }

	/* Separators. */
	"(" |
	")" |
	"[" |
	"]" |
	"{" |
	"}"							{ addToken(Token.SEPARATOR); }

	/* Operators. */
	{Trigraph} |
	"=" |
	"+" |
	"-" |
	"*" |
	"/" |
	"%" |
	"~" |
	"<" |
	">" |
	"<<" |
	">>" |
	"==" |
	"+=" |
	"-=" |
	"*=" |
	"/=" |
	"%=" |
	"&=" |
	"|=" |
	"^=" |
	">=" |
	"<=" |
	"!=" |
	">>=" |
	"<<=" |
	"^" |
	"&" |
	"&&" |
	"|" |
	"||" |
	"?" |
	":" |
	"," |
	"!" |
	"++" |
	"--" |
	"." |
	","							{ addToken(Token.OPERATOR); }

	/* Numbers */
	{IntegerLiteral}				{ addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
	{HexLiteral}					{ addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
	{FloatLiteral}					{ addToken(Token.LITERAL_NUMBER_FLOAT); }
	{ErrorNumberFormat}				{ addToken(Token.ERROR_NUMBER_FORMAT); }

	/* Some lines will end in '\' to wrap an expression. */
	"\\"							{ addToken(Token.IDENTIFIER); }

	{ErrorIdentifier}				{ addToken(Token.ERROR_IDENTIFIER); }

	/* Other punctuation, we'll highlight it as "identifiers." */
	";"							{ addToken(Token.IDENTIFIER); }

	/* Ended with a line not in a string or comment. */
	<>						{ addNullToken(); return firstToken; }

	/* Catch any other (unhandled) characters and flag them as bad. */
	.							{ addToken(Token.ERROR_IDENTIFIER); }

}

 {

	[^hwf\n\*]+			{}
	{URL}				{ int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
	[hwf]				{}

	\n					{ addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
	{MLCEnd}			{ yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
	\*					{}
	<>				{ addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }

}

 {
	[^hwf\\\n]+				{}
	{URL}					{ int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
	[hwf]					{}
	\\.						{ /* Skip all escaped chars. */ }
	\\						{ /* Line ending in '\' => continue to next line. */
								addToken(start,zzStartRead, Token.COMMENT_EOL);
								return firstToken;
							}
	\n |
	<>					{ addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }

}