org.fife.ui.rsyntaxtextarea.modes.PythonTokenMaker.flex Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of ui4j-ide Show documentation
Web Automation for Java
The newest version!
/*
 * 12/06/2005
 *
 * PythonTokenMaker.java - Token maker for the Python programming language.
 * 
 * This library is distributed under a modified BSD license.  See the included
 * RSyntaxTextArea.License.txt file for details.
 */
package org.fife.ui.rsyntaxtextarea.modes;

import java.io.*;
import javax.swing.text.Segment;

import org.fife.ui.rsyntaxtextarea.AbstractJFlexTokenMaker;
import org.fife.ui.rsyntaxtextarea.Token;
import org.fife.ui.rsyntaxtextarea.TokenMaker;


/**
 * Scanner for the Python programming language.
 *
 * @author Robert Futrell
 * @version 0.3
 */
%%

%public
%class PythonTokenMaker
%extends AbstractJFlexTokenMaker
%unicode
%type org.fife.ui.rsyntaxtextarea.Token


%{


	/**
	 * Constructor.  This must be here because JFlex does not generate a
	 * no-parameter constructor.
	 */
	public PythonTokenMaker() {
		super();
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param tokenType The token's type.
	 */
	private void addToken(int tokenType) {
		addToken(zzStartRead, zzMarkedPos-1, tokenType);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param tokenType The token's type.
	 */
	private void addToken(int start, int end, int tokenType) {
		int so = start + offsetShift;
		addToken(zzBuffer, start,end, tokenType, so);
	}


	/**
	 * Adds the token specified to the current linked list of tokens.
	 *
	 * @param array The character array.
	 * @param start The starting offset in the array.
	 * @param end The ending offset in the array.
	 * @param tokenType The token's type.
	 * @param startOffset The offset in the document at which this token
	 *                    occurs.
	 */
	@Override
	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
		super.addToken(array, start,end, tokenType, startOffset);
		zzStartRead = zzMarkedPos;
	}


	/**
	 * {@inheritDoc}
	 */
	@Override
	public String[] getLineCommentStartAndEnd(int languageIndex) {
		return new String[] { "#", null };
	}


	/**
	 * Returns the first token in the linked list of tokens generated
	 * from text.  This method must be implemented by
	 * subclasses so they can correctly implement syntax highlighting.
	 *
	 * @param text The text from which to get tokens.
	 * @param initialTokenType The token type we should start with.
	 * @param startOffset The offset into the document at which
	 *        text starts.
	 * @return The first Token in a linked list representing
	 *         the syntax highlighted text.
	 */
	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {

		resetTokenList();
		this.offsetShift = -text.offset + startOffset;

		// Start off in the proper state.
		int state = Token.NULL;
		switch (initialTokenType) {
			case Token.LITERAL_STRING_DOUBLE_QUOTE:
				state = LONG_STRING_2;
				break;
			case Token.LITERAL_CHAR:
				state = LONG_STRING_1;
				break;
			default:
				state = Token.NULL;
		}

		s = text;
		try {
			yyreset(zzReader);
			yybegin(state);
			return yylex();
		} catch (IOException ioe) {
			ioe.printStackTrace();
			return new TokenImpl();
		}

	}


	/**
	 * Resets the scanner to read from a new input stream.
	 * Does not close the old reader.
	 *
	 * All internal variables are reset, the old input stream 
	 * cannot be reused (internal buffer is discarded and lost).
	 * Lexical state is set to YY_INITIAL.
	 *
	 * @param reader   the new input stream 
	 */
	public final void yyreset(java.io.Reader reader) throws java.io.IOException {
		// 's' has been updated.
		zzBuffer = s.array;
		/*
		 * We replaced the line below with the two below it because zzRefill
		 * no longer "refills" the buffer (since the way we do it, it's always
		 * "full" the first time through, since it points to the segment's
		 * array).  So, we assign zzEndRead here.
		 */
		//zzStartRead = zzEndRead = s.offset;
		zzStartRead = s.offset;
		zzEndRead = zzStartRead + s.count - 1;
		zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
		zzLexicalState = YYINITIAL;
		zzReader = reader;
		zzAtBOL  = true;
		zzAtEOF  = false;
	}


	/**
	 * Refills the input buffer.
	 *
	 * @return      true if EOF was reached, otherwise
	 *              false.
	 * @exception   IOException  if any I/O-Error occurs.
	 */
	private boolean zzRefill() throws java.io.IOException {
		return zzCurrentPos>=s.offset+s.count;
	}


%}

/* This part is taken from http://www.python.org/doc/2.2.3/ref/grammar.txt */
identifier		= (({letter}|"_")({letter}|{digit}|"_")*)
letter			= ({lowercase}|{uppercase})
lowercase			= ([a-z])
uppercase			= ([A-Z])
digit			= ([0-9])
stringliteral		= ({stringprefix}?{shortstring})
stringprefix		= ("r"|"u"[rR]?|"R"|"U"[rR]?)
shortstring1		= ([\']{shortstring1item}*[\']?)
shortstring2		= ([\"]{shortstring2item}*[\"]?)
shortstring		= ({shortstring1}|{shortstring2})
shortstring1item	= ({shortstring1char}|{escapeseq})
shortstring2item	= ({shortstring2char}|{escapeseq})
shortstring1char	= ([^\\\n\'])
shortstring2char	= ([^\\\n\"])
escapeseq			= ([\\].)
longinteger		= ({integer}[lL])
integer			= ({decimalinteger}|{octinteger}|{hexinteger})
decimalinteger		= ({nonzerodigit}{digit}*|"0")
octinteger		= ("0"{octdigit}+)
hexinteger		= ("0"[xX]{hexdigit}+)
nonzerodigit		= ([1-9])
octdigit			= ([0-7])
hexdigit			= ({digit}|[a-f]|[A-F])
floatnumber		= ({pointfloat}|{exponentfloat})
pointfloat		= ({intpart}?{fraction}|{intpart}".")
exponentfloat		= (({intpart}|{pointfloat}){exponent})
intpart			= ({digit}+)
fraction			= ("."{digit}+)
exponent			= ([eE][\+\-]?{digit}+)
imagnumber		= (({floatnumber}|{intpart})[jJ])

ErrorNumberFormat	= ({digit}{NonSeparator}+)
NonSeparator		= ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#")

LongStringStart1	= ({stringprefix}?\'\'\')
LongStringStart2	= ({stringprefix}?\"\"\")

LineTerminator		= (\n)
WhiteSpace		= ([ \t\f])

LineComment		= ("#".*)


%state LONG_STRING_1
%state LONG_STRING_2


%%

/* Keywords */
 "and"					{ addToken(Token.RESERVED_WORD); }
 "as"					{ addToken(Token.RESERVED_WORD); }
 "assert"				{ addToken(Token.RESERVED_WORD); }
 "break"					{ addToken(Token.RESERVED_WORD); }
 "class"					{ addToken(Token.RESERVED_WORD); }
 "continue"				{ addToken(Token.RESERVED_WORD); }
 "def"					{ addToken(Token.RESERVED_WORD); }
 "del"					{ addToken(Token.RESERVED_WORD); }
 "elif"					{ addToken(Token.RESERVED_WORD); }
 "else"					{ addToken(Token.RESERVED_WORD); }
 "except"				{ addToken(Token.RESERVED_WORD); }
 "exec"					{ addToken(Token.RESERVED_WORD); }
 "finally"				{ addToken(Token.RESERVED_WORD); }
 "for"					{ addToken(Token.RESERVED_WORD); }
 "from"					{ addToken(Token.RESERVED_WORD); }
 "global"				{ addToken(Token.RESERVED_WORD); }
 "if"					{ addToken(Token.RESERVED_WORD); }
 "import"				{ addToken(Token.RESERVED_WORD); }
 "in"					{ addToken(Token.RESERVED_WORD); }
 "is"					{ addToken(Token.RESERVED_WORD); }
 "lambda"				{ addToken(Token.RESERVED_WORD); }
 "not"					{ addToken(Token.RESERVED_WORD); }
 "or"					{ addToken(Token.RESERVED_WORD); }
 "pass"					{ addToken(Token.RESERVED_WORD); }
 "print"					{ addToken(Token.RESERVED_WORD); }
 "raise"					{ addToken(Token.RESERVED_WORD); }
 "return"				{ addToken(Token.RESERVED_WORD); }
 "try"					{ addToken(Token.RESERVED_WORD); }
 "while"					{ addToken(Token.RESERVED_WORD); }
 "yield"					{ addToken(Token.RESERVED_WORD); }

/* Data types. */
 "char"					{ addToken(Token.DATA_TYPE); }
 "double"				{ addToken(Token.DATA_TYPE); }
 "float"					{ addToken(Token.DATA_TYPE); }
 "int"					{ addToken(Token.DATA_TYPE); }
 "long"					{ addToken(Token.DATA_TYPE); }
 "short"					{ addToken(Token.DATA_TYPE); }
 "signed"				{ addToken(Token.DATA_TYPE); }
 "unsigned"				{ addToken(Token.DATA_TYPE); }
 "void"					{ addToken(Token.DATA_TYPE); }

/* Standard functions */
 "abs"					{ addToken(Token.FUNCTION); }
 "apply"					{ addToken(Token.FUNCTION); }
 "bool"					{ addToken(Token.FUNCTION); }
 "buffer"				{ addToken(Token.FUNCTION); }
 "callable"				{ addToken(Token.FUNCTION); }
 "chr"					{ addToken(Token.FUNCTION); }
 "classmethod"			{ addToken(Token.FUNCTION); }
 "cmp"					{ addToken(Token.FUNCTION); }
 "coerce"				{ addToken(Token.FUNCTION); }
 "compile"				{ addToken(Token.FUNCTION); }
 "complex"				{ addToken(Token.FUNCTION); }
 "delattr"				{ addToken(Token.FUNCTION); }
 "dict"					{ addToken(Token.FUNCTION); }
 "dir"					{ addToken(Token.FUNCTION); }
 "divmod"				{ addToken(Token.FUNCTION); }
 "enumerate"				{ addToken(Token.FUNCTION); }
 "eval"					{ addToken(Token.FUNCTION); }
 "execfile"				{ addToken(Token.FUNCTION); }
 "file"					{ addToken(Token.FUNCTION); }
 "filter"				{ addToken(Token.FUNCTION); }
 "float"					{ addToken(Token.FUNCTION); }
 "getattr"				{ addToken(Token.FUNCTION); }
 "globals"				{ addToken(Token.FUNCTION); }
 "hasattr"				{ addToken(Token.FUNCTION); }
 "hash"					{ addToken(Token.FUNCTION); }
 "hex"					{ addToken(Token.FUNCTION); }
 "id"					{ addToken(Token.FUNCTION); }
 "input"					{ addToken(Token.FUNCTION); }
 "int"					{ addToken(Token.FUNCTION); }
 "intern"				{ addToken(Token.FUNCTION); }
 "isinstance"				{ addToken(Token.FUNCTION); }
 "issubclass"				{ addToken(Token.FUNCTION); }
 "iter"					{ addToken(Token.FUNCTION); }
 "len"					{ addToken(Token.FUNCTION); }
 "list"					{ addToken(Token.FUNCTION); }
 "locals"				{ addToken(Token.FUNCTION); }
 "long"					{ addToken(Token.FUNCTION); }
 "map"					{ addToken(Token.FUNCTION); }
 "max"					{ addToken(Token.FUNCTION); }
 "min"					{ addToken(Token.FUNCTION); }
 "object"				{ addToken(Token.FUNCTION); }
 "oct"					{ addToken(Token.FUNCTION); }
 "open"					{ addToken(Token.FUNCTION); }
 "ord"					{ addToken(Token.FUNCTION); }
 "pow"					{ addToken(Token.FUNCTION); }
 "property"				{ addToken(Token.FUNCTION); }
 "range"					{ addToken(Token.FUNCTION); }
 "raw_input"				{ addToken(Token.FUNCTION); }
 "reduce"				{ addToken(Token.FUNCTION); }
 "reload"				{ addToken(Token.FUNCTION); }
 "repr"					{ addToken(Token.FUNCTION); }
 "round"					{ addToken(Token.FUNCTION); }
 "setattr"				{ addToken(Token.FUNCTION); }
 "slice"					{ addToken(Token.FUNCTION); }
 "staticmethod"			{ addToken(Token.FUNCTION); }
 "str"					{ addToken(Token.FUNCTION); }
 "sum"					{ addToken(Token.FUNCTION); }
 "super"					{ addToken(Token.FUNCTION); }
 "tuple"					{ addToken(Token.FUNCTION); }
 "type"					{ addToken(Token.FUNCTION); }
 "unichr"				{ addToken(Token.FUNCTION); }
 "unicode"				{ addToken(Token.FUNCTION); }
 "vars"					{ addToken(Token.FUNCTION); }
 "xrange"				{ addToken(Token.FUNCTION); }
 "zip"					{ addToken(Token.FUNCTION); }


 {

	{LineTerminator}				{ addNullToken(); return firstToken; }

	{identifier}					{ addToken(Token.IDENTIFIER); }

	{WhiteSpace}+					{ addToken(Token.WHITESPACE); }

	/* String/Character Literals. */
	{stringliteral}				{ addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
	{LongStringStart1}				{ yybegin(LONG_STRING_1); addToken(Token.LITERAL_CHAR); }
	{LongStringStart2}				{ yybegin(LONG_STRING_2); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }

	/* Comment Literals. */
	{LineComment}					{ addToken(Token.COMMENT_EOL); }

	/* Separators. */
	"("							{ addToken(Token.SEPARATOR); }
	")"							{ addToken(Token.SEPARATOR); }
	"["							{ addToken(Token.SEPARATOR); }
	"]"							{ addToken(Token.SEPARATOR); }
	"{"							{ addToken(Token.SEPARATOR); }
	"}"							{ addToken(Token.SEPARATOR); }

	/* Operators. */
	"="							{ addToken(Token.OPERATOR); }
	"+"							{ addToken(Token.OPERATOR); }
	"-"							{ addToken(Token.OPERATOR); }
	"*"							{ addToken(Token.OPERATOR); }
	"/"							{ addToken(Token.OPERATOR); }
	"%"							{ addToken(Token.OPERATOR); }
	"**"							{ addToken(Token.OPERATOR); }
	"~"							{ addToken(Token.OPERATOR); }
	"<"							{ addToken(Token.OPERATOR); }
	">"							{ addToken(Token.OPERATOR); }
	"<<"							{ addToken(Token.OPERATOR); }
	">>"							{ addToken(Token.OPERATOR); }
	"=="							{ addToken(Token.OPERATOR); }
	"+="							{ addToken(Token.OPERATOR); }
	"-="							{ addToken(Token.OPERATOR); }
	"*="							{ addToken(Token.OPERATOR); }
	"/="							{ addToken(Token.OPERATOR); }
	"%="							{ addToken(Token.OPERATOR); }
	">>="						{ addToken(Token.OPERATOR); }
	"<<="						{ addToken(Token.OPERATOR); }
	"^"							{ addToken(Token.OPERATOR); }
	"&"							{ addToken(Token.OPERATOR); }
	"&&"							{ addToken(Token.OPERATOR); }
	"|"							{ addToken(Token.OPERATOR); }
	"||"							{ addToken(Token.OPERATOR); }
	"?"							{ addToken(Token.OPERATOR); }
	":"							{ addToken(Token.OPERATOR); }
	","							{ addToken(Token.OPERATOR); }
	"!"							{ addToken(Token.OPERATOR); }
	"++"							{ addToken(Token.OPERATOR); }
	"--"							{ addToken(Token.OPERATOR); }
	"."							{ addToken(Token.OPERATOR); }
	","							{ addToken(Token.OPERATOR); }

	/* Numbers */
	{longinteger}|{integer}			{ addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
	{floatnumber}|{imagnumber}		{ addToken(Token.LITERAL_NUMBER_FLOAT); }
	{ErrorNumberFormat}				{ addToken(Token.ERROR_NUMBER_FORMAT); }

	/* Other punctuation, we'll highlight it as "identifiers." */
	"@"							{ addToken(Token.IDENTIFIER); }
	";"							{ addToken(Token.IDENTIFIER); }

	/* Ended with a line not in a string or comment. */
	<>						{ addNullToken(); return firstToken; }

	/* Catch any other (unhandled) characters and flag them as bad. */
	.							{ addToken(Token.ERROR_IDENTIFIER); }

}

 {
	[^\']+						{ addToken(Token.LITERAL_CHAR); }
	"'''"						{ yybegin(YYINITIAL); addToken(Token.LITERAL_CHAR); }
	"'"							{ addToken(Token.LITERAL_CHAR); }
	<>						{
									if (firstToken==null) {
										addToken(Token.LITERAL_CHAR); 
									}
									return firstToken;
								}
}

 {
	[^\"]+						{ addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
	\"\"\"						{ yybegin(YYINITIAL); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
	\"							{ addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
	<>						{
									if (firstToken==null) {
										addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); 
									}
									return firstToken;
								}
}