org.jetbrains.kotlin.lexer.Kotlin.flex Maven / Gradle / Ivy
package org.jetbrains.kotlin.lexer;
import java.util.*;
import com.intellij.lexer.*;
import com.intellij.psi.*;
import com.intellij.psi.tree.IElementType;
import com.intellij.util.containers.Stack;
import org.jetbrains.kotlin.lexer.KotlinLexerException;
import org.jetbrains.kotlin.lexer.JetTokens;
%%
%unicode
%class _JetLexer
%implements FlexLexer
%{
private static final class State {
final int lBraceCount;
final int state;
public State(int state, int lBraceCount) {
this.state = state;
this.lBraceCount = lBraceCount;
}
@Override
public String toString() {
return "yystate = " + state + (lBraceCount == 0 ? "" : "lBraceCount = " + lBraceCount);
}
}
private final Stack states = new Stack();
private int lBraceCount;
private int commentStart;
private int commentDepth;
private void pushState(int state) {
states.push(new State(yystate(), lBraceCount));
lBraceCount = 0;
yybegin(state);
}
private void popState() {
State state = states.pop();
lBraceCount = state.lBraceCount;
yybegin(state.state);
}
private IElementType commentStateToTokenType(int state) {
switch (state) {
case BLOCK_COMMENT:
return JetTokens.BLOCK_COMMENT;
case DOC_COMMENT:
return JetTokens.DOC_COMMENT;
default:
throw new IllegalArgumentException("Unexpected state: " + state);
}
}
%}
%scanerror KotlinLexerException
%function advance
%type IElementType
%eof{
return;
%eof}
%xstate STRING RAW_STRING SHORT_TEMPLATE_ENTRY BLOCK_COMMENT DOC_COMMENT
%state LONG_TEMPLATE_ENTRY
DIGIT=[0-9]
HEX_DIGIT=[0-9A-Fa-f]
WHITE_SPACE_CHAR=[\ \n\t\f]
// TODO: prohibit '$' in identifiers?
LETTER = [:letter:]|_
IDENTIFIER_PART=[:digit:]|{LETTER}
PLAIN_IDENTIFIER={LETTER} {IDENTIFIER_PART}*
// TODO: this one MUST allow everything accepted by the runtime
// TODO: Replace backticks by one backslash in the begining
ESCAPED_IDENTIFIER = `[^`\n]+`
IDENTIFIER = {PLAIN_IDENTIFIER}|{ESCAPED_IDENTIFIER}
FIELD_IDENTIFIER = \${IDENTIFIER}
EOL_COMMENT="/""/"[^\n]*
SHEBANG_COMMENT="#!"[^\n]*
INTEGER_LITERAL={DECIMAL_INTEGER_LITERAL}|{HEX_INTEGER_LITERAL}|{BIN_INTEGER_LITERAL}
DECIMAL_INTEGER_LITERAL=(0|([1-9]({DIGIT})*))({LONG_SUFFIX})?
HEX_INTEGER_LITERAL=0[Xx]({HEX_DIGIT})*({LONG_SUFFIX})?
BIN_INTEGER_LITERAL=0[Bb]({DIGIT})*({LONG_SUFFIX})?
LONG_SUFFIX=[Ll]
//FLOAT_LITERAL=(({FLOATING_POINT_LITERAL1})[Ff])|(({FLOATING_POINT_LITERAL2})[Ff])|(({FLOATING_POINT_LITERAL3})[Ff])|(({FLOATING_POINT_LITERAL4})[Ff])
//DOUBLE_LITERAL=(({FLOATING_POINT_LITERAL1})[Dd]?)|(({FLOATING_POINT_LITERAL2})[Dd]?)|(({FLOATING_POINT_LITERAL3})[Dd]?)|(({FLOATING_POINT_LITERAL4})[Dd])
DOUBLE_LITERAL={FLOATING_POINT_LITERAL1}|{FLOATING_POINT_LITERAL2}|{FLOATING_POINT_LITERAL3}|{FLOATING_POINT_LITERAL4}
FLOATING_POINT_LITERAL1=({DIGIT})+"."({DIGIT})+({EXPONENT_PART})?({FLOATING_POINT_LITERAL_SUFFIX})?
FLOATING_POINT_LITERAL2="."({DIGIT})+({EXPONENT_PART})?({FLOATING_POINT_LITERAL_SUFFIX})?
FLOATING_POINT_LITERAL3=({DIGIT})+({EXPONENT_PART})({FLOATING_POINT_LITERAL_SUFFIX})?
FLOATING_POINT_LITERAL4=({DIGIT})+({FLOATING_POINT_LITERAL_SUFFIX})
FLOATING_POINT_LITERAL_SUFFIX=[Ff]
EXPONENT_PART=[Ee]["+""-"]?({DIGIT})*
CHARACTER_LITERAL="'"([^\\\'\n]|{ESCAPE_SEQUENCE})*("'"|\\)?
// TODO: introduce symbols (e.g. 'foo) as another way to write string literals
ESCAPE_SEQUENCE=\\(u{HEX_DIGIT}{HEX_DIGIT}{HEX_DIGIT}{HEX_DIGIT}|[^\n])
// ANY_ESCAPE_SEQUENCE = \\[^]
THREE_QUO = (\"\"\")
THREE_OR_MORE_QUO = ({THREE_QUO}\"*)
REGULAR_STRING_PART=[^\\\"\n\$]+
SHORT_TEMPLATE_ENTRY=\${IDENTIFIER}
LONELY_DOLLAR=\$
LONG_TEMPLATE_ENTRY_START=\$\{
%%
// String templates
{THREE_QUO} { pushState(RAW_STRING); return JetTokens.OPEN_QUOTE; }
\n { return JetTokens.REGULAR_STRING_PART; }
\" { return JetTokens.REGULAR_STRING_PART; }
\\ { return JetTokens.REGULAR_STRING_PART; }
{THREE_OR_MORE_QUO} {
int length = yytext().length();
if (length <= 3) { // closing """
popState();
return JetTokens.CLOSING_QUOTE;
}
else { // some quotes at the end of a string, e.g. """ "foo""""
yypushback(3); // return the closing quotes (""") to the stream
return JetTokens.REGULAR_STRING_PART;
}
}
\" { pushState(STRING); return JetTokens.OPEN_QUOTE; }
\n { popState(); yypushback(1); return JetTokens.DANGLING_NEWLINE; }
\" { popState(); return JetTokens.CLOSING_QUOTE; }
{ESCAPE_SEQUENCE} { return JetTokens.ESCAPE_SEQUENCE; }
{REGULAR_STRING_PART} { return JetTokens.REGULAR_STRING_PART; }
{SHORT_TEMPLATE_ENTRY} {
pushState(SHORT_TEMPLATE_ENTRY);
yypushback(yylength() - 1);
return JetTokens.SHORT_TEMPLATE_ENTRY_START;
}
// Only *this* keyword is itself an expression valid in this position
// *null*, *true* and *false* are also keywords and expression, but it does not make sense to put them
// in a string template for it'd be easier to just type them in without a dollar
"this" { popState(); return JetTokens.THIS_KEYWORD; }
{IDENTIFIER} { popState(); return JetTokens.IDENTIFIER; }
{LONELY_DOLLAR} { return JetTokens.REGULAR_STRING_PART; }
{LONG_TEMPLATE_ENTRY_START} { pushState(LONG_TEMPLATE_ENTRY); return JetTokens.LONG_TEMPLATE_ENTRY_START; }
"{" { lBraceCount++; return JetTokens.LBRACE; }
"}" {
if (lBraceCount == 0) {
popState();
return JetTokens.LONG_TEMPLATE_ENTRY_END;
}
lBraceCount--;
return JetTokens.RBRACE;
}
// (Nested) comments
"/**/" {
return JetTokens.BLOCK_COMMENT;
}
"/**" {
pushState(DOC_COMMENT);
commentDepth = 0;
commentStart = getTokenStart();
}
"/*" {
pushState(BLOCK_COMMENT);
commentDepth = 0;
commentStart = getTokenStart();
}
{
"/*" {
commentDepth++;
}
<> {
int state = yystate();
popState();
zzStartRead = commentStart;
return commentStateToTokenType(state);
}
"*/" {
if (commentDepth > 0) {
commentDepth--;
}
else {
int state = yystate();
popState();
zzStartRead = commentStart;
return commentStateToTokenType(state);
}
}
.|{WHITE_SPACE_CHAR} {}
}
// Mere mortals
({WHITE_SPACE_CHAR})+ { return JetTokens.WHITE_SPACE; }
{EOL_COMMENT} { return JetTokens.EOL_COMMENT; }
{SHEBANG_COMMENT} {
if (zzCurrentPos == 0) {
return JetTokens.SHEBANG_COMMENT;
}
else {
yypushback(yylength() - 1);
return JetTokens.HASH;
}
}
{INTEGER_LITERAL}\.\. { yypushback(2); return JetTokens.INTEGER_LITERAL; }
{INTEGER_LITERAL} { return JetTokens.INTEGER_LITERAL; }
{DOUBLE_LITERAL} { return JetTokens.FLOAT_LITERAL; }
{CHARACTER_LITERAL} { return JetTokens.CHARACTER_LITERAL; }
"typealias" { return JetTokens.TYPE_ALIAS_KEYWORD ;}
"interface" { return JetTokens.INTERFACE_KEYWORD ;}
"continue" { return JetTokens.CONTINUE_KEYWORD ;}
"package" { return JetTokens.PACKAGE_KEYWORD ;}
"return" { return JetTokens.RETURN_KEYWORD ;}
"object" { return JetTokens.OBJECT_KEYWORD ;}
"while" { return JetTokens.WHILE_KEYWORD ;}
"break" { return JetTokens.BREAK_KEYWORD ;}
"class" { return JetTokens.CLASS_KEYWORD ;}
"throw" { return JetTokens.THROW_KEYWORD ;}
"false" { return JetTokens.FALSE_KEYWORD ;}
"super" { return JetTokens.SUPER_KEYWORD ;}
"when" { return JetTokens.WHEN_KEYWORD ;}
"true" { return JetTokens.TRUE_KEYWORD ;}
"this" { return JetTokens.THIS_KEYWORD ;}
"null" { return JetTokens.NULL_KEYWORD ;}
"else" { return JetTokens.ELSE_KEYWORD ;}
"This" { return JetTokens.CAPITALIZED_THIS_KEYWORD ;}
"try" { return JetTokens.TRY_KEYWORD ;}
"val" { return JetTokens.VAL_KEYWORD ;}
"var" { return JetTokens.VAR_KEYWORD ;}
"fun" { return JetTokens.FUN_KEYWORD ;}
"for" { return JetTokens.FOR_KEYWORD ;}
"is" { return JetTokens.IS_KEYWORD ;}
"in" { return JetTokens.IN_KEYWORD ;}
"if" { return JetTokens.IF_KEYWORD ;}
"do" { return JetTokens.DO_KEYWORD ;}
"as" { return JetTokens.AS_KEYWORD ;}
{FIELD_IDENTIFIER} { return JetTokens.FIELD_IDENTIFIER; }
{IDENTIFIER} { return JetTokens.IDENTIFIER; }
\!in{IDENTIFIER_PART} { yypushback(3); return JetTokens.EXCL; }
\!is{IDENTIFIER_PART} { yypushback(3); return JetTokens.EXCL; }
"===" { return JetTokens.EQEQEQ ; }
"!==" { return JetTokens.EXCLEQEQEQ; }
"!in" { return JetTokens.NOT_IN; }
"!is" { return JetTokens.NOT_IS; }
"as?" { return JetTokens.AS_SAFE; }
"++" { return JetTokens.PLUSPLUS ; }
"--" { return JetTokens.MINUSMINUS; }
"<=" { return JetTokens.LTEQ ; }
">=" { return JetTokens.GTEQ ; }
"==" { return JetTokens.EQEQ ; }
"!=" { return JetTokens.EXCLEQ ; }
"&&" { return JetTokens.ANDAND ; }
"||" { return JetTokens.OROR ; }
"*=" { return JetTokens.MULTEQ ; }
"/=" { return JetTokens.DIVEQ ; }
"%=" { return JetTokens.PERCEQ ; }
"+=" { return JetTokens.PLUSEQ ; }
"-=" { return JetTokens.MINUSEQ ; }
"->" { return JetTokens.ARROW ; }
"=>" { return JetTokens.DOUBLE_ARROW; }
".." { return JetTokens.RANGE ; }
"::" { return JetTokens.COLONCOLON; }
"[" { return JetTokens.LBRACKET ; }
"]" { return JetTokens.RBRACKET ; }
"{" { return JetTokens.LBRACE ; }
"}" { return JetTokens.RBRACE ; }
"(" { return JetTokens.LPAR ; }
")" { return JetTokens.RPAR ; }
"." { return JetTokens.DOT ; }
"*" { return JetTokens.MUL ; }
"+" { return JetTokens.PLUS ; }
"-" { return JetTokens.MINUS ; }
"!" { return JetTokens.EXCL ; }
"/" { return JetTokens.DIV ; }
"%" { return JetTokens.PERC ; }
"<" { return JetTokens.LT ; }
">" { return JetTokens.GT ; }
"?" { return JetTokens.QUEST ; }
":" { return JetTokens.COLON ; }
";;" { return JetTokens.DOUBLE_SEMICOLON;}
";" { return JetTokens.SEMICOLON ; }
"=" { return JetTokens.EQ ; }
"," { return JetTokens.COMMA ; }
"#" { return JetTokens.HASH ; }
"@" { return JetTokens.AT ; }
// error fallback
. { return TokenType.BAD_CHARACTER; }
// error fallback for exclusive states
.
{ return TokenType.BAD_CHARACTER; }