scala.tools.nsc.ast.parser.Scanners.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scala-compiler Show documentation
Show all versions of scala-compiler Show documentation
Compiler for the SubScript extension of the Scala Programming Language
The newest version!
/* NSC -- new Scala compiler
* Copyright 2005-2013 LAMP/EPFL
* @author Martin Odersky
*/
package scala.tools.nsc
package ast.parser
import scala.tools.nsc.util.{ CharArrayReader, CharArrayReaderData }
import scala.reflect.internal.util._
import scala.reflect.internal.Chars._
import Tokens._
import scala.annotation.{ switch, tailrec }
import scala.collection.{ mutable, immutable }
import mutable.{ ListBuffer, ArrayBuffer }
import scala.tools.nsc.ast.parser.xml.Utility.isNameStart
import scala.language.postfixOps
/** See Parsers.scala / ParsersCommon for some explanation of ScannersCommon.
*/
trait ScannersCommon {
val global : Global
import global._
/** Offset into source character array */
type Offset = Int
type Token = Int
trait CommonTokenData {
def token: Token
def name: TermName
}
trait ScannerCommon extends CommonTokenData {
// things to fill in, in addition to buf, decodeUni which come from CharArrayReader
def error (off: Offset, msg: String): Unit
def incompleteInputError(off: Offset, msg: String): Unit
def deprecationWarning (off: Offset, msg: String): Unit
}
def createKeywordArray(keywords: Seq[(Name, Token)], defaultToken: Token): (Token, Array[Token]) = {
val names = keywords sortBy (_._1.start) map { case (k, v) => (k.start, v) }
val low = names.head._1
val high = names.last._1
val arr = Array.fill(high - low + 1)(defaultToken)
names foreach { case (k, v) => arr(k + low) = v }
(low, arr)
}
}
trait Scanners extends ScannersCommon {
val global : Global
import global._
trait TokenData extends CommonTokenData {
/** the next token */
var token: Token = EMPTY
/** the offset of the first character of the current token */
var offset: Offset = 0
/** the offset of the character following the token preceding this one */
var lastOffset: Offset = 0
/** the name of an identifier */
var name: TermName = null
/** the string value of a literal */
var strVal: String = null
/** the base of a number */
var base: Int = 0
def copyFrom(td: TokenData): this.type = {
this.token = td.token
this.offset = td.offset
this.lastOffset = td.lastOffset
this.name = td.name
this.strVal = td.strVal
this.base = td.base
this
}
}
/** An interface to most of mutable data in Scanner defined in TokenData
* and CharArrayReader (+ next, prev fields) with copyFrom functionality
* to backup/restore data (used by quasiquotes' lookingAhead).
*/
trait ScannerData extends TokenData with CharArrayReaderData {
/** we need one token lookahead and one token history
*/
val next: TokenData = new TokenData{}
val prev: TokenData = new TokenData{}
def copyFrom(sd: ScannerData): this.type = {
this.next copyFrom sd.next
this.prev copyFrom sd.prev
super[CharArrayReaderData].copyFrom(sd)
super[TokenData].copyFrom(sd)
this
}
}
abstract class Scanner extends CharArrayReader with TokenData with ScannerData with ScannerCommon {
private def isDigit(c: Char) = java.lang.Character isDigit c
///////////////////// BEGIN OF SUBSCRIPT SECTION ///////////////////////////
var prevWasInSubScript_script = false
var isInSubScript_script = false
var isInSubScript_header = false
var isInSubScript_nativeCode = false
var isInSubScript_val_var_init = false
def isInSubScript_body = isInSubScript_script && !isInSubScript_header
def isInSubScript_expression = isInSubScript_body &&
!isInSubScript_nativeCode &&
!isInSubScript_val_var_init &&
(!isInSubScript_partialScript || isInSubScript_partialScript_caseScript)
var sepRegions_SubScript_partialScript = -1
var level_SubScript_partialScript = 0
var level_SubScript_partialScript_caseScript = 0
def isInSubScript_partialScript = level_SubScript_partialScript > 0
def isInSubScript_partialScript_caseScript = level_SubScript_partialScript_caseScript > 0
def start_SubScript_partialScript = {level_SubScript_partialScript += 1}
def end_SubScript_partialScript = {level_SubScript_partialScript -= 1}
def start_SubScript_partialScript_caseScript = {level_SubScript_partialScript_caseScript += 1}
def end_SubScript_partialScript_caseScript = {level_SubScript_partialScript_caseScript -= 1}
def start_SubScript_val_var_init = {isInSubScript_val_var_init = true; isInSubScript_nativeCode = true}
def end_SubScript_val_var_init = {isInSubScript_val_var_init =false; isInSubScript_nativeCode =false}
var linePosOfScriptsSection = 0 // in a "script.." section, the line position of "script" (or any modifier before it); often something like 4
var linePosOfScriptEqualsSym = 0 // in a script definition, the line position of "=" or "+="
var scriptExpressionParenthesesNestingLevel = 0
// TBD cleanup
// lineOffset needs to be computed here since CharArrayReader sometimes increments its lineStartOffset earlier than expected
def isLeftOfEqualsSym(tokenOffset: Offset): Boolean = {
val lineOffset = if (tokenOffset < lineStartOffset) lastLineStartOffset else lineStartOffset
//println(s"isLeftOfEqualsSym tokenOffset=${tokenOffset} lineOffset=$lineOffset tokenOffset-lineOffset=${tokenOffset-lineOffset} linePosOfScriptEqualsSym=$linePosOfScriptEqualsSym")
tokenOffset-lineOffset < linePosOfScriptEqualsSym
}
def isIdent(token: Token) = token == IDENTIFIER || token == BACKQUOTED_IDENT
final val raw_space = "SPACE"
final val raw_semi = ";"
final val raw_bar = "|"
final val raw_bar2 = "||"
final val raw_amp = "&"
final val raw_amp2 = "&&"
final val raw_slash = "/"
final val raw_mathDot = "\u00B7"
final val subScriptInfixOperators_Set = Set(nme.raw.PLUS.toString, //SEMI_Name gets special treatment, like in Scala
raw_bar .toString,
raw_bar2 .toString,
raw_amp .toString,
raw_amp2 .toString,
raw_slash .toString,
raw_mathDot.toString,
raw_space .toString
)
final val setSubScriptUnaryPrefixOp = Set(nme.raw.MINUS.toString, nme.raw.TILDE.toString, nme.raw.BANG.toString)
final val setSubScriptPostfixOp = Set(nme.XOR.toString)
def isSubScriptUnaryPrefixOp (name : String): Boolean = setSubScriptUnaryPrefixOp(name)
def isSubScriptPostfixOp (name : String): Boolean = setSubScriptPostfixOp (name)
def isSubScriptInfixOpName(name: String): Boolean = {
var result = subScriptInfixOperators_Set(name)
//println(s"isSubScriptInfixOpName($name): $result")
result
}
def isSubScriptUnaryPrefixOp(tokenData: TokenData): Boolean = isIdent(tokenData.token) && isSubScriptUnaryPrefixOp(tokenData.name.toString)
def isSubScriptPostfixOp (tokenData: TokenData): Boolean = isIdent(tokenData.token) && isSubScriptPostfixOp (tokenData.name.toString)
def isSubScriptOperator(name: String): Boolean = isSubScriptUnaryPrefixOp(name) ||
isSubScriptPostfixOp( name) ||
isSubScriptInfixOpName (name)
// for uniformity: a SEMI becomes a name, like other script expression operators
def isSubScriptInfixOp(tokenData: TokenData): Boolean = {
val result = tokenData.token match {
case IDENTIFIER => isSubScriptInfixOpName(tokenData.name.toString)
case SEMI => false
case NEWLINE | NEWLINES => false // in.isSubScriptTermStarter(in.next) TBD: remove these two lines
case _ => false
}
//println(s"isSubScriptInfixOp($tokenData): $result")
result
}
def isSubScriptInfixOpOrSemiOrCommaOrColonOrOpenBrace(token: Token): Boolean =
token match {
case SEMI
| COMMA
| COLON
| LESS2
| LPAREN
| LPAREN_ASTERISK
| LPAREN_ASTERISK2
| LBRACE
| LBRACE_DOT
| LBRACE_DOT3
| LBRACE_QMARK
| LBRACE_EMARK
| LBRACE_ASTERISK
| LBRACE_CARET => true
case IDENTIFIER => isSubScriptInfixOpName(token.toString)
case _ => false
}
def isSubScriptTermStarter: Boolean = {val result = token match {
case VAL
| VAR
| PRIVATE
| BACKQUOTED_IDENT
| THIS
| SUPER
| NEW
| AT
| WHILE
| DOT
| DOT2
| DOT3
| LESS2
| LPAREN
| LPAREN_PLUS_RPAREN
| LPAREN_MINUS_RPAREN
| LPAREN_PLUS_MINUS_RPAREN
| LPAREN_SEMI_RPAREN
| LPAREN_ASTERISK
| LPAREN_ASTERISK2
| LBRACE
| LBRACE_DOT
| LBRACE_DOT3
| LBRACE_QMARK
| LBRACE_EMARK
| LBRACE_ASTERISK
| LBRACE_CARET => true
case IDENTIFIER if (!isSubScriptInfixOp(this)) => true
case CHARLIT | INTLIT | LONGLIT
| FLOATLIT | DOUBLELIT
| STRINGLIT | INTERPOLATIONID | SYMBOLLIT
| TRUE | FALSE | NULL => true // isLiteralToken
case _ => isSubScriptUnaryPrefixOp(this) // MINUS is always OK, also for -1 etc
}
//println(s"isSubScriptTermStarter(${tokenData.token}:${tokenData.name}): $result")
result
}
def isIF_or_DO(token: Token): Boolean = token match {case IF | DO => true case _ => false}
///////////////////// END OF SUBSCRIPT SECTION ///////////////////////////
private var openComments = 0
protected def putCommentChar(): Unit = nextChar()
@tailrec private def skipLineComment(): Unit = ch match {
case SU | CR | LF =>
case _ => nextChar() ; skipLineComment()
}
private def maybeOpen(): Unit = {
putCommentChar()
if (ch == '*') {
putCommentChar()
openComments += 1
}
}
private def maybeClose(): Boolean = {
putCommentChar()
(ch == '/') && {
putCommentChar()
openComments -= 1
openComments == 0
}
}
@tailrec final def skipNestedComments(): Unit = ch match {
case '/' => maybeOpen() ; skipNestedComments()
case '*' => if (!maybeClose()) skipNestedComments()
case SU => incompleteInputError("unclosed comment")
case _ => putCommentChar() ; skipNestedComments()
}
def skipDocComment(): Unit = skipNestedComments()
def skipBlockComment(): Unit = skipNestedComments()
private def skipToCommentEnd(isLineComment: Boolean): Unit = {
nextChar()
if (isLineComment) skipLineComment()
else {
openComments = 1
val isDocComment = (ch == '*') && { nextChar(); true }
if (isDocComment) {
// Check for the amazing corner case of /**/
if (ch == '/')
nextChar()
else
skipDocComment()
}
else skipBlockComment()
}
}
/** @pre ch == '/'
* Returns true if a comment was skipped.
*/
def skipComment(): Boolean = ch match {
case '/' | '*' => skipToCommentEnd(isLineComment = ch == '/') ; true
case _ => false
}
def flushDoc(): DocComment = null
/** To prevent doc comments attached to expressions from leaking out of scope
* onto the next documentable entity, they are discarded upon passing a right
* brace, bracket, or parenthesis.
*/
def discardDocBuffer(): Unit = ()
def isAtEnd = charOffset >= buf.length
def resume(lastCode: Token) = {
token = lastCode
if (next.token != EMPTY && !reporter.hasErrors)
syntaxError("unexpected end of input: possible missing '}' in XML block")
nextToken()
}
/** A character buffer for literals
*/
val cbuf = new StringBuilder
/** append Unicode character to "cbuf" buffer
*/
protected def putChar(c: Char): Unit = {
// assert(cbuf.size < 10000, cbuf)
cbuf.append(c)
}
/** Determines whether this scanner should emit identifier deprecation warnings,
* e.g. when seeing `macro` or `then`, which are planned to become keywords in future versions of Scala.
*/
protected def emitIdentifierDeprecationWarnings = true
/** Clear buffer and set name and token */
private def finishNamed(idtoken: Token = IDENTIFIER): Unit = {
name = newTermName(cbuf.toString)
cbuf.clear()
token = idtoken
if (idtoken == IDENTIFIER) {
val idx = name.start - kwOffset
if (idx >= 0 && idx < kwArray.length) {
token = kwArray(idx)
if (token == IDENTIFIER) {
if (allowIdent != name
&& nme.SCRIPTkw != name // Note: only used in SubScript; do not warn
&& nme.BREAKkw != name // Note: only used in SubScript; do not warn
)
if (name == nme.MACROkw)
syntaxError(s"$name is now a reserved word; usage as an identifier is disallowed")
else if (isInSubScript_script && name.toString=="then") token = THEN
else if (emitIdentifierDeprecationWarnings)
deprecationWarning(s"$name is now a reserved word; usage as an identifier is deprecated")
}
}
}
}
/** Clear buffer and set string */
private def setStrVal() {strVal = cbuf.toString; cbuf.clear()}
/** a stack of tokens which indicates whether line-ends can be statement separators
* also used for keeping track of nesting levels.
* We keep track of the closing symbol of a region. This can be
* RPAREN if region starts with '('
* RBRACKET if region starts with '['
* RBRACE if region starts with '{'
* ARROW if region starts with `case'
* STRINGLIT if region is a string interpolation expression starting with '${'
* (the STRINGLIT appears twice in succession on the stack iff the
* expression is a multiline string literal).
*/
var sepRegions: List[Token] = List()
// Get next token ------------------------------------------------------------
/** Are we directly in a string interpolation expression?
*/
private def inStringInterpolation = sepRegions.nonEmpty && sepRegions.head == STRINGLIT
/** Are we directly in a multiline string interpolation expression?
* @pre inStringInterpolation
*/
private def inMultiLineInterpolation = inStringInterpolation && sepRegions.tail.nonEmpty && sepRegions.tail.head == STRINGPART
/** read next token and return last offset*/
def skipToken(): Offset = {val off = offset; nextToken(); off}
/** Allow an otherwise deprecated ident here */
private var allowIdent: Name = nme.EMPTY
/** Get next token, and allow the otherwise deprecated ident `name` */
def nextTokenAllow(name: Name) = {
val prev = allowIdent; allowIdent = name
try {nextToken()} finally {allowIdent = prev}
}
/** Produce next token, filling TokenData fields of Scanner.
*/
def nextToken() {
prevWasInSubScript_script = isInSubScript_script
val lastToken = token
// Adapt sepRegions according to last token
(lastToken: @switch) match {
case LPAREN => sepRegions = RPAREN :: sepRegions
case LBRACKET => sepRegions = RBRACKET :: sepRegions
case LBRACE => sepRegions = RBRACE :: sepRegions
case CASE => sepRegions = ARROW :: sepRegions
// subscript tokens
case LBRACE_DOT => sepRegions = RBRACE_DOT :: sepRegions
case LBRACE_DOT3 => sepRegions = RBRACE_DOT3 :: sepRegions
case LBRACE_QMARK => sepRegions = RBRACE_QMARK :: sepRegions
case LBRACE_EMARK => sepRegions = RBRACE_EMARK :: sepRegions
case LBRACE_ASTERISK => sepRegions = RBRACE_ASTERISK :: sepRegions
case LBRACE_CARET => sepRegions = RBRACE_CARET :: sepRegions
case LPAREN_ASTERISK => sepRegions = RPAREN_ASTERISK :: sepRegions
case LPAREN_ASTERISK2 => sepRegions = RPAREN_ASTERISK2:: sepRegions
case RBRACE => while (!sepRegions.isEmpty && sepRegions.head != RBRACE)
sepRegions = sepRegions.tail
if (!sepRegions.isEmpty) sepRegions = sepRegions.tail
discardDocBuffer()
case RBRACKET | RPAREN
| RBRACE_DOT // subscript tokens
| RBRACE_DOT3
| RBRACE_QMARK
| RBRACE_EMARK
| RBRACE_ASTERISK
| RBRACE_CARET
| RPAREN_ASTERISK
| RPAREN_ASTERISK2
=> if (!sepRegions.isEmpty && sepRegions.head == lastToken) sepRegions = sepRegions.tail; discardDocBuffer()
case ARROW => if (!sepRegions.isEmpty && sepRegions.head == lastToken) sepRegions = sepRegions.tail
case STRINGLIT => if ( inMultiLineInterpolation ) sepRegions = sepRegions.tail.tail
else if ( inStringInterpolation ) sepRegions = sepRegions.tail
case _ =>
}
// Read a token or copy it from `next` tokenData
if (next.token == EMPTY) {
lastOffset = charOffset - 1
if (lastOffset > 0 && buf(lastOffset) == '\n' && buf(lastOffset - 1) == '\r') {
lastOffset -= 1
}
if (inStringInterpolation) fetchStringPart()
else fetchToken()
//println("fetching: "+this) //////////
if(token == ERROR) {
if ( inMultiLineInterpolation) sepRegions = sepRegions.tail.tail
else if (inStringInterpolation) sepRegions = sepRegions.tail
}
} else {
this copyFrom next
next.token = EMPTY
//println("Fetching: "+this) ///////////
}
/** Insert NEWLINE or NEWLINES
* if we are after a newline
* && we are within a { ... } or on toplevel (wrt sepRegions)
* && NOT isInSubScript_expression
* && the current token can start a statement and the one before can end it
*
* insert NEWLINES if we are past a blank line, NEWLINE otherwise
*
*
* If isInSubScript_expression theb the parser will deal with newline issues
*/
if (!applyBracePatch()
&& afterLineEnd()
&& ( if (isInSubScript_header) false
else if (isInSubScript_expression)
!isSubScriptInfixOpOrSemiOrCommaOrColonOrOpenBrace(lastToken)
&& (isSubScriptTermStarter || isIF_or_DO(token))
&& lastToken != NEWLINE
&& lastToken != NEWLINES
else inLastOfStat(lastToken)
&& inFirstOfStat(token)
&& ( sepRegions.isEmpty
|| sepRegions.head == RBRACE)
)
)
{
next copyFrom this
offset = if (lineStartOffset <= offset) lineStartOffset else lastLineStartOffset
//println(s"Inserting ${token2string(if (pastBlankLine()) NEWLINES else NEWLINE)} before: ${this} lastToken=${token2string(lastToken)} isInSubScript_script: $isInSubScript_script") ///////////
token = if (pastBlankLine()) NEWLINES else NEWLINE
// uncomment to see where the NEWLINE(S)'s are inserted
// error(lastOffset, if (pastBlankLine()) "NEWLINES" else "NEWLINE")
}
// Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT, SEMI + ELSE => ELSE
if (token == CASE) {
prev copyFrom this
val nextLastOffset = charOffset - 1
fetchToken()
def resetOffset() {
offset = prev.offset
lastOffset = prev.lastOffset
}
if (token == CLASS ) {token = CASECLASS ; resetOffset()}
else if (token == OBJECT) {token = CASEOBJECT; resetOffset()}
else {
lastOffset = nextLastOffset
next copyFrom this
this copyFrom prev
}
} else if (token == SEMI) {
prev copyFrom this
fetchToken()
if (token != ELSE) {
next copyFrom this
this copyFrom prev
}
}
// print("["+this+"]") ///////////
}
/** Is current token first one after a newline? */
def afterLineEnd(): Boolean =
lastOffset < lineStartOffset &&
(lineStartOffset <= offset ||
lastOffset < lastLineStartOffset && lastLineStartOffset <= offset)
/** Is there a blank line between the current token and the last one?
* @pre afterLineEnd().
*/
private def pastBlankLine(): Boolean = {
var idx = lastOffset
var ch = buf(idx)
val end = offset
while (idx < end) {
if (ch == LF || ch == FF) {
do {
idx += 1; ch = buf(idx)
if (ch == LF || ch == FF) {
// println("blank line found at "+lastOffset+":"+(lastOffset to idx).map(buf(_)).toList)
return true
}
if (idx == end) return false
} while (ch <= ' ')
}
idx += 1; ch = buf(idx)
}
false
}
/** read next token, filling TokenData fields of Scanner.
*/
protected final def fetchToken() {
def nextChar2() = {nextChar(); nextChar()}
def nextChar3() = {nextChar(); nextChar2()}
def nextChar4() = {nextChar(); nextChar3()}
def nextChar5() = {nextChar(); nextChar4()}
def nextChar6() = {nextChar(); nextChar5()}
offset = charOffset - 1
(ch: @switch) match {
case ' ' | '\t' | CR | LF | FF => nextChar(); fetchToken()
case 'A' | 'B' | 'C' | 'D' | 'E' |
'F' | 'G' | 'H' | 'I' | 'J' |
'K' | 'L' | 'M' | 'N' | 'O' |
'P' | 'Q' | 'R' | 'S' | 'T' |
'U' | 'V' | 'W' | 'X' | 'Y' |
'Z' | '$' | '_' |
'a' | 'b' | 'c' | 'd' | 'e' |
'f' | 'g' | 'h' | 'i' | 'j' |
'k' | 'l' | 'm' | 'n' | 'o' |
'p' | 'q' | 'r' | 's' | 't' |
'u' | 'v' | 'w' | 'x' | 'y' | // scala-mode: need to understand multi-line case patterns
'z' =>
putChar(ch)
nextChar()
getIdentRest()
if (ch == '"' && token == IDENTIFIER)
token = INTERPOLATIONID
case '~' => if (isInSubScript_expression) {
val lookahead = lookaheadReader; lookahead.nextChar()
if (lookahead.ch == '>') {nextChar2(); token = CURLYARROW1; return} // ~>
if (lookahead.ch == '~') {lookahead.nextChar()
if (lookahead.ch == '>') {nextChar3(); token = CURLYARROW2; return} // ~~>
if (lookahead.ch == '~') {lookahead.nextChar()
if(lookahead.ch == '>') {nextChar4(); token = CURLYARROW3; return} // ~~~>
nextChar3(); token = CURLY3; return
}
nextChar2(); token = CURLY2; return
}
else
if (lookahead.ch == '/') {lookahead.nextChar()
if (lookahead.ch == '~') {lookahead.nextChar()
if (lookahead.ch == '>') {nextChar4(); token = CURLYBROKENARROW2; return} // ~/~>
if (lookahead.ch == '~') {lookahead.nextChar()
if(lookahead.ch == '>') {nextChar5(); token = CURLYBROKENARROW3; return} // ~/~~>
else {nextChar4(); token = CURLYBROKEN3 ; return} // ~/~~
}}
}
nextChar(); token = CURLY1; return
}
getOperatorRest()
case '=' => if (isInSubScript_expression || isInSubScript_partialScript) {
val lookahead = lookaheadReader; lookahead.nextChar()
if (lookahead.ch == '=') {lookahead.nextChar()
if(lookahead.ch == '>') {nextChar(); nextChar(); nextChar(); token = ARROW2; return}
}
}
getOperatorRest()
case '>' => if (isInSubScript_partialScript) {nextChar();
if (ch=='>') {nextChar(); token = GREATER2; return} else putChar('>')}
getOperatorRest()
case '<' => if (isInSubScript_expression
|| isInSubScript_val_var_init ) {nextChar(); if (ch=='<') {nextChar(); token = LESS2; return} else putChar('<')}
// is XMLSTART?
def fetchLT() = {
val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
nextChar()
last match {
case ' ' | '\t' | '\n' | '{' | '(' | '>' if isNameStart(ch) || ch == '!' || ch == '?' =>
token = XMLSTART
case _ =>
// Console.println("found '<', but last is '"+in.last+"'"); // DEBUG
putChar('<')
getOperatorRest()
}
}
fetchLT
case '!' | '^' | '?' =>
val chOld = ch
nextChar()
if (isInSubScript_nativeCode && ch=='}') {
nextChar()
token = chOld match {
case '!' => RBRACE_EMARK
case '^' => RBRACE_CARET
case '?' => RBRACE_QMARK
}
}
else if (isInSubScript_expression && chOld=='^') {
if (ch=='^') {nextChar(); token = CARET2}
else { token = CARET }
}
else {
putChar(chOld)
getOperatorRest()
}
case '*' =>
val chOld = ch
nextChar()
if (isInSubScript_nativeCode && ch=='}') {nextChar(); token = RBRACE_ASTERISK}
else if ((isInSubScript_expression
||isInSubScript_val_var_init) && ch==')') {nextChar(); token = RPAREN_ASTERISK}
else if ((isInSubScript_expression
||isInSubScript_val_var_init) && ch=='*') {nextChar()
if (ch==')') {nextChar(); token = RPAREN_ASTERISK2}
else {putChar('*'); putChar('*'); getOperatorRest()}
}
else {
putChar(chOld)
getOperatorRest()
}
case '+' => if (isInSubScript_expression) {
val lookahead = lookaheadReader; lookahead.nextChar()
if (lookahead.ch == '~') {lookahead.nextChar()
if (lookahead.ch == '~') {lookahead.nextChar()
if(lookahead.ch == '~') {nextChar4(); token = PLUS_CURLY3 ; return} // +~~~
else if(lookahead.ch == '>') {nextChar4(); token = PLUS_CURLYARROW2; return} // +~~>
else {nextChar3(); token = PLUS_CURLY2 ; return} // +~~
}
if (lookahead.ch == '/') {lookahead.nextChar()
if (lookahead.ch == '~') {lookahead.nextChar()
if (lookahead.ch == '>') {nextChar5(); token = PLUS_CURLYBROKENARROW2; return} // +~/~>
if (lookahead.ch == '~') {lookahead.nextChar()
if(lookahead.ch == '>') {nextChar6(); token = PLUS_CURLYBROKENARROW3; return} // +~/~~>
else {nextChar5(); token = PLUS_CURLYBROKEN3 ; return} // +~/~~
}
else {nextChar4(); token = PLUS_CURLYBROKEN2 ; return} // +~/~
}
else {nextChar3(); token = PLUS_CURLYBROKEN1 ; return} // +~/
}
else {nextChar2(); token = PLUS_CURLY1 ; return} // +~
}
}
putChar(ch)
nextChar()
getOperatorRest()
case '@' | '#' | '%' |
'-' |
// '~' | '>' | '<' | '=' |
':'| '&' |
'|' | '\\' =>
putChar(ch)
nextChar()
getOperatorRest()
case '/' =>
nextChar()
if (skipComment()) {
fetchToken()
} else {
putChar('/')
getOperatorRest()
}
case '0' =>
def fetchLeadingZero(): Unit = {
nextChar()
ch match {
case 'x' | 'X' => base = 16 ; nextChar()
case _ => base = 8 // single decimal zero, perhaps
}
}
fetchLeadingZero()
getNumber()
case '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
base = 10
getNumber()
case '`' => getBackquotedIdent()
case '\"' =>
def fetchDoubleQuote() = {
if (token == INTERPOLATIONID) {
nextRawChar()
if (ch == '\"') {
val lookahead = lookaheadReader
lookahead.nextChar()
if (lookahead.ch == '\"') {
nextRawChar() // now eat it
offset += 3
nextRawChar()
getStringPart(multiLine = true)
sepRegions = STRINGPART :: sepRegions // indicate string part
sepRegions = STRINGLIT :: sepRegions // once more to indicate multi line string part
} else {
nextChar()
token = STRINGLIT
strVal = ""
}
} else {
offset += 1
getStringPart(multiLine = false)
sepRegions = STRINGLIT :: sepRegions // indicate single line string part
}
} else {
nextChar()
if (ch == '\"') {nextChar()
if (ch == '\"') {nextRawChar(); getRawStringLit()}
else {token = STRINGLIT; strVal = ""}
}
else {getStringLit()}
}
}
fetchDoubleQuote()
case '\'' =>
def fetchSingleQuote() = {
nextChar()
if (isIdentifierStart(ch)) charLitOr(getIdentRest)
else if (isOperatorPart(ch)
&& (ch != '\\')) charLitOr(getOperatorRest)
else {
getLitChar()
if (ch == '\'') {nextChar(); token = CHARLIT; setStrVal()}
else {syntaxError("unclosed character literal")}
}
}
fetchSingleQuote()
case '.' =>
nextChar()
if ('0' <= ch && ch <= '9') {putChar('.'); getFraction()}
else { token = DOT }
if (isInSubScript_header || isInSubScript_expression || isInSubScript_val_var_init) {
if (ch == '.') {nextChar()
if (ch == '.') {nextChar(); token = DOT3}
else { token = DOT2}}
}
else if (isInSubScript_nativeCode) {
if (ch == '}') {nextChar(); token = RBRACE_DOT}
else if (ch == '.') {nextChar()
if (ch == '.') {nextChar()
if(ch == '}') {nextChar(); token = RBRACE_DOT3}
else {syntaxError("'...' unexpected")}}
else {syntaxError( "'..' unexpected")}}
}
case '{' => nextChar(); token = LBRACE
if (isInSubScript_expression || isInSubScript_val_var_init) {
(ch: @switch) match {
case '?' => nextChar(); token = LBRACE_QMARK
case '!' => nextChar(); token = LBRACE_EMARK
case '*' => nextChar(); token = LBRACE_ASTERISK
case '^' => nextChar(); token = LBRACE_CARET
case '.' => nextChar(); token = LBRACE_DOT
if (ch=='.'
&& lookaheadReader.ch == '.') {
nextChar(); nextChar(); token = LBRACE_DOT3
}
case _ =>
}
}
case '(' => nextChar(); token = LPAREN
if (isInSubScript_expression || isInSubScript_val_var_init) { // (+) (-) (+-) (;) (* (**
if (ch == '*') {nextChar(); token = LPAREN_ASTERISK // no }
if (ch == '*') {nextChar(); token = LPAREN_ASTERISK2}
}
else {
var isSpecialOperand = false
val lookahead = lookaheadReader
if (lookahead.ch == '+') {lookahead.nextChar() // no }
if (lookahead.ch == '-') {lookahead.nextChar()}
isSpecialOperand = lookahead.ch == ')'
}
else if (lookahead.ch == '-'
|| lookahead.ch == ';') {lookahead.nextChar();
isSpecialOperand = lookahead.ch == ')'
}
if (isSpecialOperand) {
if (ch == '+') {nextChar(); token = LPAREN_PLUS_RPAREN // no }
if (ch == '-') {nextChar(); token = LPAREN_PLUS_MINUS_RPAREN}
}
else if (ch == '-') {nextChar(); token = LPAREN_MINUS_RPAREN}
else if (ch == ';') {nextChar(); token = LPAREN_SEMI_RPAREN}
nextChar()
}
}
}
case ')' => nextChar(); token = RPAREN
case ';' => //if (!isInSubScript_expression)
{nextChar(); token = SEMI}
//else {putChar(ch); nextChar(); getOperatorRest()}
case ',' => nextChar(); token = COMMA
case '}' => nextChar(); token = RBRACE
case '[' => nextChar(); token = LBRACKET
case ']' => nextChar(); token = RBRACKET
case SU =>
if (isAtEnd) token = EOF
else {
syntaxError("illegal character")
nextChar()
}
case _ =>
def fetchOther() = {
if (ch == '\u21D2') {
nextChar(); token = ARROW
} else if (ch == '\u2190') {
nextChar(); token = LARROW
} else if (Character.isUnicodeIdentifierStart(ch)) {
putChar(ch)
nextChar()
getIdentRest()
} else if (isSpecial(ch)) {
putChar(ch)
nextChar()
getOperatorRest()
} else {
syntaxError("illegal character '" + ("" + '\\' + 'u' + "%04x".format(ch.toInt)) + "'")
nextChar()
}
}
fetchOther()
}
}
/** Can token start a statement? */
def inFirstOfStat(token: Token) = token match {
case EOF | CATCH | ELSE | EXTENDS | FINALLY | FORSOME | MATCH | WITH | YIELD |
COMMA | SEMI | NEWLINE | NEWLINES | DOT | COLON | EQUALS | ARROW | LARROW | ARROW2 | GREATER2 |
SUBTYPE | VIEWBOUND | SUPERTYPE | HASH | RPAREN | RBRACKET | RBRACE | LBRACKET =>
false
case _ =>
true
}
/** Can token end a statement? */
def inLastOfStat(token: Token) = token match {
case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT | STRINGLIT | SYMBOLLIT |
IDENTIFIER | BACKQUOTED_IDENT | THIS | NULL | TRUE | FALSE | RETURN | USCORE |
TYPE | XMLSTART | RPAREN | RBRACKET | RBRACE =>
true
case _ =>
false
}
// Identifiers ---------------------------------------------------------------
private def getBackquotedIdent(): Unit = {
nextChar()
getLitChars('`')
if (ch == '`') {
nextChar()
finishNamed(BACKQUOTED_IDENT)
if (name.length == 0) syntaxError("empty quoted identifier")
}
else syntaxError("unclosed quoted identifier")
}
private def getIdentRest(): Unit = (ch: @switch) match {
case 'A' | 'B' | 'C' | 'D' | 'E' |
'F' | 'G' | 'H' | 'I' | 'J' |
'K' | 'L' | 'M' | 'N' | 'O' |
'P' | 'Q' | 'R' | 'S' | 'T' |
'U' | 'V' | 'W' | 'X' | 'Y' |
'Z' | '$' |
'a' | 'b' | 'c' | 'd' | 'e' |
'f' | 'g' | 'h' | 'i' | 'j' |
'k' | 'l' | 'm' | 'n' | 'o' |
'p' | 'q' | 'r' | 's' | 't' |
'u' | 'v' | 'w' | 'x' | 'y' |
'z' |
'0' | '1' | '2' | '3' | '4' |
'5' | '6' | '7' | '8' | '9'
=> putChar(ch); nextChar(); getIdentRest()
case '_' => putChar(ch); nextChar(); getIdentOrOperatorRest()
case SU => finishNamed() // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
case _ => if (Character.isUnicodeIdentifierPart(ch)) {
putChar(ch); nextChar(); getIdentRest()
}
else finishNamed()
}
private def getOperatorRest(): Unit = {
if (isInSubScript_header) { // after seeing += and = for header definitions, stop there to allow prefix operators as in x =|| y z
cbuf.toString match {
case "=" if(ch!='>') => finishNamed(); return
case "+=" => finishNamed(); return
case "?" | "??" if(ch==':') => finishNamed(); return // allow parameters lists like (p?:Char, pp??:Char)
case _ =>
}
}
(ch: @switch) match {
case '/' =>
nextChar()
if (skipComment()) finishNamed()
else { putChar('/'); getOperatorRest() }
case '~' | '!' | '@' | '#' | '%' |
'^' | '*' | '+' | '-' | '<' |
'>' | '?' | ':' | '=' | '&' |
'|' | '\\' => putChar(ch); nextChar(); getOperatorRest()
case _ => if (isSpecial(ch)
|| isInSubScript_expression && ch==';') {putChar(ch); nextChar(); getOperatorRest()}
else finishNamed()
}
}
private def getIdentOrOperatorRest() {
if (isIdentifierPart(ch)) getIdentRest()
else ch match {
case '~' | '!' | '@' | '#' | '%' |
'^' | '*' | '+' | '-' | '<' |
'>' | '?' | ':' | '=' | '&' |
'|' | '\\' | '/' => getOperatorRest()
case _ => if (isSpecial(ch)) getOperatorRest()
else finishNamed()
}
}
// Literals -----------------------------------------------------------------
private def getStringLit() = {
getLitChars('"')
if (ch == '"') {setStrVal(); nextChar(); token = STRINGLIT}
else unclosedStringLit()
}
private def unclosedStringLit(): Unit = syntaxError("unclosed string literal")
private def getRawStringLit(): Unit = {
if (ch == '\"') {
nextRawChar()
if (isTripleQuote()) {setStrVal(); token = STRINGLIT}
else getRawStringLit()
} else if (ch == SU) {
incompleteInputError("unclosed multi-line string literal")
} else {
putChar(ch)
nextRawChar()
getRawStringLit()
}
}
@scala.annotation.tailrec private def getStringPart(multiLine: Boolean): Unit = {
def finishStringPart() = {
setStrVal()
token = STRINGPART
next.lastOffset = charOffset - 1
next.offset = charOffset - 1
}
if (ch == '"') {
if (multiLine) {
nextRawChar()
if (isTripleQuote()) {
setStrVal()
token = STRINGLIT
} else
getStringPart(multiLine)
} else {
nextChar()
setStrVal()
token = STRINGLIT
}
} else if (ch == '$') {
nextRawChar()
if (ch == '$') {
putChar(ch)
nextRawChar()
getStringPart(multiLine)
} else if (ch == '{') {
finishStringPart()
nextRawChar()
next.token = LBRACE
} else if (ch == '_') {
finishStringPart()
nextRawChar()
next.token = USCORE
} else if (Character.isUnicodeIdentifierStart(ch)) {
finishStringPart()
do {
putChar(ch)
nextRawChar()
} while (ch != SU && Character.isUnicodeIdentifierPart(ch))
next.token = IDENTIFIER
next.name = newTermName(cbuf.toString)
cbuf.clear()
val idx = next.name.start - kwOffset
if (idx >= 0 && idx < kwArray.length) {
next.token = kwArray(idx)
}
} else {
syntaxError("invalid string interpolation: `$$', `$'ident or `$'BlockExpr expected")
}
} else {
val isUnclosedLiteral = !isUnicodeEscape && (ch == SU || (!multiLine && (ch == CR || ch == LF)))
if (isUnclosedLiteral) {
if (multiLine)
incompleteInputError("unclosed multi-line string literal")
else
unclosedStringLit()
}
else {
putChar(ch)
nextRawChar()
getStringPart(multiLine)
}
}
}
private def fetchStringPart() = {
offset = charOffset - 1
getStringPart(multiLine = inMultiLineInterpolation)
}
private def isTripleQuote(): Boolean =
if (ch == '"') {
nextRawChar()
if (ch == '"') {
nextChar()
while (ch == '"') {
putChar('"')
nextChar()
}
true
} else {
putChar('"')
putChar('"')
false
}
} else {
putChar('"')
false
}
/** copy current character into cbuf, interpreting any escape sequences,
* and advance to next character.
*/
protected def getLitChar(): Unit =
if (ch == '\\') {
nextChar()
if ('0' <= ch && ch <= '7') {
val start = charOffset - 2
val leadch: Char = ch
var oct: Int = digit2int(ch, 8)
nextChar()
if ('0' <= ch && ch <= '7') {
oct = oct * 8 + digit2int(ch, 8)
nextChar()
if (leadch <= '3' && '0' <= ch && ch <= '7') {
oct = oct * 8 + digit2int(ch, 8)
nextChar()
}
}
val alt = if (oct == LF) "\\n" else "\\u%04x" format oct
def msg(what: String) = s"Octal escape literals are $what, use $alt instead."
if (settings.future)
syntaxError(start, msg("unsupported"))
else
deprecationWarning(start, msg("deprecated"))
putChar(oct.toChar)
} else {
ch match {
case 'b' => putChar('\b')
case 't' => putChar('\t')
case 'n' => putChar('\n')
case 'f' => putChar('\f')
case 'r' => putChar('\r')
case '\"' => putChar('\"')
case '\'' => putChar('\'')
case '\\' => putChar('\\')
case _ => invalidEscape()
}
nextChar()
}
} else {
putChar(ch)
nextChar()
}
protected def invalidEscape(): Unit = {
syntaxError(charOffset - 1, "invalid escape character")
putChar(ch)
}
private def getLitChars(delimiter: Char) = {
while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape))
getLitChar()
}
/** read fractional part and exponent of floating point number
* if one is present.
*/
protected def getFraction(): Unit = {
token = DOUBLELIT
while ('0' <= ch && ch <= '9') {putChar(ch); nextChar()}
if (ch == 'e' || ch == 'E') {
val lookahead = lookaheadReader
lookahead.nextChar()
if (lookahead.ch == '+' || lookahead.ch == '-') {lookahead.nextChar()}
if ('0' <= lookahead.ch && lookahead.ch <= '9') {
putChar(ch); nextChar()
if (ch == '+' || ch == '-') {putChar(ch); nextChar()}
while ('0' <= ch && ch <= '9') {putChar(ch); nextChar()}
}
token = DOUBLELIT
}
if (ch == 'd' || ch == 'D') {putChar(ch); nextChar(); token = DOUBLELIT}
else if (ch == 'f' || ch == 'F') {putChar(ch); nextChar(); token = FLOATLIT}
checkNoLetter()
setStrVal()
}
/** Convert current strVal to char value
*/
def charVal: Char = if (strVal.length > 0) strVal.charAt(0) else 0
/** Convert current strVal, base to long value.
* This is tricky because of max negative value.
*
* Conversions in base 10 and 16 are supported. As a permanent migration
* path, attempts to write base 8 literals except `0` emit a verbose error.
*/
def intVal(negated: Boolean): Long = {
def malformed: Long = {
if (base == 8) syntaxError("Decimal integer literals may not have a leading zero. (Octal syntax is obsolete.)")
else syntaxError("malformed integer number")
0
}
def tooBig: Long = {
syntaxError("integer number too large")
0
}
def intConvert: Long = {
val len = strVal.length
if (len == 0) {
if (base != 8) syntaxError("missing integer number") // e.g., 0x;
0
} else {
val divider = if (base == 10) 1 else 2
val limit: Long = if (token == LONGLIT) Long.MaxValue else Int.MaxValue
@tailrec def convert(value: Long, i: Int): Long =
if (i >= len) value
else {
val d = digit2int(strVal charAt i, base)
if (d < 0)
malformed
else if (value < 0 ||
limit / (base / divider) < value ||
limit - (d / divider) < value * (base / divider) &&
!(negated && limit == value * base - 1 + d))
tooBig
else
convert(value * base + d, i + 1)
}
val result = convert(0, 0)
if (base == 8) malformed else if (negated) -result else result
}
}
if (token == CHARLIT && !negated) charVal.toLong else intConvert
}
def intVal: Long = intVal(negated = false)
/** Convert current strVal, base to double value
*/
def floatVal(negated: Boolean): Double = {
val limit: Double = if (token == DOUBLELIT) Double.MaxValue else Float.MaxValue
try {
val value: Double = java.lang.Double.valueOf(strVal).doubleValue()
if (value > limit)
syntaxError("floating point number too large")
if (negated) -value else value
} catch {
case _: NumberFormatException =>
syntaxError("malformed floating point number")
0.0
}
}
def floatVal: Double = floatVal(negated = false)
def checkNoLetter(): Unit = {
if (isIdentifierPart(ch) && ch >= ' ')
syntaxError("Invalid literal number")
}
// <<<<<<< HEAD
// /** Read a number into strVal and set base
// */
// protected def getNumber() {
// val base1 = if (base < 10) 10 else base
// // Read 8,9's even if format is octal, produce a malformed number error afterwards.
// // At this point, we have already read the first digit, so to tell an innocent 0 apart
// // from an octal literal 0123... (which we want to disallow), we check whether there
// // are any additional digits coming after the first one we have already read.
// var notSingleZero = false
// while (digit2int(ch, base1) >= 0) {
// putChar(ch)
// nextChar()
// notSingleZero = true
// }
// token = INTLIT
// /* When we know for certain it's a number after using a touch of lookahead */
// def restOfNumber() = {putChar(ch); nextChar(); getFraction()}
// def restOfUncertainToken() = {
// def isEfd = ch match { case 'e' | 'E' | 'f' | 'F' | 'd' | 'D' => true ; case _ => false }
// def isL = ch match { case 'l' | 'L' => true ; case _ => false }
// if (base <= 10 && isEfd) getFraction()
// else {
// // Checking for base == 8 is not enough, because base = 8 is set
// // as soon as a 0 is read in `case '0'` of method fetchToken.
// if (base == 8 && notSingleZero) syntaxError("Non-zero integral values may not have a leading zero.")
// setStrVal()
// if (isL) {nextChar(); token = LONGLIT}
// else checkNoLetter()
// }
// }
// if (base > 10 || ch != '.') restOfUncertainToken()
// else {
// val lookahead = lookaheadReader
// val c = lookahead.getc()
// /* Prohibit 1. */
// if (!isDigit(c))
// return setStrVal()
// val isDefinitelyNumber = (c: @switch) match {
// /** Another digit is a giveaway. */
// case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => true
// /* Backquoted idents like 22.`foo`. */
// case '`' => return setStrVal() /** Note the early return */
// /* These letters may be part of a literal, or a method invocation on an Int.
// */
// case 'd' | 'D'
// | 'f' | 'F' => !isIdentifierPart(lookahead.getc())
// /* A little more special handling for e.g. 5e7 */
// case 'e' | 'E' => val ch = lookahead.getc()
// !isIdentifierPart(ch) || (isDigit(ch) || ch == '+' || ch == '-')
// case x => !isIdentifierStart(x)
// }
// if (isDefinitelyNumber) restOfNumber()
// else restOfUncertainToken()
// }
// =======
/** Read a number into strVal.
*
* The `base` can be 8, 10 or 16, where base 8 flags a leading zero.
* For ints, base 8 is legal only for the case of exactly one zero.
*/
protected def getNumber(): Unit = {
// consume digits of a radix
def consumeDigits(radix: Int): Unit =
while (digit2int(ch, radix) >= 0) {
putChar(ch)
nextChar()
}
// adding decimal point is always OK because `Double valueOf "0."` is OK
def restOfNonIntegralNumber(): Unit = {
putChar('.')
if (ch == '.') nextChar()
getFraction()
}
// after int: 5e7f, 42L, 42.toDouble but not 42b. Repair 0d.
def restOfNumber(): Unit = {
ch match {
case 'e' | 'E' | 'f' | 'F' |
'd' | 'D' => if (cbuf.isEmpty) putChar('0'); restOfNonIntegralNumber()
case 'l' | 'L' => token = LONGLIT ; setStrVal() ; nextChar()
case _ => token = INTLIT ; setStrVal() ; checkNoLetter()
}
}
// consume leading digits, provisionally an Int
consumeDigits(if (base == 16) 16 else 10)
val detectedFloat: Boolean = base != 16 && ch == '.' && isDigit(lookaheadReader.getc)
if (detectedFloat) restOfNonIntegralNumber() else restOfNumber()
// >>>>>>> v2.11.5
}
/** Parse character literal if current character is followed by \',
* or follow with given op and return a symbol literal token
*/
def charLitOr(op: () => Unit): Unit = {
putChar(ch)
nextChar()
if (ch == '\'') {
nextChar()
token = CHARLIT
setStrVal()
} else {
op()
token = SYMBOLLIT
strVal = name.toString
}
}
// Errors -----------------------------------------------------------------
/** generate an error at the given offset
*/
def syntaxError(off: Offset, msg: String) {error(off, msg); token = ERROR}
/** generate an error at the current token offset */
def syntaxError(msg: String): Unit = syntaxError(offset, msg)
def deprecationWarning(msg: String): Unit = deprecationWarning(offset, msg)
/** signal an error where the input ended in the middle of a token */
def incompleteInputError(msg: String) {incompleteInputError(offset, msg); token = EOF}
override def toString() = token match {
case IDENTIFIER
| BACKQUOTED_IDENT => "id(" + name + ")"
case CHARLIT => "char(" + intVal + ")"
case INTLIT => "int(" + intVal + ")"
case LONGLIT => "long(" + intVal + ")"
case FLOATLIT => "float(" + floatVal + ")"
case DOUBLELIT => "double(" + floatVal + ")"
case STRINGLIT => "string(" + strVal + ")"
case STRINGPART => "stringpart(" + strVal + ")"
case INTERPOLATIONID => "interpolationid(" + name + ")"
case SEMI => ";"
case NEWLINE => "'\\n'"
case NEWLINES => "'\\n\\n'"
case COMMA => ","
case _ => token2string(token)
}
// ------------- brace counting and healing ------------------------------
/** overridden in UnitScanners:
* apply brace patch if one exists for this offset
* return true if subsequent end of line handling should be suppressed.
*/
def applyBracePatch(): Boolean = false
/** overridden in UnitScanners */
def parenBalance(token: Token) = 0
/** overridden in UnitScanners */
def healBraces(): List[BracePatch] = List()
/** Initialization method: read first char, then first token */
def init() {nextChar(); nextToken()}
} // end Scanner
// ------------- keyword configuration -----------------------------------
private val allKeywords = List[(Name, Token)](
nme.ABSTRACTkw -> ABSTRACT,
nme.CASEkw -> CASE,
nme.CATCHkw -> CATCH,
nme.CLASSkw -> CLASS,
nme.DEFkw -> DEF,
nme.DOkw -> DO,
nme.ELSEkw -> ELSE,
nme.EXTENDSkw -> EXTENDS,
nme.FALSEkw -> FALSE,
nme.FINALkw -> FINAL,
nme.FINALLYkw -> FINALLY,
nme.FORkw -> FOR,
nme.FORSOMEkw -> FORSOME,
nme.IFkw -> IF,
nme.IMPLICITkw -> IMPLICIT,
nme.IMPORTkw -> IMPORT,
nme.LAZYkw -> LAZY,
nme.MATCHkw -> MATCH,
nme.NEWkw -> NEW,
nme.NULLkw -> NULL,
nme.OBJECTkw -> OBJECT,
nme.OVERRIDEkw -> OVERRIDE,
nme.PACKAGEkw -> PACKAGE,
nme.PRIVATEkw -> PRIVATE,
nme.PROTECTEDkw -> PROTECTED,
nme.RETURNkw -> RETURN,
nme.SEALEDkw -> SEALED,
nme.SUPERkw -> SUPER,
nme.THISkw -> THIS,
nme.THROWkw -> THROW,
nme.TRAITkw -> TRAIT,
nme.TRUEkw -> TRUE,
nme.TRYkw -> TRY,
nme.TYPEkw -> TYPE,
nme.VALkw -> VAL,
nme.VARkw -> VAR,
nme.WHILEkw -> WHILE,
nme.WITHkw -> WITH,
nme.YIELDkw -> YIELD,
nme.DOTkw -> DOT,
nme.USCOREkw -> USCORE,
nme.COLONkw -> COLON,
nme.EQUALSkw -> EQUALS,
nme.ARROWkw -> ARROW,
nme.LARROWkw -> LARROW,
nme.SUBTYPEkw -> SUBTYPE,
nme.VIEWBOUNDkw -> VIEWBOUND,
nme.SUPERTYPEkw -> SUPERTYPE,
nme.HASHkw -> HASH,
nme.ATkw -> AT,
nme.MACROkw -> IDENTIFIER,
nme.THENkw -> IDENTIFIER,
nme.SCRIPTkw -> IDENTIFIER
)
private var kwOffset: Offset = -1
private val kwArray: Array[Token] = {
val (offset, arr) = createKeywordArray(allKeywords, IDENTIFIER)
kwOffset = offset
arr
}
final val token2name = (allKeywords map (_.swap)).toMap
// Token representation ----------------------------------------------------
/** Returns the string representation of given token. */
def token2string(token: Int): String = (token: @switch) match {
case IDENTIFIER
| BACKQUOTED_IDENT => "identifier"
case CHARLIT => "character literal"
case INTLIT => "integer literal"
case LONGLIT => "long literal"
case FLOATLIT => "float literal"
case DOUBLELIT => "double literal"
case STRINGLIT
| STRINGPART
| INTERPOLATIONID => "string literal"
case SYMBOLLIT => "symbol literal"
case LPAREN => "'('"
case RPAREN => "')'"
case LBRACE => "'{'"
case RBRACE => "'}'"
case LBRACKET => "'['"
case RBRACKET => "']'"
case EOF => "eof"
case ERROR => "something"
case SEMI => "';'"
case NEWLINE => "'\\n'"
case NEWLINES => "'\\n\\n'"
case COMMA => "','"
case CASECLASS => "case class"
case CASEOBJECT => "case object"
case XMLSTART => "$XMLSTART$<"
// SubScript tokens:
case CARET => "'^'"
case CARET2 => "'^^'"
case CURLY1 => "'~'"
case CURLY2 => "'~~'"
case CURLY3 => "'~~~'"
case CURLYBROKEN1 => "'~/'"
case CURLYBROKEN2 => "~/~"
case CURLYBROKEN3 => "'~/~~'"
case CURLYARROW1 => "'~>'"
case CURLYARROW2 => "'~~>'"
case CURLYARROW3 => "'~~~>'"
case CURLYBROKENARROW2 => "'~/~>'"
case CURLYBROKENARROW3 => "'~/~~>'"
case PLUS_CURLYARROW2 => "'+~~>'"
case PLUS_CURLYBROKENARROW2 => "'+~/~>'"
case PLUS_CURLYBROKENARROW3 => "'+~/~~>'"
case PLUS_CURLY1 => "'+~'"
case PLUS_CURLY2 => "'+~~'"
case PLUS_CURLY3 => "'+~~~'"
case PLUS_CURLYBROKEN1 => "'+~/'"
case PLUS_CURLYBROKEN2 => "'+~/~'"
case PLUS_CURLYBROKEN3 => "'+~/~~'"
case IF_QMARK => "'?if'"
case LBRACE_DOT => "'{.'"
case LBRACE_DOT3 => "'{...'"
case LBRACE_QMARK => "'{?'"
case LBRACE_EMARK => "'{!'"
case LBRACE_ASTERISK => "'{*'"
case LBRACE_CARET => "'{^'"
case RBRACE_DOT => "'.}'"
case RBRACE_DOT3 => "'...}"
case RBRACE_QMARK => "'?}'"
case RBRACE_EMARK => "'!}'"
case RBRACE_ASTERISK => "'*}'"
case RBRACE_CARET => "'^}'"
case DOT2 => "'..'"
case DOT3 => "'...'"
case LESS2 => "'<<'"
case GREATER2 => "'>>'"
case ARROW2 => "'==>'"
case LPAREN_PLUS_RPAREN => "'(+)'"
case LPAREN_MINUS_RPAREN => "'(-)'"
case LPAREN_PLUS_MINUS_RPAREN => "'(+-)'"
case LPAREN_SEMI_RPAREN => "'(;)'"
case LPAREN_ASTERISK => "'(*'"
case LPAREN_ASTERISK2 => "'(**'"
case RPAREN_ASTERISK => "'*)'"
case RPAREN_ASTERISK2 => "'**)'"
case THEN => "'then'"
case _ =>
(token2name get token) match {
case Some(name) => "'" + name + "'"
case _ => "'<" + token + ">'"
}
}
class MalformedInput(val offset: Offset, val msg: String) extends Exception
/** A scanner for a given source file not necessarily attached to a compilation unit.
* Useful for looking inside source files that aren not currently compiled to see what's there
*/
class SourceFileScanner(val source: SourceFile) extends Scanner {
val buf = source.content
override val decodeUni: Boolean = !settings.nouescape
// suppress warnings, throw exception on errors
def deprecationWarning (off: Offset, msg: String): Unit = ()
def error (off: Offset, msg: String): Unit = throw new MalformedInput(off, msg)
def incompleteInputError(off: Offset, msg: String): Unit = throw new MalformedInput(off, msg)
}
/** A scanner over a given compilation unit
*/
class UnitScanner(val unit: CompilationUnit, patches: List[BracePatch]) extends SourceFileScanner(unit.source) {
def this(unit: CompilationUnit) = this(unit, List())
override def deprecationWarning(off: Offset, msg: String) = currentRun.reporting.deprecationWarning(unit.position(off), msg)
override def error (off: Offset, msg: String) = reporter.error(unit.position(off), msg)
override def incompleteInputError(off: Offset, msg: String) = currentRun.parsing.incompleteInputError(unit.position(off), msg)
private var bracePatches: List[BracePatch] = patches
lazy val parensAnalyzer = new ParensAnalyzer(unit, List())
override def parenBalance(token: Token) = parensAnalyzer.balance(token)
override def healBraces(): List[BracePatch] = {
var patches: List[BracePatch] = List()
if (!parensAnalyzer.tabSeen) {
var bal = parensAnalyzer.balance(RBRACE)
while (bal < 0) {patches = new ParensAnalyzer(unit, patches).insertRBrace(); bal += 1}
while (bal > 0) {patches = new ParensAnalyzer(unit, patches).deleteRBrace(); bal -= 1}
}
patches
}
/** Insert or delete a brace, if a patch exists for this offset */
override def applyBracePatch(): Boolean = {
if (bracePatches.isEmpty || bracePatches.head.off != offset) false
else {
val patch = bracePatches.head
bracePatches = bracePatches.tail
// println("applying brace patch "+offset)//DEBUG
if (patch.inserted) {next copyFrom this; error(offset, "Missing closing brace `}' assumed here"); token = RBRACE; true}
else { error(offset, "Unmatched closing brace '}' ignored here"); fetchToken(); false}
}
}
}
class ParensAnalyzer(unit: CompilationUnit, patches: List[BracePatch]) extends UnitScanner(unit, patches) {
val balance = mutable.Map(RPAREN -> 0, RBRACKET -> 0, RBRACE -> 0,
// SubScript specials:
RBRACE_DOT -> 0,
RBRACE_DOT3 -> 0,
RBRACE_QMARK -> 0,
RBRACE_EMARK -> 0,
RBRACE_ASTERISK -> 0,
RBRACE_CARET -> 0,
RPAREN_ASTERISK -> 0,
RPAREN_ASTERISK2-> 0)
/** The source code with braces and line starts annotated with [NN] showing the index */
private def markedSource = {
val code = unit.source.content
val braces = code.indices filter (idx => "{}\n" contains code(idx)) toSet;
val mapped = code.indices map (idx => if (braces(idx)) s"${code(idx)}[$idx]" else "" + code(idx))
mapped.mkString("")
}
init()
log(s"ParensAnalyzer for ${unit.source} of length ${unit.source.content.length}\n```\n$markedSource\n```")
/** The offset of the first token on this line, or next following line if blank
*/
val lineStart = new ArrayBuffer[Int]
/** The list of matching top-level brace pairs (each of which may contain nested brace pairs).
*/
val bracePairs: List[BracePair] = {
var lineCount = 1
var lastOffset = 0
var indent = 0
val oldBalance = scala.collection.mutable.Map[Int, Int]()
def markBalance() = for ((k, v) <- balance) oldBalance(k) = v
markBalance()
def scan(bpbuf: ListBuffer[BracePair]): (Int, Int) = {
if (token != NEWLINE && token != NEWLINES) {
while (lastOffset < offset) {
if (buf(lastOffset) == LF) lineCount += 1
lastOffset += 1
}
while (lineCount > lineStart.length) {
lineStart += offset
// reset indentation unless there are new opening brackets or
// braces since last ident line and at the same time there
// are no new braces.
if (balance(RPAREN ) >= oldBalance(RPAREN ) &&
balance(RBRACKET) >= oldBalance(RBRACKET) ||
balance(RBRACE ) != oldBalance(RBRACE ) ) {
indent = column(offset)
markBalance()
}
}
}
token match {
case LPAREN => balance(RPAREN ) -= 1; nextToken(); scan(bpbuf)
case LBRACKET => balance(RBRACKET) -= 1; nextToken(); scan(bpbuf)
case RPAREN => balance(RPAREN ) += 1; nextToken(); scan(bpbuf)
case RBRACKET => balance(RBRACKET) += 1; nextToken(); scan(bpbuf)
case LBRACE => balance(RBRACE) -= 1; val lc = lineCount
val loff = offset
val lindent = indent
val bpbuf1 = new ListBuffer[BracePair]
nextToken()
val (roff, rindent) = scan(bpbuf1)
if (lc != lineCount)
bpbuf += BracePair(loff, lindent, roff, rindent, bpbuf1.toList)
scan(bpbuf)
case RBRACE => balance(RBRACE ) += 1; val off = offset; nextToken(); (off, indent)
case EOF => (-1, -1)
case _ => nextToken(); scan(bpbuf)
}
}
val bpbuf = new ListBuffer[BracePair]
while (token != EOF) {
val (roff, rindent) = scan(bpbuf)
if (roff != -1) {
val current = BracePair(-1, -1, roff, rindent, bpbuf.toList)
bpbuf.clear()
bpbuf += current
}
}
def bracePairString(bp: BracePair, indent: Int): String = {
val rangeString = {
import bp._
val lline = line(loff)
val rline = line(roff)
val tokens = List(lline, lindent, rline, rindent) map (n => if (n < 0) "??" else "" + n)
"%s:%s to %s:%s".format(tokens: _*)
}
val outer = (" " * indent) + rangeString
val inners = bp.nested map (bracePairString(_, indent + 2))
if (inners.isEmpty) outer
else inners.mkString(outer + "\n", "\n", "")
}
def bpString = bpbuf.toList map ("\n" + bracePairString(_, 0)) mkString ""
def startString = lineStart.mkString("line starts: [", ", ", "]")
log(s"\n$startString\n$bpString")
bpbuf.toList
}
var tabSeen = false
def line(offset: Offset): Int = {
def findLine(lo: Int, hi: Int): Int = {
val mid = (lo + hi) / 2
if (offset < lineStart(mid)) findLine(lo, mid - 1)
else if (mid + 1 < lineStart.length && offset >= lineStart(mid + 1)) findLine(mid + 1, hi)
else mid
}
if (offset <= 0) 0
else findLine(0, lineStart.length - 1)
}
def column(offset: Offset): Int = {
var col = 0
var i = offset - 1
while (i >= 0 && buf(i) != CR && buf(i) != LF) {
if (buf(i) == '\t') tabSeen = true
col += 1
i -= 1
}
col
}
def insertPatch(patches: List[BracePatch], patch: BracePatch): List[BracePatch] = patches match {
case List() => List(patch)
case bp :: bps => if (patch.off < bp.off) patch :: patches
else bp :: insertPatch(bps, patch)
}
def insertRBrace(): List[BracePatch] = {
def insert(bps: List[BracePair]): List[BracePatch] = bps match {
case List() => patches
case (bp @ BracePair(loff, lindent, roff, rindent, nested)) :: bps1 =>
if (lindent <= rindent) insert(bps1)
else {
// println("patch inside "+bp+"/"+line(loff)+"/"+lineStart(line(loff))+"/"+lindent"/"+rindent)//DEBUG
val patches1 = insert(nested)
if (patches1 ne patches) patches1
else {
var lin = line(loff) + 1
while (lin < lineStart.length && column(lineStart(lin)) > lindent)
lin += 1
if (lin < lineStart.length) {
val patches1 = insertPatch(patches, BracePatch(lineStart(lin), inserted = true))
//println("patch for "+bp+"/"+imbalanceMeasure+"/"+new ParensAnalyzer(unit, patches1).imbalanceMeasure)
/*if (improves(patches1))*/
patches1
/*else insert(bps1)*/
// (this test did not seem to work very well in practice)
} else patches
}
}
}
insert(bracePairs)
}
def deleteRBrace(): List[BracePatch] = {
def delete(bps: List[BracePair]): List[BracePatch] = bps match {
case List() => patches
case BracePair(loff, lindent, roff, rindent, nested) :: bps1 =>
if (lindent >= rindent) delete(bps1)
else {
val patches1 = delete(nested)
if (patches1 ne patches) patches1
else insertPatch(patches, BracePatch(roff, inserted = false))
}
}
delete(bracePairs)
}
// don't emit deprecation warnings about identifiers like `macro` or `then`
// when skimming through the source file trying to heal braces
override def emitIdentifierDeprecationWarnings = false
override def error(offset: Offset, msg: String): Unit = ()
}
}