All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scala.tools.nsc.ast.parser.Scanners.scala Maven / Gradle / Ivy

There is a newer version: 2.11.2
Show newest version
/* NSC -- new Scala compiler
 * Copyright 2005-2013 LAMP/EPFL
 * @author  Martin Odersky
 */
package scala.tools.nsc
package ast.parser

import scala.tools.nsc.util.{ CharArrayReader, CharArrayReaderData }
import scala.reflect.internal.util._
import scala.reflect.internal.Chars._
import Tokens._
import scala.annotation.{ switch, tailrec }
import scala.collection.{ mutable, immutable }
import mutable.{ ListBuffer, ArrayBuffer }
import scala.tools.nsc.ast.parser.xml.Utility.isNameStart
import scala.language.postfixOps

/** See Parsers.scala / ParsersCommon for some explanation of ScannersCommon.
 */
trait ScannersCommon {
  val global : Global
  import global._

  /** Offset into source character array */
  type Offset = Int

  type Token = Int

  trait CommonTokenData {
    def token: Token
    def name: TermName
  }

  trait ScannerCommon extends CommonTokenData {
    // things to fill in, in addition to buf, decodeUni which come from CharArrayReader
    def error               (off: Offset, msg: String): Unit
    def incompleteInputError(off: Offset, msg: String): Unit
    def deprecationWarning  (off: Offset, msg: String): Unit
  }

  def createKeywordArray(keywords: Seq[(Name, Token)], defaultToken: Token): (Token, Array[Token]) = {
    val names = keywords sortBy (_._1.start) map { case (k, v) => (k.start, v) }
    val low   = names.head._1
    val high  = names.last._1
    val arr   = Array.fill(high - low + 1)(defaultToken)

    names foreach { case (k, v) => arr(k + low) = v }
    (low, arr)
  }
}

trait Scanners extends ScannersCommon {
  val global : Global
  import global._

  trait TokenData extends CommonTokenData {

    /** the next token */
    var token: Token = EMPTY

    /** the offset of the first character of the current token */
    var offset: Offset = 0

    /** the offset of the character following the token preceding this one */
    var lastOffset: Offset = 0

    /** the name of an identifier */
    var name: TermName = null

    /** the string value of a literal */
    var strVal: String = null

    /** the base of a number */
    var base: Int = 0

    def copyFrom(td: TokenData): this.type = {
      this.token      = td.token
      this.offset     = td.offset
      this.lastOffset = td.lastOffset
      this.name       = td.name
      this.strVal     = td.strVal
      this.base       = td.base
      this
    }
  }

  /** An interface to most of mutable data in Scanner defined in TokenData
   *  and CharArrayReader (+ next, prev fields) with copyFrom functionality
   *  to backup/restore data (used by quasiquotes' lookingAhead).
   */
  trait ScannerData extends TokenData with CharArrayReaderData {
    /** we need one token lookahead and one token history
     */
    val next: TokenData = new TokenData{}
    val prev: TokenData = new TokenData{}

    def copyFrom(sd: ScannerData): this.type = {
      this.next copyFrom sd.next
      this.prev copyFrom sd.prev
      super[CharArrayReaderData].copyFrom(sd)
      super[TokenData].copyFrom(sd)
      this
    }
  }

  abstract class Scanner extends CharArrayReader with TokenData with ScannerData with ScannerCommon {
    private def isDigit(c: Char) = java.lang.Character isDigit c
    
    var prevWasInSubScript_script   = false
    var isInSubScript_script        = false
    var isInSubScript_header        = false
    var isInSubScript_nativeCode    = false
    def isInSubScript_expression  = isInSubScript_script && 
                                   !isInSubScript_header && 
                                   !isInSubScript_nativeCode && 
                                  (!isInSubScript_partialScript || isInSubScript_partialScript_caseScript)
    
    var sepRegions_SubScript_partialScript = -1
    var isInSubScript_partialScript            = false
    var isInSubScript_partialScript_caseScript = false
    def start_SubScript_partialScript            = {isInSubScript_partialScript            =  true}
    def   end_SubScript_partialScript            = {isInSubScript_partialScript            = false}
    def start_SubScript_partialScript_caseScript = {isInSubScript_partialScript_caseScript =  true}
    def   end_SubScript_partialScript_caseScript = {isInSubScript_partialScript_caseScript = false}

    private var openComments = 0
    protected def putCommentChar(): Unit = nextChar()

    @tailrec private def skipLineComment(): Unit = ch match {
      case SU | CR | LF =>
      case _            => nextChar() ; skipLineComment()
    }
    private def maybeOpen() {
      putCommentChar()
      if (ch == '*') {
        putCommentChar()
        openComments += 1
      }
    }
    private def maybeClose(): Boolean = {
      putCommentChar()
      (ch == '/') && {
        putCommentChar()
        openComments -= 1
        openComments == 0
      }
    }
    @tailrec final def skipNestedComments(): Unit = ch match {
      case '/' => maybeOpen() ; skipNestedComments()
      case '*' => if (!maybeClose()) skipNestedComments()
      case SU  => incompleteInputError("unclosed comment")
      case _   => putCommentChar() ; skipNestedComments()
    }
    def skipDocComment(): Unit = skipNestedComments()
    def skipBlockComment(): Unit = skipNestedComments()

    private def skipToCommentEnd(isLineComment: Boolean) {
      nextChar()
      if (isLineComment) skipLineComment()
      else {
        openComments = 1
        val isDocComment = (ch == '*') && { nextChar(); true }
        if (isDocComment) {
          // Check for the amazing corner case of /**/
          if (ch == '/')
            nextChar()
          else
            skipDocComment()
        }
        else skipBlockComment()
      }
    }

    /** @pre ch == '/'
     *  Returns true if a comment was skipped.
     */
    def skipComment(): Boolean = ch match {
      case '/' | '*' => skipToCommentEnd(isLineComment = ch == '/') ; true
      case _         => false
    }
    def flushDoc(): DocComment = null

    /** To prevent doc comments attached to expressions from leaking out of scope
     *  onto the next documentable entity, they are discarded upon passing a right
     *  brace, bracket, or parenthesis.
     */
    def discardDocBuffer(): Unit = ()

    def isAtEnd = charOffset >= buf.length

    def resume(lastCode: Token) = {
      token = lastCode
      if (next.token != EMPTY && !reporter.hasErrors)
        syntaxError("unexpected end of input: possible missing '}' in XML block")

      nextToken()
    }

    /** A character buffer for literals
     */
    val cbuf = new StringBuilder

    /** append Unicode character to "cbuf" buffer
     */
    protected def putChar(c: Char) {
//      assert(cbuf.size < 10000, cbuf)
      cbuf.append(c)
    }

    /** Determines whether this scanner should emit identifier deprecation warnings,
     *  e.g. when seeing `macro` or `then`, which are planned to become keywords in future versions of Scala.
     */
    protected def emitIdentifierDeprecationWarnings = true

    /** Clear buffer and set name and token */
    private def finishNamed(idtoken: Token = IDENTIFIER) {
      name = newTermName(cbuf.toString)
      cbuf.clear()
      token = idtoken
      if (idtoken == IDENTIFIER) {
        val idx = name.start - kwOffset
        
        if (idx >= 0 && idx < kwArray.length) {
          token = kwArray(idx) 
          if (token == IDENTIFIER) { 
            if (allowIdent  != name 
            && nme.SCRIPTkw != name  // Note: only used in SubScript; do not warn
            && nme.BREAKkw  != name  // Note: only used in SubScript; do not warn
            )
              if (name == nme.MACROkw)
                syntaxError(s"$name is now a reserved word; usage as an identifier is disallowed")
              else if (emitIdentifierDeprecationWarnings)
                deprecationWarning(s"$name is now a reserved word; usage as an identifier is deprecated")
          }
        }
        if (isInSubScript_script && name.toString=="then") {
          //println(s"token was: $token idtoken=$idtoken cbuf.toString=${cbuf.toString}")
          token = THEN
        }
      }
    }

    /** Clear buffer and set string */
    private def setStrVal() {strVal = cbuf.toString; cbuf.clear()}

    /** a stack of tokens which indicates whether line-ends can be statement separators
     *  also used for keeping track of nesting levels.
     *  We keep track of the closing symbol of a region. This can be
     *  RPAREN    if region starts with '('
     *  RBRACKET  if region starts with '['
     *  RBRACE    if region starts with '{'
     *  ARROW     if region starts with `case'
     *  STRINGLIT if region is a string interpolation expression starting with '${'
     *            (the STRINGLIT appears twice in succession on the stack iff the
     *             expression is a multiline string literal).
     */
    var sepRegions: List[Token] = List()

// Get next token ------------------------------------------------------------

    /** Are we directly in a string interpolation expression?
     */
    private def inStringInterpolation = sepRegions.nonEmpty && sepRegions.head == STRINGLIT

    /** Are we directly in a multiline string interpolation expression?
     *  @pre inStringInterpolation
     */
    private def inMultiLineInterpolation = inStringInterpolation && sepRegions.tail.nonEmpty && sepRegions.tail.head == STRINGPART

    /** read next token and return last offset*/
    def skipToken(): Offset = {val off = offset; nextToken(); off}

    /** Allow an otherwise deprecated ident here */
    private var allowIdent: Name = nme.EMPTY

    /** Get next token, and allow the otherwise deprecated ident `name`  */
    def nextTokenAllow(name: Name) = {
      val prev = allowIdent;     allowIdent = name
      try {nextToken()} finally {allowIdent = prev}
    }

    /** Produce next token, filling TokenData fields of Scanner.
     */
    def nextToken() {
      prevWasInSubScript_script = isInSubScript_script
      val lastToken = token
      
      // Adapt sepRegions according to last token
      (lastToken: @switch) match {
        case LPAREN   => sepRegions = RPAREN   :: sepRegions
        case LBRACKET => sepRegions = RBRACKET :: sepRegions
        case LBRACE   => sepRegions = RBRACE   :: sepRegions
        case CASE     => sepRegions = ARROW    :: sepRegions
        
        // subscript tokens
        case LBRACE_DOT       => sepRegions = RBRACE_DOT      :: sepRegions
        case LBRACE_DOT3      => sepRegions = RBRACE_DOT3     :: sepRegions
        case LBRACE_QMARK     => sepRegions = RBRACE_QMARK    :: sepRegions
        case LBRACE_EMARK     => sepRegions = RBRACE_EMARK    :: sepRegions
        case LBRACE_ASTERISK  => sepRegions = RBRACE_ASTERISK :: sepRegions
        case LBRACE_CARET     => sepRegions = RBRACE_CARET    :: sepRegions
        
        case LPAREN_ASTERISK  => sepRegions = RPAREN_ASTERISK :: sepRegions
        case LPAREN_ASTERISK2 => sepRegions = RPAREN_ASTERISK2:: sepRegions
       
        case RBRACE   => while   (!sepRegions.isEmpty && sepRegions.head != RBRACE)
                                   sepRegions = sepRegions.tail
          if (!sepRegions.isEmpty) sepRegions = sepRegions.tail
          discardDocBuffer()
        case RBRACKET | RPAREN 
           | RBRACE_DOT       // subscript tokens
           | RBRACE_DOT3    
           | RBRACE_QMARK   
           | RBRACE_EMARK   
           | RBRACE_ASTERISK
           | RBRACE_CARET  
           | RPAREN_ASTERISK
           | RPAREN_ASTERISK2
       
                       => if (!sepRegions.isEmpty && sepRegions.head == lastToken) sepRegions = sepRegions.tail;   discardDocBuffer()
        case ARROW     => if (!sepRegions.isEmpty && sepRegions.head == lastToken) sepRegions = sepRegions.tail
        case STRINGLIT => if (        inMultiLineInterpolation                   ) sepRegions = sepRegions.tail.tail
                     else if (           inStringInterpolation                   ) sepRegions = sepRegions.tail
        case _ =>
      }

      // Read a token or copy it from `next` tokenData
      if (next.token == EMPTY) {
        lastOffset = charOffset - 1
        if (lastOffset > 0 && buf(lastOffset) == '\n' && buf(lastOffset - 1) == '\r') {
          lastOffset -= 1
        }
        if (inStringInterpolation) fetchStringPart() 
        else                       fetchToken()

//println("fetched:  "+this)        

        if(token == ERROR) {
          if (  inMultiLineInterpolation) sepRegions = sepRegions.tail.tail
          else if (inStringInterpolation) sepRegions = sepRegions.tail
        }
      } else {
        this copyFrom next
        next.token = EMPTY
//println("Fetched:  "+this)        
      }

      /** Insert NEWLINE or NEWLINES if
       *  - we are after a newline
       *  - we are within a { ... } or on toplevel (wrt sepRegions)
       *  - NOT isInSubScript_expression and the current token can start a statement and the one before can end it
       *  insert NEWLINES if we are past a blank line, NEWLINE otherwise
       * 
       *  TBD: maybe require different handling if isInSubScript_expression
       */
      if (!applyBracePatch() && afterLineEnd() && 
          (!isInSubScript_expression && inLastOfStat(lastToken) && inFirstOfStat(token)) &&
          (sepRegions.isEmpty || sepRegions.head == RBRACE)) {
        next copyFrom this
        offset = if (lineStartOffset <= offset) lineStartOffset else lastLineStartOffset
        token  = if (pastBlankLine()) NEWLINES else NEWLINE
      }

      // Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT, SEMI + ELSE => ELSE
      if (token == CASE) {
        prev copyFrom this
        val nextLastOffset = charOffset - 1
        fetchToken()
        def resetOffset() {
          offset     = prev.offset
          lastOffset = prev.lastOffset
        }
        if      (token == CLASS ) {token = CASECLASS ; resetOffset()} 
        else if (token == OBJECT) {token = CASEOBJECT; resetOffset()} 
        else {
          lastOffset = nextLastOffset
          next copyFrom this
          this copyFrom prev
        }
      } else if (token == SEMI) {
        prev copyFrom this
        fetchToken()
        if (token != ELSE) {
          next copyFrom this
          this copyFrom prev
        }
      }

//      print("["+this+"]")
    }

    /** Is current token first one after a newline? */
    def afterLineEnd(): Boolean =
      lastOffset < lineStartOffset &&
      (lineStartOffset <= offset ||
       lastOffset < lastLineStartOffset && lastLineStartOffset <= offset)

    /** Is there a blank line between the current token and the last one?
     *  @pre  afterLineEnd().
     */
    private def pastBlankLine(): Boolean = {
      var idx = lastOffset
      var ch = buf(idx)
      val end = offset
      while (idx < end) {
        if (ch == LF || ch == FF) {
          do {
            idx += 1; ch = buf(idx)
            if (ch == LF || ch == FF) {
//              println("blank line found at "+lastOffset+":"+(lastOffset to idx).map(buf(_)).toList)
              return true
            }
            if (idx == end) return false
          } while (ch <= ' ')
        }
        idx += 1; ch = buf(idx)
      }
      false
    }

    /** read next token, filling TokenData fields of Scanner.
     */
    protected final def fetchToken() {
      offset = charOffset - 1
      (ch: @switch) match {

        case ' ' | '\t' | CR | LF | FF => nextChar(); fetchToken()
        case 'A' | 'B' | 'C' | 'D' | 'E' |
             'F' | 'G' | 'H' | 'I' | 'J' |
             'K' | 'L' | 'M' | 'N' | 'O' |
             'P' | 'Q' | 'R' | 'S' | 'T' |
             'U' | 'V' | 'W' | 'X' | 'Y' |
             'Z' | '$' | '_' |
             'a' | 'b' | 'c' | 'd' | 'e' |
             'f' | 'g' | 'h' | 'i' | 'j' |
             'k' | 'l' | 'm' | 'n' | 'o' |
             'p' | 'q' | 'r' | 's' | 't' |
             'u' | 'v' | 'w' | 'x' | 'y' |  // scala-mode: need to understand multi-line case patterns
             'z' =>
          putChar(ch)
          nextChar()
          getIdentRest()
          if (ch == '"' && token == IDENTIFIER)
            token = INTERPOLATIONID
       case '~' => if (isInSubScript_expression) {
                      val lookahead = lookaheadReader; lookahead.nextChar()
                      if (lookahead.ch == '~') {lookahead.nextChar()
                       if(lookahead.ch == '>') {nextChar(); nextChar(); nextChar(); token = CURLYARROW2; return} 
                      }
                      else 
                       if (lookahead.ch == '/') {lookahead.nextChar()
                        if (lookahead.ch == '~') {lookahead.nextChar()
                         if(lookahead.ch == '>') {nextChar(); nextChar(); nextChar(); nextChar(); token = CURLYBROKENARROW2; return} 
                        }
                      }
                   }
                   getOperatorRest()
       case '=' => if (isInSubScript_partialScript || isInSubScript_expression) {
                      val lookahead = lookaheadReader; lookahead.nextChar()
                      if (lookahead.ch == '=') {lookahead.nextChar()
                       if(lookahead.ch == '>') {nextChar(); nextChar(); nextChar(); token = ARROW2; return} 
                      }
                   }
                   getOperatorRest()
       case '>' => if (isInSubScript_partialScript) {nextChar(); 
                                                     if (ch=='>') {nextChar(); token = GREATER2; return} else putChar('>')}
                   getOperatorRest()
       case '<' => if (isInSubScript_expression   ) {nextChar(); if (ch=='<') {nextChar(); token =    LESS2; return} else putChar('<')} 
          // is XMLSTART?
          def fetchLT() = {
            val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
            nextChar()
            last match {
              case ' ' | '\t' | '\n' | '{' | '(' | '>' if isNameStart(ch) || ch == '!' || ch == '?' =>
                token = XMLSTART
              case _ =>
                // Console.println("found '<', but last is '"+in.last+"'"); // DEBUG
                putChar('<')
                getOperatorRest()
            }
          }
          fetchLT
        case '!' | '^' | '?' =>
          val chOld = ch
          nextChar()
          if (isInSubScript_nativeCode && ch=='}') {
            nextChar()
            token = chOld match {
              case '!' => RBRACE_EMARK
              case '^' => RBRACE_CARET
              case '?' => RBRACE_QMARK
            }
          }
          else {
            putChar(chOld)
            getOperatorRest()
          }
        case '*' =>
          val chOld = ch
          nextChar()
          if      (isInSubScript_nativeCode && ch=='}') {nextChar(); token = RBRACE_ASTERISK}
          else if (isInSubScript_expression && ch==')') {nextChar(); token = RPAREN_ASTERISK}
          else if (isInSubScript_expression && ch=='*') {nextChar()
                                           if (ch==')') {nextChar(); token = RPAREN_ASTERISK2}
                                           else {putChar('*'); putChar('*'); getOperatorRest()}
          }                                 
          else {
            putChar(chOld)
            getOperatorRest()
          }
        case '@' | '#' | '%' |
             '+' | '-' | 
             // '~' |  '>' | '<' | '=' |  
             ':'| '&' |
             '|' | '\\' =>
          putChar(ch)
          nextChar()
          getOperatorRest()
        case '/' =>
          nextChar()
          if (skipComment()) {
            fetchToken()
          } else {
            putChar('/')
            getOperatorRest()
          }
        case '0' =>
          def fetchZero() = {
            putChar(ch)
            nextChar()
            if (ch == 'x' || ch == 'X') {
              nextChar()
              base = 16
            } else {
              base = 8
            }
            getNumber()
          }
          fetchZero()
        case '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
          base = 10
          getNumber()
        case '`'  => getBackquotedIdent()
        case '\"' =>
          def fetchDoubleQuote() = {
            if (token == INTERPOLATIONID) {
              nextRawChar()
              if (ch == '\"') {
                val lookahead = lookaheadReader
                lookahead.nextChar()
                if (lookahead.ch == '\"') {
                  nextRawChar()                        // now eat it
                  offset += 3
                  nextRawChar()
                  getStringPart(multiLine = true)
                  sepRegions = STRINGPART :: sepRegions // indicate string part
                  sepRegions = STRINGLIT  :: sepRegions // once more to indicate multi line string part
                } else {
                  nextChar()
                  token = STRINGLIT
                  strVal = ""
                }
              } else {
                offset += 1
                getStringPart(multiLine = false)
                sepRegions = STRINGLIT :: sepRegions // indicate single line string part
              }
            } else {
              nextChar()
              if   (ch == '\"') {nextChar()
                if (ch == '\"') {nextRawChar(); getRawStringLit()}
                else {token = STRINGLIT; strVal = ""}
              } 
              else {getStringLit()}
            }
          }
          fetchDoubleQuote()
        case '\'' =>
          def fetchSingleQuote() = {
            nextChar()
            if   (isIdentifierStart(ch)) charLitOr(getIdentRest)
            else if (isOperatorPart(ch) 
                 && (ch != '\\'))        charLitOr(getOperatorRest)
            else {
              getLitChar()
              if (ch == '\'') {nextChar(); token = CHARLIT; setStrVal()}
              else {syntaxError("unclosed character literal")}
            }
          }
          fetchSingleQuote()
        case '.' =>
          nextChar()
          if ('0' <= ch && ch <= '9') {putChar('.'); getFraction()}
          else                   {            token = DOT }
          if (isInSubScript_header || isInSubScript_expression) {
                 if  (ch == '.') {nextChar()
                  if (ch == '.') {nextChar(); token = DOT3}
                  else           {            token = DOT2}}
          } 
          else if (isInSubScript_nativeCode) {
                 if  (ch == '}') {nextChar(); token = RBRACE_DOT}
            else if  (ch == '.') {nextChar()
                  if (ch == '.') {nextChar()
                   if(ch == '}') {nextChar(); token = RBRACE_DOT3}
                   else          {syntaxError("'...' unexpected")}}
                  else           {syntaxError( "'..' unexpected")}}
          } 
        case '{' => nextChar(); token = LBRACE
          if (isInSubScript_expression) {
            (ch: @switch) match {
              case '?' => nextChar(); token = LBRACE_QMARK
              case '!' => nextChar(); token = LBRACE_EMARK
              case '*' => nextChar(); token = LBRACE_ASTERISK
              case '^' => nextChar(); token = LBRACE_CARET
              case '.' => nextChar(); token = LBRACE_DOT
                          if  (ch=='.'
                          &&   lookaheadReader.ch == '.') {
                               nextChar(); nextChar(); token = LBRACE_DOT3
                          }
              case  _  =>
            }
          }
        case '(' => nextChar(); token = LPAREN
          if (isInSubScript_expression) { // (+)  (-)  (+-)  (;) (* (**
            if   (ch == '*') {nextChar(); token = LPAREN_ASTERISK // no }
              if (ch == '*') {nextChar(); token = LPAREN_ASTERISK2}
            }
            else {
              var isSpecialOperand = false            
              val lookahead = lookaheadReader
              if   (lookahead.ch == '+') {lookahead.nextChar() // no } 
                if (lookahead.ch == '-') {lookahead.nextChar()}
                isSpecialOperand = lookahead.ch == ')'
              }
              else if (lookahead.ch == '-' 
                   ||  lookahead.ch == ';') {lookahead.nextChar();
                  isSpecialOperand = lookahead.ch == ')'
              }
              if (isSpecialOperand) {
                if      (ch == '+') {nextChar(); token = LPAREN_PLUS_RPAREN // no }
                  if    (ch == '-') {nextChar(); token = LPAREN_PLUS_MINUS_RPAREN}
                }
                else if (ch == '-') {nextChar(); token = LPAREN_MINUS_RPAREN}
                else if (ch == ';') {nextChar(); token = LPAREN_SEMI_RPAREN}
                nextChar()
              }
            }
          }          
        case ')' => nextChar(); token = RPAREN
        case ';' => if (!isInSubScript_expression) {nextChar(); token = SEMI}
                    else   {putChar(ch); nextChar(); getOperatorRest()}
        case ',' => nextChar(); token = COMMA
        case '}' => nextChar(); token = RBRACE
        case '[' => nextChar(); token = LBRACKET
        case ']' => nextChar(); token = RBRACKET
        case SU =>
          if (isAtEnd) token = EOF
          else {
            syntaxError("illegal character")
            nextChar()
          }
        case _ =>
          def fetchOther() = {
            if (ch == '\u21D2') {
              nextChar(); token = ARROW
            } else if (ch == '\u2190') {
              nextChar(); token = LARROW
            } else if (Character.isUnicodeIdentifierStart(ch)) {
              putChar(ch)
              nextChar()
              getIdentRest()
            } else if (isSpecial(ch)) {
              putChar(ch)
              nextChar()
              getOperatorRest()
            } else {
              syntaxError("illegal character '" + ("" + '\\' + 'u' + "%04x".format(ch.toInt)) + "'")
              nextChar()
            }
          }
          fetchOther()
      }
    }

    /** Can token start a statement? */
    def inFirstOfStat(token: Token) = token match {
      case EOF | CATCH | ELSE | EXTENDS | FINALLY | FORSOME | MATCH | WITH | YIELD |
           COMMA | SEMI | NEWLINE | NEWLINES | DOT | COLON | EQUALS | ARROW | LARROW | ARROW2 | GREATER2 | 
           SUBTYPE | VIEWBOUND | SUPERTYPE | HASH | RPAREN | RBRACKET | RBRACE | LBRACKET =>
        false
      case _ =>
        true
    }

    /** Can token end a statement? */
    def inLastOfStat(token: Token) = token match {
      case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT | STRINGLIT | SYMBOLLIT |
           IDENTIFIER | BACKQUOTED_IDENT | THIS | NULL | TRUE | FALSE | RETURN | USCORE |
           TYPE | XMLSTART | RPAREN | RBRACKET | RBRACE =>
        true
      case _ =>
        false
    }

// Identifiers ---------------------------------------------------------------

    private def getBackquotedIdent() {
      nextChar()
      getLitChars('`')
      if (ch == '`') {
        nextChar()
        finishNamed(BACKQUOTED_IDENT)
        if (name.length == 0) syntaxError("empty quoted identifier")
      }
      else                                    syntaxError("unclosed quoted identifier")
    }

    private def getIdentRest(): Unit = (ch: @switch) match {
      case 'A' | 'B' | 'C' | 'D' | 'E' |
           'F' | 'G' | 'H' | 'I' | 'J' |
           'K' | 'L' | 'M' | 'N' | 'O' |
           'P' | 'Q' | 'R' | 'S' | 'T' |
           'U' | 'V' | 'W' | 'X' | 'Y' |
           'Z' | '$' |
           'a' | 'b' | 'c' | 'd' | 'e' |
           'f' | 'g' | 'h' | 'i' | 'j' |
           'k' | 'l' | 'm' | 'n' | 'o' |
           'p' | 'q' | 'r' | 's' | 't' |
           'u' | 'v' | 'w' | 'x' | 'y' |
           'z' |
           '0' | '1' | '2' | '3' | '4' |
           '5' | '6' | '7' | '8' | '9' 
               => putChar(ch); nextChar(); getIdentRest()
      case '_' => putChar(ch); nextChar(); getIdentOrOperatorRest()
      case SU  => finishNamed() // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
      case  _  => if (Character.isUnicodeIdentifierPart(ch)) {
                   putChar(ch); nextChar(); getIdentRest()
                  }
                  else finishNamed()
    }

    private def getOperatorRest(): Unit = {
      if (isInSubScript_header) { // after seeing += and = for header definitions, stop there to allow prefix operators as in x =|| y z
        cbuf.toString match {
          case "="       if(ch!='>')  => finishNamed(); return
          case "+="                   => finishNamed(); return
          case "?" | "??" if(ch==':') => finishNamed(); return // allow parameters lists like (p?:Char, pp??:Char)
          case _                      =>
        }
      }
      (ch: @switch) match {
        case '/' =>
          nextChar()
          if (skipComment()) finishNamed()
          else { putChar('/'); getOperatorRest() }
        case '~' | '!' | '@' | '#' | '%' |
             '^' | '*' | '+' | '-' | '<' |
             '>' | '?' | ':' | '=' | '&' |
             '|' | '\\'                  =>                 putChar(ch); nextChar(); getOperatorRest()
        case _ => if (isSpecial(ch)
                  ||  isInSubScript_expression && ch==';') {putChar(ch); nextChar(); getOperatorRest()}
                  else finishNamed()
      }
    }

    private def getIdentOrOperatorRest() {
      if (isIdentifierPart(ch)) getIdentRest()
      else ch match {
        case '~' | '!' | '@' | '#' | '%' |
             '^' | '*' | '+' | '-' | '<' |
             '>' | '?' | ':' | '=' | '&' |
             '|' | '\\' | '/' =>     getOperatorRest()
        case _ => if (isSpecial(ch)) getOperatorRest()
                  else finishNamed()
      }
    }


// Literals -----------------------------------------------------------------

    private def getStringLit() = {
      getLitChars('"')
      if (ch == '"') {setStrVal(); nextChar(); token = STRINGLIT}
      else syntaxError("unclosed string literal")
    }

    private def getRawStringLit(): Unit = {
      if (ch == '\"') {
        nextRawChar()
        if (isTripleQuote()) {setStrVal(); token = STRINGLIT} 
        else getRawStringLit()
      } else if (ch == SU) {
        incompleteInputError("unclosed multi-line string literal")
      } else {
        putChar(ch)
        nextRawChar()
        getRawStringLit()
      }
    }

    @scala.annotation.tailrec private def getStringPart(multiLine: Boolean): Unit = {
      def finishStringPart() = {
        setStrVal()
        token = STRINGPART
        next.lastOffset = charOffset - 1
        next.offset = charOffset - 1
      }
      if (ch == '"') {
        if (multiLine) {
          nextRawChar()
          if (isTripleQuote()) {
            setStrVal()
            token = STRINGLIT
          } else
            getStringPart(multiLine)
        } else {
          nextChar()
          setStrVal()
          token = STRINGLIT
        }
      } else if (ch == '$') {
        nextRawChar()
        if (ch == '$') {
          putChar(ch)
          nextRawChar()
          getStringPart(multiLine)
        } else if (ch == '{') {
          finishStringPart()
          nextRawChar()
          next.token = LBRACE
        } else if (ch == '_') {
          finishStringPart()
          nextRawChar()
          next.token = USCORE
        } else if (Character.isUnicodeIdentifierStart(ch)) {
          finishStringPart()
          do {
            putChar(ch)
            nextRawChar()
          } while (ch != SU && Character.isUnicodeIdentifierPart(ch))
          next.token = IDENTIFIER
          next.name = newTermName(cbuf.toString)
          cbuf.clear()
          val idx = next.name.start - kwOffset
          if (idx >= 0 && idx < kwArray.length) {
            next.token = kwArray(idx)
          }
        } else {
          syntaxError("invalid string interpolation: `$$', `$'ident or `$'BlockExpr expected")
        }
      } else {
        val isUnclosedLiteral = !isUnicodeEscape && (ch == SU || (!multiLine && (ch == CR || ch == LF)))
        if (isUnclosedLiteral) {
          if (multiLine)
            incompleteInputError("unclosed multi-line string literal")
          else
            syntaxError("unclosed string literal")
        }
        else {
          putChar(ch)
          nextRawChar()
          getStringPart(multiLine)
        }
      }
    }

    private def fetchStringPart() = {
      offset = charOffset - 1
      getStringPart(multiLine = inMultiLineInterpolation)
    }

    private def isTripleQuote(): Boolean =
      if (ch == '"') {
        nextRawChar()
        if (ch == '"') {
          nextChar()
          while (ch == '"') {
            putChar('"')
            nextChar()
          }
          true
        } else {
          putChar('"')
          putChar('"')
          false
        }
      } else {
        putChar('"')
        false
      }

    /** copy current character into cbuf, interpreting any escape sequences,
     *  and advance to next character.
     */
    protected def getLitChar(): Unit =
      if (ch == '\\') {
        nextChar()
        if ('0' <= ch && ch <= '7') {
          val start = charOffset - 2
          val leadch: Char = ch
          var oct: Int = digit2int(ch, 8)
          nextChar()
          if ('0' <= ch && ch <= '7') {
            oct = oct * 8 + digit2int(ch, 8)
            nextChar()
            if (leadch <= '3' && '0' <= ch && ch <= '7') {
              oct = oct * 8 + digit2int(ch, 8)
              nextChar()
            }
          }
          val alt = if (oct == LF) "\\n" else "\\u%04x" format oct
          def msg(what: String) = s"Octal escape literals are $what, use $alt instead."
          if (settings.future)
            syntaxError(start, msg("unsupported"))
          else
            deprecationWarning(start, msg("deprecated"))
          putChar(oct.toChar)
        } else {
          ch match {
            case 'b'  => putChar('\b')
            case 't'  => putChar('\t')
            case 'n'  => putChar('\n')
            case 'f'  => putChar('\f')
            case 'r'  => putChar('\r')
            case '\"' => putChar('\"')
            case '\'' => putChar('\'')
            case '\\' => putChar('\\')
            case _    => invalidEscape()
          }
          nextChar()
        }
      } else  {
        putChar(ch)
        nextChar()
      }

    protected def invalidEscape(): Unit = {
      syntaxError(charOffset - 1, "invalid escape character")
      putChar(ch)
    }

    private def getLitChars(delimiter: Char) = {
      while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape))
        getLitChar()
    }

    /** read fractional part and exponent of floating point number
     *  if one is present.
     */
    protected def getFraction() {
      token = DOUBLELIT
      while ('0' <= ch && ch <= '9') {putChar(ch); nextChar()}
      if (ch == 'e' || ch == 'E') {
        val lookahead = lookaheadReader
        lookahead.nextChar()
        if (lookahead.ch == '+' || lookahead.ch == '-') {lookahead.nextChar()}
        if ('0' <= lookahead.ch && lookahead.ch <= '9') {
                                          putChar(ch); nextChar()
          if    (ch == '+' || ch == '-') {putChar(ch); nextChar()}
          while ('0' <= ch && ch <= '9') {putChar(ch); nextChar()}
        }
        token = DOUBLELIT
      }
      if      (ch == 'd' || ch == 'D') {putChar(ch); nextChar(); token = DOUBLELIT} 
      else if (ch == 'f' || ch == 'F') {putChar(ch); nextChar(); token =  FLOATLIT}
      checkNoLetter()
      setStrVal()
    }

    /** Convert current strVal to char value
     */
    def charVal: Char = if (strVal.length > 0) strVal.charAt(0) else 0

    /** Convert current strVal, base to long value
     *  This is tricky because of max negative value.
     */
    def intVal(negated: Boolean): Long = {
      if (token == CHARLIT && !negated) {
        charVal.toLong
      } else {
        var value: Long = 0
        val divider = if (base == 10) 1 else 2
        val limit: Long =
          if (token == LONGLIT) Long.MaxValue else Int.MaxValue
        var i = 0
        val len = strVal.length
        while (i < len) {
          val d = digit2int(strVal charAt i, base)
          if (d < 0) {
            syntaxError("malformed integer number")
            return 0
          }
          if (value < 0 ||
              limit / (base / divider) < value ||
              limit - (d / divider) < value * (base / divider) &&
              !(negated && limit == value * base - 1 + d)) {
                syntaxError("integer number too large")
                return 0
              }
          value = value * base + d
          i += 1
        }
        if (negated) -value else value
      }
    }

    def intVal: Long = intVal(negated = false)

    /** Convert current strVal, base to double value
    */
    def floatVal(negated: Boolean): Double = {

      val limit: Double =
        if (token == DOUBLELIT) Double.MaxValue else Float.MaxValue
      try {
        val value: Double = java.lang.Double.valueOf(strVal).doubleValue()
        def isDeprecatedForm = {
          val idx = strVal indexOf '.'
          (idx == strVal.length - 1) || (
               (idx >= 0)
            && (idx + 1 < strVal.length)
            && (!Character.isDigit(strVal charAt (idx + 1)))
          )
        }
        if (value > limit)
          syntaxError("floating point number too large")
        if (isDeprecatedForm)
          syntaxError("floating point number is missing digit after dot")

        if (negated) -value else value
      } catch {
        case _: NumberFormatException =>
          syntaxError("malformed floating point number")
          0.0
      }
    }

    def floatVal: Double = floatVal(negated = false)

    def checkNoLetter() {
      if (isIdentifierPart(ch) && ch >= ' ')
        syntaxError("Invalid literal number")
    }

    /** Read a number into strVal and set base
    */
    protected def getNumber() {
      val base1 = if (base < 10) 10 else base
      // Read 8,9's even if format is octal, produce a malformed number error afterwards.
      // At this point, we have already read the first digit, so to tell an innocent 0 apart
      // from an octal literal 0123... (which we want to disallow), we check whether there
      // are any additional digits coming after the first one we have already read.
      var notSingleZero = false
      while (digit2int(ch, base1) >= 0) {
        putChar(ch)
        nextChar()
        notSingleZero = true
      }
      token = INTLIT

      /* When we know for certain it's a number after using a touch of lookahead */
      def restOfNumber() = {putChar(ch); nextChar(); getFraction()}

      def restOfUncertainToken() = {
        def isEfd = ch match { case 'e' | 'E' | 'f' | 'F' | 'd' | 'D' => true ; case _ => false }
        def isL   = ch match { case 'l' | 'L' => true ; case _ => false }

        if (base <= 10 && isEfd) getFraction()
        else {
          // Checking for base == 8 is not enough, because base = 8 is set
          // as soon as a 0 is read in `case '0'` of method fetchToken.
          if (base == 8 && notSingleZero) syntaxError("Non-zero integral values may not have a leading zero.")
          setStrVal()
          if (isL) {nextChar(); token = LONGLIT}
          else checkNoLetter()
        }
      }

      if (base > 10 || ch != '.') restOfUncertainToken()
      else {
        val lookahead = lookaheadReader
        val c = lookahead.getc()

        /* Prohibit 1. */
        if (!isDigit(c))
          return setStrVal()

        val isDefinitelyNumber = (c: @switch) match {
          /** Another digit is a giveaway. */
          case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'  => true

          /* Backquoted idents like 22.`foo`. */
          case '`'       => return setStrVal()  /** Note the early return */

          /* These letters may be part of a literal, or a method invocation on an Int.
           */
          case 'd' | 'D' 
             | 'f' | 'F' => !isIdentifierPart(lookahead.getc())

          /* A little more special handling for e.g. 5e7 */
          case 'e' | 'E' => val ch = lookahead.getc()
                            !isIdentifierPart(ch) || (isDigit(ch) || ch == '+' || ch == '-')
          case x         => !isIdentifierStart(x)
        }
        if (isDefinitelyNumber) restOfNumber()
        else restOfUncertainToken()
      }
    }

    /** Parse character literal if current character is followed by \',
     *  or follow with given op and return a symbol literal token
     */
    def charLitOr(op: () => Unit) {
      putChar(ch)
      nextChar()
      if (ch == '\'') {
        nextChar()
        token = CHARLIT
        setStrVal()
      } else {
        op()
        token = SYMBOLLIT
        strVal = name.toString
      }
    }

// Errors -----------------------------------------------------------------

    /** generate an error at the given offset
    */
    def syntaxError(off: Offset, msg: String) {error(off, msg); token = ERROR}

    /** generate an error at the current token offset
    */
    def syntaxError(msg: String): Unit = syntaxError(offset, msg)

    def deprecationWarning(msg: String): Unit = deprecationWarning(offset, msg)

    /** signal an error where the input ended in the middle of a token */
    def incompleteInputError(msg: String) {incompleteInputError(offset, msg); token = EOF}

    override def toString() = token match {
      case IDENTIFIER 
        | BACKQUOTED_IDENT =>              "id(" +     name + ")"
      case CHARLIT         =>            "char(" +   intVal + ")"
      case INTLIT          =>             "int(" +   intVal + ")"
      case LONGLIT         =>            "long(" +   intVal + ")"
      case FLOATLIT        =>           "float(" + floatVal + ")"
      case DOUBLELIT       =>          "double(" + floatVal + ")"
      case STRINGLIT       =>          "string(" +   strVal + ")"
      case STRINGPART      =>      "stringpart(" +   strVal + ")"
      case INTERPOLATIONID => "interpolationid(" +     name + ")"
      case SEMI            => ";"
      case NEWLINE         => ";"
      case NEWLINES        => ";;"
      case COMMA           => ","
      case _               => token2string(token)
    }

    // ------------- brace counting and healing ------------------------------

    /** overridden in UnitScanners:
     *  apply brace patch if one exists for this offset
     *  return true if subsequent end of line handling should be suppressed.
     */
    def applyBracePatch(): Boolean = false

    /** overridden in UnitScanners */
    def parenBalance(token: Token) = 0

    /** overridden in UnitScanners */
    def healBraces(): List[BracePatch] = List()

    /** Initialization method: read first char, then first token */
    def init() {nextChar(); nextToken()}
  } // end Scanner

  // ------------- keyword configuration -----------------------------------

  private val allKeywords = List[(Name, Token)](
    nme.ABSTRACTkw  -> ABSTRACT,
    nme.CASEkw      -> CASE,
    nme.CATCHkw     -> CATCH,
    nme.CLASSkw     -> CLASS,
    nme.DEFkw       -> DEF,
    nme.DOkw        -> DO,
    nme.ELSEkw      -> ELSE,
    nme.EXTENDSkw   -> EXTENDS,
    nme.FALSEkw     -> FALSE,
    nme.FINALkw     -> FINAL,
    nme.FINALLYkw   -> FINALLY,
    nme.FORkw       -> FOR,
    nme.FORSOMEkw   -> FORSOME,
    nme.IFkw        -> IF,
    nme.IMPLICITkw  -> IMPLICIT,
    nme.IMPORTkw    -> IMPORT,
    nme.LAZYkw      -> LAZY,
    nme.MATCHkw     -> MATCH,
    nme.NEWkw       -> NEW,
    nme.NULLkw      -> NULL,
    nme.OBJECTkw    -> OBJECT,
    nme.OVERRIDEkw  -> OVERRIDE,
    nme.PACKAGEkw   -> PACKAGE,
    nme.PRIVATEkw   -> PRIVATE,
    nme.PROTECTEDkw -> PROTECTED,
    nme.RETURNkw    -> RETURN,
    nme.SEALEDkw    -> SEALED,
    nme.SUPERkw     -> SUPER,
    nme.THISkw      -> THIS,
    nme.THROWkw     -> THROW,
    nme.TRAITkw     -> TRAIT,
    nme.TRUEkw      -> TRUE,
    nme.TRYkw       -> TRY,
    nme.TYPEkw      -> TYPE,
    nme.VALkw       -> VAL,
    nme.VARkw       -> VAR,
    nme.WHILEkw     -> WHILE,
    nme.WITHkw      -> WITH,
    nme.YIELDkw     -> YIELD,
    nme.DOTkw       -> DOT,
    nme.USCOREkw    -> USCORE,
    nme.COLONkw     -> COLON,
    nme.EQUALSkw    -> EQUALS,
    nme.ARROWkw     -> ARROW,
    nme.LARROWkw    -> LARROW,
    nme.SUBTYPEkw   -> SUBTYPE,
    nme.VIEWBOUNDkw -> VIEWBOUND,
    nme.SUPERTYPEkw -> SUPERTYPE,
    nme.HASHkw      -> HASH,
    nme.ATkw        -> AT,
    nme.MACROkw     -> IDENTIFIER,
    nme.THENkw      -> IDENTIFIER,
    nme.SCRIPTkw    -> IDENTIFIER
    )

  private var kwOffset: Offset = -1
  private val kwArray: Array[Token] = {
    val (offset, arr) = createKeywordArray(allKeywords, IDENTIFIER)
    kwOffset = offset
    arr
  }

  final val token2name = (allKeywords map (_.swap)).toMap

// Token representation ----------------------------------------------------

  /** Returns the string representation of given token. */
  def token2string(token: Int): String = (token: @switch) match {
    case IDENTIFIER
    | BACKQUOTED_IDENT => "identifier"
    case CHARLIT       => "character literal"
    case INTLIT        => "integer literal"
    case LONGLIT       => "long literal"
    case FLOATLIT      => "float literal"
    case DOUBLELIT     => "double literal"
    case STRINGLIT
       | STRINGPART 
    | INTERPOLATIONID  => "string literal"
    case SYMBOLLIT     => "symbol literal"
    case LPAREN        => "'('"
    case RPAREN        => "')'"
    case LBRACE        => "'{'"
    case RBRACE        => "'}'"
    case LBRACKET      => "'['"
    case RBRACKET      => "']'"
    case EOF           => "eof"
    case ERROR         => "something"
    case SEMI          => "';'"
    case NEWLINE       => "';'"
    case NEWLINES      => "';'"
    case COMMA         => "','"
    case CASECLASS     => "case class"
    case CASEOBJECT    => "case object"
    case XMLSTART      => "$XMLSTART$<"
      
    // SubScript tokens:  
    case CURLYARROW2              => "'~~>'"   
    case CURLYBROKENARROW2        => "'~/~>'"   
    case IF_QMARK                 => "'?if'"   
    case LBRACE_DOT               => "'{.'"   
    case LBRACE_DOT3              => "'{...'"
    case LBRACE_QMARK             => "'{?'"
    case LBRACE_EMARK             => "'{!'"
    case LBRACE_ASTERISK          => "'{*'"
    case LBRACE_CARET             => "'{^'"
    case RBRACE_DOT               => "'.}'"
    case RBRACE_DOT3              => "'...}"
    case RBRACE_QMARK             => "'?}'"
    case RBRACE_EMARK             => "'!}'"
    case RBRACE_ASTERISK          => "'*}'"
    case RBRACE_CARET             => "'^}'"
    case DOT2                     => "'..'"
    case DOT3                     => "'...'"
    case LESS2                    => "'<<'"
    case GREATER2                 => "'>>'"
    case ARROW2                   => "'==>'"
    case LPAREN_PLUS_RPAREN       => "'(+)'"
    case LPAREN_MINUS_RPAREN      => "'(-)'"
    case LPAREN_PLUS_MINUS_RPAREN => "'(+-)'"
    case LPAREN_SEMI_RPAREN       => "'(;)'"
    case LPAREN_ASTERISK          => "'(*'"
    case LPAREN_ASTERISK2         => "'(**'"
    case RPAREN_ASTERISK          => "'*)'"
    case RPAREN_ASTERISK2         => "'**)'"
    
    case THEN                     => "'then'"
      
    case _ =>
      (token2name get token) match {
        case Some(name) => "'" + name + "'"
        case _          => "'<" + token + ">'"
      }
  }

  class MalformedInput(val offset: Offset, val msg: String) extends Exception

  /** A scanner for a given source file not necessarily attached to a compilation unit.
   *  Useful for looking inside source files that aren not currently compiled to see what's there
   */
  class SourceFileScanner(val source: SourceFile) extends Scanner {
    val buf = source.content
    override val decodeUni: Boolean = !settings.nouescape

    // suppress warnings, throw exception on errors
    def deprecationWarning  (off: Offset, msg: String): Unit = ()
    def error               (off: Offset, msg: String): Unit = throw new MalformedInput(off, msg)
    def incompleteInputError(off: Offset, msg: String): Unit = throw new MalformedInput(off, msg)
  }

  /** A scanner over a given compilation unit
   */
  class UnitScanner(val unit: CompilationUnit, patches: List[BracePatch]) extends SourceFileScanner(unit.source) {
    def this(unit: CompilationUnit) = this(unit, List())

    override def deprecationWarning  (off: Offset, msg: String) = unit.deprecationWarning  (unit.position(off), msg)
    override def error               (off: Offset, msg: String) = unit.error               (unit.position(off), msg)
    override def incompleteInputError(off: Offset, msg: String) = unit.incompleteInputError(unit.position(off), msg)

    private var bracePatches: List[BracePatch] = patches

    lazy val parensAnalyzer = new ParensAnalyzer(unit, List())

    override def parenBalance(token: Token) = parensAnalyzer.balance(token)

    override def healBraces(): List[BracePatch] = {
      var patches: List[BracePatch] = List()
      if (!parensAnalyzer.tabSeen) {
        var bal = parensAnalyzer.balance(RBRACE)
        while (bal < 0) {patches = new ParensAnalyzer(unit, patches).insertRBrace(); bal += 1}
        while (bal > 0) {patches = new ParensAnalyzer(unit, patches).deleteRBrace(); bal -= 1}
      }
      patches
    }

    /** Insert or delete a brace, if a patch exists for this offset */
    override def applyBracePatch(): Boolean = {
      if (bracePatches.isEmpty || bracePatches.head.off != offset) false
      else {
        val patch = bracePatches.head
        bracePatches = bracePatches.tail
//        println("applying brace patch "+offset)//DEBUG
        if (patch.inserted) {next copyFrom this; error(offset,   "Missing closing brace `}' assumed here"); token = RBRACE; true}
        else                {                    error(offset, "Unmatched closing brace '}' ignored here");  fetchToken(); false}
      }
    }
  }

  class ParensAnalyzer(unit: CompilationUnit, patches: List[BracePatch]) extends UnitScanner(unit, patches) {
    val balance = mutable.Map(RPAREN -> 0, RBRACKET -> 0, RBRACE -> 0,
                                // SubScript specials:
								RBRACE_DOT      -> 0, 
								RBRACE_DOT3     -> 0, 
								RBRACE_QMARK    -> 0, 
								RBRACE_EMARK    -> 0, 
								RBRACE_ASTERISK -> 0, 
								RBRACE_CARET    -> 0, 
								RPAREN_ASTERISK -> 0, 
								RPAREN_ASTERISK2-> 0)

    /** The source code with braces and line starts annotated with [NN] showing the index */
    private def markedSource = {
      val code   = unit.source.content
      val braces = code.indices filter (idx => "{}\n" contains code(idx)) toSet;
      val mapped = code.indices map (idx => if (braces(idx)) s"${code(idx)}[$idx]" else "" + code(idx))
      mapped.mkString("")
    }

    init()
    log(s"ParensAnalyzer for ${unit.source} of length ${unit.source.content.length}\n```\n$markedSource\n```")

    /** The offset of the first token on this line, or next following line if blank
     */
    val lineStart = new ArrayBuffer[Int]

    /** The list of matching top-level brace pairs (each of which may contain nested brace pairs).
     */
    val bracePairs: List[BracePair] = {

      var lineCount = 1
      var lastOffset = 0
      var indent = 0
      val oldBalance = scala.collection.mutable.Map[Int, Int]()
      def markBalance() = for ((k, v) <- balance) oldBalance(k) = v
      markBalance()

      def scan(bpbuf: ListBuffer[BracePair]): (Int, Int) = {
        if (token != NEWLINE && token != NEWLINES) {
          while (lastOffset < offset) {
            if (buf(lastOffset) == LF) lineCount += 1
            lastOffset += 1
          }
          while (lineCount > lineStart.length) {
            lineStart += offset
            // reset indentation unless there are new opening brackets or
            // braces since last ident line and at the same time there
            // are no new braces.
            if (balance(RPAREN  ) >= oldBalance(RPAREN  ) &&
                balance(RBRACKET) >= oldBalance(RBRACKET) ||
                balance(RBRACE  ) != oldBalance(RBRACE  )   ) {
              indent = column(offset)
              markBalance()
            }
          }
        }

        token match {
          case LPAREN   => balance(RPAREN  ) -= 1; nextToken(); scan(bpbuf)
          case LBRACKET => balance(RBRACKET) -= 1; nextToken(); scan(bpbuf)
          case RPAREN   => balance(RPAREN  ) += 1; nextToken(); scan(bpbuf)
          case RBRACKET => balance(RBRACKET) += 1; nextToken(); scan(bpbuf)
          case LBRACE   => balance(RBRACE)   -= 1;  val lc = lineCount
										            val loff = offset
										            val lindent = indent
										            val bpbuf1 = new ListBuffer[BracePair]
										            nextToken()
										            val (roff, rindent) = scan(bpbuf1)
										            if (lc != lineCount)
										              bpbuf += BracePair(loff, lindent, roff, rindent, bpbuf1.toList)
										            scan(bpbuf)
          case RBRACE   => balance(RBRACE  ) += 1;  val off = offset; nextToken(); (off, indent)
          case EOF      => (-1, -1)
          case _        => nextToken(); scan(bpbuf)
        }
      }

      val bpbuf = new ListBuffer[BracePair]
      while (token != EOF) {
        val (roff, rindent) = scan(bpbuf)
        if (roff != -1) {
          val current = BracePair(-1, -1, roff, rindent, bpbuf.toList)
          bpbuf.clear()
          bpbuf += current
        }
      }
      def bracePairString(bp: BracePair, indent: Int): String = {
        val rangeString = {
          import bp._
          val lline = line(loff)
          val rline = line(roff)
          val tokens = List(lline, lindent, rline, rindent) map (n => if (n < 0) "??" else "" + n)
          "%s:%s to %s:%s".format(tokens: _*)
        }
        val outer  = (" " * indent) + rangeString
        val inners = bp.nested map (bracePairString(_, indent + 2))

        if (inners.isEmpty) outer
        else inners.mkString(outer + "\n", "\n", "")
      }
      def bpString    = bpbuf.toList map ("\n" + bracePairString(_, 0)) mkString ""
      def startString = lineStart.mkString("line starts: [", ", ", "]")

      log(s"\n$startString\n$bpString")
      bpbuf.toList
    }

    var tabSeen = false

    def line(offset: Offset): Int = {
      def findLine(lo: Int, hi: Int): Int = {
        val mid = (lo + hi) / 2
        if (offset < lineStart(mid)) findLine(lo, mid - 1)
        else if (mid + 1 < lineStart.length && offset >= lineStart(mid + 1)) findLine(mid + 1, hi)
        else mid
      }
      if (offset <= 0) 0
      else findLine(0, lineStart.length - 1)
    }

    def column(offset: Offset): Int = {
      var col = 0
      var i = offset - 1
      while (i >= 0 && buf(i) != CR && buf(i) != LF) {
        if (buf(i) == '\t') tabSeen = true
        col += 1
        i -= 1
      }
      col
    }

    def insertPatch(patches: List[BracePatch], patch: BracePatch): List[BracePatch] = patches match {
      case List() => List(patch)
      case bp :: bps => if (patch.off < bp.off) patch :: patches
                        else bp :: insertPatch(bps, patch)
    }

    def insertRBrace(): List[BracePatch] = {
      def insert(bps: List[BracePair]): List[BracePatch] = bps match {
        case List() => patches
        case (bp @ BracePair(loff, lindent, roff, rindent, nested)) :: bps1 =>
          if (lindent <= rindent) insert(bps1)
          else {
//           println("patch inside "+bp+"/"+line(loff)+"/"+lineStart(line(loff))+"/"+lindent"/"+rindent)//DEBUG
            val patches1 = insert(nested)
            if (patches1 ne patches) patches1
            else {
              var lin = line(loff) + 1
              while (lin < lineStart.length && column(lineStart(lin)) > lindent)
                lin += 1
              if (lin < lineStart.length) {
                val patches1 = insertPatch(patches, BracePatch(lineStart(lin), inserted = true))
                //println("patch for "+bp+"/"+imbalanceMeasure+"/"+new ParensAnalyzer(unit, patches1).imbalanceMeasure)
                /*if (improves(patches1))*/
                patches1
                /*else insert(bps1)*/
                // (this test did not seem to work very well in practice)
              } else patches
            }
          }
      }
      insert(bracePairs)
    }

    def deleteRBrace(): List[BracePatch] = {
      def delete(bps: List[BracePair]): List[BracePatch] = bps match {
        case List() => patches
        case BracePair(loff, lindent, roff, rindent, nested) :: bps1 =>
          if (lindent >= rindent) delete(bps1)
          else {
            val patches1 = delete(nested)
            if (patches1 ne patches) patches1
            else insertPatch(patches, BracePatch(roff, inserted = false))
          }
      }
      delete(bracePairs)
    }

    // don't emit deprecation warnings about identifiers like `macro` or `then`
    // when skimming through the source file trying to heal braces
    override def emitIdentifierDeprecationWarnings = false

    override def error(offset: Offset, msg: String) {}
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy