All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scala.tools.nsc.ast.parser.Scanners.scala Maven / Gradle / Ivy

/*
 * Scala (https://www.scala-lang.org)
 *
 * Copyright EPFL and Lightbend, Inc.
 *
 * Licensed under Apache License 2.0
 * (http://www.apache.org/licenses/LICENSE-2.0).
 *
 * See the NOTICE file distributed with this work for
 * additional information regarding copyright ownership.
 */

package scala.tools.nsc
package ast.parser

import scala.tools.nsc.settings.ScalaVersion
import scala.tools.nsc.util.{CharArrayReader, CharArrayReaderData}
import scala.reflect.internal.util._
import scala.reflect.internal.Chars._
import Tokens._
import scala.annotation.{switch, tailrec}
import scala.collection.mutable
import mutable.{ArrayBuffer, ListBuffer}
import scala.tools.nsc.ast.parser.xml.Utility.isNameStart
import java.lang.StringBuilder

import scala.tools.nsc.Reporting.WarningCategory

object Cbuf {
  final val TargetCapacity = 256

  def create(): StringBuilder = new StringBuilder(TargetCapacity)

  implicit class StringBuilderOps(val sb: StringBuilder) extends AnyVal {
    def clear(): Unit = {
      if (sb.capacity() > TargetCapacity) {
        sb.setLength(TargetCapacity)
        sb.trimToSize()
      }
      sb.setLength(0)
    }
    def toCharArray: Array[Char] = {
      val n = sb.length()
      val res = new Array[Char](n)
      sb.getChars(0, n, res, 0)
      res
    }
    def isEmpty = sb.length() == 0
    def last = sb.charAt(sb.length() - 1)
  }
}

import Cbuf.StringBuilderOps

/** See Parsers.scala / ParsersCommon for some explanation of ScannersCommon.
 */
trait ScannersCommon {
  val global : Global
  import global._

  /** Offset into source character array */
  type Offset = Int

  type Token = Int

  trait CommonTokenData {
    def token: Token
    def name: TermName
  }

  trait ScannerCommon extends CommonTokenData {
    /** Consume and discard the next token. */
    def nextToken(): Unit

    // things to fill in, in addition to buf, decodeUni which come from CharArrayReader
    def error(off: Offset, msg: String): Unit
    def incompleteInputError(off: Offset, msg: String): Unit
    def warning(off: Offset, msg: String, category: WarningCategory): Unit
    def deprecationWarning(off: Offset, msg: String, since: String): Unit

    // advance past COMMA NEWLINE RBRACE (to whichever token is the matching close bracket)
    def skipTrailingComma(right: Token): Boolean = false
  }

  // Hooks for ScaladocUnitScanner and ScaladocJavaUnitScanner
  trait DocScanner {
    protected def beginDocComment(prefix: String): Unit = {}
    protected def processCommentChar(): Unit = {}
    protected def finishDocComment(): Unit = {}

    private var lastDoc: DocComment = null
    // get last doc comment
    def flushDoc(): DocComment = try lastDoc finally lastDoc = null
    def registerDocComment(raw: String, pos: Position) = {
      lastDoc = DocComment(raw, pos)
      signalParsedDocComment(raw, pos)
    }

    /** To prevent doc comments attached to expressions from leaking out of scope
      *  onto the next documentable entity, they are discarded upon passing a right
      *  brace, bracket, or parenthesis.
      */
    def discardDocBuffer(): Unit = {}
  }

  def createKeywordArray(keywords: Seq[(Name, Token)], defaultToken: Token): (Token, Array[Token]) = {
    val names = keywords sortBy (_._1.start) map { case (k, v) => (k.start, v) }
    val low   = names.head._1
    val high  = names.last._1
    val arr   = Array.fill(high - low + 1)(defaultToken)

    names foreach { case (k, v) => arr(k + low) = v }
    (low, arr)
  }
}

trait Scanners extends ScannersCommon {
  val global : Global
  import global._

  trait TokenData extends CommonTokenData {

    /** the next token */
    var token: Token = EMPTY

    /** the offset of the first character of the current token */
    var offset: Offset = 0

    /** the offset of the character following the token preceding this one */
    var lastOffset: Offset = 0

    /** the name of an identifier */
    var name: TermName = null

    /** the string value of a literal */
    var strVal: String = null

    /** the base of a number */
    var base: Int = 0

    def copyFrom(td: TokenData): this.type = {
      this.token = td.token
      this.offset = td.offset
      this.lastOffset = td.lastOffset
      this.name = td.name
      this.strVal = td.strVal
      this.base = td.base
      this
    }
  }

  /** An interface to most of mutable data in Scanner defined in TokenData
   *  and CharArrayReader (+ next, prev fields) with copyFrom functionality
   *  to backup/restore data (used by quasiquotes' lookingAhead).
   */
  trait ScannerData extends TokenData with CharArrayReaderData {
    /** we need one token lookahead and one token history
     */
    val next: TokenData = new TokenData{}
    val prev: TokenData = new TokenData{}

    def copyFrom(sd: ScannerData): this.type = {
      this.next copyFrom sd.next
      this.prev copyFrom sd.prev
      super[CharArrayReaderData].copyFrom(sd)
      super[TokenData].copyFrom(sd)
      this
    }
  }

  abstract class Scanner extends CharArrayReader with TokenData with ScannerData with ScannerCommon with DocScanner {
    /** A switch whether operators at the start of lines can be infix operators. */
    private var allowLeadingInfixOperators = true

    private def isDigit(c: Char) = Character.isDigit(c)

    import Character.{isHighSurrogate, isLowSurrogate, isUnicodeIdentifierPart, isUnicodeIdentifierStart, isValidCodePoint, toCodePoint}

    // given char (ch) is high surrogate followed by low, codepoint passes predicate.
    // true means supplementary chars were put to buffer.
    // strict to require low surrogate (if not in string literal).
    private def isSupplementary(high: Char, test: Int => Boolean, strict: Boolean = true): Boolean =
      isHighSurrogate(high) && {
        var res = false
        nextChar()
        val low = ch
        if (isLowSurrogate(low)) {
          nextChar()
          val codepoint = toCodePoint(high, low)
          if (isValidCodePoint(codepoint) && test(codepoint)) {
            putChar(high)
            putChar(low)
            res = true
          } else
            syntaxError(f"illegal character '\\u$high%04x\\u$low%04x'")
        } else if (!strict) {
          putChar(high)
          res = true
        } else
          syntaxError(f"illegal character '\\u$high%04x' missing low surrogate")
        res
      }
    private def atSupplementary(ch: Char, f: Int => Boolean): Boolean =
      isHighSurrogate(ch) && {
        val hi = ch
        val r = lookaheadReader
        r.nextRawChar()
        val lo = r.ch
        isLowSurrogate(lo) && {
          val codepoint = toCodePoint(hi, lo)
          isValidCodePoint(codepoint) && f(codepoint)
        }
      }

    private var openComments = 0
    final protected def putCommentChar(): Unit = { processCommentChar(); nextChar() }

    @tailrec private def skipLineComment(): Unit = ch match {
      case SU | CR | LF =>
      case _            => nextChar() ; skipLineComment()
    }
    private def maybeOpen(): Unit = {
      putCommentChar()
      if (ch == '*') {
        putCommentChar()
        openComments += 1
      }
    }
    private def maybeClose(): Boolean = {
      putCommentChar()
      (ch == '/') && {
        putCommentChar()
        openComments -= 1
        openComments == 0
      }
    }
    @tailrec final def skipNestedComments(): Unit = ch match {
      case '/' => maybeOpen() ; skipNestedComments()
      case '*' => if (!maybeClose()) skipNestedComments()
      case SU  => incompleteInputError("unclosed comment")
      case _   => putCommentChar() ; skipNestedComments()
    }

    private def skipToCommentEnd(isLineComment: Boolean): Unit = {
      nextChar()
      if (isLineComment) skipLineComment()
      else {
        openComments = 1
        val isDocComment = (ch == '*') && { nextChar(); true }
        if (isDocComment) {
          // Check for the amazing corner case of /**/
          if (ch == '/')
            nextChar()
          else {
            beginDocComment("/**")
            skipNestedComments()
          }
        }
        else skipNestedComments()
      }
    }

    /** Returns true if a comment was skipped.
     *  @note Pre-condition: ch == '/'
     */
    final def skipComment(): Boolean = ch match {
      case '/' | '*' => skipToCommentEnd(isLineComment = ch == '/') ; finishDocComment(); true
      case _         => false
    }


    def isAtEnd = charOffset >= buf.length

    def resume(lastCode: Token) = {
      token = lastCode
      if (next.token != EMPTY && !reporter.hasErrors)
        syntaxError("unexpected end of input: possible missing '}' in XML block")

      nextToken()
    }

    /** A character buffer for literals
     */
    val cbuf = Cbuf.create()

    /** append Unicode character to "cbuf" buffer
     */
    protected def putChar(c: Char): Unit = cbuf.append(c)

    /** Determines whether this scanner should emit identifier deprecation warnings,
     *  e.g. when seeing `macro` or `then`, which are planned to become keywords in future versions of Scala.
     */
    protected def emitIdentifierDeprecationWarnings = true

    /** Clear buffer and set name and token */
    private def finishNamed(idtoken: Token = IDENTIFIER): Unit = {
      name = newTermName(cbuf.toCharArray)
      cbuf.clear()
      token = idtoken
      if (idtoken == IDENTIFIER) {
        val idx = name.start - kwOffset
        if (idx >= 0 && idx < kwArray.length) {
          token = kwArray(idx)
          if (token == IDENTIFIER && allowIdent != name) {
            if (name == nme.MACROkw)
              syntaxError(s"$name is now a reserved word; usage as an identifier is disallowed")
            else if (emitIdentifierDeprecationWarnings)
              deprecationWarning(s"$name is a reserved word (since 2.10.0); usage as an identifier is deprecated", "2.10.0")
          }
        }
      }
    }

    /** Clear buffer and set string */
    private def setStrVal(): Unit = {
      strVal = cbuf.toString
      cbuf.clear()
    }

    /** a stack of tokens which indicates whether line-ends can be statement separators
     *  also used for keeping track of nesting levels.
     *  We keep track of the closing symbol of a region. This can be
     *  RPAREN    if region starts with '('
     *  RBRACKET  if region starts with '['
     *  RBRACE    if region starts with '{'
     *  ARROW     if region starts with 'case'
     *  STRINGLIT if region is a string interpolation expression starting with '${'
     *            (the STRINGLIT appears twice in succession on the stack iff the
     *             expression is a multiline string literal).
     */
    var sepRegions: List[Token] = List()

// Get next token ------------------------------------------------------------

    /** Are we directly in a string interpolation expression?
     */
    private def inStringInterpolation =
      sepRegions.nonEmpty && sepRegions.head == STRINGLIT

    /** Are we directly in a multiline string interpolation expression?
     *  @pre inStringInterpolation
     */
    private def inMultiLineInterpolation =
      inStringInterpolation && sepRegions.tail.nonEmpty && sepRegions.tail.head == STRINGPART

    /** Are we in a `${ }` block? such that RBRACE exits back into multiline string. */
    private def inMultiLineInterpolatedExpression = {
      sepRegions match {
        case RBRACE :: STRINGLIT :: STRINGPART :: rest => true
        case _ => false
      }
    }

    def lookingAhead[A](body: => A): A = {
      val saved = new ScannerData {} copyFrom this
      val aLIO = allowLeadingInfixOperators
      allowLeadingInfixOperators = false
      nextToken()
      try body finally {
        this copyFrom saved
        allowLeadingInfixOperators = aLIO
      }
    }

    /** read next token and return last offset
     */
    def skipToken(): Offset = {
      val off = offset
      nextToken()
      off
    }

    // used by parser to distinguish pattern P(_*, p) from trailing comma.
    // EOF is accepted for REPL, which can't look ahead past the current line.
    def isTrailingComma(right: Token): Boolean =
      token == COMMA && lookingAhead(afterLineEnd() && token == right || token == EOF)

    override def skipTrailingComma(right: Token): Boolean =
      if (token == COMMA) {
        // SIP-27 Trailing Comma (multi-line only) support
        // If a comma is followed by a new line & then a closing paren, bracket or brace
        // then it is a trailing comma and is ignored
        val saved = new ScannerData {} copyFrom this
        fetchToken()
        (afterLineEnd() && token == right || token == EOF) || { copyFrom(saved) ; false }
      } else false

    /** Allow an otherwise deprecated ident here */
    private var allowIdent: Name = nme.EMPTY

    /** Get next token, and allow the otherwise deprecated ident `name`  */
    def nextTokenAllow(name: Name) = {
      val prev = allowIdent
      allowIdent = name
      try {
        nextToken()
      } finally {
        allowIdent = prev
      }
    }

    // Adapt sepRegions according to last token
    def adjustSepRegions(lastToken: Token): Unit = (lastToken: @switch) match {
      case LPAREN =>
        sepRegions = RPAREN :: sepRegions
      case LBRACKET =>
        sepRegions = RBRACKET :: sepRegions
      case LBRACE =>
        sepRegions = RBRACE :: sepRegions
      case CASE =>
        sepRegions = ARROW :: sepRegions
      case RBRACE =>
        while (!sepRegions.isEmpty && sepRegions.head != RBRACE)
          sepRegions = sepRegions.tail
        if (!sepRegions.isEmpty)
          sepRegions = sepRegions.tail

        discardDocBuffer()
      case RBRACKET | RPAREN =>
        if (!sepRegions.isEmpty && sepRegions.head == lastToken)
          sepRegions = sepRegions.tail

        discardDocBuffer()
      case ARROW =>
        if (!sepRegions.isEmpty && sepRegions.head == lastToken)
          sepRegions = sepRegions.tail
      case STRINGLIT =>
        if (inMultiLineInterpolation)
          sepRegions = sepRegions.tail.tail
        else if (inStringInterpolation)
          sepRegions = sepRegions.tail
      case _ =>
    }

    /** Advance beyond a case token without marking the CASE in sepRegions.
     *  This method should be called to skip beyond CASE tokens that are
     *  not part of matches, i.e. no ARROW is expected after them.
     */
    def skipCASE(): Unit = {
      assert(token == CASE, s"Internal error: skipCASE() called on non-case token $token")
      nextToken()
      sepRegions = sepRegions.tail
    }

    /** True to warn about migration change in infix syntax. */
    private val infixMigration = settings.Xmigration.value <= ScalaVersion("2.13.2")

    /** Produce next token, filling TokenData fields of Scanner.
     */
    def nextToken(): Unit = {
      val lastToken = token
      adjustSepRegions(lastToken)

      // Read a token or copy it from `next` tokenData
      if (next.token == EMPTY) {
        lastOffset = charOffset - 1
        if (lastOffset > 0 && buf(lastOffset) == '\n' && buf(lastOffset - 1) == '\r') {
          lastOffset -= 1
        }
        if (inStringInterpolation) fetchStringPart() else fetchToken()
        if (token == ERROR) {
          if (inMultiLineInterpolation)
            sepRegions = sepRegions.tail.tail
          else if (inStringInterpolation)
            sepRegions = sepRegions.tail
        }
      } else {
        this copyFrom next
        next.token = EMPTY
      }

      def isSimpleExprIntroToken(token: Token): Boolean = token match {
        case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT |
             STRINGLIT | INTERPOLATIONID | SYMBOLLIT | TRUE | FALSE | NULL | // literals
             IDENTIFIER | BACKQUOTED_IDENT | THIS | SUPER | NEW | USCORE |
             LPAREN | LBRACE | XMLSTART => true
        case _ => false
      }

      def insertNL(nl: Token): Unit = {
        next.copyFrom(this)
        //  todo: make offset line-end of previous line?
        offset = if (lineStartOffset <= offset) lineStartOffset else lastLineStartOffset
        token = nl
      }

      def isOperator: Boolean = token == BACKQUOTED_IDENT || token == IDENTIFIER && isOperatorPart(name.charAt(name.length - 1))

      /* A leading infix operator must be followed by a lexically suitable expression.
       * Usually any simple expr will do. However, a backquoted identifier may serve as
       * either an op or a reference. So the additional constraint is that the following
       * token can't be an assignment operator. (Dotty disallows binary ops, hence the
       * test for unary.) See run/multiLineOps.scala for 42 + `x` on 3 lines, where +
       * is not leading infix because backquoted x is non-unary op.
       */
      def followedByInfixRHS: Boolean = {
        //def isCandidateInfixRHS: Boolean = isSimpleExprIntroToken(token) && (!isOperator || nme.raw.isUnary(name) || token == BACKQUOTED_IDENT)
        def isAssignmentOperator: Boolean =
          name.endsWith('=') && !name.startsWith('=') && isOperatorPart(name.startChar) &&
          (name.length != 2 || (name.startChar match { case '!' | '<' | '>' => false case _ => true }))
        def isCandidateInfixRHS: Boolean = isSimpleExprIntroToken(token) && (!isOperator || token == BACKQUOTED_IDENT || !isAssignmentOperator)
        lookingAhead {
          isCandidateInfixRHS || token == NEWLINE && { nextToken() ; isCandidateInfixRHS }
        }
      }

      /* A leading symbolic or backquoted identifier is treated as an infix operator
       * if it is followed by at least one ' ' and a token on the same line
       * that can start an expression.
       */
      def isLeadingInfixOperator =
        allowLeadingInfixOperators &&
        isOperator &&
        (isWhitespace(ch) || ch == LF) &&
        followedByInfixRHS

      /* Insert NEWLINE or NEWLINES if
       * - we are after a newline
       * - we are within a { ... } or on toplevel (wrt sepRegions)
       * - the current token can start a statement and the one before can end it
       * insert NEWLINES if we are past a blank line, NEWLINE otherwise
       */
      if (!applyBracePatch() && afterLineEnd() && inLastOfStat(lastToken) && inFirstOfStat(token) &&
          (sepRegions.isEmpty || sepRegions.head == RBRACE)) {
        if (pastBlankLine()) insertNL(NEWLINES)
        else if (!isLeadingInfixOperator) insertNL(NEWLINE)
        else if (!currentRun.isScala3) {
          val msg = """|Line starts with an operator that in future
                       |will be taken as an infix expression continued from the previous line.
                       |To force the previous interpretation as a separate statement,
                       |add an explicit `;`, add an empty line, or remove spaces after the operator."""
          if (infixMigration) deprecationWarning(msg.stripMargin, "2.13.2")
          insertNL(NEWLINE)
        }
      }
      postProcessToken()
//      print("["+this+"]")
    } // end nextToken

    // Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT, SEMI + ELSE => ELSE
    def postProcessToken(): Unit =
      if (token == CASE) {
        prev copyFrom this
        val nextLastOffset = charOffset - 1
        fetchToken()
        def resetOffset(): Unit = {
          offset = prev.offset
          lastOffset = prev.lastOffset
        }
        if (token == CLASS) {
          token = CASECLASS
          resetOffset()
        } else if (token == OBJECT) {
          token = CASEOBJECT
          resetOffset()
        } else {
          lastOffset = nextLastOffset
          next copyFrom this
          this copyFrom prev
        }
      } else if (token == SEMI) {
        prev copyFrom this
        fetchToken()
        if (token != ELSE) {
          next copyFrom this
          this copyFrom prev
        }
      }

    /** Is current token first one after a newline? */
    private def afterLineEnd(): Boolean =
      lastOffset < lineStartOffset &&
      (lineStartOffset <= offset ||
       lastOffset < lastLineStartOffset && lastLineStartOffset <= offset)

    /** Is there a blank line between the current token and the last one?
     *  @pre  afterLineEnd().
     */
    private def pastBlankLine(): Boolean = {
      var idx = lastOffset
      var ch = buf(idx)
      val end = offset
      while (idx < end) {
        if (ch == LF || ch == FF) {
          do {
            idx += 1; ch = buf(idx)
            if (ch == LF || ch == FF) {
//              println("blank line found at "+lastOffset+":"+(lastOffset to idx).map(buf(_)).toList)
              return true
            }
            if (idx == end) return false
          } while (ch <= ' ')
        }
        idx += 1; ch = buf(idx)
      }
      false
    }

    /** read next token, filling TokenData fields of Scanner.
     */
    @tailrec
    protected final def fetchToken(): Unit = {
      offset = charOffset - 1
      (ch: @switch) match {

        case ' ' | '\t' | CR | LF | FF =>
          nextChar()
          fetchToken()
        case 'A' | 'B' | 'C' | 'D' | 'E' |
             'F' | 'G' | 'H' | 'I' | 'J' |
             'K' | 'L' | 'M' | 'N' | 'O' |
             'P' | 'Q' | 'R' | 'S' | 'T' |
             'U' | 'V' | 'W' | 'X' | 'Y' |
             'Z' | '$' | '_' |
             'a' | 'b' | 'c' | 'd' | 'e' |
             'f' | 'g' | 'h' | 'i' | 'j' |
             'k' | 'l' | 'm' | 'n' | 'o' |
             'p' | 'q' | 'r' | 's' | 't' |
             'u' | 'v' | 'w' | 'x' | 'y' |  // scala-mode: need to understand multi-line case patterns
             'z' =>
          putChar(ch)
          nextChar()
          getIdentRest()
          if (ch == '"' && token == IDENTIFIER)
            token = INTERPOLATIONID
        case '<' => // is XMLSTART?
          def fetchLT() = {
            val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
            nextChar()
            last match {
              case ' ' | '\t' | '\n' | '{' | '(' | '>' if isNameStart(ch) || ch == '!' || ch == '?' =>
                token = XMLSTART
              case _ =>
                // Console.println("found '<', but last is '"+in.last+"'"); // DEBUG
                putChar('<')
                getOperatorRest()
            }
          }
          fetchLT()
        case '~' | '!' | '@' | '#' | '%' |
             '^' | '*' | '+' | '-' | /*'<' | */
             '>' | '?' | ':' | '=' | '&' |
             '|' | '\\' =>
          putChar(ch)
          nextChar()
          getOperatorRest()
        case '/' =>
          nextChar()
          if (skipComment()) {
            fetchToken()
          } else {
            putChar('/')
            getOperatorRest()
          }
        case '0' =>
          def fetchLeadingZero(): Unit = {
            nextChar()
            ch match {
              case 'x' | 'X' => base = 16 ; nextChar()
              //case 'b' | 'B' => base = 2  ; nextChar()
              case _         => base = 10 ; putChar('0')
            }
            if (base != 10 && !isNumberSeparator(ch) && digit2int(ch, base) < 0)
              syntaxError("invalid literal number")
          }
          fetchLeadingZero()
          getNumber()
        case '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
          base = 10
          getNumber()
        case '`' =>
          getBackquotedIdent()
        case '\"' =>
          def fetchDoubleQuote() = {
            if (token == INTERPOLATIONID) {
              nextRawChar()
              if (ch == '\"') {
                val lookahead = lookaheadReader
                lookahead.nextChar()
                if (lookahead.ch == '\"') {
                  nextRawChar()                        // now eat it
                  offset += 3
                  nextRawChar()
                  getStringPart(multiLine = true)
                  sepRegions = STRINGPART :: sepRegions // indicate string part
                  sepRegions = STRINGLIT :: sepRegions // once more to indicate multi line string part
                } else {
                  nextChar()
                  token = STRINGLIT
                  strVal = ""
                }
              } else {
                offset += 1
                getStringPart(multiLine = false)
                sepRegions = STRINGLIT :: sepRegions // indicate single line string part
              }
            } else {
              nextChar()
              if (ch == '\"') {
                nextChar()
                if (ch == '\"') {
                  nextRawChar()
                  getRawStringLit()
                } else {
                  token = STRINGLIT
                  strVal = ""
                }
              } else {
                getStringLit()
              }
            }
          }
          fetchDoubleQuote()
        case '\'' =>
          def unclosedCharLit() = {
            val unclosed = "unclosed character literal"
            // advise if previous token was Symbol contiguous with the orphan single quote at offset
            val msg = {
              val maybeMistakenQuote =
                this match {
                  case sfs: SourceFileScanner =>
                    val wholeLine = sfs.source.lineToString(sfs.source.offsetToLine(offset))
                    wholeLine.count(_ == '\'') > 1
                  case _ => false
                }
              if (token == SYMBOLLIT && offset == lastOffset) s"""$unclosed (or use " for string literal "$strVal")"""
              else if (maybeMistakenQuote) s"""$unclosed (or use " not ' for string literal)"""
              else unclosed
            }
            syntaxError(msg)
          }
          def fetchSingleQuote() = {
            nextChar()
            if (isIdentifierStart(ch))
              charLitOr(() => getIdentRest())
            else if (isOperatorPart(ch) && (ch != '\\'))
              charLitOr(() => getOperatorRest())
            else if (!isAtEnd && (ch != SU && ch != CR && ch != LF)) {
              val isEmptyCharLit = (ch == '\'')
              getLitChar()
              if (ch == '\'') {
                if (isEmptyCharLit)
                  syntaxError("empty character literal (use '\\'' for single quote)")
                else {
                  nextChar()
                  if (cbuf.length != 1)
                    syntaxError("illegal codepoint in Char constant: " + cbuf.toString.map(c => f"\\u$c%04x").mkString("'", "", "'"))
                  else {
                    token = CHARLIT
                    setStrVal()
                  }
                }
              }
              else if (isEmptyCharLit)
                syntaxError("empty character literal")
              else
                unclosedCharLit()
            }
            else unclosedCharLit()
          }
          fetchSingleQuote()
        case '.' =>
          nextChar()
          if ('0' <= ch && ch <= '9') {
            putChar('.'); getFraction()
          } else {
            token = DOT
          }
        case ';' =>
          nextChar(); token = SEMI
        case ',' =>
          nextChar(); token = COMMA
        case '(' =>
          nextChar(); token = LPAREN
        case '{' =>
          nextChar(); token = LBRACE
        case ')' =>
          nextChar(); token = RPAREN
        case '}' =>
          if (inMultiLineInterpolatedExpression) nextRawChar() else nextChar()
          token = RBRACE
        case '[' =>
          nextChar(); token = LBRACKET
        case ']' =>
          nextChar(); token = RBRACKET
        case SU =>
          if (isAtEnd) token = EOF
          else {
            syntaxError("illegal character")
            nextChar()
          }
        case _ =>
          def fetchOther() = {
            if (ch == '\u21D2') {
              deprecationWarning("The unicode arrow `⇒` is deprecated, use `=>` instead. If you still wish to display it as one character, consider using a font with programming ligatures such as Fira Code.", "2.13.0")
              nextChar(); token = ARROW
            } else if (ch == '\u2190') {
              deprecationWarning("The unicode arrow `←` is deprecated, use `<-` instead. If you still wish to display it as one character, consider using a font with programming ligatures such as Fira Code.", "2.13.0")
              nextChar(); token = LARROW
            } else if (isUnicodeIdentifierStart(ch)) {
              putChar(ch)
              nextChar()
              getIdentRest()
            } else if (isSpecial(ch)) {
              putChar(ch)
              nextChar()
              getOperatorRest()
            } else if (isSupplementary(ch, isUnicodeIdentifierStart)) {
              getIdentRest()
            } else {
              syntaxError(f"illegal character '\\u$ch%04x'")
              nextChar()
            }
          }
          fetchOther()
      }
    }

    /** Can token start a statement? */
    def inFirstOfStat(token: Token) = token match {
      case EOF | CATCH | ELSE | EXTENDS | FINALLY | FORSOME | MATCH | WITH | YIELD |
           COMMA | SEMI | NEWLINE | NEWLINES | DOT | COLON | EQUALS | ARROW | LARROW |
           SUBTYPE | VIEWBOUND | SUPERTYPE | HASH | RPAREN | RBRACKET | RBRACE | LBRACKET =>
        false
      case _ =>
        true
    }

    /** Can token end a statement? */
    def inLastOfStat(token: Token) = token match {
      case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT | STRINGLIT | SYMBOLLIT |
           IDENTIFIER | BACKQUOTED_IDENT | THIS | NULL | TRUE | FALSE | RETURN | USCORE |
           TYPE | XMLSTART | RPAREN | RBRACKET | RBRACE =>
        true
      case _ =>
        false
    }

// Identifiers ---------------------------------------------------------------

    private def getBackquotedIdent(): Unit = {
      nextChar()
      getLitChars('`')
      if (ch == '`') {
        nextChar()
        finishNamed(BACKQUOTED_IDENT)
        if (name.length == 0) syntaxError("empty quoted identifier")
      }
      else syntaxError("unclosed quoted identifier")
    }

    @tailrec
    private def getIdentRest(): Unit = (ch: @switch) match {
      case 'A' | 'B' | 'C' | 'D' | 'E' |
           'F' | 'G' | 'H' | 'I' | 'J' |
           'K' | 'L' | 'M' | 'N' | 'O' |
           'P' | 'Q' | 'R' | 'S' | 'T' |
           'U' | 'V' | 'W' | 'X' | 'Y' |
           'Z' | '$' |
           'a' | 'b' | 'c' | 'd' | 'e' |
           'f' | 'g' | 'h' | 'i' | 'j' |
           'k' | 'l' | 'm' | 'n' | 'o' |
           'p' | 'q' | 'r' | 's' | 't' |
           'u' | 'v' | 'w' | 'x' | 'y' |
           'z' |
           '0' | '1' | '2' | '3' | '4' |
           '5' | '6' | '7' | '8' | '9' =>
        putChar(ch)
        nextChar()
        getIdentRest()
      case '_' =>
        putChar(ch)
        nextChar()
        getIdentOrOperatorRest()
      case SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
        finishNamed()
      case _ =>
        if (isUnicodeIdentifierPart(ch)) {
          putChar(ch)
          nextChar()
          getIdentRest()
        }
        else if (isSupplementary(ch, isUnicodeIdentifierPart))
          getIdentRest()
        else
          finishNamed()
    }

    @tailrec
    private def getOperatorRest(): Unit = (ch: @switch) match {
      case '~' | '!' | '@' | '#' | '%' |
           '^' | '*' | '+' | '-' | '<' |
           '>' | '?' | ':' | '=' | '&' |
           '|' | '\\' =>
        putChar(ch); nextChar(); getOperatorRest()
      case '/' =>
        nextChar()
        if (skipComment()) finishNamed()
        else { putChar('/'); getOperatorRest() }
      case _ =>
        if (isSpecial(ch)) { putChar(ch); nextChar(); getOperatorRest() }
        else finishNamed()
    }

    private def getIdentOrOperatorRest(): Unit = {
      if (isIdentifierPart(ch))
        getIdentRest()
      else ch match {
        case '~' | '!' | '@' | '#' | '%' |
             '^' | '*' | '+' | '-' | '<' |
             '>' | '?' | ':' | '=' | '&' |
             '|' | '\\' | '/' =>
          getOperatorRest()
        case _ =>
          if (isSpecial(ch)) getOperatorRest()
          else finishNamed()
      }
    }


// Literals -----------------------------------------------------------------

    private def getStringLit() = {
      getLitChars('"')
      if (ch == '"') {
        setStrVal()
        nextChar()
        token = STRINGLIT
      } else unclosedStringLit()
    }

    private def unclosedStringLit(seenEscapedQuoteInInterpolation: Boolean = false): Unit = {
      val note =
        if (seenEscapedQuoteInInterpolation) "; note that `\\\"` no longer closes single-quoted interpolated string literals since 2.13.6, you can use a triple-quoted string instead"
        else ""
      syntaxError(s"unclosed string literal$note")
    }

    private def replaceUnicodeEscapesInTriple(): Unit = 
      if(strVal != null) {
        try {
          val replaced = StringContext.processUnicode(strVal)
          if(replaced != strVal) {
            val diffPosition = replaced.zip(strVal).zipWithIndex.collectFirst{ case ((r, o), i) if r != o => i}.getOrElse(replaced.length - 1)
            deprecationWarning(offset + 3 + diffPosition, "Unicode escapes in triple quoted strings are deprecated, use the literal character instead", since="2.13.2")
          }
          strVal = replaced
        } catch {
          case ue: StringContext.InvalidUnicodeEscapeException => {
            syntaxError(offset + 3 + ue.index, ue.getMessage())
          }
        }
      }

    @tailrec private def getRawStringLit(): Unit = {
      if (ch == '\"') {
        nextRawChar()
        if (isTripleQuote()) {
          setStrVal()
          if(!currentRun.isScala3) replaceUnicodeEscapesInTriple()
          token = STRINGLIT
        } else
          getRawStringLit()
      } else if (ch == SU) {
        incompleteInputError("unclosed multi-line string literal")
      } else {
        putChar(ch)
        nextRawChar()
        getRawStringLit()
      }
    }

    // for interpolated strings
    @tailrec private def getStringPart(multiLine: Boolean, seenEscapedQuote: Boolean = false): Unit = {
      def finishStringPart() = {
        setStrVal()
        token = STRINGPART
        next.lastOffset = charOffset - 1
        next.offset = charOffset - 1
      }
      if (ch == '"') {
        if (multiLine) {
          nextRawChar()
          if (isTripleQuote()) {
            setStrVal()
            token = STRINGLIT
          } else
            getStringPart(multiLine, seenEscapedQuote)
        } else {
          nextChar()
          setStrVal()
          token = STRINGLIT
        }
      } else if (ch == '\\' && !multiLine) {
        putChar(ch)
        nextRawChar()
        val q = ch == '"'
        if (q || ch == '\\') {
          putChar(ch)
          nextRawChar()
        }
        getStringPart(multiLine, seenEscapedQuote || q)
      } else if (ch == '$') {
        @tailrec def getInterpolatedIdentRest(): Unit =
          if (ch != SU && isUnicodeIdentifierPart(ch)) {
            putChar(ch)
            nextRawChar()
            getInterpolatedIdentRest()
          } else if (atSupplementary(ch, isUnicodeIdentifierPart)) {
            putChar(ch)
            nextRawChar()
            putChar(ch)
            nextRawChar()
            getInterpolatedIdentRest()
          } else {
            next.token = IDENTIFIER
            next.name = newTermName(cbuf.toCharArray)
            cbuf.clear()
            val idx = next.name.start - kwOffset
            if (idx >= 0 && idx < kwArray.length)
              next.token = kwArray(idx)
          }
        nextRawChar()
        if (ch == '$' || ch == '"') {
          putChar(ch)
          nextRawChar()
          getStringPart(multiLine, seenEscapedQuote)
        } else if (ch == '{') {
          finishStringPart()
          nextRawChar()
          next.token = LBRACE
        } else if (ch == '_') {
          finishStringPart()
          nextRawChar()
          next.token = USCORE
        } else if (isUnicodeIdentifierStart(ch)) {
          finishStringPart()
          putChar(ch)
          nextRawChar()
          getInterpolatedIdentRest()
        } else if (atSupplementary(ch, isUnicodeIdentifierStart)) {
          finishStringPart()
          putChar(ch)
          nextRawChar()
          putChar(ch)
          nextRawChar()
          getInterpolatedIdentRest()
        } else {
          val expectations = "$$, $\", $identifier or ${expression}"
          syntaxError(s"invalid string interpolation $$$ch, expected: $expectations")
        }
      } else {
        val isUnclosedLiteral = (ch == SU || (!multiLine && (ch == CR || ch == LF)))
        if (isUnclosedLiteral)
          if (multiLine)
            incompleteInputError("unclosed multi-line string literal")
          else
            unclosedStringLit(seenEscapedQuote)
        else {
          putChar(ch)
          nextRawChar()
          getStringPart(multiLine, seenEscapedQuote)
        }
      }
    }

    private def fetchStringPart() = {
      offset = charOffset - 1
      getStringPart(multiLine = inMultiLineInterpolation)
    }

    private def isTripleQuote(): Boolean =
      if (ch == '"') {
        nextRawChar()
        if (ch == '"') {
          nextChar()
          while (ch == '"') {
            putChar('"')
            nextChar()
          }
          true
        } else {
          putChar('"')
          putChar('"')
          false
        }
      } else {
        putChar('"')
        false
      }

    /** Copy current character into cbuf, interpreting any escape sequences,
     *  and advance to next character. Surrogate pairs are consumed (see check
     *  at fetchSingleQuote), but orphan surrogate is allowed.
     */
    protected def getLitChar(): Unit =
      if (ch == '\\') {
        nextChar()
        charEscape()
      } else if (!isSupplementary(ch, _ => true, strict = false)) {
        putChar(ch)
        nextChar()
      }

    private def charEscape(): Unit = {
      var bump = true
      ch match {
        case 'b'  => putChar('\b')
        case 't'  => putChar('\t')
        case 'n'  => putChar('\n')
        case 'f'  => putChar('\f')
        case 'r'  => putChar('\r')
        case '\"' => putChar('\"')
        case '\'' => putChar('\'')
        case '\\' => putChar('\\')
        case 'u'  => bump = uEscape()
        case x if '0' <= x && x <= '7' => bump = octalEscape()
        case _    => invalidEscape()
      }
      if (bump) nextChar()
    }

    private def uEscape(): Boolean = {
      while (ch == 'u') nextChar()
      var codepoint = 0
      var digitsRead = 0
      while (digitsRead < 4) {
        if (digitsRead > 0) nextChar()
        val digit = digit2int(ch, 16)
        digitsRead += 1
        if (digit >= 0) {
          codepoint = codepoint << 4
          codepoint += digit
        }
        else {
          invalidUnicodeEscape(digitsRead)
          return false
        }
      }
      val found = codepoint.asInstanceOf[Char]
      putChar(found)
      true
    }

    private def octalEscape(): Boolean = {
      val start = charOffset - 2
      val leadch: Char = ch
      var oct: Int = digit2int(ch, 8)
      nextChar()
      if ('0' <= ch && ch <= '7') {
        oct = oct * 8 + digit2int(ch, 8)
        nextChar()
        if (leadch <= '3' && '0' <= ch && ch <= '7') {
          oct = oct * 8 + digit2int(ch, 8)
          nextChar()
        }
      }
      val alt = if (oct == LF) "\\n" else f"\\u$oct%04x"
      syntaxError(start, s"octal escape literals are unsupported: use $alt instead")
      putChar(oct.toChar)
      false
    }

    protected def invalidEscape(): Unit = {
      syntaxError(charOffset - 1, "invalid escape character")
      putChar(ch)
    }

    protected def invalidUnicodeEscape(n: Int): Unit = {
      syntaxError(charOffset - n, "invalid unicode escape")
      putChar(ch)
    }

    private def getLitChars(delimiter: Char) = {
      while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF))
        getLitChar()
    }

    /** read fractional part and exponent of floating point number
     *  if one is present.
     */
    protected def getFraction(): Unit = {
      while ('0' <= ch && ch <= '9' || isNumberSeparator(ch)) {
        putChar(ch)
        nextChar()
      }
      checkNoTrailingSeparator()
      if (ch == 'e' || ch == 'E') {
        val lookahead = lookaheadReader
        lookahead.nextChar()
        if (lookahead.ch == '+' || lookahead.ch == '-') {
          lookahead.nextChar()
        }
        if ('0' <= lookahead.ch && lookahead.ch <= '9' || isNumberSeparator(lookahead.ch)) {
          putChar(ch)
          nextChar()
          if (ch == '+' || ch == '-') {
            putChar(ch)
            nextChar()
          }
          if (isNumberSeparator(ch))
            syntaxError(offset + cbuf.length, "illegal separator")
          while ('0' <= ch && ch <= '9' || isNumberSeparator(ch)) {
            putChar(ch)
            nextChar()
          }
          checkNoTrailingSeparator()
        }
        token = DOUBLELIT
      }
      if (ch == 'd' || ch == 'D') {
        putChar(ch)
        nextChar()
        token = DOUBLELIT
      } else if (ch == 'f' || ch == 'F') {
        putChar(ch)
        nextChar()
        token = FLOATLIT
      } else
        token = DOUBLELIT
      checkNoLetter()
      setStrVal()
    }

    /** Convert current strVal to char value.
     */
    def charVal: Char = if (!strVal.isEmpty) strVal.charAt(0) else 0

    /** Convert current strVal, base to long value.
     *  This is tricky because of max negative value.
     *
     *  Conversions in base 2, 10 and 16 are supported.
     *  Number separators are skipped on the fly.
     */
    def intVal(negated: Boolean): Long = {
      def intConvert: Long = {
        def convertIt: Long = {
          def malformed: Long = { syntaxError("malformed integer number") ; 0 }
          def tooBig: Long = { syntaxError("integer number too large") ; 0 }
          val divider     = if (base == 10) 1 else 2
          val limit: Long = if (token == LONGLIT) Long.MaxValue else Int.MaxValue
          @tailrec def convert(value: Long, i: Int): Long =
            if (i >= strVal.length) value
            else {
              val c = strVal.charAt(i)
              if (isNumberSeparator(c)) convert(value, i + 1)
              else {
                val d = digit2int(c, base)
                if (d < 0)
                  malformed
                else if (value < 0 ||
                    limit / (base / divider) < value ||
                    limit - (d / divider) < value * (base / divider) &&
                    !(negated && limit == value * base - 1 + d))
                  tooBig
                else
                  convert(value * base + d, i + 1)
              }
            }
          val result = convert(0, 0)
          if (negated) -result else result
        }
        if (strVal.isEmpty) {
          syntaxError("missing integer number") // e.g., 0x; previous error shadows this one
          0L
        } else {
          if (settings.warnOctalLiteral && base == 10 && strVal.charAt(0) == '0' && strVal.length() > 1)
            deprecationWarning("Decimal integer literals should not have a leading zero. (Octal syntax is obsolete.)", since="2.10")
          convertIt
        }
      }
      if (token == CHARLIT && !negated) charVal.toLong else intConvert
    }

    @`inline` def intVal: Long = intVal(negated = false)

    private val zeroFloat = raw"[0.]+(?:[eE][+-]?[0-9]+)?[fFdD]?".r

    /** Convert current strVal, base to float value.
     */
    def floatVal(negated: Boolean): Float = {
      val text = removeNumberSeparators(strVal)
      try {
        val value: Float = java.lang.Float.parseFloat(text)
        if (value > Float.MaxValue)
          syntaxError("floating point number too large")
        if (value == 0.0f && !zeroFloat.pattern.matcher(text).matches)
          syntaxError("floating point number too small")
        if (negated) -value else value
      } catch {
        case _: NumberFormatException =>
          syntaxError("malformed floating point number")
          0.0f
      }
    }

    @`inline` def floatVal: Float = floatVal(negated = false)

    /** Convert current strVal, base to double value.
     */
    def doubleVal(negated: Boolean): Double = {
      val text = removeNumberSeparators(strVal)
      try {
        val value: Double = java.lang.Double.parseDouble(text)
        if (value > Double.MaxValue)
          syntaxError("double precision floating point number too large")
        if (value == 0.0d && !zeroFloat.pattern.matcher(text).matches)
          syntaxError("double precision floating point number too small")
        if (negated) -value else value
      } catch {
        case _: NumberFormatException =>
          syntaxError("malformed double precision floating point number")
          0.0
      }
    }

    @`inline` def doubleVal: Double = doubleVal(negated = false)

    @`inline` def checkNoLetter(): Unit = if (isIdentifierPart(ch) && ch >= ' ') syntaxError("invalid literal number")

    @`inline` private def isNumberSeparator(c: Char): Boolean = c == '_'

    @`inline` private def removeNumberSeparators(s: String): String = if (s.indexOf('_') > 0) s.replace("_", "") else s

    @`inline` private def numberOffset = offset + (if (base == 10) 0 else 2)

    // disallow trailing numeric separator char
    def checkNoTrailingSeparator(): Unit =
      if (!cbuf.isEmpty && isNumberSeparator(cbuf.last))
        syntaxError(numberOffset + cbuf.length - 1, "illegal separator")

    /** Read a number into strVal.
     *
     *  The `base` can be 2, 10 or 16.
     */
    protected def getNumber(): Unit = {
      // consume digits of the current radix
      def consumeDigits(): Unit =
        while (isNumberSeparator(ch) || digit2int(ch, base) >= 0) {
          putChar(ch)
          nextChar()
        }
      // at dot with digit following
      def restOfNonIntegralNumber(): Unit = {
        putChar('.')
        nextChar()
        getFraction()
      }
      // 1l is an acknowledged bad practice
      def lintel(): Unit = {
        val msg = "Lowercase el for long is not recommended because it is easy to confuse with numeral 1; use uppercase L instead"
        if (ch == 'l') deprecationWarning(numberOffset + cbuf.length, msg, since="2.13.0")
      }
      // after int: 5e7f, 42L, 42.toDouble but not 42b.
      def restOfNumber(): Unit = {
        ch match {
          case 'e' | 'E' | 'f' | 'F' |
               'd' | 'D' => getFraction()
          case 'l' | 'L' => lintel() ; token = LONGLIT ; setStrVal() ; nextChar()
          case _         => token = INTLIT  ; setStrVal() ; checkNoLetter()
        }
      }

      // consume leading digits, provisionally an Int
      consumeDigits()

      checkNoTrailingSeparator()

      val detectedFloat: Boolean = base == 10 && ch == '.' && isDigit(lookaheadReader.getc())
      if (detectedFloat) restOfNonIntegralNumber() else restOfNumber()
    }

    /** Parse character literal if current character is followed by \',
     *  or follow with given op and return a symbol literal token
     */
    def charLitOr(op: () => Unit): Unit = {
      putChar(ch)
      nextChar()
      if (ch == '\'') {
        nextChar()
        token = CHARLIT
        setStrVal()
      } else {
        op()
        token = SYMBOLLIT
        strVal = name.toString
      }
    }

// Errors -----------------------------------------------------------------

    /** generate an error at the given offset */
    def syntaxError(off: Offset, msg: String): Unit = {
      error(off, msg)
      token = ERROR
    }

    /** generate an error at the current token offset */
    def syntaxError(msg: String): Unit = syntaxError(offset, msg)

    def deprecationWarning(msg: String, since: String): Unit = deprecationWarning(offset, msg, since)

    /** signal an error where the input ended in the middle of a token */
    def incompleteInputError(msg: String): Unit = {
      incompleteInputError(offset, msg)
      token = EOF
    }

    override def toString() = token match {
      case IDENTIFIER | BACKQUOTED_IDENT =>
        "id(" + name + ")"
      case CHARLIT =>
        "char(" + intVal + ")"
      case INTLIT =>
        "int(" + intVal + ")"
      case LONGLIT =>
        "long(" + intVal + ")"
      case FLOATLIT =>
        "float(" + floatVal + ")"
      case DOUBLELIT =>
        "double(" + floatVal + ")"
      case STRINGLIT =>
        "string(" + strVal + ")"
      case STRINGPART =>
        "stringpart(" + strVal + ")"
      case INTERPOLATIONID =>
        "interpolationid(" + name + ")"
      case SEMI =>
        ";"
      case NEWLINE =>
        ";"
      case NEWLINES =>
        ";;"
      case COMMA =>
        ","
      case _ =>
        token2string(token)
    }

    // ------------- brace counting and healing ------------------------------

    /** overridden in UnitScanners:
     *  apply brace patch if one exists for this offset
     *  return true if subsequent end of line handling should be suppressed.
     */
    def applyBracePatch(): Boolean = false

    /** overridden in UnitScanners */
    def parenBalance(token: Token) = 0

    /** overridden in UnitScanners */
    def healBraces(): List[BracePatch] = List()

    /** Initialization method: read first char, then first token
     */
    def init(): Unit = {
      nextChar()
      nextToken()
    }
  } // end Scanner

  // ------------- keyword configuration -----------------------------------

  private val allKeywords = List[(Name, Token)](
    nme.ABSTRACTkw  -> ABSTRACT,
    nme.CASEkw      -> CASE,
    nme.CATCHkw     -> CATCH,
    nme.CLASSkw     -> CLASS,
    nme.DEFkw       -> DEF,
    nme.DOkw        -> DO,
    nme.ELSEkw      -> ELSE,
    nme.EXTENDSkw   -> EXTENDS,
    nme.FALSEkw     -> FALSE,
    nme.FINALkw     -> FINAL,
    nme.FINALLYkw   -> FINALLY,
    nme.FORkw       -> FOR,
    nme.FORSOMEkw   -> FORSOME,
    nme.IFkw        -> IF,
    nme.IMPLICITkw  -> IMPLICIT,
    nme.IMPORTkw    -> IMPORT,
    nme.LAZYkw      -> LAZY,
    nme.MATCHkw     -> MATCH,
    nme.NEWkw       -> NEW,
    nme.NULLkw      -> NULL,
    nme.OBJECTkw    -> OBJECT,
    nme.OVERRIDEkw  -> OVERRIDE,
    nme.PACKAGEkw   -> PACKAGE,
    nme.PRIVATEkw   -> PRIVATE,
    nme.PROTECTEDkw -> PROTECTED,
    nme.RETURNkw    -> RETURN,
    nme.SEALEDkw    -> SEALED,
    nme.SUPERkw     -> SUPER,
    nme.THISkw      -> THIS,
    nme.THROWkw     -> THROW,
    nme.TRAITkw     -> TRAIT,
    nme.TRUEkw      -> TRUE,
    nme.TRYkw       -> TRY,
    nme.TYPEkw      -> TYPE,
    nme.VALkw       -> VAL,
    nme.VARkw       -> VAR,
    nme.WHILEkw     -> WHILE,
    nme.WITHkw      -> WITH,
    nme.YIELDkw     -> YIELD,
    nme.DOTkw       -> DOT,
    nme.USCOREkw    -> USCORE,
    nme.COLONkw     -> COLON,
    nme.EQUALSkw    -> EQUALS,
    nme.ARROWkw     -> ARROW,
    nme.LARROWkw    -> LARROW,
    nme.SUBTYPEkw   -> SUBTYPE,
    nme.VIEWBOUNDkw -> VIEWBOUND,
    nme.SUPERTYPEkw -> SUPERTYPE,
    nme.HASHkw      -> HASH,
    nme.ATkw        -> AT,
    nme.MACROkw     -> IDENTIFIER,
    nme.THENkw      -> IDENTIFIER)

  private var kwOffset: Offset = -1
  private val kwArray: Array[Token] = {
    val (offset, arr) = createKeywordArray(allKeywords, IDENTIFIER)
    kwOffset = offset
    arr
  }

  final val token2name = (allKeywords map (_.swap)).toMap

  final val softModifierNames = Set(nme.open, nme.infix)

  final val scala3Keywords = Set(nme.`enum`, nme.`export`, nme.`given`)

// Token representation ----------------------------------------------------

  /** Returns the string representation of given token. */
  def token2string(token: Token): String = (token: @switch) match {
    case IDENTIFIER | BACKQUOTED_IDENT => "identifier"
    case CHARLIT => "character literal"
    case INTLIT => "integer literal"
    case LONGLIT => "long literal"
    case FLOATLIT => "float literal"
    case DOUBLELIT => "double literal"
    case STRINGLIT | STRINGPART | INTERPOLATIONID => "string literal"
    case SYMBOLLIT => "symbol literal"
    case LPAREN => "'('"
    case RPAREN => "')'"
    case LBRACE => "'{'"
    case RBRACE => "'}'"
    case LBRACKET => "'['"
    case RBRACKET => "']'"
    case EOF => "eof"
    case ERROR => "something"
    case SEMI => "';'"
    case NEWLINE => "';'"
    case NEWLINES => "';'"
    case COMMA => "','"
    case CASECLASS => "case class"
    case CASEOBJECT => "case object"
    case XMLSTART => "$XMLSTART$<"
    case _ =>
      (token2name get token) match {
        case Some(name) => "'" + name + "'"
        case _          => "'<" + token + ">'"
      }
  }

  class MalformedInput(val offset: Offset, val msg: String) extends Exception

  /** A scanner for a given source file not necessarily attached to a compilation unit.
   *  Useful for looking inside source files that are not currently compiled to see what's there
   */
  class SourceFileScanner(val source: SourceFile) extends Scanner {
    val buf = source.content

    // suppress warnings, throw exception on errors
    def warning(off: Offset, msg: String, category: WarningCategory): Unit = ()
    def deprecationWarning(off: Offset, msg: String, since: String): Unit = ()
    def error(off: Offset, msg: String): Unit = throw new MalformedInput(off, msg)
    def incompleteInputError(off: Offset, msg: String): Unit = throw new MalformedInput(off, msg)
  }

  /** A scanner over a given compilation unit
   */
  class UnitScanner(val unit: CompilationUnit, patches: List[BracePatch]) extends SourceFileScanner(unit.source) {
    def this(unit: CompilationUnit) = this(unit, List())

    override def warning(off: Offset, msg: String, category: WarningCategory): Unit   = runReporting.warning(unit.position(off), msg, category, site = "")
    override def deprecationWarning(off: Offset, msg: String, since: String)          = runReporting.deprecationWarning(unit.position(off), msg, since, site = "", origin = "")
    override def error(off: Offset, msg: String)                                      = reporter.error(unit.position(off), msg)
    override def incompleteInputError(off: Offset, msg: String)                       = currentRun.parsing.incompleteInputError(unit.position(off), msg)

    private var bracePatches: List[BracePatch] = patches

    lazy val parensAnalyzer = new ParensAnalyzer(unit, List())

    override def parenBalance(token: Token) = parensAnalyzer.balance(token)

    override def healBraces(): List[BracePatch] = {
      var patches: List[BracePatch] = List()
      if (!parensAnalyzer.tabSeen) {
        var bal = parensAnalyzer.balance(RBRACE)
        while (bal < 0) {
          patches = new ParensAnalyzer(unit, patches).insertRBrace()
          bal += 1
        }
        while (bal > 0) {
          patches = new ParensAnalyzer(unit, patches).deleteRBrace()
          bal -= 1
        }
      }
      patches
    }

    /** Insert or delete a brace, if a patch exists for this offset */
    override def applyBracePatch(): Boolean = {
      if (bracePatches.isEmpty || bracePatches.head.off != offset) false
      else {
        val patch = bracePatches.head
        bracePatches = bracePatches.tail
//        println("applying brace patch "+offset)//DEBUG
        if (patch.inserted) {
          next copyFrom this
          error(offset, "Missing closing brace `}` assumed here")
          token = RBRACE
          true
        } else {
          error(offset, "Unmatched closing brace '}' ignored here")
          fetchToken()
          false
        }
      }
    }
  }

  class ParensAnalyzer(unit: CompilationUnit, patches: List[BracePatch]) extends UnitScanner(unit, patches) {
    val balance = mutable.Map(RPAREN -> 0, RBRACKET -> 0, RBRACE -> 0)

    /** The source code with braces and line starts annotated with [NN] showing the index */
    private def markedSource = {
      val code   = unit.source.content
      val braces = code.indices.filter(idx => "{}\n" contains code(idx)).toSet
      val mapped = code.indices map (idx => if (braces(idx)) s"${code(idx)}[$idx]" else "" + code(idx))
      mapped.mkString("")
    }

    init()
    log(s"ParensAnalyzer for ${unit.source} of length ${unit.source.content.length}\n```\n$markedSource\n```")

    /** The offset of the first token on this line, or next following line if blank
     */
    val lineStart = new ArrayBuffer[Int]

    /** The list of matching top-level brace pairs (each of which may contain nested brace pairs).
     */
    val bracePairs: List[BracePair] = {

      var lineCount = 1
      var lastOffset = 0
      var indent = 0
      val oldBalance = mutable.Map[Int, Int]()
      def markBalance() = for ((k, v) <- balance) oldBalance(k) = v
      markBalance()

      def scan(bpbuf: ListBuffer[BracePair]): (Int, Int) = {
        if (token != NEWLINE && token != NEWLINES) {
          while (lastOffset < offset) {
            if (buf(lastOffset) == LF) lineCount += 1
            lastOffset += 1
          }
          while (lineCount > lineStart.length) {
            lineStart += offset
            // reset indentation unless there are new opening brackets or
            // braces since last ident line and at the same time there
            // are no new braces.
            if (balance(RPAREN) >= oldBalance(RPAREN) &&
                balance(RBRACKET) >= oldBalance(RBRACKET) ||
                balance(RBRACE) != oldBalance(RBRACE)) {
              indent = column(offset)
              markBalance()
            }
          }
        }

        token match {
          case LPAREN =>
            balance(RPAREN) -= 1; nextToken(); scan(bpbuf)
          case LBRACKET =>
            balance(RBRACKET) -= 1; nextToken(); scan(bpbuf)
          case RPAREN =>
            balance(RPAREN) += 1; nextToken(); scan(bpbuf)
          case RBRACKET =>
            balance(RBRACKET) += 1; nextToken(); scan(bpbuf)
          case LBRACE =>
            balance(RBRACE) -= 1
            val lc = lineCount
            val loff = offset
            val lindent = indent
            val bpbuf1 = new ListBuffer[BracePair]
            nextToken()
            val (roff, rindent) = scan(bpbuf1)
            if (lc != lineCount)
              bpbuf += BracePair(loff, lindent, roff, rindent, bpbuf1.toList)
            scan(bpbuf)
          case RBRACE =>
            balance(RBRACE) += 1
            val off = offset; nextToken(); (off, indent)
          case EOF =>
            (-1, -1)
          case _ =>
            nextToken(); scan(bpbuf)
        }
      }

      val bpbuf = new ListBuffer[BracePair]
      while (token != EOF) {
        val (roff, rindent) = scan(bpbuf)
        if (roff != -1) {
          val current = BracePair(-1, -1, roff, rindent, bpbuf.toList)
          bpbuf.clear()
          bpbuf += current
        }
      }
      def bracePairString(bp: BracePair, indent: Int): String = {
        val rangeString = {
          import bp._
          val lline = line(loff)
          val rline = line(roff)
          val tokens = List(lline, lindent, rline, rindent) map (n => if (n < 0) "??" else "" + n)
          "%s:%s to %s:%s".format(tokens: _*)
        }
        val outer  = (" " * indent) + rangeString
        val inners = bp.nested map (bracePairString(_, indent + 2))

        if (inners.isEmpty) outer
        else inners.mkString(outer + "\n", "\n", "")
      }
      def bpString    = bpbuf.toList map ("\n" + bracePairString(_, 0)) mkString ""
      def startString = lineStart.mkString("line starts: [", ", ", "]")

      log(s"\n$startString\n$bpString")
      bpbuf.toList
    }

    var tabSeen = false

    def line(offset: Offset): Int = {
      @tailrec
      def findLine(lo: Int, hi: Int): Int = {
        val mid = (lo + hi) / 2
        if (offset < lineStart(mid)) findLine(lo, mid - 1)
        else if (mid + 1 < lineStart.length && offset >= lineStart(mid + 1)) findLine(mid + 1, hi)
        else mid
      }
      if (offset <= 0) 0
      else findLine(0, lineStart.length - 1)
    }

    def column(offset: Offset): Int = {
      var col = 0
      var i = offset - 1
      while (i >= 0 && buf(i) != CR && buf(i) != LF) {
        if (buf(i) == '\t') tabSeen = true
        col += 1
        i -= 1
      }
      col
    }

    def insertPatch(patches: List[BracePatch], patch: BracePatch): List[BracePatch] = patches match {
      case List() => List(patch)
      case bp :: bps => if (patch.off < bp.off) patch :: patches
                        else bp :: insertPatch(bps, patch)
    }

    def insertRBrace(): List[BracePatch] = {
      def insert(bps: List[BracePair]): List[BracePatch] = bps match {
        case List() => patches
        case (bp @ BracePair(loff, lindent, roff, rindent, nested)) :: bps1 =>
          if (lindent <= rindent) insert(bps1)
          else {
//           println("patch inside "+bp+"/"+line(loff)+"/"+lineStart(line(loff))+"/"+lindent"/"+rindent)//DEBUG
            val patches1 = insert(nested)
            if (patches1 ne patches) patches1
            else {
              var lin = line(loff) + 1
              while (lin < lineStart.length && column(lineStart(lin)) > lindent)
                lin += 1
              if (lin < lineStart.length) {
                val patches1 = insertPatch(patches, BracePatch(lineStart(lin), inserted = true))
                //println("patch for "+bp+"/"+imbalanceMeasure+"/"+new ParensAnalyzer(unit, patches1).imbalanceMeasure)
                /*if (improves(patches1))*/
                patches1
                /*else insert(bps1)*/
                // (this test did not seem to work very well in practice)
              } else patches
            }
          }
      }
      insert(bracePairs)
    }

    def deleteRBrace(): List[BracePatch] = {
      def delete(bps: List[BracePair]): List[BracePatch] = bps match {
        case List() => patches
        case BracePair(loff, lindent, roff, rindent, nested) :: bps1 =>
          if (lindent >= rindent) delete(bps1)
          else {
            val patches1 = delete(nested)
            if (patches1 ne patches) patches1
            else insertPatch(patches, BracePatch(roff, inserted = false))
          }
      }
      delete(bracePairs)
    }

    // don't emit deprecation warnings about identifiers like `macro` or `then`
    // when skimming through the source file trying to heal braces
    override def emitIdentifierDeprecationWarnings = false

    override def error(offset: Offset, msg: String): Unit = ()
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy