scala.meta.internal.parsers.ScannerTokens.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of parsers_2.13 Show documentation
Show all versions of parsers_2.13 Show documentation
Scalameta APIs for parsing and their baseline implementation
package scala.meta.internal.parsers
import scala.meta.Dialect
import scala.meta.classifiers._
import scala.meta.inputs.Input
import scala.meta.internal.trees._
import scala.meta.prettyprinters._
import scala.meta.tokenizers._
import scala.meta.tokens.Token
import scala.meta.tokens.Token._
import scala.meta.tokens.TokenExtensions
import scala.meta.tokens.Tokens
import scala.annotation.tailrec
final class ScannerTokens(val tokens: Tokens)(implicit dialect: Dialect) {
import ScannerTokens._
@inline
final def skipBefore(index: Int, p: Token => Boolean): Int =
if (index <= 0) 0 else tokens.rskipIf(p, index - 1, 0)
@inline
final def skipAfter(index: Int, p: Token => Boolean): Int = {
val max = tokens.length - 1
if (index < max) tokens.skipIf(p, index + 1, max) else max
}
final def getPrevIndex(index: Int): Int = skipBefore(index, _.is[Trivia])
final def getNextIndex(index: Int): Int = skipAfter(index, _.is[Trivia])
final def getPrevToken(index: Int): Token = tokens(getPrevIndex(index))
final def getNextToken(index: Int): Token = tokens(getNextIndex(index))
final def getStrictPrev(index: Int): Int = skipBefore(index, _.is[HTrivia])
final def getStrictNext(index: Int): Int = skipAfter(index, _.is[HTrivia])
// NOTE: Scala's parser isn't ready to accept whitespace and comment tokens,
// so we have to filter them out, because otherwise we'll get errors like `expected blah, got whitespace`
// However, in certain tricky cases some whitespace tokens (namely, newlines) do have to be emitted.
// This leads to extremely dirty and seriously crazy code.
implicit class XtensionTokenClass(token: Token) {
def isClassOrObject = token.isAny[KwClass, KwObject]
def isClassOrObjectOrEnum = isClassOrObject || (token.is[Ident] && dialect.allowEnums)
def asString: String =
s"[${token.getClass.getSimpleName}@${token.end}]${token.syntax.replace("\n", "")}"
}
// https://github.com/lampepfl/dotty/blob/4e7ab609/compiler/src/dotty/tools/dotc/parsing/Scanners.scala#L435
def canBeLeadingInfixArg(argToken: Token, argTokenPos: Int): Boolean =
isExprIntro(argToken, argTokenPos) &&
(argToken match {
case x: Ident => x.value.isUnaryOp || !x.isIdentSymbolicInfixOperator
case _ => true
})
val soft = new SoftKeywords(dialect)
object TypeIntro extends Function[Token, Boolean] {
def apply(token: Token): Boolean = token match {
case _: Ident | _: KwSuper | _: KwThis | _: LeftParen | _: At | _: Underscore | _: Unquote =>
true
case _: Literal => dialect.allowLiteralTypes
case _ => false
}
def unapply(token: Token) = apply(token)
}
@inline
private def isPrecededByNL(index: Int): Boolean = tokens(getStrictPrev(index)).is[AtEOLorF]
@inline
private def isFollowedByNL(index: Int): Boolean = tokens(getStrictNext(index)).is[AtEOLorF]
@tailrec
final def isPrecededByDetachedComment(idx: Int, end: Int): Boolean = idx > end &&
(tokens(idx) match {
case _: Comment => isPrecededByNL(idx)
case _: Whitespace => isPrecededByDetachedComment(idx - 1, end)
case _ => false
})
@inline
private def isEndMarkerIdentifier(token: Token) = soft.KwEnd(token)
private def isEndMarkerSpecifier(token: Token) = token match {
case _: Ident | _: KwIf | _: KwWhile | _: KwFor | _: KwMatch | _: KwTry | _: KwNew | _: KwThis |
_: KwGiven | _: KwVal => true
case _ => false
}
def isEndMarkerIntro(token: Token, fNextIndex: => Int) = isEndMarkerIdentifier(token) && {
val nextIndex = fNextIndex
isEndMarkerSpecifier(tokens(nextIndex)) && isFollowedByNL(nextIndex)
}
def isEndMarkerIntro(index: Int): Boolean = isEndMarkerIntro(tokens(index), getStrictNext(index))
def isExprIntro(token: Token, fIndex: => Int): Boolean = isExprIntroImpl(token) {
val index = fIndex
!isSoftModifier(index) && !isEndMarkerIntro(index)
}
def isIdentOrExprIntro(token: Token): Boolean = isExprIntroImpl(token)(true)
private def isExprIntroImpl(token: Token)(isIdentOK: => Boolean): Boolean = token match {
case _: Ident => isIdentOK
case _: Literal | _: Interpolation.Id | _: Xml.Start | _: KwDo | _: KwFor | _: KwIf | _: KwNew |
_: KwReturn | _: KwSuper | _: KwThis | _: KwThrow | _: KwTry | _: KwWhile | _: LeftParen |
_: LeftBrace | _: Underscore | _: Unquote | _: MacroSplice | _: MacroQuote |
_: Indentation.Indent => true
case _: LeftBracket => dialect.allowPolymorphicFunctions
case _ => false
}
def isSoftModifier(index: Int): Boolean = {
@inline
def nextIsDclIntroOrModifierOr(f: Token => Boolean): Boolean = {
val next = getNextIndex(index)
isDclIntro(next) || isModifier(next) || f(tokens(next))
}
tokens(index).text match {
case soft.KwTransparent() => nextIsDclIntroOrModifierOr(_.isAny[KwTrait, KwClass])
case soft.KwOpaque() => nextIsDclIntroOrModifierOr(_ => false)
case soft.KwInline() => nextIsDclIntroOrModifierOr(matchesAfterInlineMatchMod)
case soft.KwOpen() | soft.KwInfix() | soft.KwErased() | soft.KwTracked() =>
isDefIntro(getNextIndex(index))
case _ => false
}
}
@inline
def isInlineMatchMod(index: Int): Boolean = soft.KwInline(tokens(index)) &&
matchesAfterInlineMatchMod(getNextToken(index))
private def matchesAfterInlineMatchMod(token: Token): Boolean = token match {
case _: LeftParen | _: LeftBrace | _: KwNew | _: Ident | _: Literal | _: Interpolation.Id |
_: Xml.Start | _: KwSuper | _: KwThis | _: MacroSplice | _: MacroQuote => true
case _ => false
}
@tailrec
final def isDefIntro(index: Int): Boolean = tokens(index) match {
case _: At => true
case _: Unquote | _: Ellipsis => isDefIntro(getNextIndex(index))
case _: KwCase => getNextToken(index).isClassOrObjectOrEnum
case _ => isDclIntro(index) || isModifier(index) || isTemplateIntro(index)
}
@tailrec
final def isTemplateIntro(index: Int): Boolean = tokens(index) match {
case _: At | _: KwClass | _: KwObject | _: KwTrait => true
case _: Unquote => isTemplateIntro(getNextIndex(index))
case _: KwCase => getNextToken(index).isClassOrObjectOrEnum
case _ => isModifier(index)
}
@tailrec
final def isDclIntro(index: Int): Boolean = tokens(index) match {
case _: KwDef | _: KwType | _: KwEnum | _: KwVal | _: KwVar | _: KwGiven => true
case _: Unquote => isDclIntro(getNextIndex(index))
case _ => isKwExtension(index)
}
// Logic taken from the Scala 3 parser
def isKwExtension(index: Int): Boolean = soft.KwExtension(tokens(index)) &&
(getNextToken(index) match {
case _: LeftParen | _: LeftBracket => true
case _ => false
})
def isModifier(index: Int): Boolean = tokens(index).is[ModifierKeyword] || isSoftModifier(index)
object ParamsModifier {
def unapply(token: Token): Boolean = matches(token.text)
def matches(text: String): Boolean = text match {
case soft.KwInline() | soft.KwErased() | soft.KwTracked() => true
case _ => false
}
}
def isNonlocalModifier(token: Token): Boolean = token match {
case _: KwPrivate | _: KwProtected | _: KwOverride | soft.KwOpen() => true
case _ => false
}
object StatSeqEnd extends Function[Token, Boolean] {
def apply(token: Token): Boolean = token match {
case _: RightBrace | _: EOF | _: Indentation.Outdent => true
case _ => false
}
def unapply(token: Token) = apply(token)
}
def mightStartStat(token: Token, closeDelimOK: Boolean): Boolean = token match {
case _: KwCatch | _: KwElse | _: KwExtends | _: KwFinally | _: KwForsome | _: KwMatch |
_: KwWith | _: KwYield | _: LeftBracket | _: Comma | _: Colon | _: Dot | _: Equals |
_: Semicolon | _: Hash | _: RightArrow | _: LeftArrow | _: Subtype | _: Supertype |
_: Viewbound | _: AtEOLorF => false
case _: CloseDelim => closeDelimOK
case _ => true
}
private def canEndStat(token: Token): Boolean = token match {
case _: Ident | _: KwGiven | _: Literal | _: Interpolation.End | _: Xml.End | _: KwReturn |
_: KwThis | _: KwType | _: RightParen | _: RightBracket | _: RightBrace | _: Underscore |
_: Ellipsis | _: Unquote => true
case _ => false
}
object StatSep extends Function[Token, Boolean] {
def apply(token: Token): Boolean = token match {
case _: Semicolon | _: AtEOL => true
case _ => false
}
def unapply(token: Token) = apply(token)
}
object Wildcard {
def unapply(token: Token): Boolean = token.is[Underscore] || isStar(token)
def isStar(token: Token): Boolean = dialect.allowStarWildcardImport && token.syntax == "*"
}
/**
* When token on `tokenPosition` is not a whitespace and is a first non-whitespace character in a
* current line then a result is a number of whitespace characters counted. Otherwise
* {{{(-1, -1)}}} is returned.
*
* Returns a tuple2 where:
* - first value is indentation level
* - second is `LF` token index
*/
private[parsers] def countIndentAndNewlineIndex(tokenPosition: Int): (Int, Int) = {
@tailrec
def countIndentInternal(pos: Int, acc: Int = 0): (Int, Int) =
if (pos < 0) (acc, pos)
else {
val token = tokens(pos)
token match {
case _: AtEOL | _: BOF => (acc, pos)
case c: Comment =>
if (AsMultilineComment.isMultiline(c)) (multilineCommentIndent(c), pos)
else countIndentInternal(pos - 1)
case t: HSpace => countIndentInternal(pos - 1, acc + t.len)
case _ => (-1, -1)
}
}
if (tokenPosition < 0 || tokens(tokenPosition).is[Whitespace]) (-1, -1)
else countIndentInternal(tokenPosition - 1)
}
private[parsers] def countIndent(tokenPosition: Int): Int =
countIndentAndNewlineIndex(tokenPosition)._1
private[parsers] def mkIndentToken(pointPos: Int): Token = {
val token = tokens(pointPos)
new Indentation.Indent(token.input, token.dialect, token.start, token.start)
}
private[parsers] def mkOutdentToken(pointPos: Int): Token = {
val token = tokens(pointPos)
new Indentation.Outdent(token.input, token.dialect, token.start, token.start)
}
private[parsers] def findOutdentPos(
prevPos: Int,
currPos: Int,
outdent: Int,
okBlank: Boolean
): Int = {
@tailrec
def iter(i: Int, pos: Int, indent: Int, numEOL: Int = 0): Int =
if (i >= currPos) if (pos < currPos) pos else currPos - 1
else tokens(i) match {
case t: AtEOL => iter(i + 1, i, 0, numEOL + t.newlines)
case t: HSpace if indent >= 0 => iter(i + 1, pos, indent + t.len, numEOL)
case _: Whitespace => iter(i + 1, pos, indent, numEOL)
case _: Comment
if indent < 0 || outdent < indent || outdent == indent && (okBlank || numEOL < 2) =>
iter(i + 1, i + 1, -1)
case _ => pos
}
val iterPos = 1 + prevPos
if (iterPos < currPos) iter(iterPos, prevPos, if (tokens(prevPos).is[AtEOL]) 0 else -1)
else if (tokens(currPos).is[EOF]) currPos
else prevPos
}
@tailrec
private[parsers] def isAheadNewLine(currentPosition: Int): Boolean = {
val nextPos = currentPosition + 1
nextPos < tokens.length && {
val nextToken = tokens(nextPos)
nextToken.is[AtEOL] || nextToken.is[Trivia] && isAheadNewLine(nextPos)
}
}
private[parsers] def nextToken(ref: TokenRef): TokenRef = ref.next match {
case null =>
val pos = ref.nextPos
val next =
if (pos < tokens.length) nextToken(ref.token, ref.pos, pos, ref.regions)
else {
val next = TokenRef(Nil, null, pos, pos, pos)
next.next = next
next
}
ref.next = next
next
case nref => nref
}
@tailrec
private def nextToken(
prevToken: Token,
prevPos: Int,
currPos: Int,
sepRegionsOrig: List[SepRegion]
): TokenRef = {
val prev = if (prevPos >= 0) tokens(prevPos) else null
val curr = tokens(currPos)
val currNonTrivial = !curr.is[Trivia]
val nextPos = tokens.indexWhere(!_.is[Trivia], currPos + 1)
val next = if (nextPos >= 0) tokens(nextPos) else null
lazy val (nextIndent, indentPos) = countIndentAndNewlineIndex(nextPos)
// relax requirement that close delim is on separate line
def isTrailingComma: Boolean = dialect.allowTrailingCommas && curr.is[Comma] &&
next.is[CloseDelim] // && next.pos.startLine > curr.pos.endLine
def mkIndent(pos: Int, pointPos: Int, rs: List[SepRegion], next: TokenRef = null): TokenRef =
TokenRef(rs, mkIndentToken(pointPos), pos, nextPos, pointPos, next)
def mkOutdentTo(region: SepRegionIndented, regions: List[SepRegion]) =
mkOutdentAt(region.indent, regions)
def mkOutdentAt(outdent: Int, regions: List[SepRegion]) = {
val maxPointPos = if (nextPos < 0 || currNonTrivial) currPos else nextPos
val pointPos = findOutdentPos(prevPos, maxPointPos, outdent, isIndented(regions, outdent))
val (nextPrevPos, nextCurrPos) =
if (pointPos > currPos) (currPos, pointPos) else (prevPos, currPos)
TokenRef(regions, mkOutdentToken(pointPos), nextPrevPos, nextCurrPos, pointPos)
}
@tailrec
def mkOutdentsOpt(
xs: List[SepRegion],
wasDone: Boolean = false,
head: TokenRef = null,
last: TokenRef = null,
multiEOL: Boolean = false
)(implicit f: List[SepRegion] => OutdentInfo): Either[List[SepRegion], (TokenRef, TokenRef)] =
if (!wasDone) f(xs) match {
case null => mkOutdentsOpt(xs, true, head, last, multiEOL)
case OutdentInfo(outdent, rs, done) =>
val isDone = done || (rs eq xs)
if (outdent eq null) mkOutdentsOpt(rs, isDone, head, last, multiEOL)
else {
val tr = mkOutdentTo(outdent, rs)
val newHead = if (head eq null) tr else { last.next = tr; head }
mkOutdentsOpt(rs, isDone, newHead, tr, multiEOL)
}
}
else if (head eq null) Left(xs)
else {
if (currNonTrivial) last.next = currRef(xs)
else if (multiEOL) last.next = eofRefAt(xs, last.pointPos, multiEOL = true)
Right((head, last))
}
def mkOutdentsT[A](xs: List[SepRegion], multiEOL: Boolean = false)(lt: List[SepRegion] => A)(
rt: (TokenRef, TokenRef) => A
)(implicit f: List[SepRegion] => OutdentInfo): A = mkOutdentsOpt(xs, multiEOL = multiEOL)
.fold(lt, rt.tupled)
def mkOutdents(xs: List[SepRegion], multiEOL: Boolean = false)(implicit
f: List[SepRegion] => OutdentInfo
): TokenRef = mkOutdentsT(xs, multiEOL)(currRef)((x, _) => x)
def currRef(regions: List[SepRegion]): TokenRef = currAndNextRef(regions, null)
def currAndNextRef(regions: List[SepRegion], next: TokenRef): TokenRef =
TokenRef(regions, curr, currPos, next)
def eolRefFor(rs: List[SepRegion], out: Token, eolPos: Int) =
TokenRef(rs, out, eolPos, nextPos, eolPos, null)
def eofRefAt(rs: List[SepRegion], eolPos: Int, multiEOL: Boolean): TokenRef = {
val token = tokens(eolPos)
val out =
if (!multiEOL || token.is[MultiEOL]) token
else LFLF(token.input, token.dialect, token.start, tokens(indentPos).end)
eolRefFor(rs, out, eolPos)
}
def outdentThenCurrRef(
ri: RegionIndent,
rs: List[SepRegion],
rn: Option[SepRegion] = None
): TokenRef = {
val ref = mkOutdentTo(ri, rs)
ref.next = currRef(rn.fold(rs)(_ :: rs))
ref
}
def getCaseIntro(sepRegions: List[SepRegion]) = {
def markRegions() = RegionCaseMark :: sepRegions
next match {
/* Several facts:
* - cases can be at the same level as catch/match; therefore, a non-case token
* starting a line at the same level as case can terminate the entire match
* - however, the compiler seems to allow (although produces a warning) exactly
* one line of a case body which follows `catch case` (both on one line) to be
* at the same level
*
* To handle both cases, we need to insert an indent before the first case so that it
* can be removed on the first line which matches this indent, without potentially
* removing the indent around the match statement.
*
* If case is on a different line after catch/match, we will insert an indent when
* handling newlines below; therefore, when there's no newline, we should explicitly
* insert an indent here, relative to outer region. */
case _: KwCase // case follows catch on same line
if dialect.allowSignificantIndentation && curr.pos.endLine == next.pos.startLine =>
val regions = markRegions()
val nextRegions = RegionIndent(findIndent(sepRegions)) :: regions
currAndNextRef(regions, mkIndent(currPos, nextPos, nextRegions))
case _: KwCase | _: LeftBrace => currRef(markRegions())
case _ => currRef(sepRegions)
}
}
def getTemplateInherit(sepRegions: List[SepRegion]): TokenRef = currRef(sepRegions match {
case RegionTemplateMark :: rs => RegionTemplateInherit :: rs
case _ => sepRegions
})
def isPrevEndMarker(): Boolean = prevPos > 0 && isEndMarkerIdentifier(prev) &&
isPrecededByNL(prevPos)
def getAtEof(sepRegions: List[SepRegion]) = mkOutdents(sepRegions) {
case (r: SepRegionIndented) :: rs => OutdentInfo(r, rs)
case _ :: rs => OutdentInfo(null, rs)
case _ => null
}
def nonTrivial(sepRegions: List[SepRegion]) = curr match {
case _: EOF => getAtEof(sepRegions)
case _: Comma =>
if (inParens(sepRegions)) {
def swapWithOutdents(tokenRef: TokenRef) = {
@tailrec
def iter(oref: TokenRef): TokenRef = {
val nref = nextToken(oref)
if (nref.token.is[Indentation.Outdent]) iter(nref)
else if (oref eq tokenRef) tokenRef
else {
oref.next = tokenRef
try tokenRef.next
finally tokenRef.next = nref
}
}
iter(tokenRef)
}
mkOutdentsT(sepRegions)(xs => swapWithOutdents(currRef(xs))) { (head, last) =>
last.next = swapWithOutdents(last.next)
head
} {
case (r: SepRegionIndented) :: rs => OutdentInfo(r, rs)
case (_: RegionNonDelimNonIndented) :: rs => OutdentInfo(null, rs)
case _ => null
}
} else currRef(sepRegions)
case _: KwEnum => currRef(RegionTemplateMark :: sepRegions)
case _: KwGiven if !prevToken.is[Dot] => currRef(new RegionGivenDecl(curr) :: sepRegions)
case _: KwWith => currRef(dropRegionLine(sepRegions) match {
case (_: RegionGivenDecl) :: rs => rs
case _ => sepRegions
})
case _: KwObject | _: KwClass | _: KwTrait | _: KwPackage | _: KwNew
if dialect.allowSignificantIndentation => currRef(RegionTemplateMark :: sepRegions)
case _: KwTry if !isPrevEndMarker() => currRef(RegionTry :: dropRegionLine(sepRegions))
case _: KwMatch if !isPrevEndMarker() => getCaseIntro(sepRegions)
case _: KwCatch => getCaseIntro(dropWhile(sepRegions)(_ ne RegionTry))
case _: KwCase if !next.isClassOrObject =>
def expr() = new RegionCaseExpr(countIndent(currPos))
currRef(dropRegionLine(sepRegions) match {
// `case` follows the body of a previous case
case (_: RegionCaseBody) :: rs => expr() :: rs
// head could be RegionIndent or RegionBrace
case x :: RegionCaseMark :: rs => expr() :: x :: rs
case (_: RegionDelim) :: (_: RegionFor | RegionTemplateBody) :: _ => sepRegions
// partial function
case (_: RegionBrace) :: _ => expr() :: sepRegions
// partial function in assignment or fewer braces
case (_: RegionIndent) :: _
if prevToken.is[Indentation.Indent] && prev.isAny[Equals, Colon] =>
expr() :: sepRegions
// `case` is at top-level (likely quasiquote)
case Nil if prevPos == 0 => expr() :: sepRegions
case _ => sepRegions
})
case _: KwFinally => dropRegionLine(sepRegions) match {
// covers case when finally follows catch case without a newline
// otherwise, these two regions would have been removed already
case (_: RegionCaseBody) :: (r: RegionIndent) :: rs =>
val ref = mkOutdentTo(r, rs)
ref.next = currRef(dropUntil(rs)(_ eq RegionTry))
ref
case rs => currRef(dropUntil(rs)(_ eq RegionTry))
}
case _: LeftBrace =>
val lbRegions = sepRegions match {
case (r: RegionGivenDecl) :: rs if r.kw ne prevToken => RegionTemplateBody :: rs
case RegionTemplateMark :: rs => RegionTemplateBody :: rs
case RegionTemplateInherit :: rs if !prev.is[KwExtends] => RegionTemplateBody :: rs
case _ => sepRegions
}
currRef(RegionBrace(nextIndent) :: lbRegions)
case _: RightBrace =>
// produce outdent for every indented region before RegionBrace|RegionEnum
mkOutdents(sepRegions) {
case (r: SepRegionIndented) :: rs => OutdentInfo(r, rs)
case (_: RegionBrace) :: (RegionTemplateBody | RegionCaseMark) :: rs =>
OutdentInfo(null, rs, true)
case (_: RegionBrace) :: rs => OutdentInfo(null, rs, true)
case _ :: rs => OutdentInfo(null, rs)
case _ => null
}
case _: LeftBracket => currRef(RegionBracket :: sepRegions)
case _: RightBracket => mkOutdents(sepRegions) {
case (r: SepRegionIndented) :: rs => OutdentInfo(r, rs)
case RegionBracket :: rs => OutdentInfo(null, rs, done = true)
case _ :: rs => OutdentInfo(null, rs)
case _ => null
}
case _: LeftParen => currRef(RegionParen :: sepRegions)
case _: RightParen => mkOutdents(sepRegions) {
case (r: SepRegionIndented) :: rs => OutdentInfo(r, rs)
case RegionParen :: rs => OutdentInfo(null, rs, true)
case _ :: rs => OutdentInfo(null, rs)
case _ => null
}
case _: RightArrow => currRef(sepRegions match {
case (_: RegionCaseExpr) :: rs =>
// add case region for `match {` to calculate proper indentation
// for statements in indentation dialects
val bodyIndent = rs match {
case (ri: RegionIndent) :: _ => ri.indent
case _ => nextIndent
}
new RegionCaseBody(bodyIndent, curr) :: rs
case _ => sepRegions
})
case _: KwFor if !isPrevEndMarker() => currRef(RegionFor(next) :: sepRegions)
case _: KwWhile if dialect.allowQuietSyntax && !isPrevEndMarker() =>
currRef(RegionWhile(next) :: sepRegions)
case _: KwIf if dialect.allowQuietSyntax && !isPrevEndMarker() =>
currRef(dropRegionLine(sepRegions) match {
case rs @ (_: RegionCaseExpr | _: RegionFor) :: _ => rs
case rs @ (_: RegionDelim) :: (_: RegionFor) :: _ => rs
case _ => RegionIf(next) :: sepRegions
})
case _: KwThen => dropRegionLine(sepRegions) match {
case (r: RegionIndent) :: (_: RegionIf) :: rs => outdentThenCurrRef(r, rs, Some(RegionThen))
case (_: RegionIf) :: rs => currRef(RegionThen :: rs)
case _ => currRef(RegionThen :: sepRegions)
}
case _: KwElse if dialect.allowQuietSyntax =>
dropRegionLine(sepRegions) match {
case (r: RegionIndent) :: RegionThen :: rs => outdentThenCurrRef(r, rs)
case (_: RegionControl) :: rs => currRef(rs)
case _ => currRef(sepRegions)
}
case _: KwDo | _: KwYield => dropRegionLine(sepRegions) match {
case (r: RegionIndent) :: (_: RegionControl) :: rs => outdentThenCurrRef(r, rs)
case (_: RegionControl) :: rs => currRef(rs)
case _ => currRef(sepRegions)
}
case _: KwDef | _: KwVal | _: KwVar
if dialect.allowSignificantIndentation && !isPrevEndMarker() =>
currRef(RegionDefMark :: sepRegions)
case _: Colon => sepRegions match {
case RegionDefMark :: rs => currRef(RegionDefType :: rs)
case _ => currRef(sepRegions)
}
case _: Equals => sepRegions match {
case (_: RegionDefDecl | _: RegionGivenDecl) :: rs => currRef(rs)
case _ => currRef(sepRegions)
}
case _: KwExtends => getTemplateInherit(sepRegions)
case _: Ident => curr.text match {
case soft.KwDerives() => getTemplateInherit(sepRegions)
case soft.KwExtension() if (prevToken match {
case _: BOF | _: Indentation | _: LeftBrace | _: RightArrow | StatSep() => next
.isAny[LeftParen, LeftBracket]
case _ => false
}) => currRef(RegionExtensionMark :: sepRegions)
case _ => currRef(sepRegions)
}
case _ => currRef(sepRegions)
}
def getNonTrivialRegions(regions: List[SepRegion]) = dropRegionLine(regions) match {
case RegionExtensionMark :: rs => curr match {
case _: LeftBrace => RegionTemplateBody :: rs
case _: LeftParen | _: LeftBracket => regions
case _ => rs
}
case (r: RegionControl) :: rs if r.isNotTerminatingTokenIfOptional(curr) =>
r match {
case rc: RegionControlMaybeCond if prev.is[RightParen] =>
curr match {
case _: Dot | _: KwMatch => rc.asCond() :: rs
// might continue cond or start body
case _: Ident | _: LeftBrace | _: LeftBracket | _: LeftParen | _: Underscore
if dialect.allowQuietSyntax => rc.asCondOrBody() :: rs
case _ => rc.asBody().fold(rs)(_ :: rs)
}
case RegionForMaybeParens if prev.is[RightParen] =>
curr match {
case _: LeftArrow => RegionForOther :: rs
case _ => rs
}
case r: RegionFor if r.isClosingConditionToken(prev) => rs
case _ => if (prevToken.is[AtEOL] || curr.is[CloseDelim]) rs else regions
}
case RegionParen :: RegionForMaybeParens :: rs if curr.is[LeftArrow] =>
RegionParen :: RegionForParens :: rs
case _ => regions
}
if (nextPos < 0) getAtEof(getNonTrivialRegions(sepRegionsOrig))
else if (currNonTrivial)
if (isTrailingComma) nextToken(curr, currPos, currPos + 1, sepRegionsOrig)
else nonTrivial(getNonTrivialRegions(sepRegionsOrig))
else {
@tailrec
def findFirstEOL(pos: Int): Int =
if (pos > indentPos) -1 else if (tokens(pos).is[AtEOL]) pos else findFirstEOL(pos + 1)
@tailrec
def hasBlank(pos: Int, hadEOL: Boolean = false): Boolean =
if (pos > indentPos) false
else tokens(pos) match {
case _: MultiNL => true
case _: AtEOL => hadEOL || hasBlank(pos + 1, true)
case _: Whitespace => hasBlank(pos + 1, hadEOL)
case _ => hasBlank(pos + 1)
}
val hasLF = indentPos > prevPos // includes indentPos = -1
val eolPos = if (hasLF) findFirstEOL(prevPos + 1) else -1
val multiEOL = eolPos >= 0 && hasBlank(eolPos)
def lastWhitespaceToken(rs: List[SepRegion], lineIndent: Int) = {
val addRegionLine = lineIndent >= 0 && isIndented(rs, lineIndent)
val regions = if (addRegionLine) RegionLine(lineIndent) :: rs else rs
eofRefAt(regions, eolPos, multiEOL)
}
def stripIfCanProduceLF(regions: List[SepRegion]) = {
@inline
def derives(token: Token) = soft.KwDerives(token)
@inline
def blankBraceOr(ok: => Boolean): Boolean = if (next.is[LeftBrace]) multiEOL else ok
def isEndMarker() = isEndMarkerSpecifier(prev) && {
val prevPrevPos = getStrictPrev(prevPos)
isEndMarkerIdentifier(tokens(prevPrevPos)) && isPrecededByNL(prevPrevPos)
}
@tailrec
def strip(rs: List[SepRegion]): Option[List[SepRegion]] = rs match {
// `[`, `=` and `#` are covered by CantStartStat
case RegionDefType :: xs => if (next.is[LeftParen]) None else strip(xs)
// `extends` and `with` are covered by canEndStat() and CantStartStat
case RegionTemplateMark :: xs => if (blankBraceOr(!derives(next))) strip(xs) else None
case RegionTemplateInherit :: xs =>
if (blankBraceOr(!derives(next) && !derives(prev))) strip(xs) else None
case RegionTry :: xs
if !next.isAny[KwCatch, KwFinally] && canBeLeadingInfix != LeadingInfix.Yes &&
!isIndented(xs, nextIndent) => strip(xs)
case Nil | (_: CanProduceLF) :: _ => Some(rs)
case _ => None
}
val ok = eolPos >= 0 && mightStartStat(next, closeDelimOK = true) &&
(canEndStat(prev) || isEndMarker())
if (ok) strip(regions) else None
}
def getIfCanProduceLF(regions: List[SepRegion], lineIndent: Int = -1) =
stripIfCanProduceLF(regions).map(rs => Right(lastWhitespaceToken(rs, lineIndent)))
// https://dotty.epfl.ch/docs/reference/changed-features/operators.html#syntax-change-1
def isLeadingInfix(regions: List[SepRegion]) = regions match {
case Nil | (_: CanProduceLF) :: _ => canBeLeadingInfix
case _ => LeadingInfix.No
}
lazy val canBeLeadingInfix =
if (!multiEOL && eolPos >= 0 && dialect.allowInfixOperatorAfterNL &&
next.isSymbolicInfixOperator) isLeadingInfixArg(nextPos + 1, nextIndent)
else LeadingInfix.No
def getInfixLFIfNeeded(regions: List[SepRegion]) = {
def getInfixLF(invalid: Option[String]) = Some(Right {
val lf = tokens(indentPos)
val out = InfixLF(lf.input, lf.dialect, lf.start, lf.end, invalid)
eolRefFor(sepRegionsOrig, out, indentPos)
})
isLeadingInfix(regions) match {
case LeadingInfix.Yes => getInfixLF(None)
case LeadingInfix.InvalidArg if (sepRegionsOrig match {
// see in `ieLeadingInfix` above, `x` is guaranteed to be CanProduceLF
case x :: _ => x.indent >= 0 && x.indent < nextIndent
case _ => false
}) => getInfixLF(Some("Invalid indented leading infix operator found"))
case _ => None
}
}
val resOpt =
if (!hasLF) None
else if (!dialect.allowSignificantIndentation) getInfixLFIfNeeded(sepRegionsOrig)
.orElse(getIfCanProduceLF(sepRegionsOrig))
else {
def noOutdent(sepRegions: List[SepRegion]) = sepRegions.find(_.isIndented)
.forall(_.indent <= nextIndent)
/**
* Outdent is needed in following cases:
* - If indentation on next line is less than current and previous token can't continue
* expr on the next line
* - At the end of `match` block even if indentation level is not changed. Example:
* ```
* x match
* case 1 =>
* case 2 =>
* // <- produce outdent
* foo()
* ```
*/
def getOutdentInfo(sepRegions: List[SepRegion]) = sepRegions match {
case (rc: RegionCaseBody) :: (r: RegionIndent) :: rs =>
if (nextIndent > r.indent) null
else if (next.is[KwFinally]) OutdentInfo(r, rs, noOutdent(rs))
else if (nextIndent < r.indent || rc.arrow.ne(prev) && (!next.is[KwCase]) ||
getNextToken(nextPos).isClassOrObject) OutdentInfo(r, dropRegionLine(nextIndent, rs))
else null
case (r: RegionIndent) :: (rs @ RegionTry :: xs) =>
if (nextIndent < r.indent || nextIndent == r.indent && next.isAny[KwCatch, KwFinally]) {
val done = noOutdent(xs)
OutdentInfo(r, if (done) rs else xs, done)
} else null
case RegionTry :: rs => if (noOutdent(rs)) null else OutdentInfo(null, rs)
case (_: RegionNonDelimNonIndented) :: rs if (prev match {
// [then] else do [catch] finally yield [match]
case _: KwThen | _: KwCatch | _: KwMatch => false
case _ => !noOutdent(rs)
}) => OutdentInfo(null, rs)
case (r: SepRegionIndented) :: _ if nextIndent >= r.indent => null // we stop here
case (r: RegionIndent) :: (rs @ (rc: RegionControl) :: xs) =>
OutdentInfo(r, if (rc.isNotTerminatingTokenIfOptional(next)) xs else rs)
case (r: RegionIndent) :: RegionTemplateBody :: rs => OutdentInfo(r, rs)
case (r: SepRegionIndented) :: rs if (prev match {
// then [else] [do] catch [finally] [yield] match
case _: KwElse | _: KwDo | _: KwFinally | _: KwYield => false
// exclude leading infix op
case _ => !isIndented(rs, nextIndent) || canBeLeadingInfix != LeadingInfix.Yes
}) => OutdentInfo(r, rs)
case _ => null
}
/**
* Indent is needed in the following cases:
* - Indetation on new line is greater and previous token can start indentation and
* token can start indentation
* - Indentation on the new line is the same and the next token is the first `case`
* clause in match. Example:
* ```
* x match // <- mk indent
* case 1 =>
* ```
*
* Notice: Indentation after `:` isn't hadled here. It's produced manually on the parser
* level.
*/
def getIndentIfNeeded(sepRegions: List[SepRegion]) = {
def exceedsIndent = isIndented(sepRegions, nextIndent)
def emitIndentWith(ri: SepRegionIndented, rs: List[SepRegion], next: TokenRef = null) =
Some(Right(mkIndent(prevPos, indentPos, ri :: rs, next)))
def emitIndent(regions: List[SepRegion], next: TokenRef = null) =
emitIndentWith(RegionIndent(nextIndent), regions, next)
def emitIndentAndOutdent(regions: List[SepRegion]) =
emitIndent(regions, mkOutdentAt(nextIndent, regions))
// !next.is[RightBrace] - braces can sometimes have -1 and we can start indent on }
prev match {
case _ if nextIndent < 0 || next.is[RightBrace] => None
// if does not work with indentation in pattern matches
case _: KwIf if !sepRegions.headOption.exists(_.isInstanceOf[RegionIf]) => None
case _: KwCatch | _: KwMatch =>
// always add indent for indented `match` block
// check the previous token to avoid infinity loop
val ok = next.is[KwCase] && sepRegions.headOption.contains(RegionCaseMark)
if (ok) emitIndent(sepRegions) else None
case _: Colon =>
/**
* Colon with NL can appear in several contexts:
* - after package/class/etc: handled with RegionColonEol
* - after variable and before its type:
* - within `def`, `val`, `var` declaration or definition: excluded with
* RegionDefMark
* - cast within an argument expression: doesn't allow newline as it can then be
* confused with a fewer-braces invocation
* - within self-type: not allowed by the compiler
* - after fewer-braces method call: will apply if not handled otherwise
*/
def couldBeFewerBraces(): Boolean = dialect.allowFewerBraces &&
!sepRegions.contains(RegionDefMark) && getPrevToken(prevPos)
.isAny[Ident, CloseDelim]
sepRegions match {
case (_: RegionTemplateDecl) :: rs =>
if (exceedsIndent) emitIndent(RegionTemplateBody :: rs)
else if (soft.KwEnd(next)) emitIndentAndOutdent(rs)
else Some(Right(lastWhitespaceToken(rs, nextIndent)))
case (_: RegionDefDecl) :: _ => None
case _ if !exceedsIndent =>
if (!couldBeFewerBraces()) None
else Some(Right(lastWhitespaceToken(sepRegions, nextIndent)))
case (_: RegionGivenDecl) :: rs => emitIndent(RegionTemplateBody :: rs)
case _ => next match {
// RefineDcl
case _: KwVal | _: KwDef | _: KwType | _: Semicolon => emitIndent(sepRegions)
// fewer braces function (although could be self-type)
case _ if couldBeFewerBraces() => emitIndent(sepRegions)
case _ => None
}
}
case _ if !exceedsIndent => None
case _: RightArrow => dropRegionLine(sepRegions) match {
case (_: RegionGivenDecl) :: _ => None
case (rc: RegionCaseBody) :: (_: RegionBrace) :: _ if rc.arrow eq prev => None
case _ if isEndMarkerIntro(nextPos) => None
case _ => emitIndent(sepRegions)
}
case _: KwTry => emitIndent(sepRegions)
case _ => dropRegionLine(sepRegions) match {
case (r: RegionFor) :: rs if r.isClosingConditionToken(prev) =>
val ko = r.isTerminatingToken(next)
if (ko) None else emitIndent(rs)
case RegionForMaybeParens :: rs if prev.is[RightParen] =>
val ko = next.is[LeftArrow] || RegionForMaybeParens.isTerminatingToken(next)
if (ko) None else emitIndent(rs)
case (x: RegionControlMaybeCond) :: rs if prev.is[RightParen] =>
val ko = next.is[Dot] || !x.isNotTerminatingTokenIfOptional(next) ||
canBeLeadingInfix == LeadingInfix.Yes
if (ko) None else emitIndent(x.asBody().fold(rs)(_ :: rs))
case (x: RegionControlMaybeCond) :: rs if x.isControlKeyword(prev) =>
emitIndent(x.asCond() :: rs)
case (_: RegionTemplateDecl) :: _ if next.is[LeftParen] => Some(Left(sepRegions)) // skip this newline
case RegionExtensionMark :: rs if prev.is[RightParen] && !next.is[LeftParen] =>
emitIndent(RegionTemplateBody :: rs)
case _ =>
val ok = prev match {
case _: KwYield | _: KwFinally | _: KwDo | _: KwFor | _: KwThen | _: KwElse |
_: KwWhile | _: KwIf | _: KwReturn | _: LeftArrow | _: ContextArrow =>
true
case _: Equals => !next.is[KwMacro]
case _: KwWith => isDefIntro(nextPos) || next.isAny[KwImport, KwExport]
case _ => false
}
if (ok) emitIndent(sepRegions) else None
}
}
}
val regionsToRes: List[SepRegion] => Option[Either[List[SepRegion], TokenRef]] =
if (prevToken.is[Indentation]) _ => None
else
mkOutdentsT(_, multiEOL)(getIndentIfNeeded)((x, _) => Some(Right(x)))(getOutdentInfo)
def onlyWithoutLF() = sepRegionsOrig match {
case (ri: SepRegionIndented) :: _ if ri.indent < nextIndent =>
Some(Left(RegionLine(nextIndent) :: sepRegionsOrig))
case _ => None
}
@tailrec
def iter(regions: List[SepRegion]): Option[Either[List[SepRegion], TokenRef]] = {
val res = regionsToRes(regions).orElse(getInfixLFIfNeeded(regions))
if (res.isEmpty) regions match {
case Nil if prev.is[BOF] => Some(Left(RegionLine(nextIndent) :: Nil))
case (r: RegionLine) :: rs if r.indent >= nextIndent => iter(rs)
case (r: RegionControl) :: rs
if !r.isControlKeyword(prev) && r.isNotTerminatingTokenIfOptional(next) =>
r match {
case r: RegionControlMaybeCond if prev.is[RightParen] =>
if (next.is[Dot]) None
else r.asBody() match {
case Some(x) => getIfCanProduceLF(x :: rs, nextIndent).orElse(onlyWithoutLF())
case None => iter(rs)
}
case _ => iter(rs)
}
case rs => stripIfCanProduceLF(rs) match {
case Some(xs) =>
if (xs eq rs) Some(Right(lastWhitespaceToken(xs, nextIndent))) else iter(xs)
case _ => onlyWithoutLF()
}
}
else res
}
iter(sepRegionsOrig)
}
resOpt match {
case Some(Right(res)) => res
case Some(Left(rs)) => nextToken(prevToken, prevPos, nextPos, rs)
case _ => nextToken(prevToken, prevPos, nextPos, sepRegionsOrig)
}
}
}
private def isLeadingInfixArg(afterOpPos: Int, nextIndent: Int) = {
// we don't check for pos to be within bounds since we would exit on EOF first
@tailrec
def iter(pos: Int, indent: Int, prevNoNL: Boolean): LeadingInfix = tokens(pos) match {
case _: EOL => if (prevNoNL) iter(pos + 1, 0, false) else LeadingInfix.No
case t: HSpace => iter(pos + 1, if (prevNoNL) indent else indent + t.len, prevNoNL)
case c: Comment =>
val commentIndent = multilineCommentIndent(c)
iter(pos + 1, if (commentIndent < 0) indent else commentIndent, true)
case t =>
if (!canBeLeadingInfixArg(t, pos)) LeadingInfix.No
else if (indent >= 0 && indent < nextIndent) LeadingInfix.InvalidArg
else LeadingInfix.Yes
}
tokens(afterOpPos) match {
case _: EOL => iter(afterOpPos + 1, 0, false)
case _: HSpace => iter(afterOpPos + 1, -1, true)
case _: Comment => LeadingInfix.InvalidArg
case _ => LeadingInfix.No
}
}
}
object ScannerTokens {
private sealed trait LeadingInfix
private object LeadingInfix {
object No extends LeadingInfix
object Yes extends LeadingInfix
object InvalidArg extends LeadingInfix
}
def apply(input: Input)(implicit dialect: Dialect, tokenize: Tokenize): ScannerTokens =
new ScannerTokens(tokenize(input, dialect).get)
private[parsers] case class OutdentInfo(
outdent: SepRegionIndented,
regions: List[SepRegion],
done: Boolean = false
)
private[parsers] def multilineCommentIndent(t: Comment): Int = {
val str: String = t.value
@tailrec
def loop(idx: Int, indent: Int): Int =
if (idx <= 0) -1
else str.charAt(idx) match {
case '\n' | '\r' => indent
case ' ' | '\t' => loop(idx - 1, indent + 1)
case _ => loop(idx - 1, 0)
}
loop(str.length - 1, 0)
}
@tailrec
private def dropWhile(regions: List[SepRegion])(f: SepRegion => Boolean): List[SepRegion] =
regions match {
case head :: tail => if (f(head)) dropWhile(tail)(f) else regions
case _ => Nil
}
@tailrec
private def dropUntil(regions: List[SepRegion])(f: SepRegion => Boolean): List[SepRegion] =
regions match {
case head :: tail => if (f(head)) tail else dropUntil(tail)(f)
case _ => Nil
}
@tailrec
private def dropRegionLine(regions: List[SepRegion]): List[SepRegion] = regions match {
case (_: RegionLine) :: rs => dropRegionLine(rs)
case _ => regions
}
@tailrec
private def dropRegionLine(indent: Int, regions: List[SepRegion]): List[SepRegion] =
regions match {
case (r: RegionLine) :: rs if r.indent >= indent => dropRegionLine(indent, rs)
case _ => regions
}
private def findIndent(sepRegions: List[SepRegion]): Int = sepRegions.find(_.indent >= 0)
.fold(0)(_.indent)
@inline
private def isIndented(sepRegions: List[SepRegion], curIndent: Int): Boolean =
findIndent(sepRegions) < curIndent
@tailrec
private def inParens(regions: List[SepRegion]): Boolean = regions.nonEmpty &&
(regions.head match {
case r: RegionDelimNonIndented => r eq RegionParen
case _ => inParens(regions.tail)
})
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy