scala.meta.internal.tokenizers.XmlParser.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tokenizers_2.12 Show documentation
Show all versions of tokenizers_2.12 Show documentation
Scalameta APIs for tokenization and their baseline implementation
The newest version!
package scala.meta.internal.tokenizers
import scala.annotation.switch
import scala.meta.Dialect
import scala.meta.inputs.Input
import scala.meta.internal.fastparse
import scala.meta.internal.fastparse.all._
/**
* Copy-pasta from this lihaoyi comment:
* [[https://github.com/scalameta/fastparse/pull/1#issuecomment-244940542]]
* and adapted to more closely match scala-xml
*/
class XmlParser(Block: P0, Patterns: P0 = Fail) {
private val S = CharsWhileIn("\t\n\r ")
val XmlExpr: P0 = P( Xml.XmlContent.rep(min = 1, sep = S.?) )
val XmlPattern: P0 = P( Xml.ElemPattern )
private[this] object Xml {
val Element = P( TagHeader ~/ ("/>" | ">" ~/ Content ~/ ETag ) ) // FIXME tag must be balanced
val TagHeader = P( "<" ~ Name ~/ (S ~ Attribute).rep ~ S.? )
val ETag = P( "" ~ Name ~ S.? ~ ">" )
val Attribute = P( Name ~/ Eq ~/ AttValue )
val Eq = P( S.? ~ "=" ~ S.? )
val AttValue = P(
"\"" ~/ (CharQ | Reference).rep ~ "\"" |
"'" ~/ (CharA | Reference).rep ~ "'" |
ScalaExpr
)
val Content = P( (CharData | Reference | ScalaExpr | XmlContent).rep )
val XmlContent: P0 = P( Unparsed | CDSect | PI | Comment | Element )
val ScalaExpr = P( "{" ~ Block ~ "}" )
val Unparsed = P( UnpStart ~/ UnpData ~ UnpEnd )
val UnpStart = P( "" )
val UnpEnd = P( " " )
val UnpData = P( (!UnpEnd ~ Char).rep )
val CDSect = P( CDStart ~/ CData ~ CDEnd )
val CDStart = P( "" ~ Char).rep )
val CDEnd = P( "]]>" )
val Comment = P( "" )
val ComText = P( (!"-->" ~ Char).rep )
val PI = P( "" ~ Name ~ S.? ~ PIProcText ~ "?>" )
val PIProcText = P( (!"?>" ~ Char).rep )
val Reference = P( EntityRef | CharRef )
val EntityRef = P( "&" ~ Name ~/ ";" )
val CharRef = P( "" ~ Num ~ ";" | "" ~ HexNum ~ ";" )
val Num = P( CharIn('0' to '9').rep )
val HexNum = P( CharIn('0' to '9', 'a' to 'f', 'A' to 'F').rep )
val CharData = P( (CharB | "{{" | "}}").rep(1) )
val Char = P( AnyChar )
val Char1 = P( !("<" | "&") ~ Char )
val CharQ = P( !"\"" ~ Char1 )
val CharA = P( !"'" ~ Char1 )
val CharB = P( !("{" | "}") ~ Char1 )
val Name: P0 = P( NameStart ~ NameChar.rep ).!.filter(_.last != ':').opaque("Name").map(_ => Unit) // discard result
val NameStart = P( CharPred.raw(isNameStart) )
val NameChar = P( CharPred.raw(isNameChar) )
val ElemPattern: P0 = P( TagPHeader ~/ ("/>" | ">" ~/ ContentP ~/ ETag ) )
val TagPHeader = P( "<" ~ Name ~ S.? )
val ContentP: P0 = P( ( CharDataP | ScalaPatterns | ElemPattern ).rep )
val ScalaPatterns = P( "{" ~ Patterns ~ "}" )
val CharDataP = P( "&" ~ CharData.? | CharData ) // matches weirdness of scalac parser on xml reference.
//================================================================================
// From `scala.xml.parsing.TokenTests`
//================================================================================
/**
* {{{
* NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
* | CombiningChar | Extender
* }}}
* See [4] and Appendix B of XML 1.0 specification.
*/
def isNameChar(ch: Char) = {
import java.lang.Character._
// The constants represent groups Mc, Me, Mn, Lm, and Nd.
isNameStart(ch) || (getType(ch).toByte match {
case COMBINING_SPACING_MARK |
ENCLOSING_MARK | NON_SPACING_MARK |
MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true
case _ => ".-:" contains ch
})
}
/**
* {{{
* NameStart ::= ( Letter | '_' )
* }}}
* where Letter means in one of the Unicode general
* categories `{ Ll, Lu, Lo, Lt, Nl }`.
*
* We do not allow a name to start with `:`.
* See [3] and Appendix B of XML 1.0 specification
*/
def isNameStart(ch: Char) = {
import java.lang.Character._
getType(ch).toByte match {
case LOWERCASE_LETTER |
UPPERCASE_LETTER | OTHER_LETTER |
TITLECASE_LETTER | LETTER_NUMBER => true
case _ => ch == '_'
}
}
}
}
/** Collects start and end positions of scala expressions inside xml literals.
*
* Doesn't really parse scala expressions, only reads until the curly brace
* balance hits 0.
*/
class ScalaExprPositionParser(dialect: Dialect) extends Parser[Unit] {
case class XmlTokenRange(from: Int, to: Int) // from is inclusive, to is exclusive
private val _splicePositions = List.newBuilder[XmlTokenRange]
def splicePositions: List[XmlTokenRange] = _splicePositions.result()
def parseRec(cfg: ParseCtx, index: Int): fastparse.core.Mutable[Unit, Char, String] = {
var curlyBraceCount = 1
val input = cfg.input
val scanner =
new LegacyScanner(Input.String(input.slice(index, input.length)), dialect)
scanner.reader.nextChar()
while (curlyBraceCount > 0) {
scanner.nextToken()
(scanner.curr.token: @switch) match {
case LegacyToken.LBRACE => curlyBraceCount += 1
case LegacyToken.RBRACE => curlyBraceCount -= 1
case LegacyToken.EOF =>
return fail(cfg.failure, index + scanner.curr.offset)
case _ =>
}
}
val nextIndex = index + scanner.curr.offset
_splicePositions += XmlTokenRange(index, nextIndex)
success(cfg.success, (), nextIndex, Set.empty, cut = false)
}
}