Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scala3-compiler_3 Show documentation
Show all versions of scala3-compiler_3 Show documentation
package dotc
package parsing
package xml
import scala.language.unsafeNulls
import scala.collection.mutable
import scala.collection.BufferedIterator
import core.Contexts.Context
import mutable.{ Buffer, ArrayBuffer, ListBuffer }
import scala.util.control.ControlThrowable
import util.Chars.SU
import Parsers.*
import util.Spans.*
import core.*
import Constants.*
import Decorators.{em, toMessage}
import util.SourceFile
import Utility.*
// XXX/Note: many/most of the functions in here are almost direct cut and pastes
// from another file - scala.xml.parsing.MarkupParser, it looks like.
// (It was like that when I got here.) They used to be commented "[Duplicate]" butx
// since approximately all of them were, I snipped it as noise. As far as I can
// tell this wasn't for any particularly good reason, but slightly different
// compiler and library parser interfaces meant it would take some setup.
// I rewrote most of these, but not as yet the library versions: so if you are
// tempted to touch any of these, please be aware of that situation and try not
// to let it get any worse. -- paulp
/** This trait ...
* @author Burak Emir
* @version 1.0
object MarkupParsers {
import ast.untpd.*
case object MissingEndTagControl extends ControlThrowable {
override def getMessage: String = "start tag was here: "
case object ConfusedAboutBracesControl extends ControlThrowable {
override def getMessage: String = " I encountered a '}' where I didn't expect one, maybe this tag isn't closed <"
case object TruncatedXMLControl extends ControlThrowable {
override def getMessage: String = "input ended while parsing XML"
class MarkupParser(parser: Parser, final val preserveWS: Boolean)(using Context) extends MarkupParserCommon {
import Tokens.{ LBRACE, RBRACE }
type PositionType = Span
type InputType = CharArrayReader
type ElementType = Tree
type AttributesType = mutable.Map[String, Tree]
type NamespaceType = Any // namespaces ignored
def mkAttributes(name: String, other: NamespaceType): AttributesType = xAttributes
def eof: Boolean = false
def truncatedError(msg: String): Nothing = throw TruncatedXMLControl
def xHandleError(that: Char, msg: String): Unit =
if (ch == SU) throw TruncatedXMLControl
else reportSyntaxError(msg)
var input : CharArrayReader = _
def lookahead(): BufferedIterator[Char] =
(input.buf drop input.charOffset).iterator.buffered
import parser.{ symbXMLBuilder => handle }
def curOffset : Int = input.charOffset - 1
var tmppos : Span = NoSpan
def ch: Char =
/** this method assign the next character to ch and advances in input */
def nextch(): Unit = { input.nextChar() }
protected def ch_returning_nextch: Char = {
val result = ch; input.nextChar(); result
def mkProcInstr(position: Span, name: String, text: String): ElementType =
parser.symbXMLBuilder.procInstr(position, name, text)
var xEmbeddedBlock: Boolean = false
private var debugLastStartElement = List.empty[(Int, String)]
private def debugLastPos = debugLastStartElement.head._1
private def debugLastElem = debugLastStartElement.head._2
private def errorBraces() = {
reportSyntaxError("in XML content, please use '}}' to express '}'")
throw ConfusedAboutBracesControl
def errorNoEnd(tag: String): Nothing = {
reportSyntaxError("expected closing tag of " + tag)
throw MissingEndTagControl
/** checks whether next character starts a Scala block, if yes, skip it.
* @return true if next character starts a scala block
def xCheckEmbeddedBlock: Boolean = {
// attentions, side-effect, used in xText
xEmbeddedBlock = (ch == '{') && { nextch(); (ch != '{') }
/** parse attribute and add it to listmap
* [41] Attributes ::= { S Name Eq AttValue }
* AttValue ::= `'` { _ } `'`
* | `"` { _ } `"`
* | `{` scalablock `}`
def xAttributes: mutable.LinkedHashMap[String, Tree] = {
val aMap = mutable.LinkedHashMap[String, Tree]()
while (isNameStart(ch)) {
val start = curOffset
val key = xName
val delim = ch
val mid = curOffset
val value: Tree = ch match {
case '"' | '\'' =>
val tmp = xAttributeValue(ch_returning_nextch)
try handle.parseAttribute(Span(start, curOffset, mid), tmp)
catch {
case e: RuntimeException =>
errorAndResult("error parsing attribute value", parser.errorTermTree(
case '{' =>
case SU =>
throw TruncatedXMLControl
case _ =>
errorAndResult("' or \" delimited attribute value or '{' scala-expr '}' expected", Literal(Constant("")))
// well-formedness constraint: unique attribute names
if (aMap contains key)
reportSyntaxError("attribute %s may only be defined once" format key)
aMap(key) = value
if (ch != '/' && ch != '>')
/** '"{char} ) ']]>'
* see [15]
def xCharData: Tree = {
val start = curOffset
val mid = curOffset
xTakeUntil(handle.charData, () => Span(start, curOffset, mid), "]]>")
def xUnparsed: Tree = {
val start = curOffset
xTakeUntil(handle.unparsed, () => Span(start, curOffset, start), "")
/** Comment ::= ''
* see [15]
def xComment: Tree = {
val start = curOffset - 2 // Rewinding to include " Span(start, curOffset, start), "-->")
def appendText(span: Span, ts: Buffer[Tree], txt: String): Unit = {
def append(t: String) = ts append handle.text(span, t)
if (preserveWS) append(txt)
else {
val sb = new StringBuilder()
txt foreach { c =>
if (!isSpace(c)) sb append c
else if (sb.isEmpty || !isSpace(sb.last)) sb append ' '
val trimmed = sb.toString.trim
if (!trimmed.isEmpty) append(trimmed)
/** adds entity/character to ts as side-effect
* @precond ch == '&'
def content_AMP(ts: ArrayBuffer[Tree]): Unit = {
val toAppend = ch match {
case '#' => // CharacterRef
val theChar = handle.text(tmppos, xCharRef)
case _ => // EntityRef
val n = xName
handle.entityRef(tmppos, n)
ts append toAppend
* @precond ch == '{'
* @postcond: xEmbeddedBlock == false!
def content_BRACE(p: Span, ts: ArrayBuffer[Tree]): Unit =
if (xCheckEmbeddedBlock) ts append xEmbeddedExpr
else appendText(p, ts, xText)
/** Returns true if it encounters an end tag (without consuming it),
* appends trees to ts as side-effect.
* @param ts ...
* @return ...
private def content_LT(ts: ArrayBuffer[Tree]): Boolean = {
if (ch == '/')
return true // end tag
val toAppend = ch match {
case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment
case '?' => nextch() ; xProcInstr // PI
case _ => element // child node
ts append toAppend
def content: Buffer[Tree] = {
val ts = new ArrayBuffer[Tree]
while (true) {
if (xEmbeddedBlock)
ts append xEmbeddedExpr
else {
tmppos = Span(curOffset)
ch match {
// end tag, cdata, comment, pi or child node
case '<' => nextch() ; if (content_LT(ts)) return ts
// either the character '{' or an embedded scala block }
case '{' => content_BRACE(tmppos, ts) // }
// EntityRef or CharRef
case '&' => content_AMP(ts)
case SU => return ts
// text content - here xEmbeddedBlock might be true
case _ => appendText(tmppos, ts, xText)
/** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag
* | xmlTag1 '/' '>'
def element: Tree = {
val start = curOffset
val (qname, attrMap) = xTag(())
if (ch == '/') { // empty element
handle.element(Span(start, curOffset, start), qname, attrMap, true, new ListBuffer[Tree])
else { // handle content
if (qname == "xml:unparsed")
return xUnparsed
debugLastStartElement = (start, qname) :: debugLastStartElement
val ts = content
debugLastStartElement = debugLastStartElement.tail
val span = Span(start, curOffset, start)
qname match {
case "xml:group" =>, ts)
case _ => handle.element(span, qname, attrMap, false, ts)
/** parse character data.
* precondition: xEmbeddedBlock == false (we are not in a scala block)
private def xText: String = {
assert(!xEmbeddedBlock, "internal error: encountered embedded block")
val buf = new StringBuilder
def done = buf.toString
while (ch != SU) {
if (ch == '}') {
if (charComingAfter(nextch()) == '}') nextch()
else errorBraces()
buf append ch
if (xCheckEmbeddedBlock || ch == '<' || ch == '&')
return done
/** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */
inline private def xLiteralCommon(f: () => Tree, ifTruncated: String => Unit): Tree = {
assert( == Tokens.XMLSTART)
val saved =
var output: Tree = null.asInstanceOf[Tree]
try output = f()
catch {
case c @ TruncatedXMLControl =>
case c @ (MissingEndTagControl | ConfusedAboutBracesControl) =>
parser.syntaxError(em"${c.getMessage}$debugLastElem>", debugLastPos)
case _: ArrayIndexOutOfBoundsException =>
parser.syntaxError(em"missing end tag in XML literal for <$debugLastElem>", debugLastPos)
if (output == null)
/** Use a lookahead parser to run speculative body, and return the first char afterward. */
private def charComingAfter(body: => Unit): Char = {
try {
input = input.lookaheadReader()
finally input =
/** xLiteral = element { element }
* @return Scala representation of this xml literal
def xLiteral: Tree = xLiteralCommon(
() => {
input =
handle.isPattern = false
val ts = new ArrayBuffer[Tree]
val start = curOffset
tmppos = Span(curOffset) // Iuli: added this line, as it seems content_LT uses tmppos when creating trees
// parse more XML?
if (charComingAfter(xSpaceOpt()) == '<') {
while {
charComingAfter(xSpaceOpt()) == '<'
} do ()
handle.makeXMLseq(Span(start, curOffset, start), ts)
else {
assert(ts.length == 1, "Require one tree")
msg => parser.incompleteInputError(msg.toMessage)
/** @see xmlPattern. resynchronizes after successful parse
* @return this xml pattern
def xLiteralPattern: Tree = xLiteralCommon(
() => {
input =
saving[Boolean, Tree](handle.isPattern, handle.isPattern = _) {
handle.isPattern = true
val tree = xPattern
msg => parser.syntaxError(msg.toMessage, curOffset)
def escapeToScala[A](op: => A, kind: String): A = {
xEmbeddedBlock = false
val res = saving(, = _) {
val lbrace =
lbrace.token = LBRACE
lbrace.offset = - 1
lbrace.lastOffset =
lbrace.lineOffset =
if ( != RBRACE)
reportSyntaxError(" expected end of Scala " + kind)
def xEmbeddedExpr: Tree = escapeToScala(parser.block(), "block")
/** xScalaPatterns ::= patterns
def xScalaPatterns: List[Tree] = escapeToScala(parser.patterns(), "pattern")
def reportSyntaxError(offset: Int, str: String): Unit = parser.syntaxError(str.toMessage, offset)
def reportSyntaxError(str: String): Unit = {
reportSyntaxError(curOffset, "in XML literal: " + str)
/** '<' xPattern ::= Name [S] { xmlPattern | '{' pattern3 '}' } ETag
* | Name [S] '/' '>'
def xPattern: Tree = {
var start = curOffset
val qname = xName
debugLastStartElement = (start, qname) :: debugLastStartElement
val ts = new ArrayBuffer[Tree]
val isEmptyTag = ch == '/'
if (isEmptyTag) nextch()
if (!isEmptyTag) {
// recurses until it hits a termination condition, then returns
def doPattern: Boolean = {
val start1 = curOffset
if (xEmbeddedBlock) ts ++= xScalaPatterns
else ch match {
case '<' => // tag
if (ch != '/') ts append xPattern // child
else return false // terminate
case '{' => // embedded Scala patterns
while (ch == '{') {
ts ++= xScalaPatterns
assert(!xEmbeddedBlock, "problem with embedded block")
case SU =>
throw TruncatedXMLControl
case _ => // text
appendText(Span(start1, curOffset, start1), ts, xText)
// here xEmbeddedBlock might be true:
// if (xEmbeddedBlock) throw new ApplicationError("after:" + text); // assert
while (doPattern) { } // call until false
debugLastStartElement = debugLastStartElement.tail
handle.makeXMLpat(Span(start, curOffset, start), qname, ts)
} /* class MarkupParser */
© 2015 - 2025 Weber Informatics LLC | Privacy Policy