scala.xml.parsing.MarkupParserCommon.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-core Show documentation
Show all versions of spark-core Show documentation
Shaded version of Apache Spark 2.x.x for Presto
The newest version!
/* __ *\
** ________ ___ / / ___ Scala API **
** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
** /____/\___/_/ |_/____/_/ | | **
** |/ **
\* */
package scala
package xml
package parsing
import scala.io.Source
import scala.annotation.switch
import Utility.Escapes.{ pairs => unescape }
import Utility.SU
/**
* This is not a public trait - it contains common code shared
* between the library level XML parser and the compiler's.
* All members should be accessed through those.
*/
private[scala] trait MarkupParserCommon extends TokenTests {
protected def unreachable = scala.sys.error("Cannot be reached.")
// type HandleType // MarkupHandler, SymbolicXMLBuilder
type InputType // Source, CharArrayReader
type PositionType // Int, Position
type ElementType // NodeSeq, Tree
type NamespaceType // NamespaceBinding, Any
type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
def mkAttributes(name: String, pscope: NamespaceType): AttributesType
def mkProcInstr(position: PositionType, name: String, text: String): ElementType
/**
* parse a start or empty tag.
* [40] STag ::= '<' Name { S Attribute } [S]
* [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
*/
protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
val name = xName
xSpaceOpt()
(name, mkAttributes(name, pscope))
}
/**
* '' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
*
* see [15]
*/
def xProcInstr: ElementType = {
val n = xName
xSpaceOpt()
xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
}
/**
* attribute value, terminated by either `'` or `"`. value may not contain `<`.
* @param endCh either `'` or `"`
*/
def xAttributeValue(endCh: Char): String = {
val buf = new StringBuilder
while (ch != endCh && !eof) {
// well-formedness constraint
if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
else if (ch == SU) truncatedError("")
else buf append ch_returning_nextch
}
ch_returning_nextch
// @todo: normalize attribute value
buf.toString
}
def xAttributeValue(): String = {
val str = xAttributeValue(ch_returning_nextch)
// well-formedness constraint
normalizeAttributeValue(str)
}
private def takeUntilChar(it: Iterator[Char], end: Char): String = {
val buf = new StringBuilder
while (it.hasNext) it.next() match {
case `end` => return buf.toString
case ch => buf append ch
}
scala.sys.error("Expected '%s'".format(end))
}
/**
* [42] '<' xmlEndTag ::= '<' '/' Name S? '>'
*/
def xEndTag(startName: String) {
xToken('/')
if (xName != startName)
errorNoEnd(startName)
xSpaceOpt()
xToken('>')
}
/**
* actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen
* Name ::= (Letter | '_') (NameChar)*
*
* see [5] of XML 1.0 specification
*
* pre-condition: ch != ':' // assured by definition of XMLSTART token
* post-condition: name does neither start, nor end in ':'
*/
def xName: String = {
if (ch == SU)
truncatedError("")
else if (!isNameStart(ch))
return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
val buf = new StringBuilder
do buf append ch_returning_nextch
while (isNameChar(ch))
if (buf.last == ':') {
reportSyntaxError("name cannot end in ':'")
buf.toString dropRight 1
} else buf.toString
}
private def attr_unescape(s: String) = s match {
case "lt" => "<"
case "gt" => ">"
case "amp" => "&"
case "apos" => "'"
case "quot" => "\""
case "quote" => "\""
case _ => "&" + s + ";"
}
/**
* Replaces only character references right now.
* see spec 3.3.3
*/
private def normalizeAttributeValue(attval: String): String = {
val buf = new StringBuilder
val it = attval.iterator.buffered
while (it.hasNext) buf append (it.next() match {
case ' ' | '\t' | '\n' | '\r' => " "
case '&' if it.head == '#' =>
it.next(); xCharRef(it)
case '&' => attr_unescape(takeUntilChar(it, ';'))
case c => c
})
buf.toString
}
/**
* CharRef ::= "" '0'..'9' {'0'..'9'} ";"
* | "" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
*
* see [66]
*/
def xCharRef(ch: () => Char, nextch: () => Unit): String =
Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _)
def xCharRef(it: Iterator[Char]): String = {
var c = it.next()
Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _)
}
def xCharRef: String = xCharRef(() => ch, () => nextch())
/** Create a lookahead reader which does not influence the input */
def lookahead(): BufferedIterator[Char]
/**
* The library and compiler parsers had the interesting distinction of
* different behavior for nextch (a function for which there are a total
* of two plausible behaviors, so we know the design space was fully
* explored.) One of them returned the value of nextch before the increment
* and one of them the new value. So to unify code we have to at least
* temporarily abstract over the nextchs.
*/
def ch: Char
def nextch(): Unit
protected def ch_returning_nextch: Char
def eof: Boolean
// def handle: HandleType
var tmppos: PositionType
def xHandleError(that: Char, msg: String): Unit
def reportSyntaxError(str: String): Unit
def reportSyntaxError(pos: Int, str: String): Unit
def truncatedError(msg: String): Nothing
def errorNoEnd(tag: String): Nothing
protected def errorAndResult[T](msg: String, x: T): T = {
reportSyntaxError(msg)
x
}
def xToken(that: Char) {
if (ch == that) nextch()
else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
}
def xToken(that: Seq[Char]) { that foreach xToken }
/** scan [S] '=' [S]*/
def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() }
/** skip optional space S? */
def xSpaceOpt() = while (isSpace(ch) && !eof) nextch()
/** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
def xSpace() =
if (isSpace(ch)) { nextch(); xSpaceOpt() }
else xHandleError(ch, "whitespace expected")
/** Apply a function and return the passed value */
def returning[T](x: T)(f: T => Unit): T = { f(x); x }
/** Execute body with a variable saved and restored after execution */
def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = {
val saved = getter
try body
finally setter(saved)
}
/**
* Take characters from input stream until given String "until"
* is seen. Once seen, the accumulated characters are passed
* along with the current Position to the supplied handler function.
*/
protected def xTakeUntil[T](
handler: (PositionType, String) => T,
positioner: () => PositionType,
until: String): T =
{
val sb = new StringBuilder
val head = until.head
val rest = until.tail
while (true) {
if (ch == head && peek(rest))
return handler(positioner(), sb.toString)
else if (ch == SU || eof)
truncatedError("") // throws TruncatedXMLControl in compiler
sb append ch
nextch()
}
unreachable
}
/**
* Create a non-destructive lookahead reader and see if the head
* of the input would match the given String. If yes, return true
* and drop the entire String from input; if no, return false
* and leave input unchanged.
*/
private def peek(lookingFor: String): Boolean =
(lookahead() take lookingFor.length sameElements lookingFor.iterator) && {
// drop the chars from the real reader (all lookahead + orig)
(0 to lookingFor.length) foreach (_ => nextch())
true
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy