Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Copyright (C) 2011-2012 Typesafe Inc.
*/
package org.ekrich.config.impl
import java.io.IOException
import java.io.Reader
import java.{lang => jl}
import java.{util => ju}
import scala.util.control.Breaks._
import org.ekrich.config.ConfigException
import org.ekrich.config.ConfigOrigin
import org.ekrich.config.ConfigSyntax
import scala.annotation.tailrec
object Tokenizer {
// this exception should not leave this file
@SerialVersionUID(1L)
private class ProblemException(val problem: Token) extends Exception
private def asString(codepoint: Int): String =
if (codepoint == '\n') "newline"
else if (codepoint == '\t') "tab"
else if (codepoint == -1) "end of file"
else if (ConfigImplUtil.isC0Control(codepoint))
"control character 0x%x".format(codepoint)
else "%c".format(codepoint)
/**
* Tokenizes a Reader. Does not close the reader; you have to arrange to do
* that after you're done with the returned iterator.
*/
def tokenize(origin: ConfigOrigin, input: Reader, flavor: ConfigSyntax) =
new Tokenizer.TokenIterator(origin, input, flavor ne ConfigSyntax.JSON)
private[impl] def render(tokens: ju.Iterator[Token]) = {
val renderedText = new StringBuilder
while (tokens.hasNext) renderedText.append(tokens.next.tokenText)
renderedText.toString
}
object TokenIterator {
class WhitespaceSaver() {
val whitespace = new jl.StringBuilder
// has to be saved inside value concatenations
// may need to value-concat with next value
var lastTokenWasSimpleValue = false
private[impl] def add(c: Int): Unit = whitespace.appendCodePoint(c)
private[impl] def check(
t: Token,
baseOrigin: ConfigOrigin,
lineNumber: Int
) =
if (isSimpleValue(t)) nextIsASimpleValue(baseOrigin, lineNumber)
else nextIsNotASimpleValue(baseOrigin, lineNumber)
// called if the next token is not a simple value;
// discards any whitespace we were saving between
// simple values.
private def nextIsNotASimpleValue(
baseOrigin: ConfigOrigin,
lineNumber: Int
) = {
lastTokenWasSimpleValue = false
createWhitespaceTokenFromSaver(baseOrigin, lineNumber)
}
// called if the next token IS a simple value,
// so creates a whitespace token if the previous
// token also was.
private def nextIsASimpleValue(
baseOrigin: ConfigOrigin,
lineNumber: Int
) = {
val t = createWhitespaceTokenFromSaver(baseOrigin, lineNumber)
if (!lastTokenWasSimpleValue) lastTokenWasSimpleValue = true
t
}
private def createWhitespaceTokenFromSaver(
baseOrigin: ConfigOrigin,
lineNumber: Int
): Token = {
if (whitespace.length > 0) {
var t: Token = null
if (lastTokenWasSimpleValue)
t = Tokens.newUnquotedText(
lineOrigin(baseOrigin, lineNumber),
whitespace.toString
)
else
t = Tokens.newIgnoredWhitespace(
lineOrigin(baseOrigin, lineNumber),
whitespace.toString
)
whitespace.setLength(0) // reset
return t
}
null
}
}
private[impl] def isWhitespace(c: Int): Boolean =
ConfigImplUtil.isWhitespace(c)
private[impl] def isWhitespaceNotNewline(c: Int): Boolean =
c != '\n' && ConfigImplUtil.isWhitespace(c)
private def problem(
origin: ConfigOrigin,
what: String,
message: String,
cause: Throwable
): ProblemException =
problem(origin, what, message, false, cause)
private def problem(
origin: ConfigOrigin,
what: String,
message: String,
suggestQuotes: Boolean,
cause: Throwable
): ProblemException = {
if (what == null || message == null)
throw new ConfigException.BugOrBroken(
"internal error, creating bad ProblemException"
)
new Tokenizer.ProblemException(
Tokens.newProblem(origin, what, message, suggestQuotes, cause)
)
}
private def problem(
origin: ConfigOrigin,
message: String
): ProblemException =
problem(origin, "", message, null)
private def lineOrigin(baseOrigin: ConfigOrigin, lineNumber: Int) =
baseOrigin
.asInstanceOf[SimpleConfigOrigin]
.withLineNumber(lineNumber)
// chars JSON allows a number to start with
private[impl] val firstNumberChars = "0123456789-"
// chars JSON allows to be part of a number
private[impl] val numberChars = "0123456789eE+-."
// chars that stop an unquoted string
private[impl] val notInUnquotedText = "$\"{}[]:=,+#`^?!@*&\\"
private def isSimpleValue(t: Token) =
if (Tokens.isSubstitution(t) || Tokens.isUnquotedText(t) || Tokens
.isValue(t)) true
else false
}
class TokenIterator(
_origin: ConfigOrigin,
val input: Reader,
val allowComments: Boolean
) extends ju.Iterator[Token] {
val origin = _origin.asInstanceOf[SimpleConfigOrigin]
val buffer = new ju.LinkedList[Integer]
var lineNumber = 1
var lineOrigin = origin.withLineNumber(lineNumber)
val tokens = new ju.LinkedList[Token]
tokens.add(Tokens.START)
val whitespaceSaver = new TokenIterator.WhitespaceSaver
// this should ONLY be called from nextCharSkippingComments
// or when inside a quoted string, or when parsing a sequence
// like ${ or +=, everything else should use
// nextCharSkippingComments().
private def nextCharRaw: Int =
if (buffer.isEmpty) {
try {
input.read
} catch {
case e: IOException =>
throw new ConfigException.IO(
origin,
"read error: " + e.getMessage,
e
)
}
} else {
val c = buffer.pop
c
}
private def putBack(c: Int): Unit = {
if (buffer.size > 2)
throw new ConfigException.BugOrBroken(
"bug: putBack() three times, undesirable look-ahead"
)
buffer.push(c)
}
private def startOfComment(c: Int) =
if (c == -1) false
else if (allowComments)
if (c == '#') true
else if (c == '/') {
val maybeSecondSlash = nextCharRaw
// we want to predictably NOT consume any chars
putBack(maybeSecondSlash)
if (maybeSecondSlash == '/') true else false
} else false
else false
// get next char, skipping non-newline whitespace
// needed to rewrite in a Scala fashion
private def nextCharAfterWhitespace(
saver: TokenIterator.WhitespaceSaver
): Int = {
@tailrec
def consume(c: Int): Int =
if (c == -1) -1
else if (TokenIterator.isWhitespaceNotNewline(c)) {
saver.add(c)
consume(nextCharRaw)
} else c
consume(nextCharRaw)
}
private def problem(message: String): ProblemException =
problem("", message, null)
private def problem(what: String, message: String): ProblemException =
problem(what, message, null)
private def problem(
what: String,
message: String,
suggestQuotes: Boolean
): ProblemException =
problem(what, message, suggestQuotes, null)
private def problem(
what: String,
message: String,
cause: Throwable
): ProblemException =
TokenIterator.problem(lineOrigin, what, message, cause)
private def problem(
what: String,
message: String,
suggestQuotes: Boolean,
cause: Throwable
): ProblemException =
TokenIterator.problem(lineOrigin, what, message, suggestQuotes, cause)
// ONE char has always been consumed, either the # or the first /, but not both slashes
private def pullComment(firstChar: Int): Token = {
var doubleSlash = false
if (firstChar == '/') {
val discard = nextCharRaw
if (discard != '/')
throw new ConfigException.BugOrBroken(
"called pullComment but // not seen"
)
doubleSlash = true
}
val sb = new jl.StringBuilder
var token: Token = null
breakable {
while (true) {
val c = nextCharRaw
if (c == -1 || c == '\n') {
putBack(c)
if (doubleSlash) {
token = Tokens.newCommentDoubleSlash(lineOrigin, sb.toString)
break
} else {
token = Tokens.newCommentHash(lineOrigin, sb.toString)
break
}
} else sb.appendCodePoint(c)
}
}
token
}
// The rules here are intended to maximize convenience while
// avoiding confusion with real valid JSON. Basically anything
// that parses as JSON is treated the JSON way and otherwise
// we assume it's a string and let the parser sort it out.
private def pullUnquotedText: Token = {
val origin = lineOrigin
val sb = new jl.StringBuilder
var t: Token = null
var c = nextCharRaw
var retToken = false
breakable {
while (true) {
if (c == -1) break // break
else if (TokenIterator.notInUnquotedText.indexOf(c) >= 0)
break // break
else if (TokenIterator.isWhitespace(c))
break // break
else if (startOfComment(c)) break // break
else sb.appendCodePoint(c)
// we parse true/false/null tokens as such no matter
// what is after them, as long as they are at the
// start of the unquoted token.
if (sb.length == 4) {
val s = sb.toString
if (s == "true") {
retToken = true
t = Tokens.newBoolean(origin, true)
break // return
} else if (s == "null") {
retToken = true
t = Tokens.newNull(origin)
break // return
}
} else if (sb.length == 5) {
val s = sb.toString
if (s == "false") {
retToken = true
t = Tokens.newBoolean(origin, false)
break // return
}
}
c = nextCharRaw
}
}
if (retToken == false) {
// put back the char that ended the unquoted text
putBack(c)
val s = sb.toString
t = Tokens.newUnquotedText(origin, s)
}
t
}
@throws[ProblemException]
private def pullNumber(firstChar: Int) = {
val sb = new jl.StringBuilder
sb.appendCodePoint(firstChar)
var containedDecimalOrE = false
var c = nextCharRaw
while (c != -1 && TokenIterator.numberChars.indexOf(c) >= 0) {
if (c == '.' || c == 'e' || c == 'E') containedDecimalOrE = true
sb.appendCodePoint(c)
c = nextCharRaw
}
// the last character we looked at wasn't part of the number, put it back
putBack(c)
val s = sb.toString
try if (containedDecimalOrE) {
// force floating point representation
Tokens.newDouble(lineOrigin, s.toDouble, s)
} else { // this should throw if the integer is too large for Long
Tokens.newLong(lineOrigin, s.toLong, s)
} catch {
case e: NumberFormatException =>
// not a number after all, see if it's an unquoted string.
for (u <- s.toCharArray) {
if (TokenIterator.notInUnquotedText.indexOf(u.toInt) >= 0)
throw problem(
asString(u.toInt),
s"Reserved character '${asString(u.toInt)}' is not allowed outside quotes",
true /* suggestQuotes */
)
}
// no evil chars so we just decide this was a string and
// not a number.
Tokens.newUnquotedText(lineOrigin, s)
}
}
@throws[ProblemException]
private def pullEscapeSequence(
sb: jl.StringBuilder,
sbOrig: jl.StringBuilder
): Unit = {
val escaped = nextCharRaw
if (escaped == -1)
throw problem(
"End of input but backslash in string had nothing after it"
)
// This is needed so we return the unescaped escape characters back out when rendering the token
sbOrig.appendCodePoint('\\')
sbOrig.appendCodePoint(escaped)
escaped match {
case '"' => sb.append('"')
case '\\' => sb.append('\\')
case '/' => sb.append('/')
case 'b' => sb.append('\b')
case 'f' => sb.append('\f')
case 'n' => sb.append('\n')
case 'r' => sb.append('\r')
case 't' => sb.append('\t')
case 'u' =>
// kind of absurdly slow, but screw it for now
val a = new Array[Char](4)
var i = 0
while (i < 4) {
val c = nextCharRaw
if (c == -1)
throw problem(
"End of input but expecting 4 hex digits for \\uXXXX escape"
)
a(i) = c.toChar
i += 1
}
val digits = new String(a)
sbOrig.append(a)
try sb.appendCodePoint(Integer.parseInt(digits, 16))
catch {
case e: NumberFormatException =>
throw problem(
digits,
"Malformed hex digits after \\u escape in string: '%s'".format(
digits
),
e
)
}
case _ =>
throw problem(
asString(escaped),
"backslash followed by '%s', this is not a valid escape sequence (quoted strings use JSON escaping, so use double-backslash \\\\ for literal backslash)"
.format(asString(escaped))
)
}
}
@throws[ProblemException]
private def appendTripleQuotedString(
sb: jl.StringBuilder,
sbOrig: jl.StringBuilder
): Unit = {
// we are after the opening triple quote and need to consume the close triple
var consecutiveQuotes = 0
breakable {
while (true) {
val c = nextCharRaw
if (c == '"') consecutiveQuotes += 1
else if (consecutiveQuotes >= 3) {
// the last three quotes end the string and the others are kept.
sb.setLength(sb.length - 3)
putBack(c)
break // break
} else {
consecutiveQuotes = 0
if (c == -1)
throw problem(
"End of input but triple-quoted string was still open"
)
else if (c == '\n') { // keep the line number accurate
lineNumber += 1
lineOrigin = origin.withLineNumber(lineNumber)
}
}
sb.appendCodePoint(c)
sbOrig.appendCodePoint(c)
}
}
}
@throws[ProblemException]
private def pullQuotedString: Tokens.Value = {
// the open quote has already been consumed
val sb = new jl.StringBuilder
// We need a second string builder to keep track of escape characters.
// We want to return them exactly as they appeared in the original text,
// which means we will need a new StringBuilder to escape escape characters
// so we can also keep the actual value of the string. This is gross.
val sbOrig = new jl.StringBuilder
sbOrig.appendCodePoint('"')
breakable {
while (true) {
val c = nextCharRaw
if (c == -1)
throw problem("End of input but string quote was still open")
if (c == '\\') pullEscapeSequence(sb, sbOrig)
else if (c == '"') {
sbOrig.appendCodePoint(c)
break // break
} else if (ConfigImplUtil.isC0Control(c))
throw problem(
asString(c),
"JSON does not allow unescaped " + asString(c) + " in quoted strings, use a backslash escape"
)
else {
sb.appendCodePoint(c)
sbOrig.appendCodePoint(c)
}
}
}
// maybe switch to triple-quoted string, sort of hacky...
if (sb.length == 0) {
val third = nextCharRaw
if (third == '"') {
sbOrig.appendCodePoint(third)
appendTripleQuotedString(sb, sbOrig)
} else putBack(third)
}
Tokens.newString(lineOrigin, sb.toString, sbOrig.toString)
}
@throws[ProblemException]
private def pullPlusEquals: Token = {
// the initial '+' has already been consumed
val c = nextCharRaw
if (c != '=')
throw problem(
asString(c),
"'+' not followed by =, '" + asString(c) + "' not allowed after '+'",
true
)
Tokens.PLUS_EQUALS
}
@throws[ProblemException]
private def pullSubstitution: Tokens.Substitution = {
// the initial '$' has already been consumed
val origin = lineOrigin
var c = nextCharRaw
if (c != '{')
throw problem(
asString(c),
"'$' not followed by {, '" + asString(c) + "' not allowed after '$'",
true
)
var optional = false
c = nextCharRaw
if (c == '?') optional = true else putBack(c)
val saver =
new TokenIterator.WhitespaceSaver
val expression = new ju.ArrayList[Token]
var t: Token = null
breakable {
while ({
t = pullNextToken(saver)
// note that we avoid validating the allowed tokens inside
// the substitution here; we even allow nested substitutions
// in the tokenizer. The parser sorts it out.
if (t eq Tokens.CLOSE_CURLY) { // end the loop, done!
break // break
} else if (t eq Tokens.END) {
throw TokenIterator.problem(
origin,
"Substitution ${ was not closed with a }"
)
} else {
val whitespace = saver.check(t, origin, lineNumber)
if (whitespace != null) expression.add(whitespace)
expression.add(t)
}
true
}) ()
}
Tokens.newSubstitution(origin, optional, expression)
}
@throws[ProblemException]
private def pullNextToken(saver: TokenIterator.WhitespaceSaver): Token = {
val c = nextCharAfterWhitespace(saver)
if (c == -1) Tokens.END
else if (c == '\n') { // newline tokens have the just-ended line number
val line = Tokens.newLine(lineOrigin)
lineNumber += 1
lineOrigin = origin.withLineNumber(lineNumber)
line
} else {
var t: Token = null
if (startOfComment(c)) t = pullComment(c)
else {
c match {
case '"' => t = pullQuotedString
case '$' => t = pullSubstitution
case ':' => t = Tokens.COLON
case ',' => t = Tokens.COMMA
case '=' => t = Tokens.EQUALS
case '{' => t = Tokens.OPEN_CURLY
case '}' => t = Tokens.CLOSE_CURLY
case '[' => t = Tokens.OPEN_SQUARE
case ']' => t = Tokens.CLOSE_SQUARE
case '+' => t = pullPlusEquals
case _ => t = null
}
if (t == null)
if (TokenIterator.firstNumberChars.indexOf(c) >= 0)
t = pullNumber(c)
else if (TokenIterator.notInUnquotedText.indexOf(c) >= 0)
throw problem(
asString(c),
"Reserved character '" + asString(c) + "' is not allowed outside quotes",
true
)
else {
putBack(c)
t = pullUnquotedText
}
}
if (t == null)
throw new ConfigException.BugOrBroken(
"bug: failed to generate next token"
)
t
}
}
@throws[ProblemException]
private def queueNextToken(): Unit = {
val t = pullNextToken(whitespaceSaver)
val whitespace = whitespaceSaver.check(t, origin, lineNumber)
if (whitespace != null) tokens.add(whitespace)
tokens.add(t)
}
override def hasNext: Boolean = !tokens.isEmpty
override def next: Token = {
val t = tokens.remove
if (tokens.isEmpty && (t ne Tokens.END)) {
try queueNextToken()
catch {
case e: Tokenizer.ProblemException =>
tokens.add(e.problem)
}
if (tokens.isEmpty)
throw new ConfigException.BugOrBroken(
"bug: tokens queue should not be empty here"
)
}
t
}
override def remove(): Unit = {
throw new UnsupportedOperationException(
"Does not make sense to remove items from token stream"
)
}
}
}