com.rojoma.json.v3.io.JsonTokenIterator.scala Maven / Gradle / Ivy
package com.rojoma.json.v3
package io
import java.io.Reader
import scala.annotation.switch
import `-impl`.util.AbstractBufferedIterator
/** Convert a character-stream into a token-stream.
*
* This is guaranteed to read no more than necessary to ensure it has
* reached the end of a single token. For objects, arrays, and
* strings, it will read only up to (and, of course, including) the
* closing delimiter. For other types, it may read one character
* further to assure itself that it has reached the end.
*
* A `JsonTokenIterator` does many small reads; it may be a good idea
* to wrap the input `Reader` into a `BufferedReader`. If you do not
* need to read non-JSON out of the underlying `Reader` afterward, a
* [[com.rojoma.json.v3.io.BlockJsonTokenIterator]] maybe be faster.
*
* As extensions to standard JSON, this reader supports single-quoted
* strings and Javascript-style comments.
*
* @see [[com.rojoma.json.v3.io.BlockJsonTokenIterator]]
* @see [[com.rojoma.json.v3.io.JsonTokenGenerator]]
* @see [[com.rojoma.json.v3.io.JsonToken]]
*/
class JsonTokenIterator(reader: Reader) extends AbstractBufferedIterator[JsonToken] {
private[this] var isPeeked: Boolean = false
private[this] var peeked: Char = _
private[this] var nextToken: JsonToken = null
private[this] var nextCharRow = 1 // This is the position of the next char returned from "nextChar()" or "peekChar()"
private[this] var nextCharCol = 1
private[this] val scratch = new StringBuilder
override def toString =
if(nextToken ne null) "non-empty iterator"
else "possibly-empty iterator"
private def lexerError(receivedChar: Char, expected: String, row: Int, col: Int): Nothing = {
throw new JsonUnexpectedCharacter(receivedChar, expected, Position(row, col))
}
private def nextChar() = {
peekChar()
isPeeked = false
if(peeked == '\n') { nextCharRow += 1; nextCharCol = 1 }
else nextCharCol += 1
peeked
}
private def peekChar() = {
if(!isPeeked) {
val newChar = reader.read()
if(newChar == -1) throw new JsonLexerEOF(Position(nextCharRow, nextCharCol))
peeked = newChar.toChar
isPeeked = true
}
peeked
}
private def atEOF(): Boolean = {
if(isPeeked) return false
val newChar = reader.read()
if(newChar == -1) return true
peeked = newChar.toChar
isPeeked = true
return false
}
private def skipToEndOfLine() = while(!atEOF() && peekChar() != '\n') nextChar()
private def skipBlockComment() {
var last = nextChar()
while(last != '*' || peekChar() != '/') last = nextChar()
nextChar() // skip final '/'
}
private def skipComment() {
nextChar() // skip opening "/"
val row = nextCharRow
val col = nextCharCol
nextChar() match {
case '/' => skipToEndOfLine()
case '*' => skipBlockComment()
case c => lexerError(c, "/ or *", row, col)
}
}
@annotation.tailrec
private def skipWhitespace() {
while(!atEOF() && Character.isWhitespace(peekChar())) nextChar()
if(!atEOF() && peekChar() == '/') { skipComment(); skipWhitespace() }
}
def hasNext: Boolean = {
if(nextToken == null) advance()
nextToken != null
}
def head: JsonToken = {
if(!hasNext) throw new NoSuchTokenException(Position(nextCharRow, nextCharCol))
nextToken
}
def next(): JsonToken = {
val result = head
nextToken = null
result
}
private def advance() {
skipWhitespace()
if(atEOF()) { nextToken = null; return }
val tokenPosition = Position(nextCharRow, nextCharCol)
val token = (peekChar(): @switch) match {
case '{' =>
nextChar()
TokenOpenBrace()(tokenPosition)
case '}' =>
nextChar()
TokenCloseBrace()(tokenPosition)
case '[' =>
nextChar()
TokenOpenBracket()(tokenPosition)
case ']' =>
nextChar()
TokenCloseBracket()(tokenPosition)
case ':' =>
nextChar()
TokenColon()(tokenPosition)
case ',' =>
nextChar()
TokenComma()(tokenPosition)
case '"' | '\'' => readString(tokenPosition)
case '-' => readNumber(tokenPosition)
case c =>
if(isDigit(c)) readNumber(tokenPosition) // should I inline this into a case '0' | '1' | ... | '9' ?
else if(Character.isUnicodeIdentifierStart(c)) readIdentifier(tokenPosition)
else lexerError(c, "start of datum", nextCharRow, nextCharCol)
}
nextToken = token
}
private def isDigit(c: Char) = '0' <= c && c <= '9'
private def readDigit() = {
val row = nextCharRow
val col = nextCharCol
val c = nextChar()
if(!isDigit(c)) lexerError(c, "digit", row, col)
c
}
private def readNumber(startPos: Position) = {
// JSON numbers match (a subset of) the language generated by
// the regular expression:
// -?\d+(\.\d+)?([eE][+-]?\d+)?
// In particular, JSON restricts leading zeros, but we'll match
// the whole thing anyway.
scratch.setLength(0)
if(peekChar() == '-') scratch += nextChar()
do { scratch += readDigit() } while(!atEOF() && isDigit(peekChar()))
val hasFrac = !atEOF() && peekChar() == '.'
if(hasFrac) {
scratch += nextChar() // skip decimal
do { scratch += readDigit() } while(!atEOF() && isDigit(peekChar()))
}
val hasExponent = !atEOF() && (peekChar() == 'e' || peekChar() == 'E')
val n =
if(hasExponent) {
scratch += nextChar() // skip e/E
if(peekChar() == '-' || peekChar() == '+') scratch += nextChar()
else scratch += '+' // ensure there's always a sign
val exponentDigitsStart = scratch.length
do { scratch += readDigit() } while(!atEOF() && isDigit(peekChar()))
// this relies on the exponent being the last thing read
val result = scratch.toString
if(!ReaderUtils.isBigDecimalizableUnsignedExponent(result, exponentDigitsStart)) {
throw new JsonNumberOutOfRange(result, startPos)
}
result
} else {
scratch.toString
}
TokenNumber(n)(startPos)
}
private def readIdentifier(startPos: Position) = {
scratch.setLength(0)
scratch += nextChar()
while(!atEOF() && Character.isUnicodeIdentifierPart(peekChar())) scratch += nextChar()
TokenIdentifier(scratch.toString())(startPos)
}
private def readString(startPos: Position) = {
scratch.setLength(0)
val Boundary = nextChar()
while(peekChar() != Boundary) {
readPotentialSurrogatePair(readChar(), Boundary)
}
nextChar() // skip closing character
TokenString(scratch.toString)(startPos)
}
private def readPotentialSurrogatePair(c: Char, endOfString: Char) {
if(c >= Character.MIN_SURROGATE && c <= Character.MAX_SURROGATE) {
readSurrogatePair(c, endOfString)
} else {
scratch += c
}
}
private def badChar = 0xfffd.toChar
@annotation.tailrec
private def readSurrogatePair(c: Char, endOfString: Char) {
if(Character.isHighSurrogate(c)) {
if(peekChar() == endOfString) {
scratch += badChar
} else {
val potentialSecondHalf = readChar()
if(Character.isLowSurrogate(potentialSecondHalf)) {
scratch += c
scratch += potentialSecondHalf
} else {
scratch += badChar
if(potentialSecondHalf >= Character.MIN_SURROGATE && c <= Character.MAX_SURROGATE) {
readSurrogatePair(potentialSecondHalf, endOfString)
} else {
scratch += potentialSecondHalf
}
}
}
} else {
scratch += badChar
}
}
private def readChar() = {
nextChar() match {
case '\\' => readEscapedCharacter()
case c => c
}
}
private def readEscapedCharacter(): Char = {
val row = nextCharRow
val col = nextCharCol
nextChar() match {
case '"' => '"'
case '\'' => '\''
case '\\' => '\\'
case '/' => '/'
case 'b' => '\b'
case 'f' => '\f'
case 'n' => '\n'
case 'r' => '\r'
case 't' => '\t'
case 'u' => readUnicodeCharacter()
case c => lexerError(c, "string escape character", row, col)
}
}
private def readUnicodeCharacter(): Char = {
val h1, h2, h3, h4 = readHexDigit()
((h1 << 12) | (h2 << 8) | (h3 << 4) | h4).toChar
}
private def readHexDigit(): Int = {
val row = nextCharRow
val col = nextCharCol
nextChar() match {
case c if isDigit(c) => c.toInt - '0'.toInt
case c if 'a' <= c && c <= 'f' => 10 + c.toInt - 'a'.toInt
case c if 'A' <= c && c <= 'F' => 10 + c.toInt - 'A'.toInt
case c => lexerError(c, "hex digit", row, col)
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy