com.rojoma.json.v3.io.FusedBlockJsonReader.scala Maven / Gradle / Ivy
The newest version!
package com.rojoma.json.v3
package io
import java.io.Reader
import scala.collection.mutable
import scala.collection.immutable
import ast._
class FusedBlockJsonReader(input: Reader, fieldCache: FieldCache = IdentityFieldCache, blockSize: Int = 1024) extends JsonReader {
def this(text: String) = this(new java.io.StringReader(text))
def this(text: String, fieldCache: FieldCache) = this(new java.io.StringReader(text), fieldCache)
private [this] val block = new Array[Char](blockSize)
private [this] var pos = 0
private [this] var end = 0
private [this] var nextCharRow = 1
private [this] var nextCharCol = 1
private [this] var depth = 0
private [this] val scratch = new StringBuilder
private def lexerError(receivedChar: Char, expected: String, row: Int, col: Int): Nothing = {
throw new JsonUnexpectedCharacter(receivedChar, expected, Position(row, col))
}
private def refill(): Boolean =
input.read(block) match {
case -1 =>
false
case n =>
pos = 0
end = n
true
}
private def throwLexerEOF() =
throw new JsonLexerEOF(Position(nextCharRow, nextCharCol))
private def throwParserEOF() =
throw new JsonParserEOF(Position(nextCharRow, nextCharCol))
private def atEOF(): Boolean =
pos == end && !refill()
private def skipCharNotAtEOF() {
if(block(pos) == '\n') { nextCharRow += 1; nextCharCol = 1 }
else { nextCharCol += 1 }
pos += 1
}
// An unexpected EOF should throw a lexer exception because we have
// not read a complete token
private def peekCharLexer() = {
if(atEOF) throwLexerEOF()
block(pos)
}
// An unexpecte EOF should throw a parser exception because we are
// not inside a token, and therefore we should simulate the clean
// end of the event stream
private def peekCharParser() = {
if(atEOF) throwParserEOF()
block(pos)
}
private def peekCharNotAtEOF() = block(pos)
private def nextCharLexer() = {
val result = peekCharLexer()
skipCharNotAtEOF()
result
}
private def nextCharNotAtEOF() = {
val result = block(pos)
skipCharNotAtEOF()
result
}
private def skipToEndOfLine() = while(!atEOF() && peekCharNotAtEOF() != '\n') skipCharNotAtEOF()
private def skipBlockComment() {
var last = nextCharLexer()
while(last != '*' || peekCharLexer() != '/') last = nextCharLexer()
skipCharNotAtEOF() // skip final '/'
}
private def skipComment() {
skipCharNotAtEOF() // skip opening "/"
peekCharLexer() match {
case '/' => skipCharNotAtEOF(); skipToEndOfLine()
case '*' => skipCharNotAtEOF(); skipBlockComment()
case c => lexerError(c, "/ or *", nextCharRow, nextCharCol)
}
}
@annotation.tailrec
private def skipWhitespace() {
while(!atEOF() && Character.isWhitespace(peekCharNotAtEOF())) skipCharNotAtEOF()
if(!atEOF() && peekCharNotAtEOF() == '/') { skipComment(); skipWhitespace() }
}
def read(): JValue = readDatum("datum")
private def readDatum(expected: String): JValue = {
skipWhitespace()
peekCharParser() match {
case '{' => readObject()
case '[' => readArray()
case '"' | '\'' => readString()
case '-' => readNumber()
case 't' => readTrue()
case 'f' => readFalse()
case 'n' => readNull()
case c =>
if(isDigit(c)) readNumber()
else badToken(expected)
}
}
private def badToken(expected: String): Nothing = {
val p = Position(nextCharRow, nextCharCol)
val token = peekCharParser() match {
case '{' => TokenOpenBrace()(p)
case '}' => TokenCloseBrace()(p)
case '[' => TokenOpenBracket()(p)
case ']' => TokenCloseBracket()(p)
case ',' => TokenComma()(p)
case ':' => TokenColon()(p)
case '"' | '\'' => TokenString(readRawString())(p)
case c if isDigit(c) || c == '-' => TokenNumber(readRawNumber())(p)
case c if Character.isUnicodeIdentifierStart(c) => TokenIdentifier(readRawIdentifier())(p)
case c => lexerError(c, expected, nextCharRow, nextCharCol)
}
throw new JsonUnexpectedToken(token, expected)
}
private def readObject(): JObject = {
skipCharNotAtEOF() // skip opening '{'
skipWhitespace()
if(peekCharParser() == '}') {
skipCharNotAtEOF()
return JObject.empty
}
depth += 1
val result = new mutable.LinkedHashMap[String, JValue]
result += readMapping("field name or end of object")
readRestOfObjectBody(result)
depth -= 1
JObject(result)
}
private def readRestOfObjectBody(result: mutable.LinkedHashMap[String, JValue]) {
skipWhitespace()
while(peekCharParser() != '}') {
if(peekCharParser() != ',') badToken("comma or end of object")
skipCharNotAtEOF()
result += readMapping("field name")
skipWhitespace()
}
skipCharNotAtEOF()
}
private def readMapping(expected: String): (String, JValue) = {
val fieldName = readFieldName(expected)
skipWhitespace()
if(peekCharParser() != ':') badToken("colon")
skipCharNotAtEOF()
(fieldName, readDatum("datum"))
}
private def readFieldName(expected: String): String = {
skipWhitespace()
val fieldName = peekCharParser() match {
case '"' | '\'' => readRawString()
case c if Character.isUnicodeIdentifierStart(c) => readRawIdentifier()
case _ => badToken(expected)
}
fieldCache(fieldName, depth)
}
private def readArray(): JArray = {
skipCharNotAtEOF() // skip opening '['
skipWhitespace()
if(peekCharParser() == ']') {
skipCharNotAtEOF()
return JArray.empty
}
depth += 1
val result = new immutable.VectorBuilder[JValue]
result += readDatum("datum or end of array")
readRestOfArrayBody(result)
depth -= 1
JArray(result.result())
}
private def readRestOfArrayBody(result: immutable.VectorBuilder[JValue]) {
skipWhitespace()
while(peekCharParser() != ']') {
if(peekCharParser() != ',') badToken("comma or end of array")
skipCharNotAtEOF()
result += readDatum("datum")
skipWhitespace()
}
skipCharNotAtEOF()
}
private def readString(): JString = JString(readRawString())
private def readRawString(): String = {
scratch.setLength(0)
val Boundary = nextCharLexer()
while(peekCharLexer() != Boundary) {
readPotentialSurrogatePair(readChar(), Boundary)
}
skipCharNotAtEOF() // skip closing quote
scratch.toString
}
private def readPotentialSurrogatePair(c: Char, endOfString: Char) {
if(c >= Character.MIN_SURROGATE && c <= Character.MAX_SURROGATE) {
readSurrogatePair(c, endOfString)
} else {
scratch += c
}
}
private def badChar = 0xfffd.toChar
@annotation.tailrec
private def readSurrogatePair(c: Char, endOfString: Char) {
if(Character.isHighSurrogate(c)) {
if(peekCharLexer() == endOfString) {
scratch += badChar
} else {
val potentialSecondHalf = readChar()
if(Character.isLowSurrogate(potentialSecondHalf)) {
scratch += c
scratch += potentialSecondHalf
} else {
scratch += badChar
if(potentialSecondHalf >= Character.MIN_SURROGATE && potentialSecondHalf <= Character.MAX_SURROGATE) {
readSurrogatePair(potentialSecondHalf, endOfString)
} else {
scratch += potentialSecondHalf
}
}
}
} else {
scratch += badChar
}
}
private def readChar(): Char = {
nextCharLexer() match {
case '\\' => readEscapedCharacter()
case c => c
}
}
private def readEscapedCharacter(): Char = {
def ret(c: Char) = { skipCharNotAtEOF(); c }
peekCharLexer() match {
case '"' => ret('"')
case '\'' => ret('\'')
case '\\' => ret('\\')
case '/' => ret('/')
case 'b' => ret('\b')
case 'f' => ret('\f')
case 'n' => ret('\n')
case 'r' => ret('\r')
case 't' => ret('\t')
case 'u' => skipCharNotAtEOF(); readUnicodeCharacter()
case c => lexerError(c, "string escape character", nextCharRow, nextCharCol)
}
}
private def readUnicodeCharacter(): Char = {
val h1, h2, h3, h4 = readHexDigit()
((h1 << 12) | (h2 << 8) | (h3 << 4) | h4).toChar
}
private def isDigit(c: Char) = '0' <= c && c <= '9'
private def readHexDigit(): Int = {
peekCharLexer() match {
case c if isDigit(c) =>
skipCharNotAtEOF()
c.toInt - '0'.toInt
case c if 'a' <= c && c <= 'f' =>
skipCharNotAtEOF()
10 + c.toInt - 'a'.toInt
case c if 'A' <= c && c <= 'F' =>
skipCharNotAtEOF()
10 + c.toInt - 'A'.toInt
case c =>
lexerError(c, "hex digit", nextCharRow, nextCharCol)
}
}
private def readTrue(): JBoolean = {
expectIdentifier("true")
JBoolean.canonicalTrue
}
private def readFalse(): JBoolean = {
expectIdentifier("false")
JBoolean.canonicalFalse
}
private def readNull(): JNull = {
expectIdentifier("null")
JNull
}
private def expectIdentifier(name: String) {
val row = nextCharRow
val col = nextCharCol
val ident = readRawIdentifier()
if(ident != name) throw new JsonUnknownIdentifier(ident, Position(row, col))
}
private def readRawIdentifier(): String = {
scratch.setLength(0)
scratch += nextCharLexer()
while(!atEOF() && Character.isUnicodeIdentifierPart(peekCharNotAtEOF())) scratch += nextCharNotAtEOF()
scratch.toString()
}
private def readDigit(): Char = {
if(!isDigit(peekCharLexer())) lexerError(peekCharLexer(), "digit", nextCharRow, nextCharCol)
nextCharLexer()
}
private def readNumber(): JNumber = JNumber.unsafeFromString(readRawNumber)
private def readRawNumber(): String = {
// JSON numbers match (a subset of) the language generated by
// the regular expression:
// -?\d+(\.\d+)?([eE][+-]?\d+)?
// We'll match the whole thing, within the limits of BigDecimal
scratch.setLength(0)
val startPos = Position(nextCharRow, nextCharCol)
if(peekCharLexer() == '-') scratch += nextCharLexer()
do { scratch += readDigit() } while(!atEOF() && isDigit(peekCharNotAtEOF()))
val hasFrac = !atEOF() && peekCharNotAtEOF() == '.'
if(hasFrac) {
scratch += nextCharNotAtEOF() // skip decimal
do { scratch += readDigit() } while(!atEOF() && isDigit(peekCharNotAtEOF()))
}
val hasExponent = !atEOF() && (peekCharNotAtEOF() == 'e' || peekCharNotAtEOF() == 'E')
if(hasExponent) {
scratch += nextCharNotAtEOF() // skip e/E
if(peekCharLexer() == '-' || peekCharNotAtEOF() == '+') scratch += nextCharNotAtEOF()
else scratch += '+' // ensure there's always a sign
val exponentDigitsStart = scratch.length
do { scratch += readDigit() } while(!atEOF() && isDigit(peekCharNotAtEOF()))
// this relies on the exponent being the last thing read
val result = scratch.toString
if(!ReaderUtils.isBigDecimalizableUnsignedExponent(result, exponentDigitsStart)) {
throw new JsonNumberOutOfRange(result, startPos)
}
result
} else {
scratch.toString
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy