
commonMain.kotlinx.serialization.json.internal.JsonReader.kt Maven / Gradle / Ivy
/*
* Copyright 2017-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/
package kotlinx.serialization.json.internal
import kotlinx.serialization.json.*
import kotlinx.serialization.json.internal.EscapeCharMappings.ESCAPE_2_CHAR
import kotlin.jvm.*
import kotlin.native.concurrent.*
internal const val lenientHint = "Use 'JsonBuilder.isLenient = true' to accept non-compliant JSON"
// special strings
internal const val NULL = "null"
// special chars
internal const val COMMA = ','
internal const val COLON = ':'
internal const val BEGIN_OBJ = '{'
internal const val END_OBJ = '}'
internal const val BEGIN_LIST = '['
internal const val END_LIST = ']'
internal const val STRING = '"'
internal const val STRING_ESC = '\\'
internal const val INVALID = 0.toChar()
internal const val UNICODE_ESC = 'u'
// token classes
internal const val TC_OTHER: Byte = 0
internal const val TC_STRING: Byte = 1
internal const val TC_STRING_ESC: Byte = 2
internal const val TC_WS: Byte = 3
internal const val TC_COMMA: Byte = 4
internal const val TC_COLON: Byte = 5
internal const val TC_BEGIN_OBJ: Byte = 6
internal const val TC_END_OBJ: Byte = 7
internal const val TC_BEGIN_LIST: Byte = 8
internal const val TC_END_LIST: Byte = 9
internal const val TC_NULL: Byte = 10
internal const val TC_INVALID: Byte = 11
internal const val TC_EOF: Byte = 12
// mapping from chars to token classes
private const val CTC_MAX = 0x7e
// mapping from escape chars real chars
private const val ESC2C_MAX = 0x75
@SharedImmutable
internal val C2TC = ByteArray(CTC_MAX).apply {
for (i in 0..0x20) {
initC2TC(i, TC_INVALID)
}
initC2TC(0x09, TC_WS)
initC2TC(0x0a, TC_WS)
initC2TC(0x0d, TC_WS)
initC2TC(0x20, TC_WS)
initC2TC(COMMA, TC_COMMA)
initC2TC(COLON, TC_COLON)
initC2TC(BEGIN_OBJ, TC_BEGIN_OBJ)
initC2TC(END_OBJ, TC_END_OBJ)
initC2TC(BEGIN_LIST, TC_BEGIN_LIST)
initC2TC(END_LIST, TC_END_LIST)
initC2TC(STRING, TC_STRING)
initC2TC(STRING_ESC, TC_STRING_ESC)
}
// object instead of @SharedImmutable because there is mutual initialization in [initC2ESC]
internal object EscapeCharMappings {
@JvmField
public val ESCAPE_2_CHAR = CharArray(ESC2C_MAX)
init {
for (i in 0x00..0x1f) {
initC2ESC(i, UNICODE_ESC)
}
initC2ESC(0x08, 'b')
initC2ESC(0x09, 't')
initC2ESC(0x0a, 'n')
initC2ESC(0x0c, 'f')
initC2ESC(0x0d, 'r')
initC2ESC('/', '/')
initC2ESC(STRING, STRING)
initC2ESC(STRING_ESC, STRING_ESC)
}
private fun initC2ESC(c: Int, esc: Char) {
if (esc != UNICODE_ESC) ESCAPE_2_CHAR[esc.toInt()] = c.toChar()
}
private fun initC2ESC(c: Char, esc: Char) = initC2ESC(c.toInt(), esc)
}
private fun ByteArray.initC2TC(c: Int, cl: Byte) {
this[c] = cl
}
private fun ByteArray.initC2TC(c: Char, cl: Byte) {
initC2TC(c.toInt(), cl)
}
internal fun charToTokenClass(c: Char) = if (c.toInt() < CTC_MAX) C2TC[c.toInt()] else TC_OTHER
internal fun escapeToChar(c: Int): Char = if (c < ESC2C_MAX) ESCAPE_2_CHAR[c] else INVALID
// Streaming JSON reader
internal class JsonReader(private val source: String) {
@JvmField
var currentPosition: Int = 0 // position in source
@JvmField
var tokenClass: Byte = TC_EOF
public val isDone: Boolean get() = tokenClass == TC_EOF
public val canBeginValue: Boolean
get() = when (tokenClass) {
TC_BEGIN_LIST, TC_BEGIN_OBJ, TC_OTHER, TC_STRING, TC_NULL -> true
else -> false
}
// updated by nextToken
private var tokenPosition: Int = 0
// update by nextString/nextLiteral
private var offset = -1 // when offset >= 0 string is in source, otherwise in buf
private var length = 0 // length of string
private var buf = CharArray(16) // only used for strings with escapes
init {
nextToken()
}
internal inline fun requireTokenClass(expected: Byte, errorMessage: (Char) -> String) {
if (tokenClass != expected) fail(errorMessage(tokenClass.toChar()), tokenPosition)
}
fun takeString(): String {
if (tokenClass != TC_OTHER && tokenClass != TC_STRING) fail(
"Expected string or non-null literal", tokenPosition
)
return takeStringInternal()
}
fun peekString(isLenient: Boolean): String? {
return if (tokenClass != TC_STRING && (!isLenient || tokenClass != TC_OTHER)) null
else takeStringInternal(advance = false)
}
fun takeStringQuoted(): String {
if (tokenClass != TC_STRING) fail(
"Expected string literal with quotes. $lenientHint",
tokenPosition
)
return takeStringInternal()
}
fun takeBooleanStringUnquoted(): String {
if (tokenClass != TC_OTHER) fail("Expected start of the unquoted boolean literal. $lenientHint", tokenPosition)
return takeStringInternal()
}
private fun takeStringInternal(advance: Boolean = true): String {
val prevStr = if (offset < 0)
buf.concatToString(0, 0 + length) else
source.substring(offset, offset + length)
if (advance) nextToken()
return prevStr
}
private fun append(ch: Char) {
if (length >= buf.size) buf = buf.copyOf(2 * buf.size)
buf[length++] = ch
}
// initializes buf usage upon the first encountered escaped char
private fun appendRange(source: String, fromIndex: Int, toIndex: Int) {
val addLen = toIndex - fromIndex
val oldLen = length
val newLen = oldLen + addLen
if (newLen > buf.size) buf = buf.copyOf(newLen.coerceAtLeast(2 * buf.size))
for (i in 0 until addLen) buf[oldLen + i] = source[fromIndex + i]
length += addLen
}
fun nextToken() {
val source = source
var currentPosition = currentPosition
while (currentPosition < source.length) {
val ch = source[currentPosition]
when (val tc = charToTokenClass(ch)) {
TC_WS -> currentPosition++ // skip whitespace
TC_OTHER -> {
nextLiteral(source, currentPosition)
return
}
TC_STRING -> {
nextString(source, currentPosition)
return
}
else -> {
this.tokenPosition = currentPosition
this.tokenClass = tc
this.currentPosition = currentPosition + 1
return
}
}
}
tokenPosition = currentPosition
tokenClass = TC_EOF
}
private fun nextLiteral(source: String, startPos: Int) {
tokenPosition = startPos
offset = startPos
var currentPosition = startPos
while (currentPosition < source.length && charToTokenClass(source[currentPosition]) == TC_OTHER) {
currentPosition++
}
this.currentPosition = currentPosition
length = currentPosition - offset
tokenClass = if (rangeEquals(source, offset, length, NULL)) TC_NULL else TC_OTHER
}
private fun nextString(source: String, startPosition: Int) {
tokenPosition = startPosition
length = 0 // in buffer
var currentPosition = startPosition + 1
var lastPosition = currentPosition
while (source[currentPosition] != STRING) {
if (source[currentPosition] == STRING_ESC) {
appendRange(source, lastPosition, currentPosition)
val newPosition = appendEsc(source, currentPosition + 1)
currentPosition = newPosition
lastPosition = newPosition
} else if (++currentPosition >= source.length) {
fail("EOF", currentPosition)
}
}
if (lastPosition == startPosition + 1) {
// there was no escaped chars
offset = lastPosition
this.length = currentPosition - lastPosition
} else {
// some escaped chars were there
appendRange(source, lastPosition, currentPosition)
this.offset = -1
}
this.currentPosition = currentPosition + 1
tokenClass = TC_STRING
}
private fun appendEsc(source: String, startPosition: Int): Int {
var currentPosition = startPosition
require(currentPosition < source.length, currentPosition) { "Unexpected EOF after escape character" }
val currentChar = source[currentPosition++]
if (currentChar == UNICODE_ESC) {
return appendHex(source, currentPosition)
}
val c = escapeToChar(currentChar.toInt())
require(c != INVALID, currentPosition) { "Invalid escaped char '$currentChar'" }
append(c)
return currentPosition
}
private fun appendHex(source: String, startPos: Int): Int {
var curPos = startPos
append(
((fromHexChar(source, curPos++) shl 12) +
(fromHexChar(source, curPos++) shl 8) +
(fromHexChar(source, curPos++) shl 4) +
fromHexChar(source, curPos++)).toChar()
)
return curPos
}
fun skipElement() {
if (tokenClass != TC_BEGIN_OBJ && tokenClass != TC_BEGIN_LIST) {
nextToken()
return
}
val tokenStack = mutableListOf()
do {
when (tokenClass) {
TC_BEGIN_LIST, TC_BEGIN_OBJ -> tokenStack.add(tokenClass)
TC_END_LIST -> {
if (tokenStack.last() != TC_BEGIN_LIST) throw JsonDecodingException(
currentPosition,
"found ] instead of }",
source
)
tokenStack.removeAt(tokenStack.size - 1)
}
TC_END_OBJ -> {
if (tokenStack.last() != TC_BEGIN_OBJ) throw JsonDecodingException(
currentPosition,
"found } instead of ]",
source
)
tokenStack.removeAt(tokenStack.size - 1)
}
}
nextToken()
} while (tokenStack.isNotEmpty())
}
override fun toString(): String {
return "JsonReader(source='$source', currentPosition=$currentPosition, tokenClass=$tokenClass, tokenPosition=$tokenPosition, offset=$offset)"
}
public fun fail(message: String, position: Int = currentPosition): Nothing {
throw JsonDecodingException(position, message, source)
}
internal inline fun require(condition: Boolean, position: Int = currentPosition, message: () -> String) {
if (!condition) fail(message(), position)
}
private fun fromHexChar(source: String, currentPosition: Int): Int {
require(currentPosition < source.length, currentPosition) { "Unexpected EOF during unicode escape" }
return when (val curChar = source[currentPosition]) {
in '0'..'9' -> curChar.toInt() - '0'.toInt()
in 'a'..'f' -> curChar.toInt() - 'a'.toInt() + 10
in 'A'..'F' -> curChar.toInt() - 'A'.toInt() + 10
else -> fail("Invalid toHexChar char '$curChar' in unicode escape")
}
}
}
private fun rangeEquals(source: String, start: Int, length: Int, str: String): Boolean {
val n = str.length
if (length != n) return false
for (i in 0 until n) if (source[start + i] != str[i]) return false
return true
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy