olon.json.JsonParser.scala Maven / Gradle / Ivy
The newest version!
package olon
package json
import scala.annotation.switch
/** JSON parser.
*/
object JsonParser {
import java.io._
class ParseException(message: String, cause: Exception)
extends Exception(message, cause)
/** Parsed tokens from low level pull parser.
*/
sealed abstract class Token
case object OpenObj extends Token
case object CloseObj extends Token
case class FieldStart(name: String) extends Token
case object End extends Token
case class StringVal(value: String) extends Token
case class IntVal(value: BigInt) extends Token
case class DoubleVal(value: Double) extends Token
case class BoolVal(value: Boolean) extends Token
case object NullVal extends Token
case object OpenArr extends Token
case object CloseArr extends Token
/** Return parsed JSON.
* @throws ParseException
* is thrown if parsing fails
*/
def parse(s: String): JValue = parse(new Buffer(new StringReader(s), false))
/** Return parsed JSON.
* @param closeAutomatically
* true (default) if the Reader is automatically closed on EOF
* @throws ParseException
* is thrown if parsing fails
*/
def parse(s: Reader, closeAutomatically: Boolean = true): JValue =
parse(new Buffer(s, closeAutomatically))
/** Return parsed JSON.
*/
def parseOpt(s: String): Option[JValue] =
try { parse(s).toOpt }
catch { case _: Exception => None }
/** Return parsed JSON.
* @param closeAutomatically
* true (default) if the Reader is automatically closed on EOF
*/
def parseOpt(s: Reader, closeAutomatically: Boolean = true): Option[JValue] =
try { parse(s, closeAutomatically).toOpt }
catch { case _: Exception => None }
/** Parse in pull parsing style. Use p.nextToken
to parse tokens
* one by one from a string.
* @see
* olon.json.JsonParser.Token
*/
def parse[A](s: String, p: Parser => A): A = parse(new StringReader(s), p)
/** Parse in pull parsing style. Use p.nextToken
to parse tokens
* one by one from a stream. The Reader must be closed when parsing is
* stopped.
* @see
* olon.json.JsonParser.Token
*/
def parse[A](s: Reader, p: Parser => A): A = p(
new Parser(new Buffer(s, false))
)
private[json] def parse(buf: Buffer): JValue = {
try {
astParser(new Parser(buf))
} catch {
case e: ParseException => throw e
case e: Exception => throw new ParseException("parsing failed", e)
} finally { buf.release }
}
// JSON hex unicode strings (\u12AF) are translated into characters through
// this array. Each number in the array corresponds to the 4-bit value that
// one number in the hex string will represent. These are combined when
// reading the unicode string.
private final val HexChars: Array[Int] = {
val chars = new Array[Int](128)
var i = 0
while (i < 10) {
chars(i + '0') = i
i += 1
}
i = 0
while (i < 16) {
chars(i + 'a') = 10 + i
chars(i + 'A') = 10 + i
i += 1
}
chars
}
// The size of one hex character in bits.
private final val hexCharSize = 4 // in bits
private[json] def unquote(string: String): String =
unquote(new JsonParser.Buffer(new java.io.StringReader(string), false))
private def unquote(buf: JsonParser.Buffer): String = {
def unquote0(buf: JsonParser.Buffer): String = {
val builder = buf.builder
builder.delete(0, builder.length())
var c = '\\'
while (c != '"') {
if (c == '\\') {
buf.substring(intoBuilder = true)
(buf.next: @switch) match {
case '"' => builder.append('"')
case '\\' => builder.append('\\')
case '/' => builder.append('/')
case 'b' => builder.append('\b')
case 'f' => builder.append('\f')
case 'n' => builder.append('\n')
case 'r' => builder.append('\r')
case 't' => builder.append('\t')
case 'u' =>
var byte = 0
var finalChar = 0
val chars = Array(buf.next, buf.next, buf.next, buf.next)
while (byte < chars.length) {
finalChar =
(finalChar << hexCharSize) | HexChars(chars(byte).toInt)
byte += 1
}
builder.appendCodePoint(finalChar.toChar)
case _ =>
builder.append('\\')
}
buf.mark
}
c = buf.next
}
buf.substring(intoBuilder = true)
builder.toString
}
buf.eofIsFailure = true
buf.mark
var c = buf.next
var forcedReturn: String = null
while (c != '"') {
(c: @switch) match {
case '\\' =>
forcedReturn = unquote0(buf)
c = '"'
case _ =>
c = buf.next
}
}
buf.eofIsFailure = false
if (forcedReturn == null) {
new String(buf.substring())
} else {
forcedReturn
}
}
private[json] def parseDouble(s: String) = {
s.toDouble
}
// Intermediate objects and arrays which can be grown mutably for performance.
// These are finalized into immutable JObject and JArray.
private case class IntermediateJObject(
fields: scala.collection.mutable.ListBuffer[JField]
)
private case class IntermediateJArray(
bits: scala.collection.mutable.ListBuffer[JValue]
)
private val astParser = (p: Parser) => {
val vals = new ValStack(p)
var token: Token = null
var root: Option[JValue] = None
// At the end of an object, if we're looking at an intermediate form of an
// object or array, gather up all their component parts and create the final
// object or array.
def closeBlock(v: Any): Unit = {
def toJValue(x: Any) = x match {
case json: JValue => json
case other: IntermediateJObject => JObject(other.fields.result())
case other: IntermediateJArray => JArray(other.bits.result())
case _ => p.fail("unexpected field " + x)
}
vals.peekOption match {
case Some(JField(name: String, _)) =>
vals.pop(classOf[JField])
val obj = vals.peek(classOf[IntermediateJObject])
obj.fields.append(JField(name, toJValue(v)))
case Some(o: IntermediateJObject) =>
o.fields.append(vals.peek(classOf[JField]))
case Some(a: IntermediateJArray) => a.bits.append(toJValue(v))
case Some(x) => p.fail("expected field, array or object but got " + x)
case None => root = Some(toJValue(v))
}
}
def newValue(v: JValue): Unit = {
if (!vals.isEmpty)
vals.peekAny match {
case JField(name, _) =>
vals.pop(classOf[JField])
val obj = vals.peek(classOf[IntermediateJObject])
obj.fields += (JField(name, v))
case a: IntermediateJArray => a.bits += v
case other => p.fail("expected field or array but got " + other)
}
else {
vals.push(v)
root = Some(v)
}
}
def loop(): Unit = {
token = p.nextToken
token match {
case OpenObj =>
vals.push(IntermediateJObject(scala.collection.mutable.ListBuffer()))
case FieldStart(name) => vals.push(JField(name, null))
case StringVal(x) => newValue(JString(x))
case IntVal(x) => newValue(JInt(x))
case DoubleVal(x) => newValue(JDouble(x))
case BoolVal(x) => newValue(JBool(x))
case NullVal => newValue(JNull)
case CloseObj => closeBlock(vals.popAny)
case OpenArr =>
vals.push(IntermediateJArray(scala.collection.mutable.ListBuffer()))
case CloseArr => closeBlock(vals.popAny)
case End =>
}
if (token != End) {
loop()
}
}
loop()
root getOrElse JNothing
}
private final val EOF: Char = (-1).asInstanceOf[Char]
private class ValStack(parser: Parser) {
import java.util.ArrayDeque
private val stack = new ArrayDeque[Any](32)
def popAny = stack.poll
def pop[A](expectedType: Class[A]) = convert(stack.poll, expectedType)
def push(v: Any) = stack.addFirst(v)
def peekAny = stack.peek
def peek[A](expectedType: Class[A]) = convert(stack.peek, expectedType)
def replace[A](newTop: Any) = {
stack.pop
stack.push(newTop)
}
private def convert[A](x: Any, expectedType: Class[A]): A = {
if (x == null) parser.fail("expected object or array")
try {
x.asInstanceOf[A]
} catch {
case cce: ClassCastException =>
parser.fail(
s"failure during class conversion. I got $x but needed a type of $expectedType",
cce
)
}
}
def peekOption = if (stack.isEmpty) None else Some(stack.peek)
def isEmpty = stack.isEmpty
}
class Parser(buf: Buffer) {
import java.util.ArrayDeque
// Maintains our current nesting context in the form of BlockMode, which
// indicates if each context is an array or object.
private val blocks = new ArrayDeque[BlockMode](32)
private var fieldNameMode = true
def fail(msg: String, cause: Exception = null) =
throw new ParseException(msg + "\nNear: " + buf.near, cause)
/** Parse next Token from stream.
*/
def nextToken: Token = {
def parseString: String =
try {
unquote(buf)
} catch {
case p: ParseException => throw p
case cause: Exception => fail("unexpected string end", cause)
}
def parseValue() = {
var wasInt = true
var doubleVal = false
val buf = this.buf
// Back up and mark the buffer so that we can extract a substring after
// that contains the whole value.
buf.back
buf.mark
while (wasInt) {
val c = buf.next
(c: @switch) match {
case '.' | 'e' | 'E' =>
doubleVal = true
case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |
'-' | '+' =>
// continue
case _ =>
wasInt = false
if (c != EOF) {
buf.back // don't include the last character
}
}
}
buf.forward // substring is exclusive to the last index
val value = buf.substring()
buf.back // back up so our current pointer is in the right place
(doubleVal: @switch) match {
case true =>
DoubleVal(parseDouble(new String(value)))
case false =>
IntVal(BigInt(new String(value)))
}
}
while (true) {
(buf.next: @switch) match {
case '{' =>
blocks.addFirst(OBJECT)
fieldNameMode = true
return OpenObj
case '}' =>
blocks.poll
return CloseObj
case '"' =>
if (fieldNameMode && blocks.peek == OBJECT)
return FieldStart(parseString)
else {
fieldNameMode = true
return StringVal(parseString)
}
case 't' =>
fieldNameMode = true
if (buf.next == 'r' && buf.next == 'u' && buf.next == 'e') {
return BoolVal(true)
}
fail("expected boolean")
case 'f' =>
fieldNameMode = true
if (
buf.next == 'a' && buf.next == 'l' && buf.next == 's' && buf.next == 'e'
) {
return BoolVal(false)
}
fail("expected boolean")
case 'n' =>
fieldNameMode = true
if (buf.next == 'u' && buf.next == 'l' && buf.next == 'l') {
return NullVal
}
fail("expected null")
case ':' =>
if (blocks.peek == ARRAY) fail("Colon in an invalid position")
fieldNameMode = false
case '[' =>
blocks.addFirst(ARRAY)
return OpenArr
case ']' =>
fieldNameMode = true
blocks.poll
return CloseArr
case ('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |
'-') =>
fieldNameMode = true
return parseValue()
case ' ' | '\n' | ',' | '\r' | '\t' =>
// ignore
case c =>
c match {
case `EOF` =>
buf.automaticClose
return End
case _ =>
fail("unknown token " + c)
}
}
}
buf.automaticClose
End
}
sealed abstract class BlockMode
case object ARRAY extends BlockMode
case object OBJECT extends BlockMode
}
/* Buffer used to parse JSON.
* Buffer is divided to one or more segments (preallocated in segmentPool).
*/
private[json] final class Buffer(
in: Reader,
closeAutomatically: Boolean,
segmentPool: SegmentPool = Segments
) {
// Reused by the parser when appropriate, allows for a single builder to be
// used throughout the parse process, and to be written to directly from the
// substring method, so as to avoid allocating new builders when avoidable.
private[json] final val builder = new java.lang.StringBuilder(32)
var offset = 0 // how far into the current segment we've read data
var curMark = -1
var curMarkSegment = -1
var eofIsFailure = false
private val segments =
scala.collection.mutable.ArrayBuffer(segmentPool.apply())
private var segment: Array[Char] = segments.head.seg
private var cur = 0 // Pointer which points current parsing location
private var curSegmentIdx = 0 // Pointer which points current segment
// Mark the current point so that future substring calls will extract the
// value from this point to whatever point the buffer has advanced to.
def mark = {
if (curSegmentIdx > 0) {
segments(0) = segments.remove(curSegmentIdx)
curSegmentIdx = 0
}
curMark = cur
curMarkSegment = curSegmentIdx
}
def back = cur = cur - 1
def forward = cur = cur + 1
// Read the next character; reads new data from the reader if necessary.
def next: Char = {
if (cur >= offset && read < 0) {
if (eofIsFailure) throw new ParseException("unexpected eof", null)
else EOF
} else {
val c = segment(cur)
cur += 1
c
}
}
private final val emptyArray = new Array[Char](0)
// Slices from the last marked point to the current index. If intoBuilder is
// true, appends it to the buffer's builder and returns an empty array. If
// false, slices it into a new array and returns that array.
final def substring(intoBuilder: Boolean = false) = {
if (curSegmentIdx == curMarkSegment) {
val substringLength = cur - curMark - 1
if (intoBuilder) {
builder.append(segment, curMark, substringLength)
emptyArray
} else if (substringLength == 0) {
emptyArray
} else {
val array = new Array[Char](substringLength)
System.arraycopy(segment, curMark, array, 0, substringLength)
array
}
} else { // slower path for case when string is in two or more segments
val segmentCount = curSegmentIdx - curMarkSegment + 1
val substringLength =
segmentCount * segmentPool.segmentSize - curMark - (segmentPool.segmentSize - cur) - 1
val chars =
if (intoBuilder) {
emptyArray
} else {
new Array[Char](substringLength)
}
var i = curMarkSegment
var offset = 0
while (i <= curSegmentIdx) {
val s = segments(i).seg
val start = if (i == curMarkSegment) curMark else 0
val end = if (i == curSegmentIdx) cur else s.length + 1
val partLen = end - start - 1
if (intoBuilder) {
builder.append(s, start, partLen)
} else {
System.arraycopy(s, start, chars, offset, partLen)
}
offset += partLen
i = i + 1
}
curMarkSegment = -1
curMark = -1
chars
}
}
def near = {
val start = (cur - 20) max 0
val len = ((cur + 1) min segmentPool.segmentSize) - start
new String(segment, start, len)
}
def release = segments.foreach(segmentPool.release)
private[JsonParser] def automaticClose = if (closeAutomatically) in.close
// Reads the next available block from the reader. Returns -1 if there's
// nothing more to read.
private def read = {
if (offset >= segment.length) {
offset = 0
val segmentToUse =
(curMarkSegment: @scala.annotation.switch) match {
case -1 =>
curSegmentIdx = 0
segments(0)
case _ =>
curSegmentIdx += 1
if (curSegmentIdx < segments.length) {
segments(curSegmentIdx)
} else {
val segment = segmentPool.apply()
segments.append(segment)
segment
}
}
segment = segmentToUse.seg
}
val length = in.read(segment, offset, segment.length - offset)
if (length != -1) {
cur = offset
offset += length
length
} else -1
}
}
private[json] trait SegmentPool {
def apply(): Segment
def release(segment: Segment): Unit
def segmentSize: Int
}
private[json] class ArrayBlockingSegmentPool(override val segmentSize: Int)
extends SegmentPool {
import java.util.concurrent.ArrayBlockingQueue
import java.util.concurrent.atomic.AtomicInteger
private val maxNumOfSegments = 10000
private val segmentCount = new AtomicInteger(0)
private val segments =
new ArrayBlockingQueue[Segment](maxNumOfSegments)
private[json] def clear = segments.clear
def apply(): Segment = {
val s = acquire
// Give back a disposable segment if pool is exhausted.
if (s != null) s else DisposableSegment(new Array(segmentSize))
}
private def acquire: Segment = {
val curCount = segmentCount.get
val createNew =
if (segments.size == 0 && curCount < maxNumOfSegments)
segmentCount.compareAndSet(curCount, curCount + 1)
else false
if (createNew) RecycledSegment(new Array(segmentSize)) else segments.poll
}
def release(s: Segment) = s match {
case _: RecycledSegment => segments.offer(s)
case _ =>
}
}
/*
* A pool of preallocated char arrays.
*/
private object Segments extends ArrayBlockingSegmentPool(1000)
sealed trait Segment {
val seg: Array[Char]
}
case class RecycledSegment(seg: Array[Char]) extends Segment
case class DisposableSegment(seg: Array[Char]) extends Segment
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy