com.rojoma.json.v3.io.JsonTokenIterator.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of rojoma-json-v3_2.11 Show documentation
rojoma-json-v3
The newest version!
package com.rojoma.json.v3
package io

import java.io.Reader

import scala.annotation.switch

import `-impl`.util.AbstractBufferedIterator

/** Convert a character-stream into a token-stream.
  *
  * This is guaranteed to read no more than necessary to ensure it has
  * reached the end of a single token.  For objects, arrays, and
  * strings, it will read only up to (and, of course, including) the
  * closing delimiter.  For other types, it may read one character
  * further to assure itself that it has reached the end.
  *
  * A `JsonTokenIterator` does many small reads; it may be a good idea
  * to wrap the input `Reader` into a `BufferedReader`.  If you do not
  * need to read non-JSON out of the underlying `Reader` afterward, a
  * [[com.rojoma.json.v3.io.BlockJsonTokenIterator]] maybe be faster.
  *
  * As extensions to standard JSON, this reader supports single-quoted
  * strings and Javascript-style comments.
  *
  * @see [[com.rojoma.json.v3.io.BlockJsonTokenIterator]]
  * @see [[com.rojoma.json.v3.io.JsonTokenGenerator]]
  * @see [[com.rojoma.json.v3.io.JsonToken]]
  */
class JsonTokenIterator(reader: Reader) extends AbstractBufferedIterator[JsonToken] {
  private[this] var isPeeked: Boolean = false
  private[this] var peeked: Char = _

  private[this] var nextToken: JsonToken = null

  private[this] var nextCharRow = 1 // This is the position of the next char returned from "nextChar()" or "peekChar()"
  private[this] var nextCharCol = 1
  private[this] val scratch = new StringBuilder

  override def toString =
    if(nextToken ne null) "non-empty iterator"
    else "possibly-empty iterator"

  private def lexerError(receivedChar: Char, expected: String, row: Int, col: Int): Nothing = {
    throw new JsonUnexpectedCharacter(receivedChar, expected, Position(row, col))
  }

  private def nextChar() = {
    peekChar()
    isPeeked = false
    if(peeked == '\n') { nextCharRow += 1; nextCharCol = 1 }
    else nextCharCol += 1
    peeked
  }

  private def peekChar() = {
    if(!isPeeked) {
      val newChar = reader.read()
      if(newChar == -1) throw new JsonLexerEOF(Position(nextCharRow, nextCharCol))
      peeked = newChar.toChar
      isPeeked = true
    }
    peeked
  }

  private def atEOF(): Boolean = {
    if(isPeeked) return false
    val newChar = reader.read()
    if(newChar == -1) return true
    peeked = newChar.toChar
    isPeeked = true
    return false
  }

  private def skipToEndOfLine() = while(!atEOF() && peekChar() != '\n') nextChar()

  private def skipBlockComment() {
    var last = nextChar()
    while(last != '*' || peekChar() != '/') last = nextChar()
    nextChar() // skip final '/'
  }

  private def skipComment() {
    nextChar() // skip opening "/"
    val row = nextCharRow
    val col = nextCharCol
    nextChar() match {
      case '/' => skipToEndOfLine()
      case '*' => skipBlockComment()
      case c => lexerError(c, "/ or *", row, col)
    }
  }

  @annotation.tailrec
  private def skipWhitespace() {
    while(!atEOF() && Character.isWhitespace(peekChar())) nextChar()
    if(!atEOF() && peekChar() == '/') { skipComment(); skipWhitespace() }
  }

  def hasNext: Boolean = {
    if(nextToken == null) advance()
    nextToken != null
  }

  def head: JsonToken = {
    if(!hasNext) throw new NoSuchTokenException(Position(nextCharRow, nextCharCol))
    nextToken
  }

  def next(): JsonToken = {
    val result = head
    nextToken = null
    result
  }

  private def advance() {
    skipWhitespace()
    if(atEOF()) { nextToken = null; return }
    val tokenPosition = Position(nextCharRow, nextCharCol)
    val token = (peekChar(): @switch) match {
      case '{' =>
        nextChar()
        TokenOpenBrace()(tokenPosition)
      case '}' =>
        nextChar()
        TokenCloseBrace()(tokenPosition)
      case '[' =>
        nextChar()
        TokenOpenBracket()(tokenPosition)
      case ']' =>
        nextChar()
        TokenCloseBracket()(tokenPosition)
      case ':' =>
        nextChar()
        TokenColon()(tokenPosition)
      case ',' =>
        nextChar()
        TokenComma()(tokenPosition)
      case '"' | '\'' => readString(tokenPosition)
      case '-' => readNumber(tokenPosition)
      case c =>
        if(isDigit(c)) readNumber(tokenPosition) // should I inline this into a case '0' | '1' | ... | '9' ?
        else if(Character.isUnicodeIdentifierStart(c)) readIdentifier(tokenPosition)
        else lexerError(c, "start of datum", nextCharRow, nextCharCol)
    }
    nextToken = token
  }

  private def isDigit(c: Char) = '0' <= c && c <= '9'

  private def readDigit() = {
    val row = nextCharRow
    val col = nextCharCol
    val c = nextChar()
    if(!isDigit(c)) lexerError(c, "digit", row, col)
    c
  }

  private def readNumber(startPos: Position) = {
    // JSON numbers match (a subset of) the language generated by
    // the regular expression:
    //    -?\d+(\.\d+)?([eE][+-]?\d+)?
    // In particular, JSON restricts leading zeros, but we'll match
    // the whole thing anyway.
    scratch.setLength(0)

    if(peekChar() == '-') scratch += nextChar()

    do { scratch += readDigit() } while(!atEOF() && isDigit(peekChar()))

    val hasFrac = !atEOF() && peekChar() == '.'
    if(hasFrac) {
      scratch += nextChar() // skip decimal
      do { scratch += readDigit() } while(!atEOF() && isDigit(peekChar()))
    }

    val hasExponent = !atEOF() && (peekChar() == 'e' || peekChar() == 'E')

    val n =
      if(hasExponent) {
        scratch += nextChar() // skip e/E

        if(peekChar() == '-' || peekChar() == '+') scratch += nextChar()
        else scratch += '+' // ensure there's always a sign

        val exponentDigitsStart = scratch.length
        do { scratch += readDigit() } while(!atEOF() && isDigit(peekChar()))

        // this relies on the exponent being the last thing read
        val result = scratch.toString
        if(!ReaderUtils.isBigDecimalizableUnsignedExponent(result, exponentDigitsStart)) {
          throw new JsonNumberOutOfRange(result, startPos)
        }
        result
      } else {
        scratch.toString
      }
    TokenNumber(n)(startPos)
  }

  private def readIdentifier(startPos: Position) = {
    scratch.setLength(0)
    scratch += nextChar()
    while(!atEOF() && Character.isUnicodeIdentifierPart(peekChar())) scratch += nextChar()
    TokenIdentifier(scratch.toString())(startPos)
  }

  private def readString(startPos: Position) = {
    scratch.setLength(0)
    val Boundary = nextChar()
    while(peekChar() != Boundary) {
      readPotentialSurrogatePair(readChar(), Boundary)
    }
    nextChar() // skip closing character
    TokenString(scratch.toString)(startPos)
  }

  private def readPotentialSurrogatePair(c: Char, endOfString: Char) {
    if(c >= Character.MIN_SURROGATE && c <= Character.MAX_SURROGATE) {
      readSurrogatePair(c, endOfString)
    } else {
      scratch += c
    }
  }

  private def badChar = 0xfffd.toChar

  @annotation.tailrec
  private def readSurrogatePair(c: Char, endOfString: Char) {
    if(Character.isHighSurrogate(c)) {
      if(peekChar() == endOfString) {
        scratch += badChar
      } else {
        val potentialSecondHalf = readChar()
        if(Character.isLowSurrogate(potentialSecondHalf)) {
          scratch += c
          scratch += potentialSecondHalf
        } else {
          scratch += badChar
          if(potentialSecondHalf >= Character.MIN_SURROGATE && c <= Character.MAX_SURROGATE) {
            readSurrogatePair(potentialSecondHalf, endOfString)
          } else {
            scratch += potentialSecondHalf
          }
        }
      }
    } else {
      scratch += badChar
    }
  }

  private def readChar() = {
    nextChar() match {
      case '\\' => readEscapedCharacter()
      case c => c
    }
  }

  private def readEscapedCharacter(): Char = {
    val row = nextCharRow
    val col = nextCharCol
    nextChar() match {
      case '"' => '"'
      case '\'' => '\''
      case '\\' => '\\'
      case '/' => '/'
      case 'b' => '\b'
      case 'f' => '\f'
      case 'n' => '\n'
      case 'r' => '\r'
      case 't' => '\t'
      case 'u' => readUnicodeCharacter()
      case c => lexerError(c, "string escape character", row, col)
    }
  }

  private def readUnicodeCharacter(): Char = {
    val h1, h2, h3, h4 = readHexDigit()
    ((h1 << 12) | (h2 << 8) | (h3 << 4) | h4).toChar
  }

  private def readHexDigit(): Int = {
    val row = nextCharRow
    val col = nextCharCol
    nextChar() match {
      case c if isDigit(c) => c.toInt - '0'.toInt
      case c if 'a' <= c && c <= 'f' => 10 + c.toInt - 'a'.toInt
      case c if 'A' <= c && c <= 'F' => 10 + c.toInt - 'A'.toInt
      case c => lexerError(c, "hex digit", row, col)
    }
  }
}