All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.noggit.JSONParser Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 *  Copyright 2006- Yonik Seeley
 *
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.noggit;

import java.io.IOException;
import java.io.Reader;


public class JSONParser {

  /**
   * Event indicating a JSON string value, including member names of objects
   */
  public static final int STRING = 1;
  /**
   * Event indicating a JSON number value which fits into a signed 64 bit integer
   */
  public static final int LONG = 2;
  /**
   * Event indicating a JSON number value which has a fractional part or an exponent
   * and with string length <= 23 chars not including sign.  This covers
   * all representations of normal values for Double.toString().
   */
  public static final int NUMBER = 3;
  /**
   * Event indicating a JSON number value that was not produced by toString of any
   * Java primitive numerics such as Double or Long.  It is either
   * an integer outside the range of a 64 bit signed integer, or a floating
   * point value with a string representation of more than 23 chars.
   */
  public static final int BIGNUMBER = 4;
  /**
   * Event indicating a JSON boolean
   */
  public static final int BOOLEAN = 5;
  /**
   * Event indicating a JSON null
   */
  public static final int NULL = 6;
  /**
   * Event indicating the start of a JSON object
   */
  public static final int OBJECT_START = 7;
  /**
   * Event indicating the end of a JSON object
   */
  public static final int OBJECT_END = 8;
  /**
   * Event indicating the start of a JSON array
   */
  public static final int ARRAY_START = 9;
  /**
   * Event indicating the end of a JSON array
   */
  public static final int ARRAY_END = 10;
  /**
   * Event indicating the end of input has been reached
   */
  public static final int EOF = 11;


  /**
   * Flags to control parsing behavior
   */
  public static final int ALLOW_COMMENTS = 1 << 0;
  public static final int ALLOW_SINGLE_QUOTES = 1 << 1;
  public static final int ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER = 1 << 2;
  public static final int ALLOW_UNQUOTED_KEYS = 1 << 3;
  public static final int ALLOW_UNQUOTED_STRING_VALUES = 1 << 4;
  /**
   * ALLOW_EXTRA_COMMAS causes any number of extra commas in arrays and objects to be ignored
   * Note that a trailing comma in [] would be [,] (hence calling the feature "trailing" commas
   * is either limiting or misleading.  Since trailing commas is fundamentally incompatible with any future
   * "fill-in-missing-values-with-null", it was decided to extend this feature to handle any
   * number of extra commas.
   */
  public static final int ALLOW_EXTRA_COMMAS = 1 << 5;
  public static final int ALLOW_MISSING_COLON_COMMA_BEFORE_OBJECT = 1 << 6;
  public static final int OPTIONAL_OUTER_BRACES = 1 << 7;

  public static final int FLAGS_STRICT = 0;
  public static final int FLAGS_DEFAULT = ALLOW_COMMENTS | ALLOW_SINGLE_QUOTES | ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER | ALLOW_UNQUOTED_KEYS | ALLOW_UNQUOTED_STRING_VALUES | ALLOW_EXTRA_COMMAS;

  public static class ParseException extends RuntimeException {
    public ParseException(String msg) {
      super(msg);
    }
  }

  public static String getEventString(int e) {
    switch (e) {
      case STRING:
        return "STRING";
      case LONG:
        return "LONG";
      case NUMBER:
        return "NUMBER";
      case BIGNUMBER:
        return "BIGNUMBER";
      case BOOLEAN:
        return "BOOLEAN";
      case NULL:
        return "NULL";
      case OBJECT_START:
        return "OBJECT_START";
      case OBJECT_END:
        return "OBJECT_END";
      case ARRAY_START:
        return "ARRAY_START";
      case ARRAY_END:
        return "ARRAY_END";
      case EOF:
        return "EOF";
    }
    return "Unknown: " + e;
  }

  private static final CharArr devNull = new CharArr.NullCharArr();

  protected int flags = FLAGS_DEFAULT;

  protected final char[] buf;  // input buffer with JSON text in it
  protected int start;         // current position in the buffer
  protected int end;           // end position in the buffer (one past last valid index)
  protected final Reader in;   // optional reader to obtain data from
  protected boolean eof = false; // true if the end of the stream was reached.
  protected long gpos;          // global position = gpos + start

  protected int event;         // last event read

  protected int stringTerm;    // The terminator for the last string we read: single quote, double quote, or 0 for unterminated.

  protected boolean missingOpeningBrace = false;

  public JSONParser(Reader in) {
    this(in, new char[8192]);
    // 8192 matches the default buffer size of a BufferedReader so double
    // buffering of the data is avoided.
  }

  public JSONParser(Reader in, char[] buffer) {
    this.in = in;
    this.buf = buffer;
  }

  // idea - if someone passes us a CharArrayReader, we could
  // directly use that buffer as it's protected.

  public JSONParser(char[] data, int start, int end) {
    this.in = null;
    this.buf = data;
    this.start = start;
    this.end = end;
  }

  public JSONParser(String data) {
    this(data, 0, data.length());
  }

  public JSONParser(String data, int start, int end) {
    this.in = null;
    this.start = start;
    this.end = end;
    this.buf = new char[end - start];
    data.getChars(start, end, buf, 0);
  }

  public int getFlags() {
    return flags;
  }

  public int setFlags(int flags) {
    int oldFlags = flags;
    this.flags = flags;
    return oldFlags;
  }

  // temporary output buffer
  private final CharArr out = new CharArr(64);

  // We need to keep some state in order to (at a minimum) know if
  // we should skip ',' or ':'.
  private byte[] stack = new byte[16];
  private int ptr = 0;     // pointer into the stack of parser states
  private byte state = 0;  // current parser state

  // parser states stored in the stack
  private static final byte DID_OBJSTART = 1;  // '{' just read
  private static final byte DID_ARRSTART = 2;  // '[' just read
  private static final byte DID_ARRELEM = 3;   // array element just read
  private static final byte DID_MEMNAME = 4;   // object member name (map key) just read
  private static final byte DID_MEMVAL = 5;    // object member value (map val) just read

  // info about value that was just read (or is in the middle of being read)
  private int valstate;

  // push current parser state (use at start of new container)
  private final void push() {
    if (ptr >= stack.length) {
      // doubling here is probably overkill, but anything that needs to double more than
      // once (32 levels deep) is very atypical anyway.
      byte[] newstack = new byte[stack.length << 1];
      System.arraycopy(stack, 0, newstack, 0, stack.length);
      stack = newstack;
    }
    stack[ptr++] = state;
  }

  // pop  parser state (use at end of container)
  private final void pop() {
    if (--ptr < 0) {
      throw err("Unbalanced container");
    } else {
      state = stack[ptr];
    }
  }

  protected void fill() throws IOException {
    if (in != null) {
      gpos += end;
      start = 0;
      int num = in.read(buf, 0, buf.length);
      end = num >= 0 ? num : 0;
    }
    if (start >= end) eof = true;
  }

  private void getMore() throws IOException {
    fill();
    if (start >= end) {
      throw err(null);
    }
  }

  protected int getChar() throws IOException {
    if (start >= end) {
      fill();
      if (start >= end) return -1;
    }
    return buf[start++];
  }

  /**
   * Returns true if the given character is considered to be whitespace.
   * One difference between Java's Character.isWhitespace() is that this method
   * considers a hard space (non-breaking space, or nbsp) to be whitespace.
   */
  protected static final boolean isWhitespace(int ch) {
    return (Character.isWhitespace(ch) || ch == 0x00a0);
  }

  private static final long WS_MASK = (1L << ' ') | (1L << '\t') | (1L << '\r') | (1L << '\n') | (1L << '#') | (1L << '/') | (0x01); // set 1 bit so 0xA0 will be flagged as whitespace

  protected int getCharNWS() throws IOException {
    for (; ; ) {
      int ch = getChar();
      // getCharNWS is normally called in the context of expecting certain JSON special characters
      // such as ":}"],"
      // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path
      // even w/o checking the range first.  We'll only get some false-positives while using bare strings (chars "IJMc")
      if (((WS_MASK >> ch) & 0x01) == 0) {
        return ch;
      } else if (ch <= ' ') {   // this will only be true if one of the whitespace bits was set
        continue;
      } else if (ch == '/') {
        getSlashComment();
      } else if (ch == '#') {
        getNewlineComment();
      } else if (!isWhitespace(ch)) { // we'll only reach here with certain bare strings, errors, or strange whitespace like 0xa0
        return ch;
      }

      /***
       // getCharNWS is normally called in the context of expecting certain JSON special characters
       // such as ":}"],"
       // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path
       if (ch < 64) {
       if (((WS_MASK >> ch) & 0x01) == 0) return ch;
       if (ch <= ' ') continue;  // whitespace below a normal space
       if (ch=='/') {
       getSlashComment();
       } else if (ch=='#') {
       getNewlineComment();
       }
       } else if (!isWhitespace(ch)) {  // check for higher whitespace like 0xA0
       return ch;
       }
       ***/

      /** older code
       switch (ch) {
       case ' ' :
       case '\t' :
       case '\r' :
       case '\n' :
       continue outer;
       case '#' :
       getNewlineComment();
       continue outer;
       case '/' :
       getSlashComment();
       continue outer;
       default:
       return ch;
       }
       **/
    }
  }

  protected int getCharNWS(int ch) throws IOException {
    for (; ; ) {
      // getCharNWS is normally called in the context of expecting certain JSON special characters
      // such as ":}"],"
      // all of these characters are below 64 (including comment chars '/' and '#', so we can make this the fast path
      // even w/o checking the range first.  We'll only get some false-positives while using bare strings (chars "IJMc")
      if (((WS_MASK >> ch) & 0x01) == 0) {
        return ch;
      } else if (ch <= ' ') {   // this will only be true if one of the whitespace bits was set
        // whitespace... get new char at bottom of loop
      } else if (ch == '/') {
        getSlashComment();
      } else if (ch == '#') {
        getNewlineComment();
      } else if (!isWhitespace(ch)) { // we'll only reach here with certain bare strings, errors, or strange whitespace like 0xa0
        return ch;
      }
      ch = getChar();
    }
  }

  protected int getCharExpected(int expected) throws IOException {
    for (; ; ) {
      int ch = getChar();
      if (ch == expected) return expected;
      if (ch == ' ') continue;
      return getCharNWS(ch);
    }
  }

  protected void getNewlineComment() throws IOException {
    // read a # or a //, so go until newline
    for (; ; ) {
      int ch = getChar();
      // don't worry about DOS /r/n... we'll stop on the \r and let the rest of the whitespace
      // eater consume the \n
      if (ch == '\n' || ch == '\r' || ch == -1) {
        return;
      }
    }
  }

  protected void getSlashComment() throws IOException {
    int ch = getChar();
    if (ch == '/') {
      getNewlineComment();
      return;
    }

    if (ch != '*') {
      throw err("Invalid comment: expected //, /*, or #");
    }

    ch = getChar();
    for (; ; ) {
      if (ch == '*') {
        ch = getChar();
        if (ch == '/') {
          return;
        } else if (ch == '*') {
          // handle cases of *******/
          continue;
        }
      }
      if (ch == -1) {
        return;
      }
      ch = getChar();
    }
  }


  protected boolean matchBareWord(char[] arr) throws IOException {
    for (int i = 1; i < arr.length; i++) {
      int ch = getChar();
      if (ch != arr[i]) {
        if ((flags & ALLOW_UNQUOTED_STRING_VALUES) == 0) {
          throw err("Expected " + new String(arr));
        } else {
          stringTerm = 0;
          out.reset();
          out.write(arr, 0, i);
          if (!eof) {
            start--;
          }
          return false;
        }
      }
    }

    // if we don't allow bare strings, we don't need to check that the string actually terminates... just
    // let things fail as the parser tries to continue
    if ((flags & ALLOW_UNQUOTED_STRING_VALUES) == 0) {
      return true;
    }

    // check that the string actually terminates... for example trueX should return false
    int ch = getChar();
    if (eof) {
      return true;
    } else if (!isUnquotedStringChar(ch)) {
      start--;
      return true;
    }

    // we encountered something like "trueX" when matching "true"
    stringTerm = 0;
    out.reset();
    out.unsafeWrite(arr, 0, arr.length);
    out.unsafeWrite(ch);
    return false;
  }

  protected ParseException err(String msg) {
    // We can't tell if EOF was hit by comparing start<=end
    // because the illegal char could have been the last in the buffer
    // or in the stream.  To deal with this, the "eof" var was introduced
    if (!eof && start > 0) start--;  // backup one char
    String chs = "char=" + ((start >= end) ? "(EOF)" : "" + buf[start]);
    String pos = "position=" + (gpos + start);
    String tot = chs + ',' + pos + getContext();
    if (msg == null) {
      if (start >= end) msg = "Unexpected EOF";
      else msg = "JSON Parse Error";
    }
    return new ParseException(msg + ": " + tot);
  }

  private String getContext() {
    String context = "";
    if (start >= 0) {
      context += " AFTER='" + errEscape(Math.max(start - 60, 0), start + 1) + "'";
    }
    if (start < end) {
      context += " BEFORE='" + errEscape(start + 1, start + 40) + "'";
    }
    return context;
  }

  private String errEscape(int a, int b) {
    b = Math.min(b, end);
    if (a >= b) return "";
    return new String(buf, a, b - a).replaceAll("\\s+", " ");
  }


  private boolean bool; // boolean value read
  private long lval;    // long value read
  private int nstate;   // current state while reading a number
  private static final int HAS_FRACTION = 0x01;  // nstate flag, '.' already read
  private static final int HAS_EXPONENT = 0x02;  // nstate flag, '[eE][+-]?[0-9]' already read

  /**
   * Returns the long read... only significant if valstate==LONG after
   * this call.  firstChar should be the first numeric digit read.
   */
  private long readNumber(int firstChar, boolean isNeg) throws IOException {
    out.unsafeWrite(firstChar);   // unsafe OK since we know output is big enough
    // We build up the number in the negative plane since it's larger (by one) than
    // the positive plane.
    long v = '0' - firstChar;
    // can't overflow a long in 18 decimal digits (i.e. 17 additional after the first).
    // we also need 22 additional to handle double so we'll handle in 2 separate loops.
    int i;
    for (i = 0; i < 17; i++) {
      int ch = getChar();
      // TODO: is this switch faster as an if-then-else?
      switch (ch) {
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
          v = v * 10 - (ch - '0');
          out.unsafeWrite(ch);
          continue;
        case '.':
          out.unsafeWrite('.');
          valstate = readFrac(out, 22 - i);
          return 0;
        case 'e':
        case 'E':
          out.unsafeWrite(ch);
          nstate = 0;
          valstate = readExp(out, 22 - i);
          return 0;
        default:
          // return the number, relying on nextEvent() to return an error
          // for invalid chars following the number.
          if (ch != -1) --start;   // push back last char if not EOF

          valstate = LONG;
          return isNeg ? v : -v;
      }
    }

    // after this, we could overflow a long and need to do extra checking
    boolean overflow = false;
    long maxval = isNeg ? Long.MIN_VALUE : -Long.MAX_VALUE;

    for (; i < 22; i++) {
      int ch = getChar();
      switch (ch) {
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
          if (v < (0x8000000000000000L / 10)) overflow = true;  // can't multiply by 10 w/o overflowing
          v *= 10;
          int digit = ch - '0';
          if (v < maxval + digit) overflow = true; // can't add digit w/o overflowing
          v -= digit;
          out.unsafeWrite(ch);
          continue;
        case '.':
          out.unsafeWrite('.');
          valstate = readFrac(out, 22 - i);
          return 0;
        case 'e':
        case 'E':
          out.unsafeWrite(ch);
          nstate = 0;
          valstate = readExp(out, 22 - i);
          return 0;
        default:
          // return the number, relying on nextEvent() to return an error
          // for invalid chars following the number.
          if (ch != -1) --start;   // push back last char if not EOF

          valstate = overflow ? BIGNUMBER : LONG;
          return isNeg ? v : -v;
      }
    }


    nstate = 0;
    valstate = BIGNUMBER;
    return 0;
  }


  // read digits right of decimal point
  private int readFrac(CharArr arr, int lim) throws IOException {
    nstate = HAS_FRACTION;  // deliberate set instead of '|'
    while (--lim >= 0) {
      int ch = getChar();
      if (ch >= '0' && ch <= '9') {
        arr.write(ch);
      } else if (ch == 'e' || ch == 'E') {
        arr.write(ch);
        return readExp(arr, lim);
      } else {
        if (ch != -1) start--; // back up
        return NUMBER;
      }
    }
    return BIGNUMBER;
  }


  // call after 'e' or 'E' has been seen to read the rest of the exponent
  private int readExp(CharArr arr, int lim) throws IOException {
    nstate |= HAS_EXPONENT;
    int ch = getChar();
    lim--;

    if (ch == '+' || ch == '-') {
      arr.write(ch);
      ch = getChar();
      lim--;
    }

    // make sure at least one digit is read.
    if (ch < '0' || ch > '9') {
      throw err("missing exponent number");
    }
    arr.write(ch);

    return readExpDigits(arr, lim);
  }

  // continuation of readExpStart
  private int readExpDigits(CharArr arr, int lim) throws IOException {
    while (--lim >= 0) {
      int ch = getChar();
      if (ch >= '0' && ch <= '9') {
        arr.write(ch);
      } else {
        if (ch != -1) start--; // back up
        return NUMBER;
      }
    }
    return BIGNUMBER;
  }

  private void continueNumber(CharArr arr) throws IOException {
    if (arr != out) arr.write(out);

    if ((nstate & HAS_EXPONENT) != 0) {
      readExpDigits(arr, Integer.MAX_VALUE);
      return;
    }
    if (nstate != 0) {
      readFrac(arr, Integer.MAX_VALUE);
      return;
    }

    for (; ; ) {
      int ch = getChar();
      if (ch >= '0' && ch <= '9') {
        arr.write(ch);
      } else if (ch == '.') {
        arr.write(ch);
        readFrac(arr, Integer.MAX_VALUE);
        return;
      } else if (ch == 'e' || ch == 'E') {
        arr.write(ch);
        readExp(arr, Integer.MAX_VALUE);
        return;
      } else {
        if (ch != -1) start--;
        return;
      }
    }
  }


  private int hexval(int hexdig) {
    if (hexdig >= '0' && hexdig <= '9') {
      return hexdig - '0';
    } else if (hexdig >= 'A' && hexdig <= 'F') {
      return hexdig + (10 - 'A');
    } else if (hexdig >= 'a' && hexdig <= 'f') {
      return hexdig + (10 - 'a');
    }
    throw err("invalid hex digit");
  }

  // backslash has already been read when this is called
  private char readEscapedChar() throws IOException {
    int ch = getChar();
    switch (ch) {
      case '"':
        return '"';
      case '\'':
        return '\'';
      case '\\':
        return '\\';
      case '/':
        return '/';
      case 'n':
        return '\n';
      case 'r':
        return '\r';
      case 't':
        return '\t';
      case 'f':
        return '\f';
      case 'b':
        return '\b';
      case 'u':
        return (char) (
            (hexval(getChar()) << 12)
                | (hexval(getChar()) << 8)
                | (hexval(getChar()) << 4)
                | (hexval(getChar())));
    }
    if ((flags & ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER) != 0 && ch != EOF) {
      return (char) ch;
    }
    throw err("Invalid character escape");
  }

  // a dummy buffer we can use to point at other buffers
  private final CharArr tmp = new CharArr(null, 0, 0);

  private CharArr readStringChars() throws IOException {
    if (stringTerm == 0) {
      // "out" will already contain the first part of the bare string, so don't reset it
      readStringBare(out);
      return out;
    }

    char terminator = (char) stringTerm;
    int i;
    for (i = start; i < end; i++) {
      char c = buf[i];
      if (c == terminator) {
        tmp.set(buf, start, i);  // directly use input buffer
        start = i + 1; // advance past last '"'
        return tmp;
      } else if (c == '\\') {
        break;
      }
    }
    out.reset();
    readStringChars2(out, i);
    return out;
  }


  // middle is the pointer to the middle of a buffer to start scanning for a non-string
  // character ('"' or "/").  start<=middle= end) {
        arr.write(buf, start, middle - start);
        start = middle;
        getMore();
        middle = start;
      }
      int ch = buf[middle++];
      if (ch == terminator) {
        int len = middle - start - 1;
        if (len > 0) arr.write(buf, start, len);
        start = middle;
        return;
      } else if (ch == '\\') {
        int len = middle - start - 1;
        if (len > 0) arr.write(buf, start, len);
        start = middle;
        arr.write(readEscapedChar());
        middle = start;
      }
    }
  }

  private void readStringBare(CharArr arr) throws IOException {
    if (arr != out) {
      arr.append(out);
    }

    for (; ; ) {
      int ch = getChar();
      if (!isUnquotedStringChar(ch)) {
        if (ch == -1) break;
        if (ch == '\\') {
          arr.write(readEscapedChar());
          continue;
        }
        start--;
        break;
      }

      if (ch == '\\') {
        arr.write(readEscapedChar());
        continue;
      }

      arr.write(ch);
    }
  }


  // isName==true if this is a field name (as opposed to a value)
  protected void handleNonDoubleQuoteString(int ch, boolean isName) throws IOException {
    if (ch == '\'') {
      stringTerm = ch;
      if ((flags & ALLOW_SINGLE_QUOTES) == 0) {
        throw err("Single quoted strings not allowed");
      }
    } else {
      if (isName && (flags & ALLOW_UNQUOTED_KEYS) == 0
          || !isName && (flags & ALLOW_UNQUOTED_STRING_VALUES) == 0
          || eof) {
        if (isName) {
          throw err("Expected quoted string");
        } else {
          throw err(null);
        }
      }

      if (!isUnquotedStringStart(ch)) {
        throw err(null);
      }

      stringTerm = 0;  // signal for unquoted string
      out.reset();
      out.unsafeWrite(ch);
    }
  }

  private static boolean isUnquotedStringStart(int ch) {
    return Character.isJavaIdentifierStart(ch);
  }

  // What characters are allowed to continue an unquoted string
  // once we know we are in one.
  private static boolean isUnquotedStringChar(int ch) {
    return Character.isJavaIdentifierPart(ch)
        || ch == '.'
        || ch == '-'
        || ch == '/';

    // would checking for a-z first speed up the common case?

    // possibly much more liberal unquoted string handling...
    /***
     switch (ch) {
     case -1:
     case ' ':
     case '\t':
     case '\r':
     case '\n':
     case '}':
     case ']':
     case ',':
     case ':':
     case '=':   // reserved for future use
     case '\\':  // check for backslash should come after this function call
     return false;
     }
     return true;
     ***/
  }


  /*** alternate implementation
   // middle is the pointer to the middle of a buffer to start scanning for a non-string
   // character ('"' or "/").  start<=middle=end) {
   getMore();
   middle=start;
   } else {
   start = middle+1;   // set buffer pointer to correct spot
   if (ch=='"') {
   valstate=0;
   return;
   } else if (ch=='\\') {
   arr.write(readEscapedChar());
   if (start>=end) getMore();
   middle=start;
   }
   }
   }
   }
   ***/


  // return the next event when parser is in a neutral state (no
  // map separators or array element separators to read
  private int next(int ch) throws IOException {
    // TODO: try my own form of indirect jump... look up char class and index directly into handling implementation?
    for (; ; ) {
      switch (ch) {
        case ' ': // this is not the exclusive list of whitespace chars... the rest are handled in default:
        case '\t':
        case '\r':
        case '\n':
          ch = getCharNWS(); // calling getCharNWS here seems faster than letting the switch handle it
          break;
        case '"':
          stringTerm = '"';
          valstate = STRING;
          return STRING;
        case '\'':
          if ((flags & ALLOW_SINGLE_QUOTES) == 0) {
            throw err("Single quoted strings not allowed");
          }
          stringTerm = '\'';
          valstate = STRING;
          return STRING;
        case '{':
          push();
          state = DID_OBJSTART;
          return OBJECT_START;
        case '[':
          push();
          state = DID_ARRSTART;
          return ARRAY_START;
        case '0':
          out.reset();
          //special case '0'?  If next char isn't '.' val=0
          ch = getChar();
          if (ch == '.') {
            start--;
            ch = '0';
            readNumber('0', false);
            return valstate;
          } else if (ch > '9' || ch < '0') {
            out.unsafeWrite('0');
            if (ch != -1) start--;
            lval = 0;
            valstate = LONG;
            return LONG;
          } else {
            throw err("Leading zeros not allowed");
          }
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
          out.reset();
          lval = readNumber(ch, false);
          return valstate;
        case '-':
          out.reset();
          out.unsafeWrite('-');
          ch = getChar();
          if (ch < '0' || ch > '9') throw err("expected digit after '-'");
          lval = readNumber(ch, true);
          return valstate;
        case 't':
          // TODO: test performance of this non-branching inline version.
          // if ((('r'-getChar())|('u'-getChar())|('e'-getChar())) != 0) throw err("");
          if (matchBareWord(JSONUtil.TRUE_CHARS)) {
            bool = true;
            valstate = BOOLEAN;
            return valstate;
          } else {
            valstate = STRING;
            return STRING;
          }
        case 'f':
          if (matchBareWord(JSONUtil.FALSE_CHARS)) {
            bool = false;
            valstate = BOOLEAN;
            return valstate;
          } else {
            valstate = STRING;
            return STRING;
          }
        case 'n':
          if (matchBareWord(JSONUtil.NULL_CHARS)) {
            valstate = NULL;
            return valstate;
          } else {
            valstate = STRING;
            return STRING;
          }
        case '/':
          getSlashComment();
          ch = getChar();
          break;
        case '#':
          getNewlineComment();
          ch = getChar();
          break;
        case ']':  // This only happens with a trailing comma (or an error)
          if (state != DID_ARRELEM || (flags & ALLOW_EXTRA_COMMAS) == 0) {
            throw err("Unexpected array closer ]");
          }
          pop();
          return event = ARRAY_END;
        case '}':  // This only happens with a trailing comma (or an error)
          if (state != DID_MEMVAL || (flags & ALLOW_EXTRA_COMMAS) == 0) {
            throw err("Unexpected object closer }");
          }
          pop();
          return event = ARRAY_END;
        case ',': // This only happens with input like [1,]
          if ((state != DID_ARRELEM && state != DID_MEMVAL) || (flags & ALLOW_EXTRA_COMMAS) == 0) {
            throw err("Unexpected comma");
          }
          ch = getChar();
          break;
        case -1:
          if (getLevel() > 0) throw err("Premature EOF");
          return EOF;
        default:
          // Handle unusual unicode whitespace like no-break space (0xA0)
          if (isWhitespace(ch)) {
            ch = getChar();  // getCharNWS() would also work
            break;
          }
          handleNonDoubleQuoteString(ch, false);
          valstate = STRING;
          return STRING;
        // throw err(null);
      }

    }
  }

  @Override
  public String toString() {
    return "start=" + start + ",end=" + end + ",state=" + state + "valstate=" + valstate;
  }


  /**
   * Returns the next event encountered in the JSON stream, one of
   * 
    *
  • {@link #STRING}
  • *
  • {@link #LONG}
  • *
  • {@link #NUMBER}
  • *
  • {@link #BIGNUMBER}
  • *
  • {@link #BOOLEAN}
  • *
  • {@link #NULL}
  • *
  • {@link #OBJECT_START}
  • *
  • {@link #OBJECT_END}
  • *
  • {@link #OBJECT_END}
  • *
  • {@link #ARRAY_START}
  • *
  • {@link #ARRAY_END}
  • *
  • {@link #EOF}
  • *
*/ public int nextEvent() throws IOException { if (valstate != 0) { if (valstate == STRING) { readStringChars2(devNull, start); } else if (valstate == BIGNUMBER) { continueNumber(devNull); } valstate = 0; } int ch; outer: for (; ; ) { switch (state) { case 0: event = next(getChar()); if (event == STRING && (flags & OPTIONAL_OUTER_BRACES) != 0) { if (start > 0) start--; missingOpeningBrace = true; stringTerm = 0; valstate = 0; event = next('{'); } return event; case DID_OBJSTART: ch = getCharExpected('"'); if (ch == '}') { pop(); return event = OBJECT_END; } if (ch == '"') { stringTerm = ch; } else if (ch == ',' && (flags & ALLOW_EXTRA_COMMAS) != 0) { continue outer; } else { handleNonDoubleQuoteString(ch, true); } state = DID_MEMNAME; valstate = STRING; return event = STRING; case DID_MEMNAME: ch = getCharExpected(':'); if (ch != ':') { if ((ch == '{' || ch == '[') && (flags & ALLOW_MISSING_COLON_COMMA_BEFORE_OBJECT) != 0) { start--; } else { throw err("Expected key,value separator ':'"); } } state = DID_MEMVAL; // set state first because it might be pushed... return event = next(getChar()); case DID_MEMVAL: ch = getCharExpected(','); if (ch == '}') { pop(); return event = OBJECT_END; } else if (ch != ',') { if ((flags & ALLOW_EXTRA_COMMAS) != 0 && (ch == '\'' || ch == '"' || Character.isLetter(ch))) { start--; } else if (missingOpeningBrace && ch == -1 && (flags & OPTIONAL_OUTER_BRACES) != 0) { missingOpeningBrace = false; pop(); return event = OBJECT_END; } else throw err("Expected ',' or '}'"); } ch = getCharExpected('"'); if (ch == '"') { stringTerm = ch; } else if ((ch == ',' || ch == '}') && (flags & ALLOW_EXTRA_COMMAS) != 0) { if (ch == ',') continue outer; pop(); return event = OBJECT_END; } else { handleNonDoubleQuoteString(ch, true); } state = DID_MEMNAME; valstate = STRING; return event = STRING; case DID_ARRSTART: ch = getCharNWS(); if (ch == ']') { pop(); return event = ARRAY_END; } state = DID_ARRELEM; // set state first, might be pushed... return event = next(ch); case DID_ARRELEM: ch = getCharExpected(','); if (ch == ',') { // state = DID_ARRELEM; // redundant return event = next(getChar()); } else if (ch == ']') { pop(); return event = ARRAY_END; } else { if ((ch == '{' || ch == '[') && (flags & ALLOW_MISSING_COLON_COMMA_BEFORE_OBJECT) != 0) { return event = next(ch); } else { throw err("Expected ',' or ']'"); } } } } // end for(;;) } public int lastEvent() { return event; } public boolean wasKey() { return state == DID_MEMNAME; } private void goTo(int what) throws IOException { if (valstate == what) { valstate = 0; return; } if (valstate == 0) { /*int ev = */ nextEvent(); // TODO if (valstate != what) { throw err("type mismatch"); } valstate = 0; } else { throw err("type mismatch"); } } /** * Returns the JSON string value, decoding any escaped characters. */ public String getString() throws IOException { return getStringChars().toString(); } /** * Returns the characters of a JSON string value, decoding any escaped characters. * The underlying buffer of the returned CharArr should *not* be * modified as it may be shared with the input buffer. * The returned CharArr will only be valid up until * the next JSONParser method is called. Any required data should be * read before that point. */ public CharArr getStringChars() throws IOException { goTo(STRING); return readStringChars(); } /** * Reads a JSON string into the output, decoding any escaped characters. */ public void getString(CharArr output) throws IOException { goTo(STRING); readStringChars2(output, start); } /** * Reads a number from the input stream and parses it as a long, only if * the value will in fact fit into a signed 64 bit integer. */ public long getLong() throws IOException { goTo(LONG); return lval; } /** * Reads a number from the input stream and parses it as a double */ public double getDouble() throws IOException { return Double.parseDouble(getNumberChars().toString()); } /** * Returns the characters of a JSON numeric value. *

The underlying buffer of the returned CharArr should *not* be * modified as it may be shared with the input buffer. *

The returned CharArr will only be valid up until * the next JSONParser method is called. Any required data should be * read before that point. */ public CharArr getNumberChars() throws IOException { int ev = 0; if (valstate == 0) ev = nextEvent(); if (valstate == LONG || valstate == NUMBER) { valstate = 0; return out; } else if (valstate == BIGNUMBER) { continueNumber(out); valstate = 0; return out; } else { throw err("Unexpected " + ev); } } /** * Reads a JSON numeric value into the output. */ public void getNumberChars(CharArr output) throws IOException { int ev = 0; if (valstate == 0) ev = nextEvent(); if (valstate == LONG || valstate == NUMBER) output.write(this.out); else if (valstate == BIGNUMBER) { continueNumber(output); } else { throw err("Unexpected " + ev); } valstate = 0; } /** * Reads a boolean value */ public boolean getBoolean() throws IOException { goTo(BOOLEAN); return bool; } /** * Reads a null value */ public void getNull() throws IOException { goTo(NULL); } /** * @return the current nesting level, the number of parent objects or arrays. */ public int getLevel() { return ptr; } public long getPosition() { return gpos + start; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy