All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.addthis.maljson.JSONTokener Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2010 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.addthis.maljson;

// Note: this class was written without inspecting the non-free org.json sourcecode.

/**
 * Parses a JSON (RFC 4627)
 * encoded string into the corresponding object. Most clients of
 * this class will use only need the {@link #JSONTokener(String) constructor}
 * and {@link #nextValue} method. Example usage: 
 * String json = "{"
 *         + "  \"query\": \"Pizza\", "
 *         + "  \"locations\": [ 94043, 90210 ] "
 *         + "}";
 *
 * JSONObject object = (JSONObject) new JSONTokener(json).nextValue();
 * String query = object.getString("query");
 * JSONArray locations = object.getJSONArray("locations");
* *

For best interoperability and performance use JSON that complies with * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons * this parser is lenient, so a successful parse does not indicate that the * input string was valid JSON. All of the following syntax errors will be * ignored: *

    *
  • End of line comments starting with {@code //} or {@code #} and ending * with a newline character. *
  • C-style comments starting with {@code /*} and ending with * {@code *}{@code /}. Such comments may not be nested. *
  • Strings that are unquoted or {@code 'single quoted'}. *
  • Hexadecimal integers prefixed with {@code 0x} or {@code 0X}. *
  • Octal integers prefixed with {@code 0}. *
  • Array elements separated by {@code ;}. *
  • Unnecessary array separators. These are interpreted as if null was the * omitted value. *
  • Key-value pairs separated by {@code =} or {@code =>}. *
  • Key-value pairs separated by {@code ;}. *
* *

Each tokener may be used to parse a single JSON string. Instances of this * class are not thread safe. Although this class is nonfinal, it was not * designed for inheritance and should not be subclassed. In particular, * self-use by overrideable methods is not specified. See Effective Java * Item 17, "Design and Document or inheritance or else prohibit it" for further * information. */ public class JSONTokener { /** The input JSON. */ private final String in; /** * If false then throw an {@link com.addthis.maljson.JSONException} * when attempting to insert a key that already exists into the object. */ private final boolean allowDuplicates; /** * The index of the next character to be returned by {@link #next}. When * the input is exhausted, this equals the input's length. */ private int pos; /** * The current line number. Line numbers are 0-based. The line feed character * will trigger a new line. As a consequence newline systems that use only the * carriage return will not be parsed correctly. */ private int line; /** * The current column offset within the current line. Column offsets are 0-based. */ private int col; /** * @param in JSON encoded string. Null is not permitted and will yield a * tokener that throws {@code NullPointerExceptions} when methods are * called. */ public JSONTokener(String in) { this(in, true); } public JSONTokener(String in, boolean allowDuplicates) { // consume an optional byte order mark (BOM) if it exists if (in != null && in.startsWith("\ufeff")) { in = in.substring(1); } this.in = in; this.allowDuplicates = allowDuplicates; } /** * Returns the next value from the input. * * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean, * Integer, Long, Double or {@link JSONObject#NULL}. * @throws JSONException if the input is malformed. */ public Object nextValue() throws JSONException { return nextValue(false); } Object nextValue(boolean returnSentinel) throws JSONException { int c = nextCleanInternal(); switch (c) { case -1: throw syntaxError("End of input"); case '{': return readObject(); case '[': return readArray(); case '\'': case '"': return nextString((char) c); default: back(); Object literal = readLiteral(); if (!returnSentinel && literal == sentinel) { // construct JSONException with stack trace information throw syntaxError("Expected literal value"); } return literal; } } private int nextCleanInternal() throws JSONException { return nextCleanConsume(true); } /** * Move forward to the next non-whitespace or comment character. * If {@code consume} is true then consume the character that is returned. */ private int nextCleanConsume(boolean consume) throws JSONException { try { for(;pos < in.length(); advance()) { int c = in.charAt(pos); switch (c) { case '\t': case ' ': case '\r': case '\n': continue; case '/': if (pos == in.length()) { return c; } char peek = peek(); switch (peek) { case '*': // skip a /* c-style comment */ int commentEnd = in.indexOf("*/", pos); if (commentEnd == -1) { throw syntaxError("Unterminated comment"); } skipToPosition(commentEnd + 1); continue; case '/': // skip a // end-of-line comment skipToEndOfLine(); continue; default: return c; } case '#': /* * Skip a # hash end-of-line comment. The JSON RFC doesn't * specify this behavior, but it's required to parse * existing documents. See http://b/2571423. */ skipToEndOfLine(); continue; default: return c; } } } finally { if (consume) { advance(); } } return -1; } /** * Advances the position until the next '\r' or '\n' character. * The caller is responsible for consuming the newline characters. */ private void skipToEndOfLine() { for (;pos < in.length(); advance()) { char c = peek(); if (c == '\r' || c == '\n') { break; } } } /** * Returns the string up to but not including {@code quote}, unescaping any * character escape sequences encountered along the way. The opening quote * should have already been read. This consumes the closing quote, but does * not include it in the returned string. * * @param quote either ' or ". * @throws NumberFormatException if any unicode escape sequences are * malformed. */ public String nextString(char quote) throws JSONException { /* * For strings that are free of escape sequences, we can just extract * the result as a substring of the input. But if we encounter an escape * sequence, we need to use a StringBuilder to compose the result. */ StringBuilder builder = null; /* the index of the first character not yet appended to the builder. */ int start = pos; while (pos < in.length()) { int c = advance(); if (c == quote) { if (builder == null) { // a new string avoids leaking memory return new String(in.substring(start, pos - 1)); } else { builder.append(in, start, pos - 1); return builder.toString(); } } if (c == '\\') { if (pos == in.length()) { back(); throw syntaxError("Unterminated escape sequence"); } if (builder == null) { builder = new StringBuilder(); } builder.append(in, start, pos - 1); builder.append(readEscapeCharacter()); start = pos; } } back(pos - start); throw syntaxError("Unterminated string"); } /** * Unescapes the character identified by the character or characters that * immediately follow a backslash. The backslash '\' should have already * been read. This supports both unicode escapes "u000A" and two-character * escapes "\n". * * @throws NumberFormatException if any unicode escape sequences are * malformed. */ private char readEscapeCharacter() throws JSONException { char escaped = advance(); switch (escaped) { case 'u': if (pos + 4 > in.length()) { throw syntaxError("Unterminated escape sequence"); } String hex = in.substring(pos, pos + 4); skipToPosition(pos + 4); return (char) Integer.parseInt(hex, 16); case 't': return '\t'; case 'b': return '\b'; case 'n': return '\n'; case 'r': return '\r'; case 'f': return '\f'; case '\'': case '"': case '\\': default: return escaped; } } /** * Constructing an exception is move expensive than throwing an expensive. * Create a static JSONException to denote the trailing comma. Construct * the actual exception with stack trace information if necessary. */ private static final JSONException sentinel = new JSONException("Expected literal value"); /** * Reads a null, boolean, numeric or unquoted string literal value. Numeric * values will be returned as an Integer, Long, or Double, in that order of * preference. */ private Object readLiteral() throws JSONException { String literal = nextToInternal("{}[]/\\:,=;# \t\f"); if (literal.length() == 0) { skipWhitespace(); if (current() == '}') { return sentinel; // trailing comma sentinel } else { throw syntaxError("Expected literal value"); } } else if ("null".equalsIgnoreCase(literal)) { return JSONObject.NULL; } else if ("true".equalsIgnoreCase(literal)) { return Boolean.TRUE; } else if ("false".equalsIgnoreCase(literal)) { return Boolean.FALSE; } /* try to parse as an integral type... */ if (literal.indexOf('.') == -1) { int base = 10; String number = literal; if (number.startsWith("0x") || number.startsWith("0X")) { number = number.substring(2); base = 16; } else if (number.startsWith("0") && number.length() > 1) { number = number.substring(1); base = 8; } try { long longValue = Long.parseLong(number, base); if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) { return (int) longValue; } else { return longValue; } } catch (NumberFormatException e) { /* * This only happens for integral numbers greater than * Long.MAX_VALUE, numbers in exponential form (5e-10) and * unquoted strings. Fall through to try floating point. */ } } /* ...next try to parse as a floating point... */ try { return Double.valueOf(literal); } catch (NumberFormatException ignored) { } /* ... finally give up. We have an unquoted string */ return new String(literal); // a new string avoids leaking memory } /** * Returns the string up to but not including any of the given characters or * a newline character. This does not consume the excluded character. */ private String nextToInternal(String excluded) { int start = pos; for (; pos < in.length(); advance()) { char c = in.charAt(pos); if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) { return in.substring(start, pos); } } return in.substring(start); } /** * Reads a sequence of key/value pairs and the trailing closing brace '}' of * an object. The opening brace '{' should have already been read. */ private JSONObject readObject() throws JSONException { JSONObject result = new JSONObject().setAllowDuplicates(allowDuplicates); /* Peek to see if this is the empty object. */ int first = nextCleanInternal(); if (first == '}') { return result; } else if (first != -1) { back(); } while (true) { skipWhitespace(); LineNumberInfo keyInfo = getLineNumberInfo(); Object name = nextValue(true); if (name == sentinel) { advance(); return result; } if (!(name instanceof String)) { if (name == null) { throw syntaxError("Names cannot be null"); } else { throw syntaxError("Names must be strings, but " + name + " is of type " + name.getClass().getName()); } } /* * Expect the name/value separator to be either a colon ':', an * equals sign '=', or an arrow "=>". The last two are bogus but we * include them because that's what the original implementation did. */ int separator = nextCleanInternal(); if (separator != ':' && separator != '=') { throw syntaxError("Expected ':' after " + name); } if (pos < in.length() && in.charAt(pos) == '>') { advance(); } skipWhitespace(); LineNumberInfo valInfo = getLineNumberInfo(); Object val = nextValue(); result.put((String) name, val, keyInfo, valInfo); switch (nextCleanInternal()) { case '}': return result; case ';': case ',': continue; default: throw syntaxError("Unterminated object"); } } } /** * Reads a sequence of values and the trailing closing brace ']' of an * array. The opening brace '[' should have already been read. Note that * "[]" yields an empty array, but "[,]" returns a two-element array * equivalent to "[null,null]". */ private JSONArray readArray() throws JSONException { JSONArray result = new JSONArray(); while (true) { switch (nextCleanInternal()) { case -1: throw syntaxError("Unterminated array"); case ']': return result; case ',': case ';': continue; default: back(); } skipWhitespace(); LineNumberInfo lineInfo = getLineNumberInfo(); result.put(nextValue()); result.setLineNumber(result.length() - 1, lineInfo); switch (nextCleanInternal()) { case ']': return result; case ',': case ';': continue; default: throw syntaxError("Unterminated array"); } } } /** * Returns an exception containing the given message plus the current * position and the entire input string. */ public JSONException syntaxError(String message) { return syntaxError(message, true); } public JSONException syntaxError(String message, boolean stepBack) { if (stepBack) { back(); } return new JSONException(message, getLineNumberInfo()); } /** * Returns the current position and the entire input string. */ @Override public String toString() { // consistent with the original implementation return " at character " + pos + " of " + in; } /* * Legacy APIs. * * None of the methods below are on the critical path of parsing JSON * documents. They exist only because they were exposed by the original * implementation and may be used by some clients. */ /** * Returns true until the input has been exhausted. */ public boolean more() { return pos < in.length(); } /** * Returns the next available character, or the null character '\0' if all * input has been exhausted. The return value of this method is ambiguous * for JSON strings that contain the character '\0'. */ public char next() { return advance(); } /** * The upstream project has a suite of self use tests that measure * the number of {@link JSONTokener#next()}} invocations. Internally * invoke {@code advance()} so that these measurements are unchanged. */ private char advance() { char next = pos < in.length() ? in.charAt(pos++) : '\0'; if (next == '\n') { line++; col = 0; } else if (next != '\0') { col++; } return next; } public char peek() { return pos + 1 < in.length() ? in.charAt(pos + 1) : '\0'; } /** * Unreads the most recent character of input. If no input characters have * been read, the input is unchanged. */ public void back() { back(1); } private void back(int length) { for(int i = 0; i < length && pos > 0; i++) { pos--; if (in.charAt(pos) == '\n') { if (line > 1) { col = pos - in.lastIndexOf('\n', pos - 1); } else if (line == 1) { col = pos; } else { String msg = "Illegal state reached for input string \"" + in + "\" at position " + pos; throw new IllegalStateException(msg); } line--; } else { col--; } } } /** * Returns the current available character, or the null character '\0' if all * input has been exhausted. The return value of this method is ambiguous * for JSON strings that contain the character '\0'. */ public char current() { return pos < in.length() ? in.charAt(pos) : '\0'; } /** * Returns the next available character if it equals {@code c}. Otherwise an * exception is thrown. */ public char next(char c) throws JSONException { char result = next(); if (result != c) { JSONException exception = syntaxError("Expected " + c + " but was " + result); advance(); throw exception; } return result; } /** * Returns the next character that is not whitespace and does not belong to * a comment. If the input is exhausted before such a character can be * found, the null character '\0' is returned. The return value of this * method is ambiguous for JSON strings that contain the character '\0'. */ public char nextClean() throws JSONException { int nextCleanInt = nextCleanInternal(); return nextCleanInt == -1 ? '\0' : (char) nextCleanInt; } /** * Returns the next {@code length} characters of the input. * *

The returned string shares its backing character array with this * tokener's input string. If a reference to the returned string may be held * indefinitely, you should use {@code new String(result)} to copy it first * to avoid memory leaks. * * @throws JSONException if the remaining input is not long enough to * satisfy this request. */ public String next(int length) throws JSONException { if (pos + length > in.length()) { throw syntaxError(length + " is out of bounds", false); } String result = in.substring(pos, pos + length); skipToPosition(pos + length); return result; } /** * Returns the {@link String#trim trimmed} string holding the characters up * to but not including the first of: *

    *
  • any character in {@code excluded} *
  • a newline character '\n' *
  • a carriage return '\r' *
* *

The returned string shares its backing character array with this * tokener's input string. If a reference to the returned string may be held * indefinitely, you should use {@code new String(result)} to copy it first * to avoid memory leaks. * * @return a possibly-empty string */ public String nextTo(String excluded) { if (excluded == null) { throw new NullPointerException("excluded == null"); } return nextToInternal(excluded).trim(); } /** * Equivalent to {@code nextTo(String.valueOf(excluded))}. */ public String nextTo(char excluded) { return nextToInternal(String.valueOf(excluded)).trim(); } /** * Move the cursor past any whitespace or comment tokens. * @throws JSONException */ private void skipWhitespace() throws JSONException { nextCleanConsume(false); } /** * Advances past all input up to and including the next occurrence of * {@code thru}. If the remaining input doesn't contain {@code thru}, the * input is exhausted. */ public void skipPast(String thru) { int thruStart = in.indexOf(thru, pos); int newPos = thruStart == -1 ? in.length() : (thruStart + thru.length()); skipToPosition(newPos); } /** * Advances past all input up to but not including the next occurrence of * {@code to}. If the remaining input doesn't contain {@code to}, the input * is unchanged. */ public char skipTo(char to) { int index = in.indexOf(to, pos); if (index != -1) { skipToPosition(index); return to; } else { return '\0'; } } /** * Skip ahead to a new position. Updates the column and line information. * Precondition: pos <= newPos <= in.length(). */ public void skipToPosition(int newPos) { assert(pos <= newPos); assert(newPos <= in.length()); String[] lines = in.substring(pos, newPos).split("\n", -1); if (lines.length > 1) { line += (lines.length - 1); col = lines[lines.length - 1].length(); } else { col += lines[0].length(); } pos = newPos; } /** * Returns the integer [0..15] value for the given hex character, or -1 * for non-hex input. * * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other * character will yield a -1 result. */ public static int dehexchar(char hex) { if (hex >= '0' && hex <= '9') { return hex - '0'; } else if (hex >= 'A' && hex <= 'F') { return hex - 'A' + 10; } else if (hex >= 'a' && hex <= 'f') { return hex - 'a' + 10; } else { return -1; } } public int getColumn() { return col; } public int getLine() { return line; } public LineNumberInfo getLineNumberInfo() { return new LineNumberInfo(line, col); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy