All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.freeutils.util.JSON Maven / Gradle / Ivy

The newest version!
/*
 *  Copyright © 2003-2024 Amichai Rothman
 *
 *  This file is part of JElementary - the Java Elementary Utilities package.
 *
 *  JElementary is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  JElementary is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with JElementary.  If not, see .
 *
 *  For additional info see https://www.freeutils.net/source/jelementary/
 */

package net.freeutils.util;

import java.io.*;
import java.lang.reflect.Array;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

/**
 * A utility class for serializing and deserializing the JSON format,
 * as defined at json.org and RFC 8259.
 * 

* When parsing objects, name/value iteration order is preserved, and if * a name appears more than once, the last value associated with it is returned. * As the RFC recommends, for interoperability it is best not to rely on * order and not to use duplicate names. *

* In addition, the RFC requires that JSON text transmitted between systems * must be encoded as UTF-8 with no BOM. Parsers are allowed to be lenient * in accepting a BOM, which this implementation supports. */ public class JSON { /** * A wrapper for a parsed value when the subsequent * character was already read and must also be returned * for further processing. */ private static class Value { final Object value; final int nextChar; Value(Object value, int nextChar) { this.value = value; this.nextChar = nextChar; } } /** * Private constructor to avoid external instantiation. */ private JSON() {} /** * Escapes all characters within a given string that must be escaped * in a JSON string: quotation mark, reverse solidus, and the control * characters (U+0000 through U+001F). * * @param s a string * @return the escaped string */ private static String escape(String s) { if (s == null || s.isEmpty()) return s; int len = s.length(); StringBuilder escaped = null; // optimization - lazy init int j = 0; for (int i = 0; i < len; i++) { int c = s.charAt(i); // find escaped chars and their replacement value switch (c) { case '\\': break; case '\"': break; case '\b': c = 'b'; break; case '\t': c = 't'; break; case '\n': c = 'n'; break; case '\f': c = 'f'; break; case '\r': c = 'r'; break; default: if (c > 0x1F) continue; // skip non-escaped literals } // we found a character that needs escaping - // copy literals since last escape if (escaped == null) escaped = new StringBuilder(len + 8); escaped.append(s, j, i); // add special escape or unicode escape if (c > 0x1F) { escaped.append('\\').append((char)c); } else { int hex = Strings.toHex((byte)c); escaped.append("\\u00").append((char)(hex >> 16)).append((char)hex); } j = i + 1; } if (escaped == null) return s; // no escapes - return original string // copy remaining literals since last escape return escaped.append(s, j, len).toString(); } /** * Returns the JSON representation of the given data. *

* Complex nested data types are supported, including Maps, Collections, * arrays and any other object (which is represented by its bean properties via getters, * except for object whose package name starts with "java." which are represented by their * string value). *

* Note that according to the RFC, if transmitted between systems, * the JSON text must be encoded in UTF-8, without a BOM. * * @param data the data to convert * @return the JSON representation of the data */ public static String toJSON(Object... data) { if (data.length == 0) return ""; if (data.length > 1) return toJSON((Object)data); Object obj = data[0]; if (obj == null) { return "null"; } else if (obj instanceof Number) { if (Double.isFinite(((Number)obj).doubleValue())) return obj.toString(); return "null"; // Nan and Infinity are invalid JSON, serialized as null } else if (obj instanceof Boolean) { return obj.toString(); } else if (obj instanceof Collection) { return toJSON((Object)((Collection)obj).toArray()); } else if (obj instanceof Object[]) { Object[] arr = (Object[])obj; if (arr.length == 0) return "[]"; StringBuilder sb = new StringBuilder(128); sb.append('['); for (Object o : arr) sb.append(toJSON(o)).append(','); sb.setCharAt(sb.length() - 1, ']'); return sb.toString(); } else if (obj.getClass().isArray()) { int len = Array.getLength(obj); if (len == 0) return "[]"; StringBuilder sb = new StringBuilder(128); sb.append('['); for (int i = 0; i < len; i++) sb.append(toJSON(Array.get(obj, i))).append(','); sb.setCharAt(sb.length() - 1, ']'); return sb.toString(); } else if (obj instanceof Map) { Map map = (Map)obj; if (map.isEmpty()) return "{}"; StringBuilder sb = new StringBuilder(128); sb.append('{'); for (Map.Entry e : map.entrySet()) sb.append('"').append(escape(e.getKey().toString())) .append("\":").append(toJSON(e.getValue())).append(','); sb.setCharAt(sb.length() - 1, '}'); return sb.toString(); } else if (!obj.getClass().getPackage().getName().startsWith("java.")) { return toJSON(Reflect.getBeanFields(obj)); } else { String s = String.valueOf(obj); s = escape(s); return new StringBuilder(s.length() + 2).append('"').append(s).append('"').toString(); } } /** * Reads the next character to parse, * skipping all valid whitespace before it. * * @param in the reader to read from * @return the next character, or -1 if the end of data has been reached * @throws IOException if an error occurs */ private static int readNextChar(Reader in) throws IOException { int c; while ((c = in.read()) > -1) { switch (c) { // whitespace - ignored case 0x09: case 0x0a: case 0x0d: case 0x20: continue; default: return c; } } return -1; } /** * Reads the next expected delimiter character, * skipping all valid whitespace before it. * * @param in the reader to read from * @param delim the expected delimiter * @param delim2 an optional additional expected delimiter * (pass -1 if there is no second valid delimiter) * @return the read delimiter character * @throws IOException if an error occurs * @throws IllegalArgumentException if the next non-whitespace * character is not the expected delimiter, or if the * end of data has been reached */ private static int readDelimiter(Reader in, int delim, int delim2) throws IOException { int c = readNextChar(in); if (c < 0) throw new IllegalArgumentException("unexpected end of stream"); // the expected delimiter if (c == delim || c == delim2) return c; // unexpected character throw new IllegalArgumentException("unexpected character: " + (char)c); } /** * Reads an exact constant sequence of characters. * * @param in the reader to read from * @param value the expected constant sequence of characters * @throws IOException if an error occurs * @throws IllegalArgumentException if the read data is not * the exact given constant, or if the * end of data has been reached */ private static void readConstant(Reader in, String value) throws IOException { for (int i = 1; i < value.length(); i++) { // excluding first char int c = in.read(); if (c < 0) throw new IllegalArgumentException("unexpected end of stream"); if (c != value.charAt(i)) throw new IllegalArgumentException("unexpected character: " + (char)c); } } /** * Reads a string value. * * @param in the reader to read from * @return the read value * @throws IOException if an error occurs * @throws IllegalArgumentException if the string is malformed * or if the end of data has been reached */ private static String readString(Reader in) throws IOException { int c; int i = 0; boolean escaped = false; int unicode = 0; StringBuilder sb = new StringBuilder(16); while ((c = in.read()) > -1) { if (unicode != 0) { // unicode escape sequence if (c >= '0' && c <= '9') c -= '0'; else if (c >= 'a' && c <= 'f') c = c + 10 - 'a'; else if (c >= 'A' && c <= 'F') c = c + 10 - 'A'; else throw new IllegalArgumentException("invalid unicode sequence"); unicode = (unicode << 4) | c; if (++i == 4) { sb.append((char)unicode); unicode = 0; } } else if (escaped) { // escaped special character escaped = false; switch (c) { case '"': case '/': case '\\': break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'u': unicode = 1; i = 0; break; default: throw new IllegalArgumentException("invalid escape sequence"); } if (c != 'u') sb.append((char)c); } else if (c == '\\') { // start escape sequence escaped = true; } else if (c == '"') { // end of string return sb.toString(); } else if (c <= 0x1f) { // invalid control character throw new IllegalArgumentException("invalid control character (0x00-0x1f)"); } else { // valid regular character sb.append((char)c); } } throw new IllegalArgumentException("unexpected end of stream"); } /** * Reads a number value. *

* Note that numbers are the only type of value that do not have * an explicit terminator, so the character following the number * is also read. In order to continue processing it later, if the * next character is meaningful, we may return a wrapper object * that contains both the read value and the next character. * * @param in the reader to read from * @param c the first character of the number (which was already read) * @return the read value itself or a wrapper containing the * read value and the next character * @throws IOException if an error occurs * @throws IllegalArgumentException if the number is malformed * or if the end of data has been reached */ private static Object readNumber(Reader in, int c) throws IOException { // state bitmask (see json.org spec's number state machine diagram): // 0x01 start, 0x02 minus, 0x04 zero, 0x08 integer1, 0x10 integer, // 0x20 dot, 0x40 fraction, 0x80 e, 0x100 sign, 0x200 exponent int s = 1; // bit 0 (start) StringBuilder sb = new StringBuilder(16); boolean valid = true; boolean done = false; int next = -1; do { switch (c) { case '-': valid = (s & 0x81) != 0; s <<= 1; break; case '+': valid = (s & 0x80) != 0; s <<= 1; break; case '.': valid = (s & 0x1c) != 0; s = 0x20; break; case 'e': case 'E': valid = (s & 0x5c) != 0; s = 0x80; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': valid = s != 0x04; if ((s & 0x03) != 0) s = c == '0' ? 0x04 : 0x10; else if (s == 0x20) s <<= 1; else if ((s & 0x180) != 0) s = 0x200; break; case 0x09: case 0x0a: case 0x0d: case 0x20: case ',': case ']': case '}': next = c; done = true; break; default: valid = false; // junk break; } if (!valid) throw new IllegalArgumentException("invalid character in number"); if (!done) sb.append((char)c); } while (!done &&(c = in.read()) > -1); if ((s & 0x1a3) != 0) throw new IllegalArgumentException("number is cut off"); double value = Double.parseDouble(sb.toString()); return next > -1 ? new Value(value, next) : value; // use wrapper if we have the next character } /** * Reads an array value. * * @param in the reader to read from * @return the read value * @throws IOException if an error occurs * @throws IllegalArgumentException if the value is malformed * or if the end of data has been reached */ private static Object[] readArray(Reader in) throws IOException { List list = new ArrayList<>(); while (true) { int c = readNextChar(in); if (c == ']' && list.isEmpty()) // not after comma break; Object val = readValue(in, c); if (val instanceof Value) { int next = ((Value)val).nextChar; val = ((Value)val).value; list.add(val); if (next == ']') // value and array end break; if (next == ',') // value and comma continue; } else { list.add(val); } if (readDelimiter(in, ',', ']') == ']') break; } return list.toArray(); } /** * Reads an object value. *

* The iteration order of the name/value pairs is preserved. *

* If a name appears more than once, the last value associated with it is returned. * * @param in the reader to read from * @return the read value * @throws IOException if an error occurs * @throws IllegalArgumentException if the value is malformed * or if the end of data has been reached */ private static Object readObject(Reader in) throws IOException { Map map = new LinkedHashMap<>(); while (true) { int c = readNextChar(in); if (c == '}' && map.isEmpty()) // not after comma break; if (c != '"') throw new IllegalArgumentException("object keys must be quoted strings"); String key = readString(in); readDelimiter(in, ':', -1); Object val = readValue(in, readNextChar(in)); if (val instanceof Value) { int next = ((Value)val).nextChar; val = ((Value)val).value; map.put(key, val); if (next == '}') // value and object end break; if (next == ',') // value and comma continue; } else { map.put(key, val); } if (readDelimiter(in, ',', '}') == '}') break; } return map; } /** * Reads the next value (of any valid type). * The first character must be part of the value and not whitespace. * * @param in the reader to read from * @param c the first character of the value (which was already read) * @return the read value, or a wrapper containing the read value * and the next character * @throws IOException if an error occurs * @throws IllegalArgumentException if the value is malformed, * or has leading whitespace, * or if the end of data has been reached */ private static Object readValue(Reader in, int c) throws IOException { if (c < 0) throw new IllegalArgumentException("unexpected end of stream"); switch (c) { case 'n': readConstant(in, "null"); return null; case 't': readConstant(in, "true"); return true; case 'f': readConstant(in, "false"); return false; case '"': return readString(in); case '[': return readArray(in); case '{': return readObject(in); default: // number if (c >= '0' && c <= '9' || c == '-') return readNumber(in, c); // junk throw new IllegalArgumentException("invalid character"); } } /** * Parses the given data stream into a JSON value. * The value starts with the given character, * and whitespace after the value is skipped. * * @param in the reader to read from * @param c the first character of the stream (which was already read) * @return the read value, or a wrapper containing the read value * and the next character * @throws IOException if an error occurs * @throws IllegalArgumentException if the data is not valid JSON */ @SuppressWarnings("unchecked") private static T fromJSON(Reader in, int c) throws IOException { Object val = readValue(in, c); if (val instanceof Value || readNextChar(in) > -1) // there is junk after the value throw new IllegalArgumentException("unexpected character after value"); return (T)val; } /** * Parses the given data stream into a JSON value. * Valid whitespace before and after the value is ignored. * * @param in the reader to read from * @return the read value * @throws IOException if an error occurs * @throws IllegalArgumentException if the data is not valid JSON */ public static T fromJSON(Reader in) throws IOException { return fromJSON(in, readNextChar(in)); } /** * Parses the given string into a JSON value. * Valid whitespace before and after the value is ignored. * * @param json the JSON string * @return the read value * @throws IllegalArgumentException if the data is not valid JSON */ public static T fromJSON(String json) { try { return fromJSON(new StringReader(json)); } catch (IOException e) { throw new RuntimeException(e); // can't happen with our stream } } /** * Parses the given stream into a JSON value. * Valid whitespace before and after the value is ignored. * * @param in the data stream * @param charset the charset used to convert the stream to characters * @return the read value * @throws IOException if an error occurs * @throws IllegalArgumentException if the data is not valid JSON */ public static T fromJSON(InputStream in, String charset) throws IOException { return fromJSON(new InputStreamReader(in, charset)); } /** * Parses the given stream into a JSON value. * Valid whitespace before and after the value is ignored. *

* The data in the stream must be an encoded Unicode string. * If the stream starts with a BOM, the Unicode charset * variant is detected automatically, otherwise UTF-8 is assumed. * * @param in the data stream * @return the read value * @throws IOException if an error occurs * @throws IllegalArgumentException if the data is not valid JSON */ public static T fromJSON(InputStream in) throws IOException { int c = in.read(); // first potential BOM character String charset = "UTF-8"; // default if (c == 0xEF) { charset = in.read() == 0xBB && in.read() == 0xBF ? "UTF-8" : null; c = -1; } else if (c == 0xFE) { charset = in.read() == 0xFF ? "UTF-16BE" : null; c = -1; } else if (c == 0x00) { charset = in.read() == 0x00 && in.read() == 0xFE && in.read() == 0xFF ? "UTF-32BE" : null; c = -1; } else if (c == 0xFF) { c = in.read(); if (c == 0xFE) { // we must read an extra byte to distinguish UTF-16LE from UTF-32LE if ((c = in.read()) == 0x00) { charset = in.read() == 0x00 ? "UTF-32LE" : null; c = -1; } else { charset = "UTF-16LE"; // we already consumed one byte, so must combine it with the second byte of the 16-bit char c = c | (in.read() << 8); } } else { charset = null; } } if (charset == null) // invalid BOM throw new UnsupportedEncodingException("invalid BOM"); Reader reader = new InputStreamReader(in, charset); // if we exactly matched a BOM and didn't read any additional bytes, // we just start a new stream. Also, if we did read the next byte, // but it's whitespace, we also start a new stream since it will // skip all leading whitespace. If we do have a valid first character, // we pass it along for parsing if (c == -1 || c == 0x09 || c == 0x0a || c == 0x0d || c == 0x20) return fromJSON(reader); // a fresh start return fromJSON(reader, c); // with first character } /** * Reads JSON Lines data as a stream of objects. *

* Closing the returned Stream will close the provided reader. *

* The JSON Lines format is a UTF-8 encoded stream of characters, * where each line contains a valid JSON value, and lines are * separated by a single newline LF character (a preceding CR * character is valid since it will simply be ignored as valid * whitespace surrounding the JSON value). If the last character * of the last line is a LF it is ignored. *

* JSON Lines files can be saved with the ".jsonl" extension, * and the corresponding MIME type (not yet standardized) * would be "application/jsonl". * * @param in the JSON Lines reader * @return the stream of parsed JSON values */ public static Stream readJSONLines(Reader in) { Spliterator spliterator = new Spliterators.AbstractSpliterator( Long.MAX_VALUE, Spliterator.ORDERED) { @Override public boolean tryAdvance(Consumer action) { try { int c; // skip leading non-LF whitespace while ((c = in.read()) == 0x09 || c == 0x20 || c == 0x0d); if (c < 0) // end of data return false; if (c == 0x0a) { // empty line allowed only at the very end if (in.read() < 0) return false; throw new IllegalArgumentException("empty line"); } Object val = readValue(in, c); if (val instanceof Value) { c = ((Value)val).nextChar; val = ((Value)val).value; } else { c = 0x20; } // skip trailing non-LF whitespace while (c == 0x09 || c == 0x20 || c == 0x0d) c = in.read(); if (c != -1 && c != 0x0a) // there is junk after the value throw new IllegalArgumentException("unexpected character after value"); action.accept(val); return true; } catch (Exception e) { throw new RuntimeException(e); } } }; return StreamSupport.stream(spliterator, false) .onClose(() -> { try { in.close(); } catch (IOException e) { throw new RuntimeException(e); } }); } /** * Reads JSON Lines data as a stream of objects. *

* Closing the returned Stream will close the * provided input stream. * * @param in the JSON Lines input stream * @return the stream of parsed JSON values * @see #readJSONLines(Reader) readJSONLines */ public static Stream readJSONLines(InputStream in) { return readJSONLines(new InputStreamReader(in, StandardCharsets.UTF_8)); } /** * Writes a value as a JSON line to the given JSON Lines writer. * * @param out the JSON Lines writer * @param value the value to write * @throws IOException if an error occurs * @see #readJSONLines(Reader) readJSONLines */ public static void writeJSONLine(Writer out, Object value) throws IOException { // toJSON outputs a single line, so we just append the LF out.write(toJSON(value)); out.write(0x0a); // LF } /** * Writes values from an object stream to a JSON Lines writer. * * @param out the JSON Lines writer * @param values the values to write * @throws IOException if an error occurs * @see #readJSONLines(Reader) readJSONLines */ public static void writeJSONLines(Writer out, Stream values) throws IOException { for (Object value : (Iterable)values::iterator) writeJSONLine(out, value); } /** * Writes values from an object stream to a JSON Lines output stream. * * @param out the JSON Lines output stream * @param values the values to write * @throws IOException if an error occurs * @see #readJSONLines(Reader) readJSONLines */ public static void writeJSONLines(OutputStream out, Stream values) throws IOException { writeJSONLines(new OutputStreamWriter(out, StandardCharsets.UTF_8), values); } }