All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.modeshape.schematic.internal.document.JsonReader Maven / Gradle / Ivy

The newest version!
/*
 * ModeShape (http://www.modeshape.org)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.modeshape.schematic.internal.document;

import static org.modeshape.schematic.document.Json.ReservedField.BASE_64;
import static org.modeshape.schematic.document.Json.ReservedField.BINARY_TYPE;
import static org.modeshape.schematic.document.Json.ReservedField.CODE;
import static org.modeshape.schematic.document.Json.ReservedField.DATE;
import static org.modeshape.schematic.document.Json.ReservedField.INCREMENT;
import static org.modeshape.schematic.document.Json.ReservedField.OBJECT_ID;
import static org.modeshape.schematic.document.Json.ReservedField.REGEX_OPTIONS;
import static org.modeshape.schematic.document.Json.ReservedField.REGEX_PATTERN;
import static org.modeshape.schematic.document.Json.ReservedField.SCOPE;
import static org.modeshape.schematic.document.Json.ReservedField.TIMESTAMP;
import static org.modeshape.schematic.document.Json.ReservedField.UUID;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.text.CharacterIterator;
import java.text.ParseException;
import java.text.StringCharacterIterator;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import org.modeshape.schematic.Base64;
import org.modeshape.schematic.annotation.Immutable;
import org.modeshape.schematic.annotation.NotThreadSafe;
import org.modeshape.schematic.annotation.ThreadSafe;
import org.modeshape.schematic.document.Bson;
import org.modeshape.schematic.document.Document;
import org.modeshape.schematic.document.DocumentSequence;
import org.modeshape.schematic.document.Json;
import org.modeshape.schematic.document.Null;
import org.modeshape.schematic.document.ParsingException;

/**
 * A class that reads the JSON data format and constructs an in-memory BSON representation.
 * 

* This reader is capable of optionally introspecting string values to look for certain string patterns that are commonly used to * represent dates. In introspection is not done by default, but when it is used it looks for the following patterns: *

    *
  • a string literal date of the form "yyyy-MM-ddTHH:mm:ss" where * T is a literal character
  • *
  • a string literal date of the form "yyyy-MM-ddTHH:mm:ssZ" where * T and Z are literal characters
  • *
  • a string literal date of the form * "yyyy-MM-ddTHH:mm:ssGMT+00:00" where T, and * GMT are literal characters
  • *
  • a string literal date of the form "/Date(millisOrIso)/"
  • *
  • a string literal date of the form "\/Date(millisOrIso)\/"
  • *
* Note that in the date forms listed above, millisOrIso is either a long value representing the number of * milliseconds since epoch or a string literal in ISO-8601 format representing a date and time. *

*

* This reader also accepts non-string values that are function calls of the form * *

 *     new functionName(parameters)
 * 
* * where parameters consists of one or more JSON values (including nested functions). If the function call * cannot be parsed and executed, the string literal form of the function call is kept. *

* * @author Randall Hauch (C) 2011 Red Hat Inc. * @since 5.1 */ @ThreadSafe @Immutable public class JsonReader { protected static final DocumentValueFactory VALUE_FACTORY = new DefaultDocumentValueFactory(); protected static final ValueMatcher SIMPLE_VALUE_MATCHER = new SimpleValueMatcher(VALUE_FACTORY); protected static final ValueMatcher DATE_VALUE_MATCHER = new DateValueMatcher(VALUE_FACTORY); public static final boolean DEFAULT_INTROSPECT = true; /** * Read the JSON representation from supplied URL and construct the {@link Document} representation, using the * {@link Charset#defaultCharset() default character set}. * * @param url the URL to the JSON document; may not be null and must be resolvable * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the URL */ public Document read( URL url ) throws ParsingException { try { return read(url.openStream(), DEFAULT_INTROSPECT); } catch (IOException e) { throw new ParsingException(e.getMessage(), e, 0, 0); } } /** * Read the JSON representation from supplied input stream and construct the {@link Document} representation, using the * {@link Charset#defaultCharset() default character set}. * * @param stream the input stream; may not be null * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the stream */ public Document read( InputStream stream ) throws ParsingException { return read(stream, DEFAULT_INTROSPECT); } /** * Read the JSON representation from supplied input stream and construct the {@link Document} representation, using the * supplied {@link Charset character set}. * * @param stream the input stream; may not be null * @param charset the character set that should be used; may not be null * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the stream */ public Document read( InputStream stream, Charset charset ) throws ParsingException { return read(stream, charset, DEFAULT_INTROSPECT); } /** * Read the JSON representation from supplied input stream and construct the {@link Document} representation. * * @param reader the IO reader; may not be null * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the stream */ public Document read( Reader reader ) throws ParsingException { return read(reader, DEFAULT_INTROSPECT); } /** * Read the JSON representation from supplied string and construct the {@link Document} representation. * * @param json the JSON representation; may not be null * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the stream */ public Document read( String json ) throws ParsingException { return read(json, DEFAULT_INTROSPECT); } /** * Read the JSON representation from supplied input stream and construct the {@link Document} representation, using the * {@link Charset#defaultCharset() default character set}. * * @param stream the input stream; may not be null * @param introspectStringValues true if the string values should be examined for common patterns, or false otherwise * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the stream */ public Document read( InputStream stream, boolean introspectStringValues ) throws ParsingException { return read(stream, Json.UTF8, introspectStringValues); } /** * Read the JSON representation from supplied input stream and construct the {@link Document} representation, using the * supplied {@link Charset character set}. * * @param stream the input stream; may not be null * @param charset the character set that should be used; may not be null * @param introspectStringValues true if the string values should be examined for common patterns, or false otherwise * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the stream */ public Document read( InputStream stream, Charset charset, boolean introspectStringValues ) throws ParsingException { return read(new InputStreamReader(stream, charset), introspectStringValues); } /** * Read the JSON representation from supplied input stream and construct the {@link Document} representation. * * @param reader the IO reader; may not be null * @param introspectStringValues true if the string values should be examined for common patterns, or false otherwise * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the stream */ public Document read( Reader reader, boolean introspectStringValues ) throws ParsingException { // Create an object so that this reader is thread safe ... ValueMatcher matcher = introspectStringValues ? DATE_VALUE_MATCHER : SIMPLE_VALUE_MATCHER; return new Parser(new Tokenizer(reader), VALUE_FACTORY, matcher).parseDocument(); } /** * Read the JSON representation from supplied string and construct the {@link Document} representation. * * @param json the JSON representation; may not be null * @param introspectStringValues true if the string values should be examined for common patterns, or false otherwise * @return the in-memory {@link Document} representation * @throws ParsingException if there was a problem reading from the stream */ public Document read( String json, boolean introspectStringValues ) throws ParsingException { return read(new StringReader(json), introspectStringValues); } /** * Return a {@link DocumentSequence} that can be used to pull multiple documents from the stream. * * @param stream the input stream; may not be null * @return the sequence that can be used to get one or more Document instances from a single input */ public DocumentSequence readMultiple( InputStream stream ) { return readMultiple(stream, DEFAULT_INTROSPECT); } /** * Return a {@link DocumentSequence} that can be used to pull multiple documents from the stream. * * @param stream the input stream; may not be null * @param introspectStringValues true if the string values should be examined for common patterns, or false otherwise * @return the sequence that can be used to get one or more Document instances from a single input */ public DocumentSequence readMultiple( InputStream stream, boolean introspectStringValues ) { return readMultiple(new InputStreamReader(stream, Json.UTF8), introspectStringValues); } /** * Return a {@link DocumentSequence} that can be used to pull multiple documents from the stream. * * @param reader the IO reader; may not be null * @return the sequence that can be used to get one or more Document instances from a single input */ public DocumentSequence readMultiple( Reader reader ) { return readMultiple(reader, DEFAULT_INTROSPECT); } /** * Return a {@link DocumentSequence} that can be used to pull multiple documents from the stream. * * @param reader the IO reader; may not be null * @param introspectStringValues true if the string values should be examined for common patterns, or false otherwise * @return the sequence that can be used to get one or more Document instances from a single input */ public DocumentSequence readMultiple( Reader reader, boolean introspectStringValues ) { // Create an object so that this reader is thread safe ... final Tokenizer tokenizer = new Tokenizer(reader); ValueMatcher matcher = introspectStringValues ? DATE_VALUE_MATCHER : SIMPLE_VALUE_MATCHER; final Parser parser = new Parser(tokenizer, VALUE_FACTORY, matcher); return () -> { if (tokenizer.isFinished()) return null; Document doc = parser.parseDocument(false); // System.out.println(Json.writePretty(doc)); return doc; }; } /** * Parse the number represented by the supplied (unquoted) JSON field value. * * @param value the string representation of the value * @return the number, or null if the value could not be parsed */ public static Number parseNumber( String value ) { // Try to parse as a number ... char c = value.charAt(0); if ((c >= '0' && c <= '9') || c == '.' || c == '-' || c == '+') { // It's definitely a number ... if (c == '0' && value.length() > 2) { // it might be a hex number that starts with '0x' char two = value.charAt(1); if (two == 'x' || two == 'X') { try { // Parse the remainder of the hex number ... return Integer.parseInt(value.substring(2), 16); } catch (NumberFormatException e) { // Ignore and continue ... } } } // Try parsing as a double ... try { if ((value.indexOf('.') > -1) || (value.indexOf('E') > -1) || (value.indexOf('e') > -1)) { return Double.parseDouble(value); } Long longObj = new Long(value); long longValue = longObj.longValue(); int intValue = longObj.intValue(); if (longValue == intValue) { // Then it's just an integer ... return new Integer(intValue); } return longObj; } catch (NumberFormatException e) { // ignore ... } } return null; } /** * The component that parses a tokenized JSON stream. * * @author Randall Hauch (C) 2011 Red Hat Inc. * @since 5.1 */ @NotThreadSafe public static class Parser { private final DocumentValueFactory values; private final Tokenizer tokens; private final ValueMatcher valueMatcher; /** * Create a new JsonReader that uses the supplied {@link Tokenizer} instance. * * @param tokenizer the tokenizer that tokenizes the stream of JSON content; may not be null * @param values the factory for creating value objects; may not be null * @param valueMatcher the component that looks for patterns within string values to create alternative objects; may not * be null */ public Parser( Tokenizer tokenizer, DocumentValueFactory values, ValueMatcher valueMatcher ) { this.tokens = tokenizer; this.values = values; this.valueMatcher = valueMatcher; } /** * Parse the stream for the next JSON document. * * @return the document, or null if there are no more documents * @throws ParsingException if there is a problem parsing the value */ public Document parseDocument() throws ParsingException { return parseDocument(null, true); } /** * Parse the stream for the next JSON document. * * @param failIfNotValidDocument true if this method should throw an exception if the stream does not contain a valid * document, or false if null should be returned if there is no valid document on the stream * @return the document, or null if there are no more documents * @throws ParsingException if there is a problem parsing the value */ public Document parseDocument( boolean failIfNotValidDocument ) throws ParsingException { return parseDocument(null, failIfNotValidDocument); } protected BasicDocument newDocument() { return new BasicDocument(); } /** * Parse the stream for the next JSON document. * * @param hasReservedFieldNames the flag that should be set if this document contains field names that are reserved * @param failIfNotValidDocument true if this method should throw an exception if the stream does not contain a valid * document, or false if null should be returned if there is no valid document on the stream * @return the document, or null if there are no more documents * @throws ParsingException if there is a problem parsing the value */ protected Document parseDocument( AtomicBoolean hasReservedFieldNames, boolean failIfNotValidDocument ) throws ParsingException { if (tokens.nextUsefulChar() != '{') { if (failIfNotValidDocument) { throw tokens.error("JSON documents must begin with a '{' character"); } // otherwise just return ... return null; } BasicDocument doc = newDocument(); do { String fieldName = null; // Peek at the next character on the stream ... switch (tokens.peek()) { case 0: throw tokens.error("JSON documents must end with a '}' character"); case '}': tokens.next(); return doc; default: // This should be a field name, so read it ... fieldName = tokens.nextString(); break; } // Now look for any of the following delimiters: ':', "->", or "=>" tokens.nextFieldDelim(); // Now look for a value ... Object value = parseValue(); doc.put(fieldName, value); // Determine if this field is a reserved if (hasReservedFieldNames != null && isReservedFieldName(fieldName)) { hasReservedFieldNames.set(true); } // Look for the delimiter between fields ... if (tokens.nextDocumentDelim()) return doc; } while (true); } protected final boolean isReservedFieldName( String fieldName ) { return fieldName.length() != 0 && fieldName.charAt(0) == '$'; } /** * Parse the JSON array on the stream, beginning with the '[' character until the ']' character, which is consumed. * * @return the array representation; never null but possibly an empty array * @throws ParsingException if there is a problem parsing the value */ public BasicArray parseArray() throws ParsingException { if (tokens.nextUsefulChar() != '[') { throw tokens.error("JSON arrays must begin with a '[' character"); } BasicArray array = new BasicArray(); boolean expectValueSeparator = false; do { // Peek at the next character on the stream ... char c = tokens.peek(); switch (c) { case 0: throw tokens.error("JSON arrays must end with a ']' character"); case ']': tokens.next(); return array; case ',': tokens.next(); expectValueSeparator = false; break; default: if (expectValueSeparator) { throw tokens.error("Invalid character in JSON array: '" + c + "' at line " + tokens.lineNumber() + " column " + tokens.columnNumber()); } // This should be a value .. Object value = parseValue(); array.addValue(value); expectValueSeparator = true; break; } } while (true); } /** * Parse the stream for the next field value, which can be one of the following values: *
    *
  • a nested document
  • *
  • an array of values
  • *
  • a string literal, surrounded by single-quote characters
  • *
  • a string literal, surrounded by double-quote characters
  • *
  • a string literal date of the form "yyyy-MM-ddTHH:mm:ss" * where T is a literal character
  • *
  • a string literal date of the form "yyyy-MM-ddTHH:mm:ssZ" * where T and Z are literal characters
  • *
  • a string literal date of the form * "yyyy-MM-ddTHH:mm:ssGMT+00:00" where * T, and GMT are literal characters
  • *
  • a string literal date of the form "/Date(millisOrIso)/"
  • *
  • a string literal date of the form "\/Date(millisOrIso)\/"
  • *
  • a date literal of the form new Date(millisOrIso)
  • *
  • a date literal of the form Date(millisOrIso)
  • *
  • a function of the form new functionName(parameters) where parameters * consists of one or more values as parsed by this method *
* Note that in the date forms listed above, millisOrIso is either a long value representing the * number of milliseconds since epoch or a string literal in ISO-8601 format representing a date and time. * * @return the field value * @throws ParsingException if there is a problem parsing the value */ public Object parseValue() throws ParsingException { char c = tokens.peek(); switch (c) { case 0: // There's nothing left ... return null; case '{': // Nested object ... AtomicBoolean hasReservedFieldNames = new AtomicBoolean(); Document doc = parseDocument(hasReservedFieldNames, true); if (!hasReservedFieldNames.get()) { return doc; } // Convert the doc with reserved field names ... return processDocumentWithReservedFieldNames(doc); case '[': // Nested array ... return parseArray(); case '"': case '\'': String literal = tokens.nextString(); Object value = valueMatcher.parseValue(literal); return value != null ? value : literal; case 'd': case 'n': String newToken = tokens.nextWord(); // read the 'new' token if ("new".equalsIgnoreCase(newToken) || "date".equalsIgnoreCase(newToken)) { return parseFunction(); } break; } // Looks like it's a number, so try that ... String number = tokens.nextNumber(); return number != null ? parseValue(number, tokens.lineNumber(), tokens.columnNumber()) : number; } /** * Parse the value given by the supplied string located at the supplied line and column numbers. This method looks for * known constant values, then attempts to parse the value as a number, and then calls * {@link #parseUnknownValue(String, int, int)}. * * @param value the string representation of the value * @param lineNumber the line number for the beginning of the value * @param columnNumber the column number for the beginning of the value * @return the value * @throws ParsingException if there is a problem parsing the value */ public Object parseValue( String value, int lineNumber, int columnNumber ) throws ParsingException { if (value.length() == 0) return value; if ("true".equalsIgnoreCase(value)) return Boolean.TRUE; if ("false".equalsIgnoreCase(value)) return Boolean.FALSE; if ("null".equalsIgnoreCase(value)) return Null.getInstance(); // Try to parse as a number ... Number number = parseNumber(value); if (number != null) return number; return parseUnknownValue(value, lineNumber, columnNumber); } /** * Parse the number represented by the supplied value. This method is called by the {@link #parseValue(String, int, int)} * method. * * @param value the string representation of the value * @return the number, or null if the value could not be parsed */ protected Number parseNumber( String value ) { return JsonReader.parseNumber(value); } /** * Override this method if custom value types are expected. * * @param value the string representation of the value * @param lineNumber the line number at which the value starts * @param columnNumber the column number at which the value starts * @return the value * @throws ParsingException if there is a problem parsing the value */ protected Object parseUnknownValue( String value, int lineNumber, int columnNumber ) throws ParsingException { return value; } /** * Parse a function call on the stream. The 'new' keyword has already been processed. * * @return the result of the evaluation of the function * @throws ParsingException if there is a problem parsing the value */ public Object parseFunction() throws ParsingException { // Parse the function name ... int line = tokens.lineNumber(); int col = tokens.columnNumber(); String functionName = tokens.nextString(); FunctionCall function = new FunctionCall(functionName, line, col); // Read the open parenthesis .. char c = tokens.nextUsefulChar(); if (c != '(') { throw tokens.error("Expected '(' after function name \"" + functionName + "\" and at line " + tokens.lineNumber() + ", column " + tokens.columnNumber()); } // Read the parameters ... do { line = tokens.lineNumber(); col = tokens.columnNumber(); Object parameter = parseValue(); if (parameter == null) { break; } function.add(parameter, line, col); } while (true); // Now evaluate the function ... Object value = evaluateFunction(function); return value != null ? value : evaluateUnknownFunction(function); } /** * Method that is called to evaluate the supplied function. This method may be overridden by subclasses to handle custom * functions. * * @param function the function definition * @return the value that resulted from evaluating the function, or null if the function call could not be evaluated * @throws ParsingException if there is a problem parsing the value */ public Object evaluateFunction( FunctionCall function ) throws ParsingException { int numParams = function.size(); if ("date".equalsIgnoreCase(function.getFunctionName())) { if (numParams > 0) { // The parameter should be a long or a timestamp ... FunctionParameter param1 = function.get(0); Object value = param1.getValue(); if (value instanceof Long) { Long millis = (Long)value; return values.createDate(millis.longValue()); } if (value instanceof Integer) { Integer millis = (Integer)value; return values.createDate(millis.longValue()); } if (value instanceof String) { String valueStr = (String)value; try { return values.createDate(valueStr); } catch (ParseException e) { // Not a valid date ... throw tokens.error("Expecting the \"new Date(...)\" parameter to be a valid number of milliseconds or ISO date string, but found \"" + param1.getValue() + "\" at line " + param1.getLineNumber() + ", column " + param1.getColumnNumber()); } } } // Not a valid date ... throw tokens.error("The date function requires one parameter at line " + function.getLineNumber() + ", column " + function.getColumnNumber()); } return null; } /** * Method that is called when the function call described by the parameter could not be evaluated. By default, the string * representation of the function is returned. * * @param function the function definition * @return the value that resulted from evaluating the function * @throws ParsingException if there is a problem parsing the value */ protected Object evaluateUnknownFunction( FunctionCall function ) throws ParsingException { return function.toString(); } @SuppressWarnings( "deprecation" ) protected Object processDocumentWithReservedFieldNames( Document doc ) { if (doc == null) return null; Object value = null; int numFields = doc.size(); if (numFields == 0) return doc; try { if (numFields == 1) { if (!Null.matches(value = doc.get(OBJECT_ID))) { String bytesInBase16 = value.toString(); return values.createObjectId(bytesInBase16); } if (!Null.matches(value = doc.get(DATE))) { if (value instanceof Date) { return value; } String isoDate = value.toString(); try { return values.createDate(isoDate); } catch (ParseException e) { Long millis = Long.parseLong(isoDate); return values.createDate(millis); } } if (!Null.matches(value = doc.get(REGEX_PATTERN))) { String pattern = value.toString(); return values.createRegex(pattern, null); } if (!Null.matches(value = doc.get(UUID))) { return values.createUuid(value.toString()); } if (!Null.matches(value = doc.get(CODE))) { String code = value.toString(); return values.createCode(code); } } else if (numFields == 2) { if (!Null.matches(value = doc.get(TIMESTAMP))) { int time = doc.getInteger(TIMESTAMP); int inc = doc.getInteger(INCREMENT); return values.createTimestamp(time, inc); } if (!Null.matches(value = doc.get(REGEX_PATTERN))) { String pattern = value.toString(); String options = doc.getString(REGEX_OPTIONS); return values.createRegex(pattern, options); } if (!Null.matches(value = doc.get(CODE))) { String code = value.toString(); Document scope = doc.getDocument(SCOPE); return scope != null ? values.createCode(code, scope) : values.createCode(code); } if (!Null.matches(value = doc.get(BINARY_TYPE))) { char c = value.toString().charAt(0); byte type = 0x00; switch (c) { case '0': type = Bson.BinaryType.GENERAL; break; case '1': type = Bson.BinaryType.FUNCTION; break; case '2': type = Bson.BinaryType.BINARY; break; case '3': type = Bson.BinaryType.UUID; break; case '5': type = Bson.BinaryType.MD5; break; case '8': c = value.toString().charAt(1); if (c == '0') { type = Bson.BinaryType.USER_DEFINED; } break; } String data = doc.getString(BASE_64); return values.createBinary(type, Base64.decode(data)); } } } catch (Throwable e) { // ignore } return doc; } protected static class FunctionCall implements Iterable { private final String functionName; private final List parameters = new LinkedList<>(); private final int lineNumber; private final int columnNumber; public FunctionCall( String functionName, int lineNumber, int columnNumber ) { this.functionName = functionName; this.lineNumber = lineNumber; this.columnNumber = columnNumber; } public String getFunctionName() { return functionName; } public void add( Object parameter, int lineNumber, int columnNumber ) { this.parameters.add(new FunctionParameter(parameter, lineNumber, columnNumber)); } @Override public Iterator iterator() { return parameters.iterator(); } public FunctionParameter get( int index ) { return parameters.get(index); } public int size() { return parameters.size(); } public int getLineNumber() { return lineNumber; } public int getColumnNumber() { return columnNumber; } @Override public String toString() { StringBuilder sb = new StringBuilder(functionName); sb.append('('); boolean first = true; for (FunctionParameter parameter : parameters) { if (first) { first = false; } else { sb.append(','); } sb.append(parameter.getValue()); } sb.append(')'); return sb.toString(); } } @Immutable protected static class FunctionParameter { private final Object value; private final int lineNumber; private final int columnNumber; public FunctionParameter( Object value, int lineNumber, int columnNumber ) { this.value = value; this.lineNumber = lineNumber; this.columnNumber = columnNumber; } public Object getValue() { return value; } public int getLineNumber() { return lineNumber; } public int getColumnNumber() { return columnNumber; } @Override public String toString() { return Json.write(value); } } } /** * The component that matches a string value for certain patterns. If the value matches a known pattern, it return the * appropriate value object; otherwise, the supplied string value is returned. * * @author Randall Hauch (C) 2011 Red Hat Inc. * @since 5.1 */ @NotThreadSafe public static interface ValueMatcher { /** * Parse the value given by the supplied string into an appropriate value object. This method looks for specific patterns * of Date strings; if no known pattern is found, it just returns the supplied value. * * @param value the string representation of the value * @return the value */ public Object parseValue( String value ); } /** * The component that matches a string value for certain patterns. If the value matches a known pattern, it return the * appropriate value object; otherwise, the supplied string value is returned. * * @author Randall Hauch (C) 2011 Red Hat Inc. * @since 5.1 */ @NotThreadSafe public static class SimpleValueMatcher implements ValueMatcher { protected final DocumentValueFactory values; /** * Create a new matcher that uses the supplied {@link DocumentValueFactory} instance. * * @param values the factory for creating value objects; may not be null */ public SimpleValueMatcher( DocumentValueFactory values ) { this.values = values; } /** * Parse the value given by the supplied string into an appropriate value object. This method looks for specific patterns * of Date strings; if no known pattern is found, it just returns the supplied value. * * @param value the string representation of the value * @return the value */ @Override public Object parseValue( String value ) { return value; } } /** * The component that parses a tokenized JSON stream and attempts to evaluate literal values such as dates * * @author Randall Hauch (C) 2011 Red Hat Inc. * @since 5.1 */ @NotThreadSafe public static class DateValueMatcher extends SimpleValueMatcher { /** * Create a new matcher that uses the supplied {@link DocumentValueFactory} instance. * * @param values the factory for creating value objects; may not be null */ public DateValueMatcher( DocumentValueFactory values ) { super(values); } /** * Parse the value given by the supplied string into an appropriate value object. This method looks for specific patterns * of Date strings; if no known pattern is found, it just returns the supplied value. * * @param value the string representation of the value * @return the value */ @Override public Object parseValue( String value ) { if (value != null) { if (value.length() > 2) { Date date = parseDateFromLiteral(value); if (date != null) { return date; } } // Unescape escaped characters ... value = unescapeValue(value); } return value; } protected String unescapeValue( String value ) { if (value == null || value.length() == 0) return value; StringBuilder sb = new StringBuilder(value.length()); CharacterIterator iter = new StringCharacterIterator(value); for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { switch (c) { case '\\': // The character might be an escape sequence, so output the backslash ... char next = iter.next(); switch (next) { case CharacterIterator.DONE: // This was the last character, so we're done ... sb.append(c); break; case '/': // optional case '\b': case '\f': case '\n': case '\r': case '\t': // This is an escaped sequence ... sb.append(next); break; case 'u': // This is an unicode escape sequence, so we already output one of them ... char first = iter.next(); char second = iter.next(); char third = iter.next(); char fourth = iter.next(); String string = "" + first + second + third + fourth; try { char uni = (char)Integer.parseInt(string, 16); sb.append(uni); } catch (NumberFormatException e) { // this is not a valid unicode escape sequence so just append it as is sb.append("\\u").append(string); continue; } break; default: // It's not an escape sequence that we care about. We've already written the backslash, // so just write the character ... sb.append(c); sb.append(next); } break; default: // Unicode escapes are handled above ... sb.append(c); break; } } return sb.toString(); } /** * Parse the date represented by the supplied value. This method is called by the {@link #parseValue(String)} method. This * method checks the following formats: *
    *
  • a string literal date of the form "yyyy-MM-ddTHH:mm:ss" * where T is a literal character
  • *
  • a string literal date of the form "yyyy-MM-ddTHH:mm:ssZ" * where T and Z are literal characters
  • *
  • a string literal date of the form * "yyyy-MM-ddTHH:mm:ssGMT+00:00" where * T, and GMT are literal characters
  • *
  • a string literal date of the form "/Date(millisOrIso)/"
  • *
  • a string literal date of the form "\/Date(millisOrIso)\/"
  • *
*

* Note that this method does not handle the new Date(...) or Date(...) representations, as * that's handled elsewhere. *

* * @param value the string representation of the value; never null and never empty * @return the number, or null if the value could not be parsed */ protected Date parseDateFromLiteral( String value ) { char f = value.charAt(0); if (Character.isDigit(f)) { // Try as simply an ISO-8601 formatted date ... return evaluateDate(value); } if (value.startsWith("\\/Date(") && value.endsWith(")\\/")) { String millisOrIso = value.substring(7, value.length() - 3).trim(); return evaluateDate(millisOrIso); } if (value.startsWith("/Date(") && value.endsWith(")/")) { String millisOrIso = value.substring(6, value.length() - 2).trim(); return evaluateDate(millisOrIso); } return null; } protected Date evaluateDate( String millisOrIso ) { try { return values.createDate(millisOrIso); } catch (ParseException e) { // not an ISO-8601 format ... } return null; } } /** * The component that tokenizes a stream of JSON content. * * @author Randall Hauch (C) 2011 Red Hat Inc. * @since 5.1 */ @NotThreadSafe public static class Tokenizer { private final Reader reader; private int lineNumber; private int columnNumber; private boolean finished; private boolean hasPrevious; private char previous; private StringBuilder stringBuilder = new StringBuilder(128); /** * Create a new tokenizer that uses the supplied {@link Reader Java IO Reader} instance. * * @param reader the reader for accessing the JSON content; may not be null */ public Tokenizer( Reader reader ) { this.reader = reader; } public boolean isFinished() { return finished; } protected char next() throws ParsingException { char c = 0; if (hasPrevious) { hasPrevious = false; c = previous; } else { try { int x = reader.read(); if (x <= 0) { // We've reached the end of the stream ... finished = true; c = 0; } else { c = (char)x; } } catch (IOException e) { throw error("Error reading at line " + lineNumber + ", column " + columnNumber + ": " + e.getLocalizedMessage(), e); } } // It's a valid character, but we have to advance our line & column counts ... if (previous == '\r') { ++lineNumber; columnNumber = (c == '\n' ? 0 : 1); } else if (c == '\n') { ++lineNumber; columnNumber = 0; } else { ++columnNumber; } previous = c; return c; } public String next( int characterCount ) throws ParsingException { StringBuilder sb = stringBuilder(); for (int i = 0; i != characterCount; ++i) { sb.append(next()); } return complete(sb); } protected final StringBuilder stringBuilder() { StringBuilder stringBuilder = this.stringBuilder != null ? this.stringBuilder : new StringBuilder(128); this.stringBuilder = null; stringBuilder.delete(0, stringBuilder.length()); return stringBuilder; } protected final String complete( StringBuilder sb ) { assert sb != null; assert stringBuilder == null; stringBuilder = sb; return sb.toString(); } public char nextUsefulChar() throws ParsingException { boolean withinComment = false; do { char next = next(); if (next == '/') { char afterNext = next(); if (afterNext != '/') { throw error("Invalid character '" + afterNext + "' (expected comment //)"); } withinComment = true; continue; } boolean isLineSeparator = (next == '\n') || (next == '\r'); if (isLineSeparator && withinComment) { withinComment = false; } if (next == 0 || (!withinComment && next != ' ' && next != '\t' && !isLineSeparator)) return next; } while (true); } public char peek() throws ParsingException { if (hasPrevious) { return previous; } char next = nextUsefulChar(); hasPrevious = true; previous = next; --columnNumber; return next; } /** * Read the next quoted string from the stream, where stream begins with the a single-quote or double-quote character and * the string ends with the same quote character. * * @return the next string; never null * @throws ParsingException */ public String nextString() throws ParsingException { char c = nextUsefulChar(); switch (c) { case '"': case '\'': return nextString(c); } throw error("Expecting a field name at line " + lineNumber + ", column " + columnNumber + ". Check for a missing comma."); } public String nextString( char endQuote ) throws ParsingException { StringBuilder sb = stringBuilder(); char c = 0; do { c = next(); switch (c) { case 0: case '\n': case '\r': // The string was not properly terminated ... throw error("The string was not terminated before the end of line or end of document, at line " + lineNumber + ", column " + columnNumber); case '\\': // Escape sequence ... c = next(); switch (c) { case '\'': // single quote case '"': // double quote case '\\': // reverse solidus case '/': // forward solidus break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'u': // Unicode sequence made of exactly 4 hex characters ... char[] hex = new char[4]; hex[0] = next(); hex[1] = next(); hex[2] = next(); hex[3] = next(); String code = new String(hex, 0, 4); try { c = (char)Integer.parseInt(code, 16); // hex } catch (NumberFormatException e) { // this is not a valid unicode escape sequence so just append it as is sb.append('\\').append('u').append(code); continue; } break; default: // No other characters are valid escaped sequences, so this is actually just a backslash // followed by the current character c. So append the backslash ... sb.append('\\'); // then the character ... break; } sb.append(c); break; default: // Just a regular character (or the end quote) ... if (c == endQuote) { // This is the only way to successfully exit this method! return complete(sb); } // just a regular character ... sb.append(c); } } while (true); } public void nextFieldDelim() throws ParsingException { try { switch (nextUsefulChar()) { case ':': case '=': if (peek() == '>') { next(); // consume the '>' } break; } } catch (ParsingException e) { throw error("Expecting a field delimiter (either ':', '=' or '=>') at line " + lineNumber + ", column " + columnNumber); } } /** * Consume the next document delimiter (either a ',' or a ';'), and return whether the end-of-document character (e.g., * '}') has been consumed. This will correctly handle repeated delimiters, which are technically incorrect. * * @return true if a '}' has been consumed, or false otherwise * @throws ParsingException if the document delimiter could not be read */ public boolean nextDocumentDelim() throws ParsingException { switch (nextUsefulChar()) { case ';': // handle ';' delimiters, too! case ',': switch (peek()) { case ':': case ',': // There are multiple delimiters in a row. Strictly speaking, this is invalid but we // can easily handle it anyway ... return nextDocumentDelim(); case '}': // The comma was before '}' - this is not strictly well-formed, but we'll handle it next(); return true; } return false; case '}': return true; } return false; } /** * Return a string containing the next number on the stream. * * @return the next number as a string, or null if there is no content on the stream * @throws ParsingException if the number could not be read */ public String nextNumber() throws ParsingException { char c = peek(); if (c == 0) { return null; } StringBuilder sb = stringBuilder(); while (c > ' ' && "{}[]:\"=#/\\',;".indexOf(c) <= -1) { if (c == 0) { break; } sb.append(next()); c = peek(); } return complete(sb); } /** * Return a string containing the next alpha-numeric word on the stream. * * @return the next word as a string * @throws ParsingException if the number could not be read */ public String nextWord() throws ParsingException { char c = peek(); StringBuilder sb = stringBuilder(); while (Character.isLetterOrDigit(c)) { sb.append(next()); c = peek(); } return complete(sb); } public ParsingException error( String message ) { return new ParsingException(message, lineNumber, columnNumber); } public ParsingException error( String message, Throwable t ) { return new ParsingException(message, t, lineNumber, columnNumber); } public int lineNumber() { return lineNumber; } public int columnNumber() { return columnNumber; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy