All Downloads are FREE. Search and download functionalities are using the official Maven repository.

groovy.json.JsonLexer Maven / Gradle / Ivy

There is a newer version: 3.0.22
Show newest version
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */
package groovy.json;

import static groovy.json.JsonTokenType.*;

import groovy.io.LineColumnReader;

import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.regex.Pattern;

/**
 * The lexer reads JSON tokens in a streaming fashion from the underlying reader.
 *
 * @author Guillaume Laforge
 * @since 1.8.0
 */
public class JsonLexer implements Iterator {

    private static final char SPACE    = ' ';
    private static final char DOT      = '.';
    private static final char MINUS    = '-';
    private static final char PLUS     = '+';
    private static final char LOWER_E  = 'e';
    private static final char UPPER_E  = 'E';
    private static final char ZERO     = '0';
    private static final char NINE     = '9';

    private static final Pattern p = Pattern.compile("\\\\u(\\p{XDigit}{4})");

    private LineColumnReader reader;

    /**
     * Underlying reader from which to read the JSON tokens.
     * This reader is an instance of LineColumnReader,
     * to keep track of line and column positions.
     */
    public LineColumnReader getReader() {
        return reader;
    }

    private JsonToken currentToken = null;

    /**
     * Instanciates a lexer with a reader from which to read JSON tokens.
     * Under the hood, the reader is wrapped in a LineColumnReader,
     * for line and column information, unless it's already an instance of that class.
     *
     * @param reader underlying reader
     */
    public JsonLexer(Reader reader) {
        this.reader = reader instanceof LineColumnReader ? (LineColumnReader) reader : new LineColumnReader(reader);
    }

    /**
     * @return the next token from the stream
     */
    public JsonToken nextToken() {
        try {
            int firstIntRead = skipWhitespace();
            if (firstIntRead == -1) return null;

            char firstChar = (char) firstIntRead;
            JsonTokenType possibleTokenType = startingWith((char) firstIntRead);

            if (possibleTokenType == null) {
                throw new JsonException(
                        "Lexing failed on line: " + reader.getLine() + ", column: " + reader.getColumn() +
                                ", while reading '" + firstChar + "', " +
                                "no possible valid JSON value or punctuation could be recognized."
                );
            }

            reader.reset();
            long startLine = reader.getLine();
            long startColumn = reader.getColumn();

            JsonToken token = new JsonToken();
            token.setStartLine(startLine);
            token.setStartColumn(startColumn);
            token.setEndLine(startLine);
            token.setEndColumn(startColumn + 1);
            token.setType(possibleTokenType);
            token.setText("" + firstChar);

            if (possibleTokenType.ordinal() >= OPEN_CURLY.ordinal() && possibleTokenType.ordinal() <= FALSE.ordinal()) {
                return readingConstant(possibleTokenType, token);
            } else if (possibleTokenType == STRING) {
                StringBuilder currentContent = new StringBuilder("\"");
                // consume the first double quote starting the string
                reader.read();
                boolean isEscaped = false;
                for (;;) {
                    int read = reader.read();
                    if (read == -1) return null;

                    isEscaped = (!isEscaped && currentContent.charAt(currentContent.length() - 1) == '\\');

                    char charRead = (char) read;
                    currentContent.append(charRead);

                    if (charRead == '"' && !isEscaped &&
                            possibleTokenType.matching(currentContent.toString())) {
                        token.setEndLine(reader.getLine());
                        token.setEndColumn(reader.getColumn());
                        token.setText(unescape(currentContent.toString()));
                        return token;
                    }
                }
            } else if (possibleTokenType == NUMBER) {
                StringBuilder currentContent = new StringBuilder();
                for (;;) {
                    reader.mark(1);
                    int read = reader.read();
                    if (read == -1) return null;
                    char lastCharRead = (char) read;

                    if (lastCharRead >= ZERO && lastCharRead <= NINE ||
                            lastCharRead == DOT || lastCharRead == MINUS || lastCharRead == PLUS ||
                            lastCharRead == LOWER_E || lastCharRead == UPPER_E) {
                        currentContent.append(lastCharRead);
                    } else {
                        reader.reset();
                        break;
                    }
                }

                String content = currentContent.toString();
                if (possibleTokenType.matching(content)) {
                    token.setEndLine(reader.getLine());
                    token.setEndColumn(reader.getColumn());
                    token.setText(currentContent.toString());

                    return token;
                } else {
                    throwJsonException(currentContent.toString(), possibleTokenType);
                }
            }
            return null;
        } catch (IOException ioe) {
            throw new JsonException("An IO exception occurred while reading the JSON payload", ioe);
        }
    }

    private void throwJsonException(String content, JsonTokenType type) {
        throw new JsonException(
                "Lexing failed on line: " +
                reader.getLine() + ", column: " + reader.getColumn() +
                ", while reading '" + content + "', " +
                "was trying to match " + type.getLabel()
        );
    }

    /**
     * Replace unicode escape and other control characters with real characters
     *
     * @param input text
     * @return input text without the escaping
     */
    public static String unescape(String input) {
        return StringEscapeUtils.unescapeJavaScript(input);
    }

    /**
     * When a constant token type is expected, check that the expected constant is read,
     * and update the content of the token accordingly.
     *
     * @param type the token type
     * @param token the token
     * @return the token updated with end column and text updated
     */
    private JsonToken readingConstant(JsonTokenType type, JsonToken token) {
        try {
            int numCharsToRead = ((String) type.getValidator()).length();
            char[] chars = new char[numCharsToRead];
            reader.read(chars);
            String stringRead = new String(chars);

            if (stringRead.equals(type.getValidator())) {
                token.setEndColumn(token.getStartColumn() + numCharsToRead);
                token.setText(stringRead);
                return token;
            } else {
                throwJsonException(stringRead, type);
            }
        } catch (IOException ioe) {
            throw new JsonException("An IO exception occurred while reading the JSON payload", ioe);
        }
        return null;
    }

    /**
     * Skips all the whitespace characters and moves the cursor to the next non-space character.
     */
    public int skipWhitespace() {
        try {
            int readChar = 20;
            char c = SPACE;
            while (Character.isWhitespace(c)) {
                reader.mark(1);
                readChar = reader.read();
                c = (char) readChar;
            }
            reader.reset();
            return readChar;
        } catch (IOException ioe) {
            throw new JsonException("An IO exception occurred while reading the JSON payload", ioe);
        }
    }

    /**
     * Iterator method to know if another token follows,
     * or if we've reached the end of the stream.
     *
     * @return true if there are more tokens
     */
    public boolean hasNext() {
        currentToken = nextToken();
        return currentToken != null;
    }

    /**
     * Iterator method to get the next token of the stream.
     *
     * @return the next token
     */
    public JsonToken next() {
        return currentToken;
    }

    /**
     * Method not implemented.
     *
     * @throws UnsupportedOperationException
     */
    public void remove() {
        throw new UnsupportedOperationException("The method remove() is not supported on this lexer.");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy