All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xerial.json.JSONLexer Maven / Gradle / Ivy

The newest version!
/*--------------------------------------------------------------------------
 *  Copyright 2011 Taro L. Saito
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *--------------------------------------------------------------------------*/
//--------------------------------------
// XerialJ
//
// JSONLexer.java
// Since: 2011/05/03 9:51:43
//
//--------------------------------------
package org.xerial.json;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;

import org.xerial.core.XerialErrorCode;
import org.xerial.core.XerialException;
import org.xerial.json.impl.JSONToken;
import org.xerial.util.ArrayDeque;
import org.xerial.util.io.BufferedScanner;

/**
 * JSON lexer for byte input stream
 * 
 * @author leo
 * 
 */
public class JSONLexer {
    public static class Token {
        public final JSONToken type;
        public final CharSequence str;

        public Token(JSONToken type, CharSequence str) {
            this.type = type;
            this.str = str;
        }

        @Override
        public String toString() {
            return String.format("[%s] %s", type, str == null ? "" : str);
        }
    }

    private BufferedScanner scanner;
    private ArrayDeque tokenQueue = new ArrayDeque();
    private long lineCount = 0;
    private int posInLine = 0;

    public JSONLexer(InputStream in) {
        this(new BufferedScanner(in));
    }

    public JSONLexer(Reader in) {
        this(new BufferedScanner(in));
    }

    public JSONLexer(String json) {
        this(new BufferedScanner(json));
    }

    public JSONLexer(BufferedScanner scanner) {
        this.scanner = scanner;
    }

    public void close() throws IOException {
        scanner.close();
    }

    /**
     * Get the next token. If null is returned, no more token is available.
     * 
     * @return
     * @throws XerialException
     */
    public Token nextToken() throws XerialException {
        if (!tokenQueue.isEmpty())
            return tokenQueue.pollFirst();

        if (scanner.hasReachedEOF())
            return null;

        // Retrieve the next token
        matchWhiteSpaces(); // skip white spaces
        scanner.resetMarks();
        scanner.mark();
        int c = LA(1);
        switch (c) {
            case '{':
                consume();
                emit(JSONToken.LBrace);
                break;
            case '}':
                consume();
                emit(JSONToken.RBrace);
                break;
            case '[':
                consume();
                emit(JSONToken.LBracket);
                break;
            case ']':
                consume();
                emit(JSONToken.RBracket);
                break;
            case ':':
                consume();
                emit(JSONToken.Colon);
                break;
            case ',':
                consume();
                emit(JSONToken.Comma);
                break;
            case '"':
                parseString();
                break;
            case BufferedScanner.EOF:
                return null;
            default: {
                parseValue();
                break;
            }
        }

        return nextToken();
    }

    void matchWhiteSpaces() throws XerialException {
        loop: for (;;) {
            int c = LA(1);
            switch (c) {
                case ' ':
                case '\t':
                    break;
                case '\r':
                    if (LA(1) != '\n') {
                        lineCount++;
                        posInLine = 0;
                    }
                    break;
                case '\n':
                    lineCount++;
                    posInLine = 0;
                    break;
                default:
                    break loop;
            }
            consume();
        }
    }

    protected int LA(int k) throws XerialException {
        try {
            return scanner.LA(k);
        }
        catch (IOException e) {
            throw XerialException.convert(e);
        }
    }

    protected void consume() throws XerialException {
        try {
            int c = scanner.consume();
            switch (c) {
                case '\r':
                    if (scanner.LA(1) != '\n') {
                        lineCount++;
                        posInLine = 0;
                    }
                    break;
                case '\n':
                    lineCount++;
                    posInLine = 0;
                    break;
                default:
                    posInLine++;
                    break;
            }
        }
        catch (IOException e) {
            throw XerialException.convert(e);
        }
    }

    public void parseValue() throws XerialException {
        int c = LA(1);

        if (c == '-' || c >= '0' && c <= '9') {
            parseNumber();
            return;
        }
        else if (match(NULL)) {
            emit(JSONToken.Null);
            return;
        }
        else if (match(TRUE)) {
            emit(JSONToken.True);
            return;
        }
        else if (match(FALSE)) {
            emit(JSONToken.False);
            return;
        }

        throw error("value", LA(1));
    }

    public void parseNumber() throws XerialException {

        {
            int c = LA(1);
            // Negative flag
            if (c == '-') {
                int c2 = LA(2);
                if (c2 >= '0' && c2 <= '9') {
                    consume();
                    c = c2;
                }
                else
                    throw error("Number", c);
            }

            if (c == '0') {
                consume();
            }
            else if (c >= '1' && c <= '9') {
                consume();
                matchDigit_s();
            }
            else
                throw error("Number", c);
        }

        {
            int c = LA(1);
            switch (c) {
                case '.': {
                    consume();
                    matchDigit_p();
                    int c2 = LA(1);
                    matchExp();
                    emitText(JSONToken.Double);
                    break;
                }
                default:
                    if (matchExp())
                        emitText(JSONToken.Double);
                    else
                        emitText(JSONToken.Integer);
                    break;
            }
        }

    }

    private void matchDigit_s() throws XerialException {
        for (;;) {
            int c = LA(1);
            if (c >= '0' && c <= '9')
                consume();
            else
                return;
        }
    }

    private void matchDigit_p() throws XerialException {
        try {
            int c = scanner.LA(1);
            if (c >= '0' && c <= '9')
                consume();
            else
                throw error("Digit+", c);

            matchDigit_s();
        }
        catch (IOException e) {
            throw XerialException.convert(e);
        }
    }

    private boolean matchExp() throws XerialException {
        {
            int c = LA(1);
            if (c == 'e' || c == 'E')
                consume();
            else
                return false;
        }

        {
            int c = LA(1);
            if (c == '+' || c == '-') {
                consume();
            }
            matchDigit_p();
        }

        return true;
    }

    private final static byte[] NULL = { 'n', 'u', 'l', 'l' };
    private final static byte[] TRUE = { 't', 'r', 'u', 'e' };
    private final static byte[] FALSE = { 'f', 'a', 'l', 's', 'e' };

    boolean match(byte[] text) throws XerialException {
        for (int i = 0; i < text.length; ++i) {
            if (text[i] != LA(i + 1))
                return false;
        }

        for (int i = 0; i < text.length; ++i)
            consume();
        return true;

    }

    void parseString() throws XerialException {
        match('"');

        boolean toContinue = true;
        for (;;) {
            int c = LA(1);
            switch (c) {
                case '"':
                    // end of string
                    consume();
                    emitString();
                    return;
                case '\\':
                    // escape sequence 
                    matchEscapeSequence();
                    break;
                default:
                    consume();
            }
        }

    }

    public void matchEscapeSequence() throws XerialException {
        try {
            match('\\');
            int c = scanner.LA(1);
            switch (c) {
                case '"':
                case '\\':
                case '/':
                case 'b':
                case 'f':
                case 'n':
                case 'r':
                case 't':
                    consume();
                    break;
                case 'u':
                    consume();
                    for (int i = 0; i < 4; ++i) {
                        matchHexDigit();
                    }
                    break;
                default:
                    throw error("escape sequence", c);
            }
        }
        catch (IOException e) {
            throw XerialException.convert(e);
        }
    }

    public void matchHexDigit() throws XerialException {
        int c = LA(1);
        if (c >= '0' && c <= '9' || c >= 'A' && c <= 'F' || c >= 'a' && c >= 'f') {
            consume();
        }
        else {
            throw error("hex digit", c);
        }

    }

    public void match(int expected) throws XerialException {
        int c = LA(1);
        if (c != expected)
            throw new XerialException(XerialErrorCode.PARSE_ERROR, String.format(
                    "expected:'%s' but found '%s'", expected, c));
        else
            consume();
    }

    private XerialException error(String tokenType, int foundChar) {
        return new XerialException(XerialErrorCode.PARSE_ERROR, String.format(
                "<%s> invalid char '%s'", tokenType, foundChar));
    }

    void emit(JSONToken type) {
        tokenQueue.add(new Token(type, null));
    }

    void emitText(JSONToken type) {
        tokenQueue.add(new Token(type, scanner.selectedRawString()));
    }

    void emitString() {
        tokenQueue.add(new Token(JSONToken.String, scanner.selectedRawString(1)));
    }

    public long getLineNumber() {
        return lineCount;
    }

    public int getPosInLine() {
        return posInLine;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy