de.unkrig.commons.text.json.JsonScanner Maven / Gradle / Ivy
/*
* de.unkrig.commons - A general-purpose Java class library
*
* Copyright (c) 2013, Arno Unkrig
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
* following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
* following disclaimer in the documentation and/or other materials provided with the distribution.
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package de.unkrig.commons.text.json;
import de.unkrig.commons.lang.protocol.Predicate;
import de.unkrig.commons.nullanalysis.Nullable;
import de.unkrig.commons.text.scanner.AbstractScanner.Token;
import de.unkrig.commons.text.scanner.ScannerUtil;
import de.unkrig.commons.text.scanner.StatefulScanner;
import de.unkrig.commons.text.scanner.StringScanner;
/**
* A JASON scanner; scans tokens as defined on json.org.
*/
public final
class JsonScanner {
private JsonScanner() {}
/** Representation of the type of a token. */
public
enum TokenType {
// SUPPRESS CHECKSTYLE JavadocVariable:11
SPACE,
CXX_COMMENT,
SINGLE_LINE_C_COMMENT,
MULTI_LINE_C_COMMENT_BEGINNING, MULTI_LINE_C_COMMENT_MIDDLE, MULTI_LINE_C_COMMENT_END,
END_OF_IGNORABLES,
OPERATOR, NUMBER, KEYWORD,
DOUBLE_QUOTE, STRING_UNICODE_ESCAPE, STRING_ESCAPE, STRING_CHARS,
}
private
enum State { IN_MULTI_LINE_C_COMMENT, IN_STRING }
/**
* Returns a Java scanner that also produces SPACE and COMMENT tokens.
*/
public static StringScanner
rawStringScanner() {
StatefulScanner scanner = new StatefulScanner(State.class);
scanner.addRule("\\s+", TokenType.SPACE);
scanner.addRule("//.*(?:\r|\r\n|\n)?", TokenType.CXX_COMMENT);
scanner.addRule("/\\*.*?\\*/", TokenType.SINGLE_LINE_C_COMMENT);
// Multi-line C-style comments require special treatment, i.e. a special scanner state
// 'IN_MULTI_LINE_C_COMMENT'.
scanner.addRule("/\\*.*", TokenType.MULTI_LINE_C_COMMENT_BEGINNING).goTo(State.IN_MULTI_LINE_C_COMMENT);
{
scanner.addRule(State.IN_MULTI_LINE_C_COMMENT, ".*?\\*/", TokenType.MULTI_LINE_C_COMMENT_END);
scanner.addRule(
State.IN_MULTI_LINE_C_COMMENT,
".*",
TokenType.MULTI_LINE_C_COMMENT_MIDDLE
).goTo(State.IN_MULTI_LINE_C_COMMENT);
}
scanner.addRule("\\p{Alpha}+", TokenType.KEYWORD);
scanner.addRule("-?[0-9.][0-9.Ee]*", TokenType.NUMBER);
// Strings require special treatment, i.e. a special scanner state 'IN_STRING'.
scanner.addRule("\"", TokenType.DOUBLE_QUOTE).goTo(State.IN_STRING);
{
scanner.addRule(
State.IN_STRING,
"\\\\u\\p{XDigit}\\p{XDigit}\\p{XDigit}\\p{XDigit}",
TokenType.STRING_UNICODE_ESCAPE
).goTo(State.IN_STRING);
scanner.addRule(State.IN_STRING, "\\\\.", TokenType.STRING_ESCAPE).goTo(State.IN_STRING);
scanner.addRule(State.IN_STRING, "[^\\p{Cntrl}\"\\\\]+", TokenType.STRING_CHARS).goTo(State.IN_STRING);
scanner.addRule(State.IN_STRING, "\"", TokenType.DOUBLE_QUOTE);
}
scanner.addRule("[\\{\\}\\[\\]:,]", TokenType.OPERATOR);
return scanner;
}
/**
* @return A scanner that swallows SPACE and COMMENT tokens
*/
public static StringScanner
stringScanner() {
return ScannerUtil.filter(JsonScanner.rawStringScanner(), new Predicate>() {
@Override public boolean
evaluate(@Nullable Token token) {
return token == null || token.type.ordinal() > TokenType.END_OF_IGNORABLES.ordinal();
}
});
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy