com.github.jlangch.venice.impl.reader.HighlightParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of venice Show documentation
Show all versions of venice Show documentation
Venice, a sandboxed Lisp implemented in Java.
The newest version!
/* __ __ _
* \ \ / /__ _ __ (_) ___ ___
* \ \/ / _ \ '_ \| |/ __/ _ \
* \ / __/ | | | | (_| __/
* \/ \___|_| |_|_|\___\___|
*
*
* Copyright 2017-2024 Venice
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jlangch.venice.impl.reader;
import static com.github.jlangch.venice.impl.reader.HighlightClass.AT;
import static com.github.jlangch.venice.impl.reader.HighlightClass.BRACE_BEGIN;
import static com.github.jlangch.venice.impl.reader.HighlightClass.BRACE_END;
import static com.github.jlangch.venice.impl.reader.HighlightClass.BRACKET_BEGIN;
import static com.github.jlangch.venice.impl.reader.HighlightClass.BRACKET_END;
import static com.github.jlangch.venice.impl.reader.HighlightClass.CONSTANT;
import static com.github.jlangch.venice.impl.reader.HighlightClass.HASH;
import static com.github.jlangch.venice.impl.reader.HighlightClass.KEYWORD;
import static com.github.jlangch.venice.impl.reader.HighlightClass.META;
import static com.github.jlangch.venice.impl.reader.HighlightClass.NUMBER;
import static com.github.jlangch.venice.impl.reader.HighlightClass.PARENTHESIS_BEGIN;
import static com.github.jlangch.venice.impl.reader.HighlightClass.PARENTHESIS_END;
import static com.github.jlangch.venice.impl.reader.HighlightClass.QUASI_QUOTE;
import static com.github.jlangch.venice.impl.reader.HighlightClass.QUOTE;
import static com.github.jlangch.venice.impl.reader.HighlightClass.STRING;
import static com.github.jlangch.venice.impl.reader.HighlightClass.SYMBOL;
import static com.github.jlangch.venice.impl.reader.HighlightClass.SYMBOL_EAR_MUFFS;
import static com.github.jlangch.venice.impl.reader.HighlightClass.SYMBOL_FUNCTION_NAME;
import static com.github.jlangch.venice.impl.reader.HighlightClass.SYMBOL_MACRO_NAME;
import static com.github.jlangch.venice.impl.reader.HighlightClass.SYMBOL_SPECIAL_FORM;
import static com.github.jlangch.venice.impl.reader.HighlightClass.UNKNOWN;
import static com.github.jlangch.venice.impl.reader.HighlightClass.UNQUOTE;
import static com.github.jlangch.venice.impl.reader.HighlightClass.UNQUOTE_SPLICING;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.github.jlangch.venice.EofException;
import com.github.jlangch.venice.impl.specialforms.SpecialForms;
public class HighlightParser {
private HighlightParser(
final String form,
final List formTokens
) {
this.form = form;
this.tokens = formTokens;
this.position = 0;
}
public static List parse(final String str) {
final List tokens = Tokenizer.tokenize(
str, "highlighter", false, false, false);
final HighlightParser hl = new HighlightParser(str, tokens);
try {
hl.process_form();
hl.finish();
return hl.items();
}
catch(EofException ex) {
// return what we've got so far
return hl.items();
}
}
private List items() {
return items;
}
private void addItem(final String token, final HighlightClass clazz) {
items.add(new HighlightItem(token, pinnedClass == null ? clazz : pinnedClass));
}
private void addItem(final char token, final HighlightClass clazz) {
addItem(String.valueOf(token), clazz);
}
private Token peek() {
while(true) {
if (position >= tokens.size()) {
throw new EofException("Unexpected EOF");
}
final Token token = tokens.get(position);
if (token.isWhitespaces()) {
addItem(token.getToken(), HighlightClass.WHITESPACES);
position++;
}
else if (token.isComment()) {
addItem(token.getToken(), HighlightClass.COMMENT);
position++;
}
else {
return token;
}
}
}
private Token next() {
final Token t = peek();
position++;
return t;
}
private void process_atom() {
final Token token = next();
final String sToken = token.getToken();
switch(Reader.getAtomType(token)) {
case NIL:
case TRUE:
case FALSE:
addItem(sToken, CONSTANT);
break;
case INTEGER:
case LONG:
case FLOAT:
case DOUBLE:
case DECIMAL:
case BIGINT:
addItem(sToken, NUMBER);
break;
case STRING:
addItem(sToken, STRING);
break;
case STRING_BLOCK:
addItem(sToken, STRING);
break;
case KEYWORD:
addItem(sToken, KEYWORD);
break;
case SYMBOL: {
final HighlightItem last = items.size() < 1 ? null : items.get(items.size()-1);
final HighlightItem secondLast = items.size() < 2 ? null : items.get(items.size()-2);
if (last != null && last.getClazz() == PARENTHESIS_BEGIN) {
if (secondLast != null && secondLast.getClazz() == QUOTE) {
// '(a 2 3)
addItem(sToken, SYMBOL);
}
else if (SpecialForms.isSpecialForm(sToken)) {
// (def a 10)
addItem(sToken, SYMBOL_SPECIAL_FORM);
}
else if (SYSTEM_VARS.contains(sToken)) {
addItem(sToken, SYMBOL_SPECIAL_FORM);
}
else if (CORE_MACROS.contains(sToken)) {
// (and true false)
addItem(sToken, SYMBOL_MACRO_NAME);
}
else if (sToken.startsWith("*") && sToken.endsWith("*")){
// (*out*)
addItem(sToken, SYMBOL_EAR_MUFFS);
}
else {
// (+ 1 2)
addItem(sToken, SYMBOL_FUNCTION_NAME);
}
}
else {
addItem(sToken, SYMBOL);
}
}
break;
case UNKNOWN:
default:
addItem(sToken, UNKNOWN);
break;
}
}
private void process_list(
final char start,
final char end
) {
final Token startTok = next();
addItem(startTok.getToken(), parenToClass(start));
Token token = peek();
while (token.charAt(0) != end) {
process_form();
token = peek();
}
final Token endTok = next();
addItem(endTok.getToken(), parenToClass(end));
}
private void process_form() {
final Token token = peek();
switch (token.charAt(0)) {
case '\'':
next();
addItem('\'', QUOTE);
process_form();
break;
case '`':
next();
addItem('`', QUASI_QUOTE);
process_form();
break;
case '~':
if (token.equals("~")) {
next();
addItem('~', UNQUOTE);
process_form();
}
else {
next();
addItem("~@", UNQUOTE_SPLICING);
process_form();
}
break;
case '^':
next();
addItem('^', META);
pinnedClass = META;
process_form();
pinnedClass = null;
break;
case '@':
next();
addItem("@", AT);
process_form();
break;
case '#':
final String sToken = token.getToken();
next();
if (sToken.length() == 1) {
addItem("#", HASH);
Token t = peek();
if (t.charAt(0) == '{') { // set literal #{1 2}
process_list('{' , '}');
}
else if (t.charAt(0) == '(') { // anonymous function literal #(> % 2)
process_list('(' , ')');
}
else if (t.getType() == TokenType.STRING) {
next();
String s = t.getToken();
if (s.startsWith("\"")) {
// regex pattern: #"[0-9]+"
addItem(s, STRING);
}
}
}
else {
addItem(sToken, UNKNOWN);
}
break;
case '(':
process_list('(' , ')');
break;
case ')':
next();
addItem(')', PARENTHESIS_END);
break;
case '[':
process_list('[' , ']');
break;
case ']':
next();
addItem(']', BRACKET_END);
break;
case '{':
process_list('{', '}');
break;
case '}':
next();
addItem('}', BRACE_END);
break;
default:
process_atom();
break;
}
}
private HighlightClass parenToClass(final char ch) {
switch(ch) {
case '(': return PARENTHESIS_BEGIN;
case ')': return PARENTHESIS_END;
case '[': return BRACKET_BEGIN;
case ']': return BRACKET_END;
case '{': return BRACE_BEGIN;
case '}': return BRACE_END;
default: throw new RuntimeException("Invalid parenthesis '" + ch + "'");
}
}
private void finish() {
while (hasUnprocessedTokens()) {
final Token tok = next();
if (tok.getType() == TokenType.COMMENT) {
addItem(tok.getToken(), HighlightClass.COMMENT);
}
else if (tok.getType() == TokenType.WHITESPACES) {
addItem(tok.getToken(), HighlightClass.WHITESPACES);
}
else {
addItem(form.substring(tok.getFileStartPos()), HighlightClass.UNPROCESSED);
break;
}
}
}
private boolean hasUnprocessedTokens() {
return position < tokens.size();
}
private static Set SYSTEM_VARS = new HashSet<>(
Arrays.asList(
"*version*",
"*newline*",
"*loaded-modules*",
"*loaded-files*",
"*ns*",
"*run-mode*",
"*ansi-term*"));
private static Set CORE_MACROS = new HashSet<>(
Arrays.asList(
"assert",
"and",
"or",
"cond",
"condp",
"case",
"when",
"when-not",
"coalesce",
"if-let",
"when-let",
"dotimes",
"while",
"doto",
"->",
"->>",
"-<>",
"as->",
"cond->",
"cond->>",
"some->",
"some->>",
"list-comp",
"doseq",
"time",
"perf",
"load-string",
"load-file",
"load-classpath-file",
"load-module",
"with-sh-dir",
"with-sh-throw",
"with-out-str",
"with-err-str",
"delay"));
private final String form;
private final List items= new ArrayList<>();
private final List tokens;
private int position;
private HighlightClass pinnedClass = null;
}