cdc.applic.expressions.parsing.OneCharTokens Maven / Gradle / Ivy
Show all versions of cdc-applic-expressions Show documentation
package cdc.applic.expressions.parsing;
import cdc.util.function.BitSetCharPredicate;
import cdc.util.function.BooleanArrayCharPredicate;
import cdc.util.function.CharFunction;
import cdc.util.function.CharPredicate;
import cdc.util.function.MultiplyShiftCharPredicate;
import cdc.util.lang.UnexpectedValueException;
/**
* Recognition of tokens that hold on one character.
*
* WARNING: inside an escaped text or a number, those chars should not be recognized as tokens.
*
* @author Damien Carbonne
*/
public final class OneCharTokens {
/**
* String containing characters that can be recognized as a token, without needing to look further.
*
* If a token holds on one char, but other tokens start with this char, then this one char token is excluded.
* This excludes:
*
* - {@code '!'} (because of {@code '!=', '!<', '!<=', ...})
*
- {@code '<'} (because of {@code '<=', '<:'})
*
- {@code '>'} (because of {@code '>='})
*
*/
static final String CHARS = "(){},~.=&|¬∧∨∈∉≠→↔↮⊤⊥≮≯≤≥≰≱∅";
/**
* Perfect hash table for {@link #CHARS}.
*
* '(' is used as the filling character.
* WARNING: this is computed and must match {@link #CHARS}.
*/
static final String STABLE = "((((((≮≯≰≱∧∨(→↔((¬((((((((((((((&()(↮,.{|}~(((∅(∈∉(=⊤⊥((((≠(((≤≥";
/**
* Conversion of {@link #STABLE} as a char array.
*/
static final char[] TABLE = STABLE.toCharArray();
/**
* Mapping from {@link #STABLE} to token types.
*
* WARNING: This is computed and must match {@link #CHARS} and {@link #STABLE}.
*/
static final TokenType[] SMAP = {
null,
null,
null,
null,
null,
null,
TokenType.NOT_LESS,
TokenType.NOT_GREATER,
TokenType.NEITHER_LESS_NOR_EQUAL,
TokenType.NEITHER_GREATER_NOR_EQUAL,
TokenType.AND,
TokenType.OR,
null,
TokenType.IMPL,
TokenType.EQUIV,
null,
null,
TokenType.NOT,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
TokenType.AND,
TokenType.OPEN_PAREN,
TokenType.CLOSE_PAREN,
null,
TokenType.XOR,
TokenType.ITEMS_SEP,
TokenType.PATH_SEP,
TokenType.OPEN_SET,
TokenType.OR,
TokenType.CLOSE_SET,
TokenType.TO,
null,
null,
null,
TokenType.EMPTY_SET,
null,
TokenType.IN,
TokenType.NOT_IN,
null,
TokenType.EQUAL,
TokenType.TRUE,
TokenType.FALSE,
null,
null,
null,
null,
TokenType.NOT_EQUAL,
null,
null,
null,
TokenType.LESS_OR_EQUAL,
TokenType.GREATER_OR_EQUAL
};
/**
* The multiplier to use for {@link #TABLE}.
*/
static final int MULTIPLY = 56_576_049;
/**
* The shift to use for {@link #TABLE}.
*/
static final int SHIFT = 26;
private OneCharTokens() {
}
public static final CharPredicate MULTIPLY_SHIFT_MATCHER =
new MultiplyShiftCharPredicate(STABLE,
MULTIPLY,
SHIFT);
public static final CharPredicate MULTIPLY_SHIFT_INLINE_MATCHER =
c -> TABLE[(c * MULTIPLY) >>> SHIFT] == c;
public static final CharPredicate BIT_SET_MATCHER =
new BitSetCharPredicate(CHARS);
public static final CharPredicate BOOLEAN_ARRAY_MATCHER =
new BooleanArrayCharPredicate(CHARS);
/**
* Matcher that must be used to identify one character tokens.
*/
public static final CharPredicate BEST_MATCHER = MULTIPLY_SHIFT_INLINE_MATCHER;
public static final CharFunction SWITCH_MAPPER =
c -> (switch (c) {
case '(' -> TokenType.OPEN_PAREN;
case ')' -> TokenType.CLOSE_PAREN;
case '{' -> TokenType.OPEN_SET;
case '}' -> TokenType.CLOSE_SET;
case ',' -> TokenType.ITEMS_SEP;
case '~' -> TokenType.TO;
case '.' -> TokenType.PATH_SEP;
case '=' -> TokenType.EQUAL;
case '≠' -> TokenType.NOT_EQUAL;
case '&', '∧' -> TokenType.AND;
case '|', '∨' -> TokenType.OR;
case '¬' -> TokenType.NOT;
case '∈' -> TokenType.IN;
case '∉' -> TokenType.NOT_IN;
case '→' -> TokenType.IMPL;
case '↔' -> TokenType.EQUIV;
case '↮' -> TokenType.XOR;
case '⊤' -> TokenType.TRUE;
case '⊥' -> TokenType.FALSE;
case '≮' -> TokenType.NOT_LESS;
case '≯' -> TokenType.NOT_GREATER;
case '≤' -> TokenType.LESS_OR_EQUAL;
case '≥' -> TokenType.GREATER_OR_EQUAL;
case '≰' -> TokenType.NEITHER_LESS_NOR_EQUAL;
case '≱' -> TokenType.NEITHER_GREATER_NOR_EQUAL;
case '∅' -> TokenType.EMPTY_SET;
default -> throw new UnexpectedValueException("No token type associated to " + c);
});
public static final CharFunction MULTIPLY_SHIFT_INLINE_MAPPER =
c -> SMAP[(c * MULTIPLY) >>> SHIFT];
/**
* Mapper that must be used to map one character token type to their corresponding token type.
*/
public static final CharFunction BEST_MAPPER = MULTIPLY_SHIFT_INLINE_MAPPER;
}