io.jenetics.ext.internal.util.FormulaParser Maven / Gradle / Ivy
/*
* Java Genetic Algorithm Library (jenetics-8.1.0).
* Copyright (c) 2007-2024 Franz Wilhelmstötter
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Author:
* Franz Wilhelmstötter ([email protected])
*/
package io.jenetics.ext.internal.util;
import static java.util.Objects.requireNonNull;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.function.Supplier;
import io.jenetics.ext.internal.parser.Parser;
import io.jenetics.ext.internal.parser.ParsingException;
import io.jenetics.ext.util.TreeNode;
/**
* This class allows you to convert a sequence of tokens, which
* represents some kind of (mathematical) formula, into a tree structure. To do
* this, it is assumed that the given tokens can be categorized. The two main
* categories are structural tokens and operational tokens.
*
* Structural tokens
* Structural tokens are used to influence the hierarchy of the parsed tokens
* and are also part of function definitions. This kind of token will not be
* part of the generated tree representation.
*
* - lparen: Represents left parentheses, which starts
* sub-trees or opens function argument lists.
* - rparen: Represents right parentheses, which closes
* sub-trees or function argument lists. lparen and
* rparen must be balanced.
* - comma: Separator token for function arguments.
*
*
* Operational tokens
* Operational tokens define the actual behaviour of the created tree.
*
* - identifier: This kind of tokens usually represents variable
* names or numbers.
* - function: Function tokens represents identifiers for
* functions. Valid functions have the following form: {@code 'fun' 'lparen'
* arg ['comma' args]* 'rparen'}
* - binary operator: Binary operators are defined in infix
* order and have a precedence. Typical examples are the arithmetic
* operators '+' and '*', where the '*' have a higher precedence than '+'.
* - unary operator: Unary operators are prefix operators. A
* typical example is the arithmetic negation operator '-'. Unary
* operators have all the same precedence, which is higher than the
* precedence of all binary operators.
*
*
* This class is only responsible for the parsing step. The tokenization must
* be implemented separately. Another possible token source would be a generating
* grammar, where the output is already a list of tokens (aka sentence). The
* following example parser can be used to parse arithmetic expressions.
*
* {@snippet lang="java":
* final FormulaParser parser = FormulaParser.builder()
* // Structural tokens.
* .lparen("(")
* .rparen(")")
* .separator(",")
* // Operational tokens.
* .unaryOperators("+", "-")
* .binaryOperators(ops -> ops
* .add(11, "+", "-")
* .add(12, "*", "/")
* .add(14, "^", "**"))
* .identifiers("x", "y", "z")
* .functions("pow", "sin", "cos")
* .build();
* }
* This parser allows you to parse the following token list
* {@snippet lang="java":
* final List tokens = List.of(
* "x", "*", "x", "+", "sin", "(", "z", ")", "-", "cos", "(", "x",
* ")", "+", "y", "/", "z", "-", "pow", "(", "z", ",", "x", ")"
* );
* final Tree tree = parser.parse(tokens);
* }
* which will result in the following parsed tree:
* {@code
* "-"
* ├── "+"
* │ ├── "-"
* │ │ ├── "+"
* │ │ │ ├── "*"
* │ │ │ │ ├── "x"
* │ │ │ │ └── "x"
* │ │ │ └── "sin"
* │ │ │ └── "z"
* │ │ └── "cos"
* │ │ └── "x"
* │ └── "/"
* │ ├── "y"
* │ └── "z"
* └── "pow"
* ├── "z"
* └── "x"
* }
* Note that the generated (parsed) tree is of type {@code Tree}. To
* evaluate this tree, additional steps are necessary. If you want to
* create an executable tree, you have to use the
* {@link #parse(Iterable, TokenConverter)} function for parsing the tokens.
*
* The following code snippet shows how to create an executable AST
* from a token list. The {@code MathExpr} class in the {@code io.jenetics.prog}
* module uses a similar {@link TokenConverter}.
* {@snippet lang="java":
* final Tree, ?> tree = formula.parse(
* tokens,
* (token, type) -> switch (token) {
* case "+" -> type == TokenType.UNARY_OPERATOR ? MathOp.ID : MathOp.ADD;
* case "-" -> type == TokenType.UNARY_OPERATOR ? MathOp.NEG : MathOp.SUB;
* case "*" -> MathOp.MUL;
* case "/" -> MathOp.DIV;
* case "^", "**", "pow" -> MathOp.POW;
* case "sin" -> MathOp.SIN;
* case "cos" -> MathOp.COS;
* default -> type == TokenType.IDENTIFIER
* ? Var.of(token)
* : throw new IllegalArgumentException("Unknown token: " + token);
* }
* );
* }
*
* @param the token type used as input for the parser
*
* @implNote
* This class is immutable and thread-safe.
*
* @author Franz Wilhelmstötter
* @since 7.1
* @version 7.1
*/
public final class FormulaParser {
/**
* The token types the parser recognizes during the parsing process.
*/
public enum TokenType {
/**
* Indicates an unary operator.
*/
UNARY_OPERATOR,
/**
* Indicates a binary operator.
*/
BINARY_OPERATOR,
/**
* Indicates a function token.
*/
FUNCTION,
/**
* Indicates an identifier token.
*/
IDENTIFIER
}
/**
* Conversion function which is used for converting tokens into another
* type.
*
* @param the token type
* @param the converted value type
*/
@FunctionalInterface
public interface TokenConverter {
/**
* Convert the given {@code token} into another value. The conversion
* can use the token type, recognized during the parsing process.
*
* @param token the token value to convert
* @param type the token type, recognized during the parsing process
* @return the converted value
*/
V convert(final T token, final TokenType type);
}
private final Predicate super T> _lparen;
private final Predicate super T> _rparen;
private final Predicate super T> _separator;
private final Predicate super T> _uops;
private final Predicate super T> _identifiers;
private final Predicate super T> _functions;
// The processed binary operators.
private final Term _term;
/**
* Creates a new general expression parser object. The parser is not bound
* to a specific source and target type or concrete token types.
*
* @param lparen the token type specifying the left parentheses, '('
* @param rparen the token type specifying the right parentheses, ')'
* @param separator the token type specifying the function parameter
* separator, ','
* @param bops the list of binary operators, according its
* precedence. The first list element contains the operations with
* the lowest precedence, and the last list element contains the
* operations with the highest precedence.
* @param uops the token types representing the unary operations
* @param identifiers the token type representing identifier, like variable
* names, constants or numbers
* @param functions predicate which tests whether a given identifier value
* represents a known function name
*/
private FormulaParser(
final Predicate super T> lparen,
final Predicate super T> rparen,
final Predicate super T> separator,
final List extends Predicate super T>> bops,
final Predicate super T> uops,
final Predicate super T> identifiers,
final Predicate super T> functions
) {
_lparen = requireNonNull(lparen);
_rparen = requireNonNull(rparen);
_separator = requireNonNull(separator);
_uops = requireNonNull(uops);
_identifiers = requireNonNull(identifiers);
_functions = requireNonNull(functions);
final Term oterm = BopTerm.build(bops);
final Term fterm = new Term<>() {
@Override
TreeNode term(
final Parser parser,
final TokenConverter super T, ? extends V> mapper
) {
return function(parser, mapper);
}
};
if (oterm != null) {
oterm.append(fterm);
_term = oterm;
} else {
_term = fterm;
}
}
private TreeNode function(
final Parser parser,
final TokenConverter super T, ? extends V> mapper
) {
final var token = parser.LT(1);
if (_functions.test(token)) {
parser.consume();
final TreeNode node = TreeNode
.of(mapper.convert(token, TokenType.FUNCTION));
parser.match(_lparen);
node.attach(_term.expr(parser, mapper));
while (_separator.test(parser.LT(1))) {
parser.consume();
node.attach(_term.expr(parser, mapper));
}
parser.match(_rparen);
return node;
} else if (_lparen.test(token)) {
parser.consume();
final TreeNode node = _term.expr(parser, mapper);
parser.match(_rparen);
return node;
} else {
return unary(() -> atom(parser, mapper), parser, mapper);
}
}
private TreeNode atom(
final Parser parser,
final TokenConverter super T, ? extends V> mapper
) {
final var token = parser.LT(1);
if (_identifiers.test(token)) {
parser.consume();
return TreeNode.of(mapper.convert(token, TokenType.IDENTIFIER));
} else if (token == null) {
throw new ParsingException("Unexpected end of input.");
} else {
throw new ParsingException(
"Unexpected symbol found: %s.".formatted(parser.LT(1))
);
}
}
private TreeNode unary(
final Supplier> other,
final Parser parser,
final TokenConverter super T, ? extends V> mapper
) {
final var token = parser.LT(1);
if (_uops.test(token)) {
parser.consume();
return TreeNode
.of(mapper.convert(token, TokenType.UNARY_OPERATOR))
.attach(other.get());
} else {
return other.get();
}
}
/**
* Parses the given token sequence according {@code this} formula definition.
* If the given {@code tokens} supplier returns null, no further token is
* available.
*
* @param tokens the tokens which form the formula
* @param mapper the mapper function which maps the token type to the parse
* tree value type
* @return the parsed formula as a tree
* @throws NullPointerException if one of the arguments is {@code null}
* @throws IllegalArgumentException if the given {@code tokens} can't be
* parsed
*/
public TreeNode parse(
final Supplier extends T> tokens,
final TokenConverter super T, ? extends V> mapper
) {
requireNonNull(tokens);
requireNonNull(mapper);
return _term.expr(new Parser(tokens::get, 1), mapper);
}
/**
* Parses the given token sequence according {@code this} formula definition.
* If the given {@code tokens} supplier returns null, no further token is
* available.
*
* @param tokens the tokens which form the formula
* @return the parsed formula as a tree
* @throws NullPointerException if the arguments is {@code null}
* @throws IllegalArgumentException if the given {@code tokens} can't be
* parsed
*/
public TreeNode parse(final Supplier extends T> tokens) {
return parse(tokens, (token, type) -> token);
}
/**
* Parses the given token sequence according {@code this} formula definition.
*
* @param tokens the tokens which form the formula
* @param mapper the mapper function which maps the token type to the parse
* tree value type
* @return the parsed formula as a tree
* @throws NullPointerException if one of the arguments is {@code null}
* @throws IllegalArgumentException if the given {@code tokens} can't be
* parsed
*/
public TreeNode parse(
final Iterable extends T> tokens,
final TokenConverter super T, ? extends V> mapper
) {
final var it = tokens.iterator();
return parse(() -> it.hasNext() ? it.next() : null, mapper);
}
/**
* Parses the given token sequence according {@code this} formula definition.
*
* @param tokens the tokens which form the formula
* @return the parsed formula as a tree
* @throws NullPointerException if the arguments is {@code null}
* @throws IllegalArgumentException if the given {@code tokens} can't be
* parsed
*/
public TreeNode parse(final Iterable extends T> tokens) {
return parse(tokens, (token, type) -> token);
}
/**
* Return a new builder class for building new formula parsers.
*
* @param the token type
* @return a new formula parser builder
*/
public static Builder builder() {
return new Builder<>();
}
/* *************************************************************************
* FormulaParser helper classes
* ************************************************************************/
/**
* General term object to be parsed.
*
* @param the token value type used as input for the parser
*/
private static abstract class Term {
Term _next;
Term _last;
TreeNode op(
final TreeNode expr,
final Parser parser,
final TokenConverter super T, ? extends V> mapper
) {
return expr;
}
abstract TreeNode term(
final Parser parser,
final TokenConverter super T, ? extends V> mapper
);
TreeNode expr(
final Parser parser,
final TokenConverter super T, ? extends V> mapper
) {
return op(term(parser, mapper), parser, mapper);
}
void append(final Term term) {
if (_next == null) {
_next = term;
_last = term;
} else {
_last.append(term);
}
}
}
/**
* Represents a binary (mathematical) operation.
*
* @param the token value type used as input for the parser
*/
private static class BopTerm extends Term {
private final Predicate super T> _tokens;
BopTerm(final Predicate super T> tokens) {
_tokens = requireNonNull(tokens);
}
@Override
TreeNode op(
final TreeNode expr,
final Parser parser,
final TokenConverter super T, ? extends V> mapper
) {
var result = expr;
final var token = parser.LT(1);
if (token != null && _tokens.test(token)) {
parser.consume();
final TreeNode node = TreeNode
.of(mapper.convert(token, TokenType.BINARY_OPERATOR))
.attach(expr)
.attach(term(parser, mapper));
result = op(node, parser, mapper);
}
return result;
}
@Override
TreeNode term(
final Parser parser,
final TokenConverter super T, ? extends V> mapper
) {
return _next.op(_next.term(parser, mapper), parser, mapper);
}
/**
* Builds a linked chain of binary operations. Operations with lower
* precedence are at the beginning of the chain and operations
* with higher precedence are appended to the end of the linked
* operation term chain.
*
* @param bops the list of binary operations with a given precedence
* @param the token value type used as input for the parser
* @return the linked operation term
*/
static BopTerm build(final List extends Predicate super T>> bops) {
BopTerm start = null;
for (var tokens : bops) {
final BopTerm term = new BopTerm<>(tokens);
if (start == null) {
start = term;
} else {
start.append(term);
}
}
return start;
}
}
/* *************************************************************************
* FormulaParser builder class
* ************************************************************************/
/**
* Builder for building new {@link FormulaParser} instances.
*
* @param the token type
*/
public static final class Builder {
private Predicate super T> _lparen = token -> false;
private Predicate super T> _rparen = token -> false;
private Predicate super T> _separator = token -> false;
private List extends Predicate super T>> _bops = List.of();
private Predicate super T> _uops = token -> false;
private Predicate super T> _identifiers = token -> false;
private Predicate super T> _functions = token -> false;
private Builder() {
}
/**
* Set the predicate which defines {@code lparen} tokens. If the given
* predicate returns {@code true} for a token, it is treated as
* lparen.
*
* @param lparen the {@code lparen} token
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code lparen} is {@code null}
*/
public Builder lparen(final Predicate super T> lparen) {
_lparen = requireNonNull(lparen);
return this;
}
/**
* Set the prototype for the {@code lparen} token. A given
* token is treated as {@code lparen} if {@code Objects.equals(token, lparen)}
* returns {@code true}.
*
* @param lparen the {@code lparen} prototype
* @return {@code this} builder, for method chaining
*/
public Builder lparen(final T lparen) {
return lparen(token -> Objects.equals(token, lparen));
}
/**
* Set the predicate which defines {@code rparen} tokens. If the given
* predicate returns {@code true} for a token, it is treated as
* rparen.
*
* @param rparen the {@code rparen} token
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code rparen} is {@code null}
*/
public Builder rparen(final Predicate super T> rparen) {
_rparen = requireNonNull(rparen);
return this;
}
/**
* Set the prototype for the {@code rparen} token. A given
* token is treated as {@code rparen} if {@code Objects.equals(token, rparen)}
* returns {@code true}.
*
* @param rparen the {@code rparen} prototype
* @return {@code this} builder, for method chaining
*/
public Builder rparen(final T rparen) {
return rparen(token -> Objects.equals(token, rparen));
}
/**
* Set the predicate which defines {@code separator} tokens. If the given
* predicate returns {@code true} for a token, it is treated as
* separator.
*
* @param separator the {@code separator} token
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code separator} is {@code null}
*/
public Builder separator(final Predicate super T> separator) {
_separator = requireNonNull(separator);
return this;
}
/**
* Set the prototype for the {@code separator} token. A given
* token is treated as {@code separator} if {@code Objects.equals(token, separator)}
* returns {@code true}.
*
* @param separator the {@code separator} prototype
* @return {@code this} builder, for method chaining
*/
public Builder separator(final T separator) {
return separator(token -> Objects.equals(token, separator));
}
/**
* Set the predicate which defines the unary operator tokens. If the
* given predicate returns {@code true} for a token, it is treated as
* unary operator.
*
* @param ops the {@code comma} token
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code ops} is {@code null}
*/
public Builder unaryOperators(final Predicate super T> ops) {
_uops = requireNonNull(ops);
return this;
}
/**
* Set all unary operator tokens.
*
* @param ops the unary operator tokens
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code ops} is {@code null}
*/
public Builder unaryOperators(final Set extends T> ops) {
return unaryOperators(Set.copyOf(ops)::contains);
}
/**
* Set all unary operator tokens.
*
* @param ops the unary operator tokens
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code ops} is {@code null}
*/
@SafeVarargs
public final Builder unaryOperators(final T... ops) {
return unaryOperators(Set.of(ops));
}
/**
* Set the list of predicates which defines the binary ops. The
* predicate indexes of the list represent the precedence of the binary
* ops. {@code ops.get(0)} has the lowest precedence and
* {@code ops.get(ops.size() - 1)} has the highest precedence
*
* @param ops the predicates defining the binary operator tokens
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code ops} is {@code null}
*/
public Builder binaryOperators(final List extends Predicate super T>> ops) {
_bops = List.copyOf(ops);
return this;
}
/**
* Set the list of predicates which defines the binary ops. The
* predicate indexes of the list represent the precedence of the binary
* ops. {@code ops.get(0)} has the lowest precedence and
* {@code ops.get(ops.size() - 1)} has the highest precedence
*
* @param ops the predicates defining the binary operator tokens
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code ops} is {@code null}
*/
@SafeVarargs
public final Builder binaryOperators(final Predicate super T>... ops) {
_bops = List.of(ops);
return this;
}
/**
* Method for defining the binary operators and its precedence.
*
* @param ops the predicates defining the binary operator tokens
* @return {@code this} builder, for method chaining
*/
public Builder binaryOperators(final Consumer super Bops> ops) {
final var builder = new Bops();
ops.accept(builder);
_bops = builder.build();
return this;
}
/**
* Set the predicate which defines identifier tokens.
*
* @param identifiers the identifier predicate
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code identifiers} is {@code null}
*/
public Builder identifiers(final Predicate super T> identifiers) {
_identifiers = requireNonNull(identifiers);
return this;
}
/**
* Set all identifier tokens.
*
* @param identifiers the identifier tokens
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code identifiers} is {@code null}
*/
public Builder identifiers(final Set extends T> identifiers) {
return identifiers(Set.copyOf(identifiers)::contains);
}
/**
* Set all identifier tokens.
*
* @param identifiers the identifier tokens
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code identifiers} is {@code null}
*/
@SafeVarargs
public final Builder identifiers(final T... identifiers) {
return identifiers(Set.of(identifiers));
}
/**
* Set the predicate which defines function tokens.
*
* @param functions the function predicate
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code functions} is {@code null}
*/
public Builder functions(final Predicate super T> functions) {
_functions = requireNonNull(functions);
return this;
}
/**
* Set all functions tokens.
*
* @param functions the function tokens
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code functions} is {@code null}
*/
public Builder functions(final Set extends T> functions) {
return functions(Set.copyOf(functions)::contains);
}
/**
* Set all functions tokens.
*
* @param functions the function tokens
* @return {@code this} builder, for method chaining
* @throws NullPointerException if the {@code functions} is {@code null}
*/
@SafeVarargs
public final Builder functions(final T... functions) {
return functions(Set.of(functions));
}
/**
* Create a new formula parser with the defined values.
*
* @return a new formula parser
*/
public FormulaParser build() {
return new FormulaParser<>(
_lparen,
_rparen,
_separator,
_bops,
_uops,
_identifiers,
_functions
);
}
/**
* Builder class for building binary operators with its precedence.
*
* @param the token type
*/
public static final class Bops {
private final Map> _operations = new HashMap<>();
private Bops() {
}
/**
* Add a new operator predicate with its precedence.
*
* @param precedence the precedence of the operators
* @param operators the operators predicate
* @return {@code this} builder, for method chaining
*/
public Bops add(
final int precedence,
final Predicate super T> operators
) {
Predicate super T> ops = _operations.get(precedence);
if (ops != null) {
final Predicate super T> prev = ops;
ops = token -> prev.test(token) || operators.test(token);
} else {
ops = operators;
}
_operations.put(precedence, ops);
return this;
}
/**
* Add a new operator tokens with its precedence.
*
* @param precedence the precedence of the operators
* @param operators the operators
* @return {@code this} builder, for method chaining
*/
@SafeVarargs
public final Bops add(
final int precedence,
final T... operators
) {
return add(precedence, Set.of(operators)::contains);
}
private List extends Predicate super T>> build() {
return _operations.entrySet().stream()
.sorted(Entry.comparingByKey())
.map(Entry::getValue)
.toList();
}
}
}
}