All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.jenetics.ext.internal.util.FormulaParser Maven / Gradle / Ivy

The newest version!
/*
 * Java Genetic Algorithm Library (jenetics-8.1.0).
 * Copyright (c) 2007-2024 Franz Wilhelmstötter
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Author:
 *    Franz Wilhelmstötter ([email protected])
 */
package io.jenetics.ext.internal.util;

import static java.util.Objects.requireNonNull;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.function.Supplier;

import io.jenetics.ext.internal.parser.Parser;
import io.jenetics.ext.internal.parser.ParsingException;
import io.jenetics.ext.util.TreeNode;

/**
 * This class allows you to convert a sequence of tokens, which
 * represents some kind of (mathematical) formula, into a tree structure. To do
 * this, it is assumed that the given tokens can be categorized. The two main
 * categories are structural tokens and operational tokens.
 *
 * 

Structural tokens

* Structural tokens are used to influence the hierarchy of the parsed tokens * and are also part of function definitions. This kind of token will not be * part of the generated tree representation. *
    *
  1. lparen: Represents left parentheses, which starts * sub-trees or opens function argument lists.
  2. *
  3. rparen: Represents right parentheses, which closes * sub-trees or function argument lists. lparen and * rparen must be balanced.
  4. *
  5. comma: Separator token for function arguments.
  6. *
* *

Operational tokens

* Operational tokens define the actual behaviour of the created tree. *
    *
  1. identifier: This kind of tokens usually represents variable * names or numbers.
  2. *
  3. function: Function tokens represents identifiers for * functions. Valid functions have the following form: {@code 'fun' 'lparen' * arg ['comma' args]* 'rparen'}
  4. *
  5. binary operator: Binary operators are defined in infix * order and have a precedence. Typical examples are the arithmetic * operators '+' and '*', where the '*' have a higher precedence than '+'.
  6. *
  7. unary operator: Unary operators are prefix operators. A * typical example is the arithmetic negation operator '-'. Unary * operators have all the same precedence, which is higher than the * precedence of all binary operators.
  8. *
* * This class is only responsible for the parsing step. The tokenization must * be implemented separately. Another possible token source would be a generating * grammar, where the output is already a list of tokens (aka sentence). The * following example parser can be used to parse arithmetic expressions. * * {@snippet lang="java": * final FormulaParser parser = FormulaParser.builder() * // Structural tokens. * .lparen("(") * .rparen(")") * .separator(",") * // Operational tokens. * .unaryOperators("+", "-") * .binaryOperators(ops -> ops * .add(11, "+", "-") * .add(12, "*", "/") * .add(14, "^", "**")) * .identifiers("x", "y", "z") * .functions("pow", "sin", "cos") * .build(); * } * This parser allows you to parse the following token list * {@snippet lang="java": * final List tokens = List.of( * "x", "*", "x", "+", "sin", "(", "z", ")", "-", "cos", "(", "x", * ")", "+", "y", "/", "z", "-", "pow", "(", "z", ",", "x", ")" * ); * final Tree tree = parser.parse(tokens); * } * which will result in the following parsed tree: *
{@code
 * "-"
 * ├── "+"
 * │   ├── "-"
 * │   │   ├── "+"
 * │   │   │   ├── "*"
 * │   │   │   │   ├── "x"
 * │   │   │   │   └── "x"
 * │   │   │   └── "sin"
 * │   │   │       └── "z"
 * │   │   └── "cos"
 * │   │       └── "x"
 * │   └── "/"
 * │       ├── "y"
 * │       └── "z"
 * └── "pow"
 *     ├── "z"
 *     └── "x"
 * }
* Note that the generated (parsed) tree is of type {@code Tree}. To * evaluate this tree, additional steps are necessary. If you want to * create an executable tree, you have to use the * {@link #parse(Iterable, TokenConverter)} function for parsing the tokens. *

* The following code snippet shows how to create an executable AST * from a token list. The {@code MathExpr} class in the {@code io.jenetics.prog} * module uses a similar {@link TokenConverter}. * {@snippet lang="java": * final Tree, ?> tree = formula.parse( * tokens, * (token, type) -> switch (token) { * case "+" -> type == TokenType.UNARY_OPERATOR ? MathOp.ID : MathOp.ADD; * case "-" -> type == TokenType.UNARY_OPERATOR ? MathOp.NEG : MathOp.SUB; * case "*" -> MathOp.MUL; * case "/" -> MathOp.DIV; * case "^", "**", "pow" -> MathOp.POW; * case "sin" -> MathOp.SIN; * case "cos" -> MathOp.COS; * default -> type == TokenType.IDENTIFIER * ? Var.of(token) * : throw new IllegalArgumentException("Unknown token: " + token); * } * ); * } * * @param the token type used as input for the parser * * @implNote * This class is immutable and thread-safe. * * @author Franz Wilhelmstötter * @since 7.1 * @version 7.1 */ public final class FormulaParser { /** * The token types the parser recognizes during the parsing process. */ public enum TokenType { /** * Indicates an unary operator. */ UNARY_OPERATOR, /** * Indicates a binary operator. */ BINARY_OPERATOR, /** * Indicates a function token. */ FUNCTION, /** * Indicates an identifier token. */ IDENTIFIER } /** * Conversion function which is used for converting tokens into another * type. * * @param the token type * @param the converted value type */ @FunctionalInterface public interface TokenConverter { /** * Convert the given {@code token} into another value. The conversion * can use the token type, recognized during the parsing process. * * @param token the token value to convert * @param type the token type, recognized during the parsing process * @return the converted value */ V convert(final T token, final TokenType type); } private final Predicate _lparen; private final Predicate _rparen; private final Predicate _separator; private final Predicate _uops; private final Predicate _identifiers; private final Predicate _functions; // The processed binary operators. private final Term _term; /** * Creates a new general expression parser object. The parser is not bound * to a specific source and target type or concrete token types. * * @param lparen the token type specifying the left parentheses, '(' * @param rparen the token type specifying the right parentheses, ')' * @param separator the token type specifying the function parameter * separator, ',' * @param bops the list of binary operators, according its * precedence. The first list element contains the operations with * the lowest precedence, and the last list element contains the * operations with the highest precedence. * @param uops the token types representing the unary operations * @param identifiers the token type representing identifier, like variable * names, constants or numbers * @param functions predicate which tests whether a given identifier value * represents a known function name */ private FormulaParser( final Predicate lparen, final Predicate rparen, final Predicate separator, final List> bops, final Predicate uops, final Predicate identifiers, final Predicate functions ) { _lparen = requireNonNull(lparen); _rparen = requireNonNull(rparen); _separator = requireNonNull(separator); _uops = requireNonNull(uops); _identifiers = requireNonNull(identifiers); _functions = requireNonNull(functions); final Term oterm = BopTerm.build(bops); final Term fterm = new Term<>() { @Override TreeNode term( final Parser parser, final TokenConverter mapper ) { return function(parser, mapper); } }; if (oterm != null) { oterm.append(fterm); _term = oterm; } else { _term = fterm; } } private TreeNode function( final Parser parser, final TokenConverter mapper ) { final var token = parser.LT(1); if (_functions.test(token)) { parser.consume(); final TreeNode node = TreeNode .of(mapper.convert(token, TokenType.FUNCTION)); parser.match(_lparen); node.attach(_term.expr(parser, mapper)); while (_separator.test(parser.LT(1))) { parser.consume(); node.attach(_term.expr(parser, mapper)); } parser.match(_rparen); return node; } else if (_lparen.test(token)) { parser.consume(); final TreeNode node = _term.expr(parser, mapper); parser.match(_rparen); return node; } else { return unary(() -> atom(parser, mapper), parser, mapper); } } private TreeNode atom( final Parser parser, final TokenConverter mapper ) { final var token = parser.LT(1); if (_identifiers.test(token)) { parser.consume(); return TreeNode.of(mapper.convert(token, TokenType.IDENTIFIER)); } else if (token == null) { throw new ParsingException("Unexpected end of input."); } else { throw new ParsingException( "Unexpected symbol found: %s.".formatted(parser.LT(1)) ); } } private TreeNode unary( final Supplier> other, final Parser parser, final TokenConverter mapper ) { final var token = parser.LT(1); if (_uops.test(token)) { parser.consume(); return TreeNode .of(mapper.convert(token, TokenType.UNARY_OPERATOR)) .attach(other.get()); } else { return other.get(); } } /** * Parses the given token sequence according {@code this} formula definition. * If the given {@code tokens} supplier returns null, no further token is * available. * * @param tokens the tokens which form the formula * @param mapper the mapper function which maps the token type to the parse * tree value type * @return the parsed formula as a tree * @throws NullPointerException if one of the arguments is {@code null} * @throws IllegalArgumentException if the given {@code tokens} can't be * parsed */ public TreeNode parse( final Supplier tokens, final TokenConverter mapper ) { requireNonNull(tokens); requireNonNull(mapper); return _term.expr(new Parser(tokens::get, 1), mapper); } /** * Parses the given token sequence according {@code this} formula definition. * If the given {@code tokens} supplier returns null, no further token is * available. * * @param tokens the tokens which form the formula * @return the parsed formula as a tree * @throws NullPointerException if the arguments is {@code null} * @throws IllegalArgumentException if the given {@code tokens} can't be * parsed */ public TreeNode parse(final Supplier tokens) { return parse(tokens, (token, type) -> token); } /** * Parses the given token sequence according {@code this} formula definition. * * @param tokens the tokens which form the formula * @param mapper the mapper function which maps the token type to the parse * tree value type * @return the parsed formula as a tree * @throws NullPointerException if one of the arguments is {@code null} * @throws IllegalArgumentException if the given {@code tokens} can't be * parsed */ public TreeNode parse( final Iterable tokens, final TokenConverter mapper ) { final var it = tokens.iterator(); return parse(() -> it.hasNext() ? it.next() : null, mapper); } /** * Parses the given token sequence according {@code this} formula definition. * * @param tokens the tokens which form the formula * @return the parsed formula as a tree * @throws NullPointerException if the arguments is {@code null} * @throws IllegalArgumentException if the given {@code tokens} can't be * parsed */ public TreeNode parse(final Iterable tokens) { return parse(tokens, (token, type) -> token); } /** * Return a new builder class for building new formula parsers. * * @param the token type * @return a new formula parser builder */ public static Builder builder() { return new Builder<>(); } /* ************************************************************************* * FormulaParser helper classes * ************************************************************************/ /** * General term object to be parsed. * * @param the token value type used as input for the parser */ private static abstract class Term { Term _next; Term _last; TreeNode op( final TreeNode expr, final Parser parser, final TokenConverter mapper ) { return expr; } abstract TreeNode term( final Parser parser, final TokenConverter mapper ); TreeNode expr( final Parser parser, final TokenConverter mapper ) { return op(term(parser, mapper), parser, mapper); } void append(final Term term) { if (_next == null) { _next = term; _last = term; } else { _last.append(term); } } } /** * Represents a binary (mathematical) operation. * * @param the token value type used as input for the parser */ private static class BopTerm extends Term { private final Predicate _tokens; BopTerm(final Predicate tokens) { _tokens = requireNonNull(tokens); } @Override TreeNode op( final TreeNode expr, final Parser parser, final TokenConverter mapper ) { var result = expr; final var token = parser.LT(1); if (token != null && _tokens.test(token)) { parser.consume(); final TreeNode node = TreeNode .of(mapper.convert(token, TokenType.BINARY_OPERATOR)) .attach(expr) .attach(term(parser, mapper)); result = op(node, parser, mapper); } return result; } @Override TreeNode term( final Parser parser, final TokenConverter mapper ) { return _next.op(_next.term(parser, mapper), parser, mapper); } /** * Builds a linked chain of binary operations. Operations with lower * precedence are at the beginning of the chain and operations * with higher precedence are appended to the end of the linked * operation term chain. * * @param bops the list of binary operations with a given precedence * @param the token value type used as input for the parser * @return the linked operation term */ static BopTerm build(final List> bops) { BopTerm start = null; for (var tokens : bops) { final BopTerm term = new BopTerm<>(tokens); if (start == null) { start = term; } else { start.append(term); } } return start; } } /* ************************************************************************* * FormulaParser builder class * ************************************************************************/ /** * Builder for building new {@link FormulaParser} instances. * * @param the token type */ public static final class Builder { private Predicate _lparen = token -> false; private Predicate _rparen = token -> false; private Predicate _separator = token -> false; private List> _bops = List.of(); private Predicate _uops = token -> false; private Predicate _identifiers = token -> false; private Predicate _functions = token -> false; private Builder() { } /** * Set the predicate which defines {@code lparen} tokens. If the given * predicate returns {@code true} for a token, it is treated as * lparen. * * @param lparen the {@code lparen} token * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code lparen} is {@code null} */ public Builder lparen(final Predicate lparen) { _lparen = requireNonNull(lparen); return this; } /** * Set the prototype for the {@code lparen} token. A given * token is treated as {@code lparen} if {@code Objects.equals(token, lparen)} * returns {@code true}. * * @param lparen the {@code lparen} prototype * @return {@code this} builder, for method chaining */ public Builder lparen(final T lparen) { return lparen(token -> Objects.equals(token, lparen)); } /** * Set the predicate which defines {@code rparen} tokens. If the given * predicate returns {@code true} for a token, it is treated as * rparen. * * @param rparen the {@code rparen} token * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code rparen} is {@code null} */ public Builder rparen(final Predicate rparen) { _rparen = requireNonNull(rparen); return this; } /** * Set the prototype for the {@code rparen} token. A given * token is treated as {@code rparen} if {@code Objects.equals(token, rparen)} * returns {@code true}. * * @param rparen the {@code rparen} prototype * @return {@code this} builder, for method chaining */ public Builder rparen(final T rparen) { return rparen(token -> Objects.equals(token, rparen)); } /** * Set the predicate which defines {@code separator} tokens. If the given * predicate returns {@code true} for a token, it is treated as * separator. * * @param separator the {@code separator} token * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code separator} is {@code null} */ public Builder separator(final Predicate separator) { _separator = requireNonNull(separator); return this; } /** * Set the prototype for the {@code separator} token. A given * token is treated as {@code separator} if {@code Objects.equals(token, separator)} * returns {@code true}. * * @param separator the {@code separator} prototype * @return {@code this} builder, for method chaining */ public Builder separator(final T separator) { return separator(token -> Objects.equals(token, separator)); } /** * Set the predicate which defines the unary operator tokens. If the * given predicate returns {@code true} for a token, it is treated as * unary operator. * * @param ops the {@code comma} token * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code ops} is {@code null} */ public Builder unaryOperators(final Predicate ops) { _uops = requireNonNull(ops); return this; } /** * Set all unary operator tokens. * * @param ops the unary operator tokens * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code ops} is {@code null} */ public Builder unaryOperators(final Set ops) { return unaryOperators(Set.copyOf(ops)::contains); } /** * Set all unary operator tokens. * * @param ops the unary operator tokens * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code ops} is {@code null} */ @SafeVarargs public final Builder unaryOperators(final T... ops) { return unaryOperators(Set.of(ops)); } /** * Set the list of predicates which defines the binary ops. The * predicate indexes of the list represent the precedence of the binary * ops. {@code ops.get(0)} has the lowest precedence and * {@code ops.get(ops.size() - 1)} has the highest precedence * * @param ops the predicates defining the binary operator tokens * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code ops} is {@code null} */ public Builder binaryOperators(final List> ops) { _bops = List.copyOf(ops); return this; } /** * Set the list of predicates which defines the binary ops. The * predicate indexes of the list represent the precedence of the binary * ops. {@code ops.get(0)} has the lowest precedence and * {@code ops.get(ops.size() - 1)} has the highest precedence * * @param ops the predicates defining the binary operator tokens * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code ops} is {@code null} */ @SafeVarargs public final Builder binaryOperators(final Predicate... ops) { _bops = List.of(ops); return this; } /** * Method for defining the binary operators and its precedence. * * @param ops the predicates defining the binary operator tokens * @return {@code this} builder, for method chaining */ public Builder binaryOperators(final Consumer> ops) { final var builder = new Bops(); ops.accept(builder); _bops = builder.build(); return this; } /** * Set the predicate which defines identifier tokens. * * @param identifiers the identifier predicate * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code identifiers} is {@code null} */ public Builder identifiers(final Predicate identifiers) { _identifiers = requireNonNull(identifiers); return this; } /** * Set all identifier tokens. * * @param identifiers the identifier tokens * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code identifiers} is {@code null} */ public Builder identifiers(final Set identifiers) { return identifiers(Set.copyOf(identifiers)::contains); } /** * Set all identifier tokens. * * @param identifiers the identifier tokens * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code identifiers} is {@code null} */ @SafeVarargs public final Builder identifiers(final T... identifiers) { return identifiers(Set.of(identifiers)); } /** * Set the predicate which defines function tokens. * * @param functions the function predicate * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code functions} is {@code null} */ public Builder functions(final Predicate functions) { _functions = requireNonNull(functions); return this; } /** * Set all functions tokens. * * @param functions the function tokens * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code functions} is {@code null} */ public Builder functions(final Set functions) { return functions(Set.copyOf(functions)::contains); } /** * Set all functions tokens. * * @param functions the function tokens * @return {@code this} builder, for method chaining * @throws NullPointerException if the {@code functions} is {@code null} */ @SafeVarargs public final Builder functions(final T... functions) { return functions(Set.of(functions)); } /** * Create a new formula parser with the defined values. * * @return a new formula parser */ public FormulaParser build() { return new FormulaParser<>( _lparen, _rparen, _separator, _bops, _uops, _identifiers, _functions ); } /** * Builder class for building binary operators with its precedence. * * @param the token type */ public static final class Bops { private final Map> _operations = new HashMap<>(); private Bops() { } /** * Add a new operator predicate with its precedence. * * @param precedence the precedence of the operators * @param operators the operators predicate * @return {@code this} builder, for method chaining */ public Bops add( final int precedence, final Predicate operators ) { Predicate ops = _operations.get(precedence); if (ops != null) { final Predicate prev = ops; ops = token -> prev.test(token) || operators.test(token); } else { ops = operators; } _operations.put(precedence, ops); return this; } /** * Add a new operator tokens with its precedence. * * @param precedence the precedence of the operators * @param operators the operators * @return {@code this} builder, for method chaining */ @SafeVarargs public final Bops add( final int precedence, final T... operators ) { return add(precedence, Set.of(operators)::contains); } private List> build() { return _operations.entrySet().stream() .sorted(Entry.comparingByKey()) .map(Entry::getValue) .toList(); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy