All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.jenetics.ext.grammar.Cfg Maven / Gradle / Ivy

The newest version!
/*
 * Java Genetic Algorithm Library (jenetics-8.1.0).
 * Copyright (c) 2007-2024 Franz Wilhelmstötter
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Author:
 *    Franz Wilhelmstötter ([email protected])
 */
package io.jenetics.ext.grammar;

import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.toCollection;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * Represents a context-free grammar
 * (CFG).
 * 

* Formal definition *

* A context-free grammar {@code G} is defined by the 4-tuple * {@code G = (N, T, R, S)}, where *

    *
  • {@code N} is a finite set; each element {@code n ∈ N} is called a * non-terminal ({@link NonTerminal}) character or a variable. Each * variable represents a different type of phrase or clause in the sentence. * Variables are also sometimes called syntactic categories. Each variable * defines a sub-language of the language defined by {@code G}. *
  • *
  • {@code T} is a finite set of terminals ({@link Terminal}) disjoint * from {@code N}, which make up the actual content of the sentence. The set * of terminals is the alphabet of the language defined by the grammar * {@code G}. *
  • *
  • {@code R} is a finite relation in {@code N × (N ∪ T)∗}, where the * asterisk represents the * Kleene star operation. The members of {@code R} are called the * (rewrite) rules ({@link Rule}) or productions of the grammar. *
  • *
  • {@code S} is the start variable (or start symbol), used to represent * the whole sentence (or program). It must be an element of {@code N} * ({@link NonTerminal}) * .
  • *
* * You can easily create a Cfg object from a given BNF grammar. * {@snippet lang="java": * final Cfg grammar = Bnf.parse(""" * ::= | | '(' ')' * ::= + | - | * | / * ::= x | y * ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 * """ * ); * } * * It is also possible to create the grammar above programmatically. * {@snippet lang="java": * final Cfg grammar = Cfg.of( * R("expr", * E(NT("num")), * E(NT("var")), * E(T("("), NT("expr"), NT("op"), NT("expr"), T(")")) * ), * R("op", E(T("+")), E(T("-")), E(T("*")), E(T("/"))), * R("var", E(T("x")), E(T("y"))), * R("num", * E(T("0")), E(T("1")), E(T("2")), E(T("3")), * E(T("4")), E(T("5")), E(T("6")), E(T("7")), * E(T("8")), E(T("9")) * ) * ); * } * * @see Bnf#parse(String) * * @param the terminal symbol value type * * @author Franz Wilhelmstötter * @since 7.1 * @version 7.1 */ public record Cfg( List> nonTerminals, List> terminals, List> rules, NonTerminal start ) { /** * Represents the symbols the BNF grammar consists. * * @param the terminal symbol value type */ public sealed interface Symbol { /** * Return the name of the symbol. * * @return the name of the symbol */ String name(); } /** * Represents the non-terminal symbols of the grammar ({@code NT}). * * @param the terminal symbol value type */ public record NonTerminal(String name) implements Symbol { /** * @param name the name of the non-terminal symbol * @throws IllegalArgumentException if the given {@code name} is not * a valid non-terminal name * @throws NullPointerException if one of the arguments is {@code null} */ public NonTerminal { if (name.isEmpty()) { throw new IllegalArgumentException( "Non-terminal value must not be empty." ); } } } /** * Represents a terminal symbols of the grammar ({@code T}). * * @param the terminal symbol value type */ public record Terminal(String name, T value) implements Symbol { /** * @param name the name of the terminal symbol * @param value the value of the terminal symbol * @throws IllegalArgumentException if the given terminal {@code name} * is empty */ public Terminal { if (name.isEmpty()) { throw new IllegalArgumentException( "Terminal value must not be empty." ); } } /** * Return a new terminal symbol where the name of the symbol is equal * to its value. * * @param name the name (and value) of the terminal symbol * @return a new terminal symbol with the given {@code name} * @throws IllegalArgumentException if the given terminal {@code name} * is empty */ public static Terminal of(final String name) { return new Terminal<>(name, name); } } /** * Represents one expression (list of alternative symbols) a * production rule consists of. * * @param the terminal symbol value type */ public record Expression(List> symbols) { /** * @param symbols the list of symbols of the expression * @throws IllegalArgumentException if the list of {@code symbols} is * empty */ public Expression { if (symbols.isEmpty()) { throw new IllegalArgumentException( "The list of symbols must not be empty." ); } symbols = List.copyOf(symbols); } } /** * Represents a production rule of the grammar ({@code R}). * * @param the terminal symbol value type */ public record Rule(NonTerminal start, List> alternatives) { /** * Creates a new rule object. * * @param start the start symbol of the rule * @param alternatives the list of alternative rule expressions * @throws IllegalArgumentException if the given list of * {@code alternatives} is empty * @throws NullPointerException if one of the arguments is {@code null} */ public Rule { requireNonNull(start); if (alternatives.isEmpty()) { throw new IllegalArgumentException( "Rule alternatives must not be empty." ); } alternatives = List.copyOf(alternatives); } } /** * Create a new context-free grammar object. * * @param nonTerminals the non-terminal symbols of {@code this} grammar * @param terminals the terminal symbols of {@code this} grammar * @param rules the production rules of {@code this} grammar * @param start the start symbol of {@code this} grammar * @throws NullPointerException if one of the arguments is {@code null} * @throws IllegalArgumentException if a rule is defined more than once, the * start symbol points to a missing rule or the rules uses symbols * not defined in the list of {@link #nonTerminals()} or * {@link #terminals()} */ public Cfg { if (rules.isEmpty()) { throw new IllegalArgumentException( "The given list of rules must not be empty." ); } // Check the uniqueness of the rules. final var duplicatedRules = rules.stream() .collect(Collectors.groupingBy(Rule::start)) .values().stream() .filter(values -> values.size() > 1) .map(rule -> rule.get(0).start.name) .toList(); if (!duplicatedRules.isEmpty()) { throw new IllegalArgumentException( "Found duplicate rule(s), " + duplicatedRules + "." ); } // Check if start symbol points to an existing rule. final var startRule = rules.stream() .filter(r -> start.equals(r.start)) .findFirst(); if (startRule.isEmpty()) { throw new IllegalArgumentException( "No rule found for start symbol %s.".formatted(start) ); } // Check that all symbols used in the given rules are also defined // in the list of non-terminals and terminals. final Set> required = rules.stream() .flatMap(Cfg::ruleSymbols) .collect(Collectors.toUnmodifiableSet()); final Set> available = Stream .concat(nonTerminals.stream(), terminals.stream()) .collect(Collectors.toUnmodifiableSet()); final var missing = new HashSet<>(required); missing.removeAll(available); if (!missing.isEmpty()) { throw new IllegalArgumentException( "Unknown symbols defined in rules: " + missing ); } // Check if the name of terminals and non-terminals are distinct. final var terminalNames = terminals.stream() .map(Symbol::name) .collect(Collectors.toSet()); final var nonTerminalNames = nonTerminals.stream() .map(Symbol::name) .collect(Collectors.toSet()); terminalNames.retainAll(nonTerminalNames); if (!terminalNames.isEmpty()) { throw new IllegalArgumentException(format( "Terminal and non-terminal symbols with same name: %s", terminalNames.stream().sorted().toList() )); } nonTerminals = List.copyOf(nonTerminals); terminals = List.copyOf(terminals); rules = List.copyOf(rules); requireNonNull(start); } /** * Return the rule for the given {@code start} symbol. * * @param start the start symbol of the rule * @return the rule for the given {@code start} symbol * @throws NullPointerException if the given {@code start} symbol is * {@code null} */ public Optional> rule(final NonTerminal start) { requireNonNull(start); for (var rule : rules) { if (rule.start().name().equals(start.name())) { return Optional.of(rule); } } return Optional.empty(); } /** * Maps the values of the terminal symbols from type {@code T} to type * {@code A}. * * @param mapper the mapper function * @param the new value type of the terminal symbols * @return the mapped grammar * @throws NullPointerException if the given mapper is {@code null} */ public Cfg map(final Function, ? extends A> mapper) { requireNonNull(mapper); final var cache = new HashMap, Terminal>(); final Function, Terminal> mapping = t -> cache .computeIfAbsent(t, t2 -> new Terminal<>(t2.name(), mapper.apply(t2))); @SuppressWarnings("unchecked") final List> rules = rules().stream() .map(rule -> new Rule<>( (NonTerminal)rule.start(), rule.alternatives().stream() .map(expr -> new Expression<>( expr.symbols().stream() .map(sym -> sym instanceof Cfg.Terminal t ? mapping.apply(t) : (Symbol)sym) .toList() )) .toList() )) .toList(); return Cfg.of(rules); } /** * Create a grammar object with the given rules. Duplicated rules are merged * into one rule. The start symbol of the first rule is chosen as * the start symbol of the created CFG * * @param rules the rules the grammar consists of * @throws IllegalArgumentException if the list of rules is empty * @throws NullPointerException if the list of rules is {@code null} */ public static Cfg of(final List> rules) { if (rules.isEmpty()) { throw new IllegalArgumentException( "The list of rules must not be empty." ); } final List> normalizedRules = normalize(rules); final List> symbols = normalizedRules.stream() .flatMap(Cfg::ruleSymbols) .distinct() .toList(); final List> nonTerminals = symbols.stream() .filter(NonTerminal.class::isInstance) .map(nt -> (NonTerminal)nt) .toList(); final List> terminals = symbols.stream() .filter(Terminal.class::isInstance) .map(nt -> (Terminal)nt) .toList(); return new Cfg<>( nonTerminals, terminals, normalizedRules.stream() .map(r -> rebuild(r, symbols)) .toList(), (NonTerminal)select(normalizedRules.get(0).start(), symbols) ); } /** * Create a grammar object with the given rules. Duplicated rules are merged * into one rule. The start symbol of the first rule is chosen as * the start symbol of the created CFG * * @param rules the rules the grammar consists of * @throws IllegalArgumentException if the list of rules is empty * @throws NullPointerException if the list of rules is {@code null} */ @SafeVarargs public static Cfg of(final Rule... rules) { return Cfg.of(List.of(rules)); } private static List> normalize(final List> rules) { final Map, List>> grouped = rules.stream() .collect(groupingBy( Rule::start, LinkedHashMap::new, toCollection(ArrayList::new))); return grouped.entrySet().stream() .map(entry -> merge(entry.getKey(), entry.getValue())) .toList(); } private static Rule merge(final NonTerminal start, final List> rules) { return new Rule<>( start, rules.stream() .flatMap(rule -> rule.alternatives().stream()) .toList() ); } private static Stream> ruleSymbols(final Rule rule) { return Stream.concat( Stream.of(rule.start), rule.alternatives.stream() .flatMap(expr -> expr.symbols().stream()) ); } private static Rule rebuild(final Rule rule, final List> symbols) { return new Rule<>( (NonTerminal)select(rule.start, symbols), rule.alternatives.stream() .map(e -> rebuild(e, symbols)) .toList() ); } private static Expression rebuild(final Expression expression, final List> symbols) { return new Expression<>( expression.symbols.stream() .map(s -> select(s, symbols)) .toList() ); } private static Symbol select( final Symbol symbol, final List> symbols ) { for (var s : symbols) { if (s.name().equals(symbol.name())) { return s; } } throw new AssertionError("Symbol not found: " + symbol); } @SuppressWarnings("unchecked") static Cfg upcast(final Cfg seq) { return (Cfg)seq; } /* ************************************************************************* * Static factory methods for rule creation. * ************************************************************************/ /** * Factory method for creating a terminal symbol with the given * {@code name} and {@code value}. * * @param name the name of the terminal symbol * @param value the value of the terminal symbol * @param the terminal symbol value type * @return a new terminal symbol */ public static Terminal T(final String name, final T value) { return new Terminal<>(name, value); } /** * Factory method for creating a terminal symbol with the given * {@code name}. * * @param name the name of the terminal symbol * @return a new terminal symbol */ public static Terminal T(final String name) { return new Terminal<>(name, name); } /** * Factory method for creating non-terminal symbols. * * @param name the name of the symbol. * @param the terminal symbol value type * @return a new non-terminal symbol */ public static NonTerminal N(final String name) { return new NonTerminal<>(name); } /** * Factory method for creating an expression with the given * {@code symbols}. * * @param symbols the list of symbols of the expression * @throws IllegalArgumentException if the list of {@code symbols} is * empty * @param the terminal symbol value type * @return a new expression */ @SafeVarargs public static Expression E(final Symbol... symbols) { return new Expression<>(List.of(symbols)); } /** * Factory method for creating a new rule. * * @param name the name of start symbol of the rule * @param alternatives the list af alternative rule expressions * @throws IllegalArgumentException if the given list of * {@code alternatives} is empty * @throws NullPointerException if one of the arguments is {@code null} * @param the terminal symbol value type * @return a new rule */ @SafeVarargs public static Rule R( final String name, final Expression... alternatives ) { return new Rule<>(new NonTerminal<>(name), List.of(alternatives)); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy