All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.jenetics.ext.grammar.BnfParser Maven / Gradle / Ivy

The newest version!
/*
 * Java Genetic Algorithm Library (jenetics-8.1.0).
 * Copyright (c) 2007-2024 Franz Wilhelmstötter
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Author:
 *    Franz Wilhelmstötter ([email protected])
 */
package io.jenetics.ext.grammar;

import static java.lang.String.format;
import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.ASSIGN;
import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.BAR;
import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.GT;
import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.ID;
import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.LT;
import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.QUOTED_STRING;
import static io.jenetics.ext.grammar.BnfTokenizer.BnfTokenType.STRING;
import static io.jenetics.ext.internal.parser.Token.Type.EOF;

import java.util.ArrayList;
import java.util.List;

import io.jenetics.ext.grammar.Cfg.Expression;
import io.jenetics.ext.grammar.Cfg.NonTerminal;
import io.jenetics.ext.grammar.Cfg.Rule;
import io.jenetics.ext.grammar.Cfg.Symbol;
import io.jenetics.ext.grammar.Cfg.Terminal;
import io.jenetics.ext.internal.parser.ParsingException;
import io.jenetics.ext.internal.parser.TokenParser;

/**
 * Parser for BNF grammars.
 *
 * 
{@code
 * rulelist: rule_* EOF;
 * rule: lhs ASSIGN rhs;
 * lhs: id;
 * rhs: alternatives;
 * alternatives: alternative (BAR alternative)*;
 * alternative: element*;
 * element:  text | id;
 * text: STRING | QUOTED_STRING;
 * id: LT ruleid GT;
 * ruleid: ID;
 * }
* * The BNF object is build from the following classes. *
    *
  • {@link Symbol}: A symbol is either a {@link Terminal} or * {@link NonTerminal} symbol.
  • *
  • {@link NonTerminal}: Non-terminal symbols are parenthesised in angle * brackets; {@code }, {@code num} or {@code var}. The name must start * with a letter and contain only letters and digits: * {@code ('a'..'z'|'A'..'Z') ('a'..'z'|'A'..'Z'|'0'..'9'|'-')+}
  • *
  • {@link Terminal}: Terminal symbols are simple string values, which * can also be quoted; {@code x}, {@code 1}, {@code terminal} or * {@code 'some $special value'}
  • *
  • {@link Expression}: Consists of a list of symbols; {@code [num]}, * {@code [var]} or {@code [(, expr, op, expr, )]}
  • *
  • {@link Rule}: A rule has a name, a non-terminal start symbol, and a * list of alternative expressions; * {@code ::= [[num], [var], [(, expr, op, expr, )]]}
  • *
  • {@link Cfg}: A whole BNF grammar consists of one or more {@link Rule}s.
  • *
* * @author Franz Wilhelmstötter * @since 7.1 * @version 7.1 */ final class BnfParser extends TokenParser { NonTerminal start = null; final List> rules = new ArrayList<>(); final List> symbols = new ArrayList<>(); final List> alternatives = new ArrayList<>(); BnfParser(final BnfTokenizer tokenizer) { super(tokenizer, 4); } public Cfg parse() { rulelist(); return Cfg.of(rules); } private void rulelist() { do { rule(); } while (LA(1) != EOF.code()); } private void rule() { start = lhs(); match(ASSIGN); rhs(); rules.add(new Rule<>(start, alternatives)); start = null; alternatives.clear(); } private NonTerminal lhs() { return id(); } private void rhs() { alternatives(); } private void alternatives() { alternative(); if (!symbols.isEmpty()) { alternatives.add(new Expression<>(symbols)); symbols.clear(); } while (LA(1) == BAR.code()) { match(BAR); alternative(); if (!symbols.isEmpty()) { alternatives.add(new Expression<>(symbols)); symbols.clear(); } } } private void alternative() { do { element(); } while ( LA(4) != ASSIGN.code() && ( LA(1) == STRING.code() || LA(1) == QUOTED_STRING.code() || LA(1) == ID.code() || LA(1) == LT.code() ) ); } private void element() { if (LA(1) == STRING.code()) { symbols.add(text()); } else if (LA(1) == QUOTED_STRING.code()) { symbols.add(text()); } else if (LA(1) == ID.code()) { symbols.add(text()); } else if (LA(1) == LT.code()) { symbols.add(id()); } else { throw new ParsingException(format( "Expecting %s but found %s.", List.of(STRING, QUOTED_STRING, ID, LT), LT(1) )); } } private Terminal text() { if (LA(1) == STRING.code()) { return terminal(match(STRING).value()); } else if (LA(1) == QUOTED_STRING.code()) { return terminal(match(QUOTED_STRING).value()); } else if (LA(1) == ID.code()) { return terminal(match(ID).value()); } else { throw new ParsingException(format( "Expecting %s but found %s.", List.of(STRING, QUOTED_STRING, ID), LT(1) )); } } private static Terminal terminal(final String value) { if (value.isEmpty()) { throw new ParsingException("Terminal value must not be empty."); } return new Terminal<>(value, value); } private NonTerminal id() { match(LT); final var result = ruleid(); match(GT); return result; } private NonTerminal ruleid() { final var name = match(ID).value(); if (name.isEmpty()) { throw new ParsingException("Rule id must not be empty."); } return new NonTerminal<>(name); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy