jeco.core.util.bnf.BnfReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jeco-core Show documentation
Show all versions of jeco-core Show documentation
Java Evolutionary COmputation library
The newest version!
package jeco.core.util.bnf;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedList;
/**BnfReader for grammar file
*
*/
public class BnfReader {
protected ArrayList rules = new ArrayList();
public BnfReader() {
}
public boolean load(String pathToBnfFile) {
boolean res = false;
try {
StringBuilder contents = new StringBuilder();
BufferedReader br = new BufferedReader(new FileReader(new File(pathToBnfFile)));
String line;
while ((line = br.readLine()) != null) {
contents.append(line);
//readLine removes the line-separator from http://www.javapractices.com/Topic42.cjp
contents.append(System.getProperty("line.separator"));
}
br.close();
contents.append("\n");
res = readBNFString(contents.toString());
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return res;
}
public boolean readBNFString(String bnfString) {
Rule newRule = new Rule(); // Used to create new rules for grammar
boolean insertRule = false;// If newRule is to be inserted onto grammar
Rule currentRule = null;// Used in pass 2 to add productions to current rule
Production newProduction = new Production();// Used to create new productions for grammar
Symbol newSymbol = new Symbol();// Used to create new symbols for grammar
String symbolString;
Symbol newTokenSeparator = new Symbol();// Used to create token separators for grammar
int bnfString_size = bnfString.length();
char currentChar;// Current char of input
char separated = 0;// If there was a separator between previous token and current one
boolean skip = false;// Skip an iteration on parser (for escaped newlines)
boolean quoted = false;// If current char is quoted
boolean non_terminal = false;// If current text is a non-terminal symbol
StringBuffer currentBuffer = new StringBuffer(bnfString_size);// Buffer used to add new symbols to grammar
// States of parser
final int START = 0;
final int START_RULE = 1;
final int LHS_READ = 2;
final int PRODUCTION = 3;
final int START_OF_LINE = 4;
int state = START;// Current state of parser
int i;
try {
for (int pass = 0; pass < 2; pass++) { //Do 2 passes over the string
i = 0;
while (i < bnfString_size) {
if (i < bnfString_size) {
currentChar = bnfString.charAt(i);
} else { // Simulate presence of endl at end of grammar
currentChar = '\n';
}
if (bnfString.charAt(i) == '\\') { // Escape sequence
i++;
if (i >= bnfString_size) {// Escape sequence as last char is invalid
throw new Exception("Escape sequence as last char is invalid");
} else {
if ((non_terminal) && (bnfString.charAt(i) != '\n')) {
// Only escaped newline allowed inside non-terminal
throw new Exception("Only escaped newline allowed inside non-terminal");
}
}
if (bnfString.charAt(i) == '\'') {// Single quote
currentChar = '\'';
} else if (bnfString.charAt(i) == '\'') {// Double quote
currentChar = '\'';
} else if (bnfString.charAt(i) == '\\') {// Backslash
currentChar = '\\';
} else if (bnfString.charAt(i) == '0') {// Null character
currentChar = '\0';
} else if (bnfString.charAt(i) == 'a') {// Audible bell
currentChar = '\007';
} else if (bnfString.charAt(i) == 'b') {// Backspace
currentChar = '\b';
} else if (bnfString.charAt(i) == 'f') {// Formfeed
currentChar = '\f';
} else if (bnfString.charAt(i) == 'n') {// Newline
currentChar = '\n';
} else if (bnfString.charAt(i) == 'r') {// Carriage return
currentChar = '\r';
} else if (bnfString.charAt(i) == 't') {// Horizontal tab
currentChar = '\t';
} else if (bnfString.charAt(i) == 'v') {// Vertical tab
currentChar = '\013';
} else if (bnfString.charAt(i) == '\n') {// Escaped newline
skip = true;// Ignore newline
} else if (bnfString.charAt(i) == '\r') {// Escaped DOS return
skip = true;// Ignore newline
if (bnfString.charAt(++i) != '\n') {
throw new Exception("No newline");
}
} else {// Normal character
currentChar = bnfString.charAt(i);
}
if ((!skip) && (pass > 0)) {
if (currentBuffer.length() == 0) {//Empty
newSymbol = new Symbol(Symbol.SYMBOL_TYPE.T_SYMBOL);
}
currentBuffer.append(currentChar);
}
} else {
switch (state) {
case (START):
if (currentChar == '\r') {
break;// Ignore DOS newline first char
}
if (currentChar == '#') {
// this line is a comment in the grammar so skip to end of line
while (i < bnfString_size && bnfString.charAt(i) != '\n') {
//System.out.println("charAt:" + bnfString.charAt(i));
i++;
}
// we have skipped to end of line, so exit the switch
// next time round, it will see the "\n" (or "\r\n") at end of line
break;
}
switch (currentChar) {
case ' ':// Ignore whitespaces
case '\t':// Ignore tabs
case '\n':// Ignore newlines
break;
case '<':// START OF RULE
newSymbol = new Symbol(Symbol.SYMBOL_TYPE.NT_SYMBOL);
currentBuffer.append(currentChar);
state = START_RULE;
break;
default: // Illigal
throw new Exception("Illegal");
}
break;
case (START_RULE):// Read the lhs Non-terminal symbol
if (currentChar == '\r') {
break;// Ignore DOS newline first char
}
switch (currentChar) {
case '\n':// Newlines are illigal here
throw new Exception("Newlines are illigal here");
case '>': // Possible end of non-terminal symbol
currentBuffer.append(currentChar);
symbolString = currentBuffer.toString();
if (pass == 0) {// First pass
// Check if new symbol definition
if (findRule(newSymbol) == null) {// Create new rule for symbol
insertRule = true;//We will add the newRule to Grammar.Rules
newRule.lhs = new Symbol(symbolString, Symbol.SYMBOL_TYPE.NT_SYMBOL);
} else {
insertRule = true;//We will not add a rule this time
}
} else {
// Second pass
// Point currentRule to previously defined rule
currentRule = findRule(symbolString);
if (currentRule == null) {
throw new Exception("Current rule is null: " + symbolString);
}
}
currentBuffer.delete(0, currentBuffer.length());// Reset the buffer
state = LHS_READ;// lhs for this rule has been read
break;
default:// Check for non-escaped special characters
if (((currentChar == '"') || (currentChar == '|') || (currentChar == '<'))) {
throw new Exception("Non escaped special character");
}
currentBuffer.append(currentChar);
}
break;
case (LHS_READ):// Must read ::= token
if (currentChar == '\r') {
break;// Ignore DOS newline first char
}
switch (currentChar) {
case ' ':// Ignore whitespaces
case '\t':// Ignore tabs
case '\n':// Ignore newlines
break;
case ':':// Part of ::= token
currentBuffer.append(currentChar);
break;
case '=':// Should be end of ::= token
currentBuffer.append(currentChar);
String s = currentBuffer.toString();
if (s.compareTo("::=") != 0) {// Something other than ::= was read
throw new Exception("Something other than ::= was read");
}
currentBuffer.delete(0, currentBuffer.length());
// START OF PRODUCTION
newProduction.clear();
state = PRODUCTION;
break;
default: // Illigal
throw new Exception("Illigal:" + currentChar);
}
break;
case (PRODUCTION):// Read everything until | token or \n, or EOL
if (currentChar == '\r') {
break;// Ignore DOS newline first char
}
if (pass == 0) {
if (currentChar == '\n') {
state = START_OF_LINE;
}
break;
} else {
switch (currentChar) {
case '|':// Possible end of production
if (quoted) {// Normal character
currentBuffer.append(currentChar);
break;
}
case '\n':// End of production (and possibly rule)
separated = 0;// Reset separator marker
if ((currentBuffer.length() != 0) || (newProduction.size() == 0)) {// There is a symbol to add
if (currentBuffer.length() == 0) {
// No symbol exists; create terminal empty symbol
newSymbol.type = Symbol.SYMBOL_TYPE.T_SYMBOL;
}
if (non_terminal) {// Current non-terminal symbol isn't finished
symbolString = currentBuffer.toString();
throw new Exception("Current non-terminal symbol isn't finished: "+symbolString);
}
symbolString = currentBuffer.toString();
newSymbol.symbolString = symbolString;
if (newSymbol.type == Symbol.SYMBOL_TYPE.NT_SYMBOL) {
// Find rule that defines this symbol
Rule tempRule = findRule(newSymbol);
if (tempRule != null) {
newProduction.add(newSymbol.clone());
} else {// Undefined symbol, insert anyway
newProduction.add(newSymbol.clone());
}
} else {// Add terminal symbol
newProduction.add(newSymbol.clone());
}
newSymbol.symbolString = null;
newSymbol.type = null;
}
// END OF PRODUCTION
// Add production to current rule
currentRule.add(newProduction.clone());
currentBuffer.delete(0, currentBuffer.length());// Reset the buffer
if (currentChar == '\n') {
state = START_OF_LINE;
} else {
// START OF PRODUCTION
newProduction.clear();
}
break;
case '<':// Possible start of non-terminal symbol
case '>':// Possible end of non-terminal symbol
case ' ':// Possible token separator
case '\t':// Possible token separator
if ((quoted) || (((currentChar == ' ') || (currentChar == '\t')) && (non_terminal))) {// Spaces inside non-terminals are accepted
currentBuffer.append(currentChar);
if (!non_terminal) {
newSymbol.type = Symbol.SYMBOL_TYPE.T_SYMBOL;
}
break;
}
if (currentChar == '>') {// This is also the end of a non-terminal symbol
currentBuffer.append(currentChar);
non_terminal = false;
}
if (currentBuffer.length() != 0) {
if (non_terminal) {// Current non-terminal symbol isn't finished
symbolString = currentBuffer.toString();
throw new Exception("Current non-terminal symbol isn't finished: "+symbolString);
}
if ((currentChar == ' ') || (currentChar == '\t')) {// Token separator
separated = 1;
}
symbolString = currentBuffer.toString();
newSymbol.symbolString = symbolString;
if (newSymbol.type == Symbol.SYMBOL_TYPE.NT_SYMBOL) {
// Find rule that defines this symbol
Rule tempRule = findRule(newSymbol);
if (tempRule != null) {
newProduction.add(newSymbol.clone());
} else {
// Undefined symbol, insert anyway
newProduction.add(newSymbol.clone());
}
} else {// Add terminal symbol
newProduction.add(newSymbol.clone());
}
newSymbol.symbolString = null;
newSymbol.type = null;
} else {// Empty buffer
if (((currentChar == ' ') || (currentChar == '\t')) && (newProduction.size() != 0)) {
// Probably a token separator after a non-terminal symbol
separated = 1;
}
}
currentBuffer.delete(0, currentBuffer.length());// Reset the buffer
if (currentChar == '<') {// This is also the start of a non-terminal symbol
// Special case; must create new Symbol here
newSymbol.symbolString = null;
newSymbol.type = Symbol.SYMBOL_TYPE.NT_SYMBOL;
currentBuffer.append(currentChar);
non_terminal = true;// Now reading a non-terminal symbol
if (separated == '1') {// Insert a token separator
separated = 0;
newTokenSeparator.symbolString = " ";
newTokenSeparator.type = Symbol.SYMBOL_TYPE.T_SYMBOL;
newProduction.add(newTokenSeparator.clone());
}
}
break;
default: // Add character to current buffer
if (separated == '1') {// Insert a token separator
separated = 0;
newTokenSeparator.symbolString = " ";
newTokenSeparator.type = Symbol.SYMBOL_TYPE.T_SYMBOL;
newProduction.add(newTokenSeparator.clone());
}
if (currentChar == '"') {// Start (or end) quoted section
quoted = !quoted;
newSymbol.type = Symbol.SYMBOL_TYPE.T_SYMBOL;
break;
}
if (currentBuffer.length() == 0) {
newSymbol.type = Symbol.SYMBOL_TYPE.T_SYMBOL;
}
currentBuffer.append(currentChar);
}
break;
}
case (START_OF_LINE):
if (currentChar == '#') {
// this line is a comment in the grammar so skip to end of line
while (i < bnfString_size && bnfString.charAt(i) != '\n') {
//System.out.println("charAt:" + bnfString.charAt(i));
i++;
}
// we have skipped to end of line, so exit the switch
// next time round, it will see the "\n" (or "\r\n") at end of line
break;
}
if (currentChar == '\r') {
break;// Ignore DOS newline first char
}
switch (currentChar) {
case ' ':// Ignore whitespaces
case '\t':// Ignore tabs
case '\n':// Ignore newlines
break;
case '|':// Start of new production
state = PRODUCTION;
if (pass == 1) {
// START OF PRODUCTION
newProduction.clear();
}
break;
case '<':// Start of lhs non-terminal symbol
// END OF RULE
if (pass == 0) {
// Add current rule
if (insertRule) {
rules.add(newRule.clone());
}
}
// START OF RULE
newSymbol.type = Symbol.SYMBOL_TYPE.NT_SYMBOL;
currentBuffer.append(currentChar);
state = START_RULE;
break;
default: // Illigal
throw new Exception("Illigal:" + currentChar);
}
break;
default://Impossible error, quit the program now!
throw new Exception("Impossible error, quit the program now!");
}
}
skip = false;
i++;
}
// END OF PASS
if (state != START_OF_LINE) {// This must be the state of the parser
throw new Exception("START_OF_LINE must be the state of the parser");
}
if (pass == 0) {
// Add current rule
if (insertRule) {
this.rules.add(newRule.clone());
}
}
}
checkInfiniteRecursion();
} catch (Exception ex) {
System.out.println(ex.getMessage());
// ex.printStackTrace();
return false;
}
updateRuleFields();
//genotype2Phenotype();
return true;
}
public Rule findRule(Symbol symbol) {
for (Rule rule : rules) {
if (rule.lhs.equals(symbol)) {
return rule;
}
}
return null;
}
public Rule findRule(String symbolString) {
for (Rule rule : rules) {
if (rule.lhs.symbolString.equals(symbolString)) {
return rule;
}
}
return null;
}
public void checkInfiniteRecursion() throws Exception {
for (Rule rule : rules) {
if (isInfinitlyRecursive(rule)) {
throw new Exception("Infinite recursion: " + rule.toString());
}
}
}
public boolean isInfinitlyRecursive(Rule startRule) throws Exception {
LinkedList rulesToVisit = new LinkedList();
ArrayList visitedRules = new ArrayList();
Rule currentRule;
rulesToVisit.add(startRule);
while (!rulesToVisit.isEmpty()) {
currentRule = rulesToVisit.remove();
visitedRules.add(currentRule);
for (Production production : currentRule) {
for (Symbol symbol : production) {
if (symbol.type == Symbol.SYMBOL_TYPE.NT_SYMBOL
&& !symbol.symbolString.startsWith(" visitedRules = new ArrayList();
clearRuleFields();
for (Rule rule : rules) {
visitedRules.clear();
rule.recursive = isRecursive(visitedRules, rule);
}
for (Rule rule : rules) {
visitedRules.clear();
calculateMinimumDepthRecursive(rule, visitedRules);
}
for (Rule rule : rules) {
setProductionMinimumDepth(rule);
}
}
public void clearRuleFields() {
for (Rule rule : rules) {
rule.minimumDepth = Integer.MAX_VALUE >> 1;
rule.recursive = false;
for(Production p: rule) {
p.recursive = false;
p.minimumDepth = Integer.MAX_VALUE >> 1;
}
}
}
/**
* Checks that given a Rule and a Production they are both recursive and that the recursive element is the same one
* @param r given rule
* @param p given production
* @return true if r and p are recursive, false otherwise.
*/
public boolean sameRecursion(Rule r, Production p) {
if(r.getRecursive() && p.getRecursive()) {
for(Symbol s: p) {
if(s.equals(r.lhs)) {
return true;
}
}
}
return false;
}
/**
* Given a Rule and a Symbol it tests if they have the same recursion
* @param r given rule
* @param s given symbol
* @return true if r and s are recursive, false otherwise.
*/
public boolean sameRecursion(Rule r, Symbol s) {
if(r.getRecursive() && (s.equals(r.lhs))) {
return true;
}
return false;
}
protected boolean isRecursive(ArrayList visitedRules, Rule currentRule) {
ArrayList prodIt;
Rule definingRule;
if (visitedRules.size() == 0) {
prodIt = currentRule;
} else {
prodIt = visitedRules.get(visitedRules.size() - 1);
}
// Check if this is a recursive call to a previously visited rule
if (visitedRules.contains(findRule(currentRule.lhs))) {
currentRule.recursive = true;
return true;
}
// Go through each production in the rule
for (Production production : prodIt) {
for (Symbol symbol : production) {
if (symbol.type == Symbol.SYMBOL_TYPE.NT_SYMBOL) {
definingRule = findRule(symbol);
if (definingRule != null) {
if (!visitedRules.contains(definingRule)) {
visitedRules.add(definingRule);
if (isRecursive(visitedRules, currentRule)) {
production.recursive = true;
return true;
}
//If we have already checked for recursiveness we set the production accordingly
}else {
if(definingRule.recursive) {
production.recursive = true;
}
}
}
}
}
}
// return recursive;
return false;
}
public void calculateMinimumDepthRecursive(Rule startRule, ArrayList visitedRules) {
if (!visitedRules.contains(startRule)) {
for (Production production : startRule) {
production.minimumDepth = 0;
for (Symbol symbol : production) {
if (symbol.type == Symbol.SYMBOL_TYPE.NT_SYMBOL) {
Rule currentRule = findRule(symbol);
if (currentRule != null) {
visitedRules.add(startRule);
calculateMinimumDepthRecursive(currentRule, visitedRules);
if (production.minimumDepth < (currentRule.minimumDepth + 1)) {
production.minimumDepth = currentRule.minimumDepth + 1;
}
}
} else {
if (production.minimumDepth < 1) {
production.minimumDepth = 1;
}
}
}
if (startRule.minimumDepth > production.minimumDepth) {
startRule.minimumDepth = production.minimumDepth;
}
}
}
}
public void setProductionMinimumDepth(Rule rule) {
int minDepth;
for (Production production : rule) {
minDepth = 0;
for (Symbol symbol : production) {
if (symbol.type == Symbol.SYMBOL_TYPE.NT_SYMBOL) {
Rule ruleAux = this.findRule(symbol);
if (ruleAux != null) {
if (ruleAux.minimumDepth > minDepth) {
minDepth = ruleAux.minimumDepth;
}
}
}
}
production.minimumDepth = minDepth;
}
}
public ArrayList getRules() {
return rules;
}
public static void main(String[] args) {
BnfReader bnfReader = new BnfReader();
bnfReader.load("test/grammar_example.bnf");
for (Rule rule : bnfReader.rules) {
System.out.println(rule.toString());
System.out.println(rule.lhs.toString());
bnfReader.setProductionMinimumDepth(rule);
System.out.println(rule.minimumDepth);
try {
System.out.println(bnfReader.isRecursive( new ArrayList(), rule));
System.out.println(bnfReader.isInfinitlyRecursive(rule));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}