aima.core.nlp.parsing.grammars.ProbUnrestrictedGrammar Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aima-core Show documentation
Show all versions of aima-core Show documentation
AIMA-Java Core Algorithms from the book Artificial Intelligence a Modern Approach 3rd Ed.
The newest version!
package aima.core.nlp.parsing.grammars;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Represents the most general grammatical formalism,
* the Unrestricted (or Recrusively Enumerable) Grammar.
* All other grammars can derive from this grammar, imposing extra
* restrictions.
* @author Jonathon
*
*/
public class ProbUnrestrictedGrammar implements ProbabilisticGrammar {
// types of grammars
public static final int UNRESTRICTED = 0;
public static final int CONTEXT_SENSITIVE = 1;
public static final int CONTEXT_FREE = 2;
public static final int REGULAR = 3;
public static final int CNFGRAMMAR = 4;
public static final int PROB_CONTEXT_FREE = 5;
public List rules;
public List vars;
public List terminals;
public int type;
// default constructor. has no rules
public ProbUnrestrictedGrammar() {
type = 0;
rules = new ArrayList();
vars = new ArrayList();
terminals = new ArrayList();
}
/**
* Add a number of rules at once, testing each in turn
* for validity, and then testing the batch for probability validity.
* @param ruleList
* @return true if rules are valid and incorporated into the grammar. false, otherwise
*/
public boolean addRules( List ruleList ) {
for( int i=0; i < ruleList.size(); i++ ) {
if( !validRule(ruleList.get(i)) ) {
return false;
}
}
if( !validateRuleProbabilities(ruleList)) {
return false;
}
this.rules = ruleList;
updateVarsAndTerminals();
return true;
}
/**
* Add a single rule the grammar, testing it for structural
* and probability validity.
* @param rule
* @return true if rule is incorporated. false, otherwise
*/
// TODO: More sophisticated probability distribution management
public boolean addRule( Rule rule ) {
if( validRule(rule)) {
rules.add(rule);
updateVarsAndTerminals( rule );
return true;
}
else {
return false;
}
}
/**
* For a set of rules, test whether each batch of rules with the same
* LHS have their probabilities sum to exactly 1.0
* @param ruleList
* @return true if the probabilities are valid. false, otherwise
*/
public boolean validateRuleProbabilities( List ruleList ) {
float probTotal = 0;
for( int i=0; i < vars.size(); i++ ) {
for( int j=0; j < ruleList.size(); j++ ) {
// reset probTotal at start
if( j == 0 ) {
probTotal = (float) 0.0;
}
if( ruleList.get(i).lhs.get(0).equals(vars.get(i))) {
probTotal += ruleList.get(i).PROB;
}
// check probTotal hasn't exceed max
if( probTotal > 1.0 ) {
return false;
}
// check we have correct probability total
if( j == ruleList.size() -1 && probTotal != (float) 1.0 ) {
return false;
}
}
}
return true;
}
/**
* Test validity of the LHS and RHS of grammar rule.
* In unrestricted grammar, the only invalid rule type is
* a rule with a null LHS.
* @param r ( a rule )
* @return true, if rule has valid form. false, otherwise
*/
public boolean validRule( Rule r ) {
if( r.lhs != null && r.lhs.size() > 0 ) {
return true;
}
else {
return false;
}
}
/**
* Whenever a new rule is added to the grammar, we want to
* update the list of variables and terminals with any new grammar symbols
*/
public void updateVarsAndTerminals() {
if( rules == null ) {
vars = new ArrayList();
terminals = new ArrayList();
return;
}
for( int i=0; i < rules.size(); i++ ) {
Rule r = rules.get(i);
updateVarsAndTerminals(r); // update the variables and terminals for this rule
}
}
/**
* Update variable and terminal lists with a single rule's symbols,
* if there a new symbols
* @param r
*/
public void updateVarsAndTerminals( Rule r ) {
// check lhs for new terminals or variables
for( int j=0; j < r.lhs.size(); j++ ) {
if( isVariable(r.lhs.get(j)) && !vars.contains(r.lhs.get(j))) {
vars.add(r.lhs.get(j));
}
else if( isTerminal(r.lhs.get(j)) && !terminals.contains(r.lhs.get(j))) {
terminals.add(r.lhs.get(j));
}
}
// for rhs we must check that this isn't a null-rule
if ( r.rhs != null ) {
// check rhs for new terminals or variables
for( int j=0; j < r.rhs.size(); j++ ) {
if( isVariable(r.rhs.get(j)) && !vars.contains(r.rhs.get(j))) {
vars.add(r.rhs.get(j));
}
else if( isTerminal(r.rhs.get(j)) && !terminals.contains(r.rhs.get(j))) {
terminals.add(r.rhs.get(j));
}
}
}
// maintain sorted lists
Collections.sort(vars);
Collections.sort(terminals);
}
/**
* Check if we have a variable, as they are uppercase strings.
* @param s
* @return
*/
public static boolean isVariable(String s) {
for (int i=0; i < s.length(); i++)
{
if (!Character.isUpperCase(s.charAt(i))) {
return false;
}
}
return true;
}
/**
* Check if we have a terminal, as they are lowercase strings
* @param s
* @return true, if string must be a terminal. false, otherwise
*/
public static boolean isTerminal(String s) {
for (int i=0; i < s.length(); i++ ) {
if( !Character.isLowerCase(s.charAt(i))) {
return false;
}
}
return true;
}
@Override
public String toString() {
String output = "";
output += "Variables: ";
for( int i=0; i < this.vars.size(); i++ ) {
output+= this.vars.get(i) + ", ";
}
output+= '\n';
output += "Terminals: ";
for( int i=0; i < this.terminals.size(); i++ ) {
output+= this.terminals.get(i) + ", ";
}
output+= '\n';
for( int i=0; i < this.rules.size(); i++ ) {
output += this.rules.get(i).toString() + '\n';
}
return output;
}
}