
aima.core.nlp.parsing.CYK Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aima-core Show documentation
Show all versions of aima-core Show documentation
AIMA-Java Core Algorithms from the book Artificial Intelligence a Modern Approach 3rd Ed.
package aima.core.nlp.parsing;
import java.util.ArrayList;
import java.util.List;
import aima.core.nlp.parsing.grammars.ProbCNFGrammar;
import aima.core.nlp.parsing.grammars.ProbUnrestrictedGrammar;
import aima.core.nlp.parsing.grammars.Rule;
/**
* Artificial Intelligence A Modern Approach (3rd Edition): page 894.
*
*
*
* function CYK-PARSE(words, grammar) returns P, a table of probabilities
* N <- LENGTH(words)
* M <- the number of nonterminal symbols in grammar
* P <- an array of size[M,N,N], initially all 0
* /* Insert Lexical rules for each word *\
* for i = 1 to N do
* for each rule of form( X -> wordsi[p]) do
* P[X,i,1] <- p
* /* Combine first and second parts of right-hand sides of rules, from short to long *\
* for length = 2 to N do
* for start = 1 to N - length + 1 do
* for len1 = 1 to N -1 do
* len2 <- length - len1
* for each rule of the form( X -> Y Z [p] ) do
* p[X, start, length] <- MAX(P[X, start, length],
* P[Y, start, len1] * P[Z, start + len1, len2] * p)
* return P
*
*
* Figure 23.5 The CYK algorithm for parsing. Given a sequence of words,
* it finds the most probable derivation for the whole sequence and for
* each subsequence. It returns the whole table, P, in which an entry
* P[X, start, len] is the probability of the most probable X of length
* len starting at position start. If there is no X of that size at that
* location, the probability is 0.
*
*
* @author Jonathon Belotti (thundergolfer)
*
*/
public class CYK {
public float[][][] parse( List words, ProbCNFGrammar grammar ) {
final int N = length(words);
final int M = grammar.vars.size();
float[][][] P = new float[M][N][N]; // initialised to 0.0
for( int i=0; i < N; i++ ) {
//for each rule of form( X -> wordsi[p]) do
// P[X,i,1] <- p
for( int j=0; j < grammar.rules.size(); j++ ) {
Rule r = (Rule) grammar.rules.get(j);
if( r.derives(words.get(i))) { // rule is of form X -> w, where w = words[i]
int x = grammar.vars.indexOf(r.lhs.get(0)); // get the index of rule's LHS variable
P[x][i][0] = r.PROB; // not P[X][i][1] because we use 0-based indexing
}
}
}
for( int length=2; length <= N; length++ ) {
for( int start=1; start <= N - length + 1; start++ ) {
for( int len1=1; len1 <= length -1; len1++ ) { // N.B. the book incorrectly has N-1 instead of length-1
int len2 = length - len1;
// for each rule of the form X -> Y Z, where Y,Z are variables of the grammar
for( int j=0; j < grammar.rules.size(); j++ ) {
Rule r = grammar.rules.get(j);
if( r.rhs.size() == 2 ) {
// get index of rule's variables X, Y, and Z
int x = grammar.vars.indexOf(r.lhs.get(0));
int y = grammar.vars.indexOf(r.rhs.get(0));
int z = grammar.vars.indexOf(r.rhs.get(1));
P[x][start-1][length-1] = Math.max( P[x][start-1][length-1],
P[y][start-1][len1-1] *
P[z][start+len1-1][len2-1] * r.PROB);
}
}
}
}
}
return P;
}
/**
* Simple function to make algorithm more closely resemble pseudocode
* @param ls
* @return the length of the list
*/
public int length( List ls ) {
return ls.size();
}
/**
* Print out the probability table produced by the CYK Algorithm
* @param probTable
* @param words
* @param g
*/
public void printProbTable( float[][][] probTable, List words, ProbUnrestrictedGrammar g ) {
final int N = words.size();
final int M = g.vars.size(); // num non-terminals in grammar
for( int i=0; i < M; i++ ) {
System.out.println("Table For : " + g.vars.get(i) + "(" + i + ")");
for( int j=0; j < N; j++ ) {
System.out.print(j + "| ");
for( int k=0; k < N; k++ ) {
System.out.print(probTable[i][j][k] + " | ");
}
System.out.println();
}
System.out.println();
}
}
/**
* The probability table get's us halfway there, but this method can provide the
* derivation chain that most probably derives the words provided to the parser.
* @param probTable
* @param g
* @return
*/
public ArrayList getMostProbableDerivation( float[][][] probTable, ProbUnrestrictedGrammar g ) {
// TODO
return null;
}
} // end of CYKParse()
© 2015 - 2025 Weber Informatics LLC | Privacy Policy