![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.PCFGLA.CoarseToFineTwoChartsParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
/**
*
*/
package edu.berkeley.nlp.PCFGLA;
import java.util.Arrays;
import java.util.List;
import edu.berkeley.nlp.discPCFG.Linearizer;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.syntax.Tree;
/**
* @author petrov
*
*/
public class CoarseToFineTwoChartsParser extends CoarseToFineMaxRuleParser{
/** inside and outside scores; start idx, end idx, state, substate -> logProb/prob */
/** NEW: we now have two charts one before applying unaries and one after: */
protected double[][][][] iScorePreU, iScorePostU;
protected double[][][][] oScorePreU, oScorePostU;
/**
* @param gr
* @param lex
* @param unaryPenalty
* @param endL
* @param viterbi
* @param sub
* @param score
*/
public CoarseToFineTwoChartsParser(Grammar gr, Lexicon lex, double unaryPenalty, int endL, boolean viterbi, boolean sub, boolean score, boolean accurate) {
super(gr, lex, unaryPenalty, endL, viterbi, sub, score,accurate,false,false,true);
}
void doConstrainedInsideScores(Grammar grammar, boolean viterbi, boolean logScores) {
if (!viterbi && logScores) throw new Error("This would require logAdds and is slow. Exponentiate the scores instead.");
numSubStatesArray = grammar.numSubStates;
double initVal = (logScores) ? Double.NEGATIVE_INFINITY : 0;
//double[] oldIScores = new double[maxNSubStates];
//int smallestScale = 10, largestScale = -10;
for (int diff = 1; diff <= length; diff++) {
//smallestScale = 10; largestScale = -10;
//System.out.print(diff + " ");
for (int start = 0; start < (length - diff + 1); start++) {
int end = start + diff;
for (int pState=0; pState= narrowR); // can this right constituent fit next to the left constituent?
if (!iPossibleR) { continue; }
int min1 = narrowR;
int min2 = wideLExtent[end][rState];
int min = (min1 > min2 ? min1 : min2); // can this right constituent stretch far enough to reach the left constituent?
if (min > narrowL) { continue; }
int max1 = wideRExtent[start][lState];
int max2 = narrowL;
int max = (max1 < max2 ? max1 : max2); // can this left constituent stretch far enough to reach the right constituent?
if (min > max) { continue; }
// TODO switch order of loops for efficiency
double[][][] scores = r.getScores2();
int nLeftChildStates = numSubStatesArray[lState];
int nRightChildStates = numSubStatesArray[rState];
for (int split = min; split <= max; split++) {
if (allowedSubStates[start][split][lState] == null) continue;
if (allowedSubStates[split][end][rState] == null) continue;
for (int lp = 0; lp < nLeftChildStates; lp++) {
//if (iScore[start][split][lState] == null) continue;
//if (!allowedSubStates[start][split][lState][lp]) continue;
double lS = iScorePostU[start][split][lState][lp];
if (lS == initVal) continue;
for (int rp = 0; rp < nRightChildStates; rp++) {
if (scores[lp][rp]==null) continue;
double rS = iScorePostU[split][end][rState][rp];
if (rS == initVal) continue;
for (int np = 0; np < nParentStates; np++) {
if (!allowedSubStates[start][end][pState][np]) continue;
double pS = scores[lp][rp][np];
if (pS == initVal) continue;
//if (iScore[split][end][rState] == null) continue;
//if (!allowedSubStates[split][end][rState][rp]) continue;
double thisRound = (logScores) ? pS+lS+rS : pS*lS*rS;
if (viterbi) scoresToAdd[np] = Math.max(thisRound, scoresToAdd[np]);
else scoresToAdd[np] += thisRound;
somethingChanged = true;
}
}
}
//if (!somethingChanged) continue;
//boolean firstTime = false;
/* int parentScale = iScale[start][end][pState];
int currentScale = iScale[start][split][lState]+iScale[split][end][rState];
if (parentScale==currentScale) {
// already had a way to generate this state and the scales are the same
// -> nothing to do
}
else {
if (parentScale==Integer.MIN_VALUE){ // first time we can build this state
firstTime = true;
parentScale = scaleArray(scoresToAdd,currentScale);
iScale[start][end][pState] = parentScale;
//smallestScale = Math.min(smallestScale,parentScale);
//largestScale = Math.max(largestScale,parentScale);
}
else { // scale the smaller one to the base of the bigger one
int newScale = Math.max(currentScale,parentScale);
scaleArrayToScale(scoresToAdd,currentScale,newScale);
scaleArrayToScale(iScore[start][end][pState],parentScale,newScale);
iScale[start][end][pState] = newScale;
//smallestScale = Math.min(smallestScale,newScale);
//largestScale = Math.max(largestScale,newScale);
}
}*/
}
}
if (!somethingChanged) continue;
for (int np = 0; np < nParentStates; np++) {
if (scoresToAdd[np] > initVal) {
iScorePreU[start][end][pState][np] = scoresToAdd[np];
}
}
//iScale[start][end][pState] = currentScale;
//iScale[start][end][pState] = scaleArray(iScore[start][end][pState],iScale[start][end][pState]);
if (true/*firstTime*/) {
if (start > narrowLExtent[end][pState]) {
narrowLExtent[end][pState] = start;
wideLExtent[end][pState] = start;
} else {
if (start < wideLExtent[end][pState]) {
wideLExtent[end][pState] = start;
}
}
if (end < narrowRExtent[start][pState]) {
narrowRExtent[start][pState] = end;
wideRExtent[start][pState] = end;
} else {
if (end > wideRExtent[start][pState]) {
wideRExtent[start][pState] = end;
}
}
}
}
// now do the unaries
for (int pState=0; pState nothing to do
}
else {
if (parentScale==Integer.MIN_VALUE){ // first time we can build this state
firstTime = true;
parentScale = scaleArray(scoresToAdd,currentScale);
iScale[start][end][pState] = parentScale;
//smallestScale = Math.min(smallestScale,parentScale);
//largestScale = Math.max(largestScale,parentScale);
}
else { // scale the smaller one to the base of the bigger one
int newScale = Math.max(currentScale,parentScale);
scaleArrayToScale(scoresToAdd,currentScale,newScale);
scaleArrayToScale(iScore[start][end][pState],parentScale,newScale);
iScale[start][end][pState] = newScale;
//smallestScale = Math.min(smallestScale,newScale);
//largestScale = Math.max(largestScale,newScale);
}
}*/
if (!somethingChanged) {
iScorePostU[start][end][pState] = iScorePreU[start][end][pState].clone();
continue;
} else{
for (int np = 0; np < nParentStates; np++) {
if (scoresToAdd[np] > initVal) {
if (viterbi) iScorePostU[start][end][pState][np] = Math.max(iScorePreU[start][end][pState][np], scoresToAdd[np]);
else iScorePostU[start][end][pState][np] = iScorePreU[start][end][pState][np] + scoresToAdd[np];
}
else iScorePostU[start][end][pState][np] = iScorePreU[start][end][pState][np];
}
}
//iScale[start][end][pState] = currentScale;
//iScale[start][end][pState] = scaleArray(iScore[start][end][pState],iScale[start][end][pState]);
if (true){
if (start > narrowLExtent[end][pState]) {
narrowLExtent[end][pState] = start;
wideLExtent[end][pState] = start;
} else {
if (start < wideLExtent[end][pState]) {
wideLExtent[end][pState] = start;
}
}
if (end < narrowRExtent[start][pState]) {
narrowRExtent[start][pState] = end;
wideRExtent[start][pState] = end;
} else {
if (end > wideRExtent[start][pState]) {
wideRExtent[start][pState] = end;
}
}
}
}
}
}
}
void doConstrainedOutsideScores(Grammar grammar, boolean viterbi, boolean logScores) {
numSubStatesArray = grammar.numSubStates;
double initVal = (logScores) ? Double.NEGATIVE_INFINITY : 0;
for (int diff = length; diff >= 1; diff--) {
for (int start = 0; start + diff <= length; start++) {
int end = start + diff;
// do unaries
boolean somethingChanged = false;
for (int pState=0; pState nothing to do
} else {
if (childScale==Integer.MIN_VALUE){ // first time we can build this state
firstTime = true;
childScale = scaleArray(scoresToAdd,currentScale);
oScale[start][end][cState] = childScale;
}
else { // scale the smaller one to the base of the bigger one
int newScale = Math.max(currentScale,childScale);
scaleArrayToScale(scoresToAdd,currentScale,newScale);
scaleArrayToScale(oScore[start][end][cState],childScale,newScale);
oScale[start][end][cState] = newScale;
}
}*/
if (somethingChanged){
for (int cp=0; cp 0) oScore[start][end][cState][cp] += scoresToAdd[cp];
if (scoresToAdd[cp] > initVal){
if (viterbi) oScorePostU[start][end][cState][cp] = Math.max(oScorePostU[start][end][cState][cp], scoresToAdd[cp]);
else oScorePostU[start][end][cState][cp] += scoresToAdd[cp];
}
}
}
}
}
// copy/add the entries where the unaries where not useful
for (int cState=0; cState 2) {
int min2 = wideLExtent[end][rState];
min = (min1 > min2 ? min1 : min2);
if (max1 < min) { continue; }
int max2 = wideRExtent[start][lState];
max = (max1 < max2 ? max1 : max2);
if (max < min) { continue; }
}
double[][][] scores = br.getScores2();
int nLeftChildStates = numSubStatesArray[lState];
int nRightChildStates = numSubStatesArray[rState];
for (int split = min; split <= max; split++) {
if (oScorePreU[start][split][lState] == null) continue;
if (oScorePreU[split][end][rState] == null) continue;
//if (!allowedStates[start][split][lState]) continue;
//if (!allowedStates[split][end][rState]) continue;
double[] rightScores = new double[nRightChildStates];
Arrays.fill(scoresToAdd,initVal);
Arrays.fill(rightScores,initVal);
somethingChanged = false;
for (int lp=0; lp nothing to do
} else {
if (leftScale==Integer.MIN_VALUE){ // first time we can build this state
firstTime = true;
leftScale = scaleArray(scoresToAdd,currentScale);
oScale[start][split][lState] = leftScale;
}
else { // scale the smaller one to the base of the bigger one
int newScale = Math.max(currentScale,leftScale);
scaleArrayToScale(scoresToAdd,currentScale,newScale);
scaleArrayToScale(oScore[start][split][lState],leftScale,newScale);
oScale[start][split][lState] = newScale;
}
}*/
for (int cp=0; cp initVal){
if (viterbi) oScorePreU[start][split][lState][cp] = Math.max(oScorePreU[start][split][lState][cp], scoresToAdd[cp]);
else oScorePreU[start][split][lState][cp] += scoresToAdd[cp];
}
}
//oScale[start][split][lState] = currentScale;
//oScale[start][split][lState] = scaleArray(oScore[start][split][lState],oScale[start][split][lState]);
//currentScale = parentScale+iScale[start][split][lState];
/*firstTime = false;
if (rightScale==currentScale) {
// already had a way to generate this state and the scales are the same
// -> nothing to do
} else {
if (rightScale==Integer.MIN_VALUE){ // first time we can build this state
firstTime = true;
rightScale = scaleArray(rightScores,currentScale);
oScale[split][end][rState] = rightScale;
}
else { // scale the smaller one to the base of the bigger one
int newScale = Math.max(currentScale,rightScale);
scaleArrayToScale(rightScores,currentScale,newScale);
scaleArrayToScale(oScore[split][end][rState],rightScale,newScale);
oScale[split][end][rState] = newScale;
}
}*/
for (int cp=0; cp initVal){
if (viterbi) oScorePreU[split][end][rState][cp] = Math.max(oScorePreU[split][end][rState][cp], rightScores[cp]);
else oScorePreU[split][end][rState][cp] += rightScores[cp];
}
}
//oScale[split][end][rState] = currentScale;
//oScale[split][end][rState] = scaleArray(oScore[split][end][rState],oScale[split][end][rState]);
}
}
}
}
}
}
void initializeChart(List sentence, Lexicon lexicon,boolean noSubstates,boolean noSmoothing) {
int start = 0;
int end = start+1;
for (String word : sentence) {
end = start+1;
for (int tag=0; tag0 && start==0 && end==length ) {
if (iScorePostU[start][end][0]==null)
System.out.println("ROOT does not span the entire tree!");
}
}
}
narrowRExtent = new int[length + 1][numStates];
wideRExtent = new int[length + 1][numStates];
narrowLExtent = new int[length + 1][numStates];
wideLExtent = new int[length + 1][numStates];
for (int loc = 0; loc <= length; loc++) {
Arrays.fill(narrowLExtent[loc], -1); // the rightmost left with state s ending at i that we can get is the beginning
Arrays.fill(wideLExtent[loc], length + 1); // the leftmost left with state s ending at i that we can get is the end
Arrays.fill(narrowRExtent[loc], length + 1); // the leftmost right with state s starting at i that we can get is the end
Arrays.fill(wideRExtent[loc], -1); // the rightmost right with state s starting at i that we can get is the beginning
}
}
protected void clearArrays() {
iScorePreU = iScorePostU = oScorePreU = oScorePostU = null;
viScore = voScore = null;
allowedSubStates = null;
vAllowedStates = null;
// iPossibleByL = iPossibleByR = oFilteredEnd = oFilteredStart =
// oPossibleByL = oPossibleByR = tags = null;
narrowRExtent = wideRExtent = narrowLExtent = wideLExtent = null;
}
public void doPreParses(List sentence, Tree tree,boolean noSmoothing){
boolean keepGoldAlive = (tree!=null); // we are given the gold tree -> make sure we don't prune it away
clearArrays();
length = (short)sentence.size();
double score = 0;
Grammar curGrammar = null;
Lexicon curLexicon = null;
double[] accurateThresholds = {-8,-12,-12,-11,-12,-12,-14};
double[] fastThresholds = {-8,-9.75,-10,-9.6,-9.66,-8.01,-7.4,-10};
double[] pruningThreshold = null;
if (accurate)
pruningThreshold = accurateThresholds;
else
pruningThreshold = fastThresholds;
for (int level=startLevel; level getBestConstrainedParse(List sentence, List[][] pStates) {
doPreParses(sentence,null,false);
// length = (short)sentence.size();
// constrainChart();
bestTree = new Tree("ROOT");
double score = 0;
Grammar curGrammar = grammarCascade[endLevel-startLevel+1];
Lexicon curLexicon = lexiconCascade[endLevel-startLevel+1];
grammar = curGrammar;
lexicon = curLexicon;
double initVal = (viterbiParse) ? Double.NEGATIVE_INFINITY : 0;
int level = isBaseline ? 1 : endLevel;
createArrays(false,curGrammar.numStates,curGrammar.numSubStates,level,initVal,!isBaseline);
initializeChart(sentence,curLexicon,false,false);
doConstrainedInsideScores(curGrammar,viterbiParse,false);
score = iScorePostU[0][length][0][0];
if (!viterbiParse) score = Math.log(score);// + (100*iScale[0][length][0]);
logLikelihood = score;
if (score != Double.NEGATIVE_INFINITY) {
// System.out.println("\nFound a parse for sentence with length "+length+". The LL is "+score+".");
if (!viterbiParse) {
oScorePreU[0][length][0][0] = 1.0;
doConstrainedOutsideScores(curGrammar,viterbiParse,false);
doConstrainedMaxCScores(sentence,curGrammar,curLexicon,false);
}
//iScore = iScorePostU;
//oScore = oScorePostU;
if (viterbiParse) bestTree = extractBestViterbiParse(0, 0, 0, length, sentence);
else bestTree = extractBestMaxRuleParse(0, length, sentence);
}
maxcScore = null;
maxcSplit = null;
maxcChild = null;
maxcLeftChild = null;
maxcRightChild = null;
return bestTree;
}
/** Assumes that inside and outside scores (sum version, not viterbi) have been computed.
* In particular, the narrowRExtent and other arrays need not be updated.
*/
void doConstrainedMaxCScores(List sentence, Grammar grammar, SophisticatedLexicon lexicon) {
numSubStatesArray = grammar.numSubStates;
maxcScore = new double[length][length + 1][numStates];
maxcSplit = new int[length][length + 1][numStates];
maxcChild = new int[length][length + 1][numStates];
maxcLeftChild = new int[length][length + 1][numStates];
maxcRightChild = new int[length][length + 1][numStates];
double threshold = 1.0e-2;
double logNormalizer = iScorePostU[0][length][0][0];
double thresh2 = threshold*logNormalizer;
for (int diff = 1; diff <= length; diff++) {
//System.out.print(diff + " ");
for (int start = 0; start < (length - diff + 1); start++) {
int end = start + diff;
Arrays.fill(maxcSplit[start][end], -1);
Arrays.fill(maxcChild[start][end], -1);
Arrays.fill(maxcLeftChild[start][end], -1);
Arrays.fill(maxcRightChild[start][end], -1);
if (diff > 1) {
// diff > 1: Try binary rules
for (int pState=0; pState= narrowR); // can this right constituent fit next to the left constituent?
if (!iPossibleR) { continue; }
int min1 = narrowR;
int min2 = wideLExtent[end][rState];
int min = (min1 > min2 ? min1 : min2); // can this right constituent stretch far enough to reach the left constituent?
if (min > narrowL) { continue; }
int max1 = wideRExtent[start][lState];
int max2 = narrowL;
int max = (max1 < max2 ? max1 : max2); // can this left constituent stretch far enough to reach the right constituent?
if (min > max) { continue; }
// TODO switch order of loops for efficiency
double[][][] scores = r.getScores2();
int nLeftChildStates = numSubStatesArray[lState]; // == scores.length;
int nRightChildStates = numSubStatesArray[rState]; // == scores[0].length;
for (int split = min; split <= max; split++) {
double ruleScore = 0;
if (iScorePostU[start][split][lState] == null) continue;
if (iScorePostU[split][end][rState] == null) continue;
//if (!allowedStates[start][split][lState]) continue;
//if (!allowedStates[split][end][rState]) continue;
for (int lp = 0; lp < nLeftChildStates; lp++) {
double lIS = iScorePostU[start][split][lState][lp];
//if (lIS == 0) continue;
if (lIS < thresh2) continue;
//if (!allowedSubStates[start][split][lState][lp]) continue;
for (int rp = 0; rp < nRightChildStates; rp++) {
if (scores[lp][rp]==null) continue;
double rIS = iScorePostU[split][end][rState][rp];
//if (rIS == 0) continue;
if (rIS < thresh2) continue;
//if (!allowedSubStates[split][end][rState][rp]) continue;
for (int np = 0; np < nParentStates; np++) {
//if (!allowedSubStates[start][end][pState][np]) continue;
double pOS = oScorePostU[start][end][pState][np];
//if (pOS == 0) continue;
if (pOS < thresh2) continue;
double ruleS = scores[lp][rp][np];
if (ruleS == 0) continue;
ruleScore += (pOS * ruleS * lIS * rIS) / logNormalizer;
}
}
}
double scale = 1.0;/*Math.pow(GrammarTrainer.SCALE,
oScale[start][end][pState]+iScale[start][split][lState]+
iScale[split][end][rState]-iScale[0][length][0]);*/
double leftChildScore = maxcScore[start][split][lState];
double rightChildScore = maxcScore[split][end][rState];
double gScore = ruleScore * leftChildScore * rightChildScore * scale;
if (gScore > maxcScore[start][end][pState]) {
maxcScore[start][end][pState] = gScore;
maxcSplit[start][end][pState] = split;
maxcLeftChild[start][end][pState] = lState;
maxcRightChild[start][end][pState] = rState;
}
}
}
}
} else { // diff == 1
// We treat TAG --> word exactly as if it was a unary rule, except the score of the rule is
// given by the lexicon rather than the grammar and that we allow another unary on top of it.
//for (int tag : lexicon.getAllTags()){
for (int tag=0; tag urules = grammar.getUnaryRulesByParent(pState);//
// for (UnaryRule ur : urules){
int cState = ur.childState;
if ((pState == cState)) continue;// && (np == cp))continue;
if (iScorePostU[start][end][cState]==null) continue;
//if (!allowedStates[start][end][cState]) continue;
//new loop over all substates
double[][] scores = ur.getScores2();
int nChildStates = numSubStatesArray[cState]; // == scores.length;
double ruleScore = 0;
for (int cp = 0; cp < nChildStates; cp++) {
double cIS = iScorePreU[start][end][cState][cp];
//if (cIS == 0) continue;
if (cIS < thresh2) continue;
//if (!allowedSubStates[start][end][cState][cp]) continue;
if (scores[cp]==null) continue;
for (int np = 0; np < nParentStates; np++) {
//if (!allowedSubStates[start][end][pState][np]) continue;
double pOS = oScorePreU[start][end][pState][np];
if (pOS < thresh2) continue;
double ruleS = scores[cp][np];
if (ruleS == 0) continue;
ruleScore += (pOS * ruleS * cIS) / logNormalizer;
}
}
// log_threshold is a penalty on unaries, to control precision
double scale = 1.0;/*Math.pow(GrammarTrainer.SCALE,
oScale[start][end][pState]+iScale[start][end][cState]
-iScale[0][length][0]);*/
double childScore = maxcScore[start][end][cState];
double gScore = ruleScore / unaryPenalty * childScore * scale;
if (gScore > maxcScoreStartEnd[pState]) {
maxcScoreStartEnd[pState] = gScore;
maxcChild[start][end][pState] = cState;
}
}
}
maxcScore[start][end] = maxcScoreStartEnd;
}
}
}
// public void constrainChart(){
// viScore = new double[length][length + 1][];
// viScore[0][length] = new double[1];
// iScorePreU = new double[length][length + 1][][];
// iScorePostU = new double[length][length + 1][][];
// oScorePreU = new double[length][length + 1][][];
// oScorePostU = new double[length][length + 1][][];
// allowedSubStates = new boolean[length][length+1][][];
// allowedStates = new boolean[length][length+1][];
//
// for (int start = 0; start < length; start++) {
// for (int end = start + 1; end <= length; end++) {
// iScorePreU[start][end] = new double[numStates][];
// iScorePostU[start][end] = new double[numStates][];
// oScorePreU[start][end] = new double[numStates][];
// oScorePostU[start][end] = new double[numStates][];
// allowedStates[start][end] = new boolean[numStates];
// allowedSubStates[start][end] = new boolean[numStates][];
//
// //for (int pState=0; pState sentence, Tree tree){
final boolean noSmoothing = true;
doPreParses(sentence,tree,noSmoothing);
// clearArrays();
length = (short)sentence.size();
Grammar curGrammar = grammarCascade[endLevel-startLevel+1];
Lexicon curLexicon = lexiconCascade[endLevel-startLevel+1];
double initVal = 0;
int level = isBaseline ? 1 : endLevel;
// ensureGoldTreeSurvives(tree, level);
createArrays(isBaseline,curGrammar.numStates,curGrammar.numSubStates,level,initVal,false/*!isBaseline*/); // remove false
initializeChart(sentence,curLexicon,false,noSmoothing);
doConstrainedInsideScores(curGrammar,false,false);
logLikelihood = Math.log(iScorePostU[0][length][0][0]); // + (100*iScale[0][length][0]);
// System.out.println("Found a parse for sentence with length "+length+". The LL is "+logLikelihood+".");
oScorePreU[0][length][0][0] = 1.0;
doConstrainedOutsideScores(curGrammar,false,false);
return logLikelihood;
}
public double doConstrainedInsideOutsideScores(List sentence, boolean[][][][] cons){
final boolean noSmoothing = true;
clearArrays();
// doPreParses(sentence,null,noSmoothing);
Grammar curGrammar = grammarCascade[endLevel-startLevel+1];
Lexicon curLexicon = lexiconCascade[endLevel-startLevel+1];
numSubStatesArray = curGrammar.numSubStates;
length = (short)sentence.size();
setConstraints(cons);
double initVal = 0;
int level = isBaseline ? 1 : endLevel;
// ensureGoldTreeSurvives(tree, level);
createArrays(true,curGrammar.numStates,curGrammar.numSubStates,level,initVal,false/*!isBaseline*/); // remove false
initializeChart(sentence,curLexicon,false,noSmoothing);
doConstrainedInsideScores(curGrammar,false,false);
logLikelihood = Math.log(iScorePostU[0][length][0][0]); // + (100*iScale[0][length][0]);
// System.out.println("Found a parse for sentence with length "+length+". The LL is "+logLikelihood+".");
oScorePreU[0][length][0][0] = 1.0;
doConstrainedOutsideScores(curGrammar,false,false);
return logLikelihood;
}
protected void pruneChart(double threshold, short[] numSubStatesArray, int level){
int totalStates = 0, previouslyPossible = 0, nowPossible = 0;
//threshold = Double.NEGATIVE_INFINITY;
double sentenceProb = (level<1) ? viScore[0][length][0] : iScorePostU[0][length][0][0];
//double sentenceScale = iScale[0][length][0];//+1.0 for oScale
if (level<1) nowPossible=totalStates=previouslyPossible=length;
int startDiff = (level<0) ? 2 : 1;
for (int diff = startDiff; diff <= length; diff++) {
for (int start = 0; start < (length - diff + 1); start++) {
int end = start + diff;
int lastState = (level<0) ? 1 : numSubStatesArray.length;
for (int state = 0; state < lastState; state++) {
if (diff>1&&!grammarTags[state]) continue;
//boolean allFalse = true;
if (level==0){
if (!vAllowedStates[start][end]) {
allowedSubStates[start][end][state] = null;//
// allowedStates[start][end][state]=false;
totalStates++;
continue;
}
} else if (level>0){
// if (!allowedStates[start][end][state]) {
// totalStates+=numSubStatesArray[state];
// continue;
// }
}
if (level<1){
totalStates++;
previouslyPossible++;
double iS = viScore[start][end][state];
double oS = voScore[start][end][state];
if (iS==Double.NEGATIVE_INFINITY||oS==Double.NEGATIVE_INFINITY) {
if (level==0) allowedSubStates[start][end][state] = null;//allowedStates[start][end][state] = false;
else /*level==-1*/ vAllowedStates[start][end]=false;
continue;
}
double posterior = iS + oS - sentenceProb;
if (posterior > threshold) {
boolean[] tmp = new boolean[numSubStatesArray[state]];
Arrays.fill(tmp, true);
if (level==0) allowedSubStates[start][end][state] = tmp;//allowedStates[start][end][state]=true;
else vAllowedStates[start][end]=true;
//spanMass[start][end]+=Math.exp(posterior);
nowPossible++;
} else {
if (level==0) allowedSubStates[start][end][state] = null;//allowedStates[start][end][state] = false;
else vAllowedStates[start][end]=false;
}
continue;
}
// level >= 1 -> iterate over substates
boolean nonePossible = true;
for (int substate = 0; substate < numSubStatesArray[state]; substate++) {
totalStates++;
if (!allowedSubStates[start][end][state][substate]) continue;
previouslyPossible++;
// double iS = iScore[start][end][state][substate];
// double oS = oScore[start][end][state][substate];
double iS = iScorePostU[start][end][state][substate];
double oS = oScorePostU[start][end][state][substate];
if (iS==Double.NEGATIVE_INFINITY||oS==Double.NEGATIVE_INFINITY) {
allowedSubStates[start][end][state][substate] = false;
continue;
}
double posterior = iS + oS - sentenceProb;
if (posterior > threshold) {
allowedSubStates[start][end][state][substate]=true;
nowPossible++;
//spanMass[start][end]+=Math.exp(posterior);
nonePossible=false;
} else {
allowedSubStates[start][end][state][substate] = false;
}
/*if (thisScale>sentenceScale){
posterior *= Math.pow(GrammarTrainer.SCALE,thisScale-sentenceScale);
}*/
//}
//allowedStates[start][end][state][0] = !allFalse;
//int thisScale = iScale[start][end][state]+oScale[start][end][state];
/*if (sentenceScale>thisScale){
// too small anyways
allowedStates[start][end][state][0] = false;
continue;
}*/
}
if (nonePossible) allowedSubStates[start][end][state] = null;//allowedStates[start][end][state]=false;
}
}
}
/*
System.out.print("[");
for(int st=0; st sentence, boolean hardCounts,
int lexiconOffset) {
throw new Error("Currently disabled");
// numSubStatesArray = grammar.numSubStates;
// double tree_score = iScorePostU[0][length][0][0];
// if (tree_score==0){
// System.out.println("Training tree has zero probability - presumably underflow!");
// System.exit(-1);
// }
//
// for (int start = 0; start < length; start++) {
// final int lastState = numSubStatesArray.length;
// String word = sentence.get(start);
// for (int tag=0; tag= narrowR); // can this right constituent fit next to the left constituent?
// if (!iPossibleR) { continue; }
//
// int min1 = narrowR;
// int min2 = wideLExtent[end][rState];
// int min = (min1 > min2 ? min1 : min2); // can this right constituent stretch far enough to reach the left constituent?
// if (min > narrowL) { continue; }
//
// int max1 = wideRExtent[start][lState];
// int max2 = narrowL;
// int max = (max1 < max2 ? max1 : max2); // can this left constituent stretch far enough to reach the right constituent?
// if (min > max) { continue; }
//
// // new: loop over all substates
// double[][][] scores = r.getScores2();
// for (int split = min; split <= max; split++) {
// if (allowedSubStates[start][split][lState] == null) continue;
// if (allowedSubStates[split][end][rState] == null) continue;
// int curInd = 0;
//
// for (int lp = 0; lp < scores.length; lp++) {
// double lcIS = iScorePostU[start][split][lState][lp];
//
// for (int rp = 0; rp < scores[0].length; rp++) {
// if (scores[lp][rp]==null) continue;
// double rcIS = iScorePostU[split][end][rState][rp];
//
// for (int np = 0; np < nParentSubStates; np++) {
// curInd++;
// if (lcIS == 0) { continue; }
// if (rcIS == 0) { continue; }
// if (!allowedSubStates[start][end][pState][np]) continue;
// double pOS = oScorePostU[start][end][pState][np];
// if (pOS==0) { continue; }
//
// double rS = scores[lp][rp][np];
// if (rS==0) { continue; }
//
// double ruleCount = (hardCounts) ? 1 : (rS * lcIS / tree_score) * rcIS * pOS;
// probs[thisStartIndex + curInd-1] += ruleCount;
// }
// }
// }
// }
// }
// }
// final int lastStateU = numSubStatesArray.length;
// for (short pState=0; pState unaries = grammar.getUnaryRulesByParent(pState);
// int nParentSubStates = numSubStatesArray[pState];
// for (UnaryRule ur : unaries) {
// short cState = ur.childState;
// if ((pState == cState)) continue;// && (np == cp))continue;
// if (allowedSubStates[start][end][cState] == null) continue;
// //new loop over all substates
// double[][] scores = ur.getScores2();
// int thisStartIndex = linearizer.getLinearIndex(new UnaryRule(pState, cState));
//// if (thisStartIndex<0) continue; // a unary chain rule...
// int curInd = 0;
// for (int cp = 0; cp < scores.length; cp++) {
// if (scores[cp]==null) continue;
// double cIS = iScorePreU[start][end][cState][cp];
// for (int np = 0; np < nParentSubStates; np++) {
// curInd++;
// if (cIS == 0) { continue; }
// if (!allowedSubStates[start][end][pState][np]) continue;
// double rS = scores[cp][np];
// if (rS==0){ continue; }
//
// double pOS = oScorePreU[start][end][pState][np];
//
// double ruleCount = (hardCounts) ? 1 : (rS * cIS / tree_score) * pOS;
// probs[thisStartIndex + curInd-1] += ruleCount;
// }
// }
// }
// }
// }
// }
}
private void setConstraints(boolean[][][][] allowedSubStates2) {
allowedSubStates = new boolean[length][length+1][][];
for (int start = 0; start < length; start++) {
for (int end = start + 1; end <= length; end++) {
allowedSubStates[start][end] = new boolean[numStates][];
for (int state = 0; state
© 2015 - 2025 Weber Informatics LLC | Privacy Policy