![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.PCFGLA.ConstrainedHierarchicalTwoChartParser Maven / Gradle / Ivy
Show all versions of berkeleyparser Show documentation
/**
*
*/
package edu.berkeley.nlp.PCFGLA;
import java.util.Arrays;
import java.util.List;
import edu.berkeley.nlp.discPCFG.Linearizer;
import edu.berkeley.nlp.math.DoubleArrays;
import edu.berkeley.nlp.math.SloppyMath;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.util.ScalingTools;
/**
* @author petrov
*
*/
public class ConstrainedHierarchicalTwoChartParser extends ConstrainedTwoChartsParser {
/** inside and outside scores; start idx, end idx, state, substate -> logProb/prob */
/** NEW: we now have two charts one before applying unaries and one after: */
protected double[][][][][] h_iScorePreU, h_iScorePostU; //[start][end][state][level][substate]
protected double[][][][][] h_oScorePreU, h_oScorePostU;
int finalLevel;
int[] substatesToCover;
public ConstrainedHierarchicalTwoChartParser(Grammar gr, Lexicon lex, SpanPredictor sp, int f) {
super(gr, lex, sp);
finalLevel = f;
substatesToCover = new int[finalLevel+1];
for (int i=0; i<=finalLevel; i++) substatesToCover[i] = (int)Math.pow(2, finalLevel-i);
}
@Override
void doConstrainedInsideScores(final boolean viterbi){
doConstrainedInsideScores(viterbi, null);
}
@Override
void doConstrainedInsideScores(final boolean viterbi, final double[][][] spanScores) {
for (int diff = 1; diff <= length; diff++) {
for (int start = 0; start < (length - diff + 1); start++) {
int end = start + diff;
for (int pState=0; pState= narrowR); // can this right constituent fit next to the left constituent?
if (!iPossibleR) { continue; }
int min1 = narrowR;
int min2 = wideLExtent[end][rState];
int min = (min1 > min2 ? min1 : min2); // can this right constituent stretch far enough to reach the left constituent?
if (min > narrowL) { continue; }
int max1 = wideRExtent[start][lState];
int max2 = narrowL;
int max = (max1 < max2 ? max1 : max2); // can this left constituent stretch far enough to reach the right constituent?
if (min > max) { continue; }
for (int split = min; split <= max; split++) {
boolean changeThisRound = false;
if (allowedSubStates[start][split][lState] == null) continue;
if (allowedSubStates[split][end][rState] == null) continue;
changeThisRound = computeInsideScore(start, split, end, r, viterbi);
if (!changeThisRound) continue;
somethingChanged = true;
//boolean firstTime = false;
int parentScale = iScale[start][end][pState];
int currentScale = iScale[start][split][lState]+iScale[split][end][rState];
currentScale = ScalingTools.scaleArray(unscaledScoresToAdd,currentScale);
if (parentScale!=currentScale) {
if (parentScale==Integer.MIN_VALUE){ // first time to build this span
iScale[start][end][pState] = currentScale;
} else {
int newScale = Math.max(currentScale,parentScale);
ScalingTools.scaleArrayToScale(unscaledScoresToAdd,currentScale,newScale);
ScalingTools.scaleArrayToScale(h_iScorePreU[start][end][pState][finalLevel],parentScale,newScale);
iScale[start][end][pState] = newScale;
}
}
for (int np = 0; np < nParentStates; np++) {
if (viterbi){
h_iScorePreU[start][end][pState][finalLevel][np] = Math.max(h_iScorePreU[start][end][pState][finalLevel][np],unscaledScoresToAdd[np]);
} else {
h_iScorePreU[start][end][pState][finalLevel][np] += unscaledScoresToAdd[np];
}
}
Arrays.fill(unscaledScoresToAdd,0);
}
}
if (somethingChanged) {
// apply span predictions
if (spanScores!=null){
double val = spanScores[start][end][stateClass[pState]];
if (val!=1){
for (int np = 0; np < nParentStates; np++){
h_iScorePreU[start][end][pState][finalLevel][np] *= val;
}
}
}
updateHierarchy(h_iScorePreU[start][end][pState]);
if (start > narrowLExtent[end][pState]) {
narrowLExtent[end][pState] = start;
wideLExtent[end][pState] = start;
} else {
if (start < wideLExtent[end][pState]) {
wideLExtent[end][pState] = start;
}
}
if (end < narrowRExtent[start][pState]) {
narrowRExtent[start][pState] = end;
wideRExtent[start][pState] = end;
} else {
if (end > wideRExtent[start][pState]) {
wideRExtent[start][pState] = end;
}
}
}
}
// now do the unaries
for (int pState=0; pState narrowLExtent[end][pState]) {
narrowLExtent[end][pState] = start;
wideLExtent[end][pState] = start;
} else {
if (start < wideLExtent[end][pState]) {
wideLExtent[end][pState] = start;
}
}
if (end < narrowRExtent[start][pState]) {
narrowRExtent[start][pState] = end;
wideRExtent[start][pState] = end;
} else {
if (end > wideRExtent[start][pState]) {
wideRExtent[start][pState] = end;
}
}
}
// in any case copy/add the scores from before and apply the spanScores
for (int np = 0; np < nParentStates; np++) {
double val = h_iScorePreU[start][end][pState][finalLevel][np];
if (val>0) {
if (viterbi){
h_iScorePostU[start][end][pState][finalLevel][np] = Math.max(h_iScorePostU[start][end][pState][finalLevel][np],val);
} else {
h_iScorePostU[start][end][pState][finalLevel][np] += val;
}
}
}
if (pState!=0) updateHierarchy(h_iScorePostU[start][end][pState]);
}
}
}
}
private final void updateHierarchy(double[][] ds) {
for (int level=finalLevel-1; level>=0; level--){
for (int i=0; i= 1; diff--) {
for (int start = 0; start + diff <= length; start++) {
int end = start + diff;
// do unaries
final int nStates = (diff==length) ? 1 : numSubStatesArray.length;
for (int pState=0; pState1 && !grammarTags[cState]) continue;
final int nChildStates = numSubStatesArray[pState];
// apply span predictions
if (spanScores!=null){
double val = spanScores[start][end][stateClass[pState]];
if (val != 1){
for (int np = 0; np < nChildStates; np++){
h_oScorePreU[start][end][pState][finalLevel][np] *= val;
}
}
if (pState!=0) updateHierarchy(h_oScorePreU[start][end][pState]);
else {
val = h_oScorePreU[start][end][0][finalLevel][0];
for (int level=finalLevel-1; level>=0; level--){
h_oScorePreU[start][end][0][level][0] = val;
}
}
}
}
for (int cState=0; cState1 && !grammarTags[cState]) continue;
final int nChildStates = numSubStatesArray[cState];
// UnaryRule[] rules = grammar.getClosedSumUnaryRulesByParent(pState);
UnaryRule[] rules = grammar.getClosedSumUnaryRulesByChild(cState);
//UnaryRule[] rules = grammar.getClosedViterbiUnaryRulesByParent(pState);
// For now:
//UnaryRule[] rules = grammar.getUnaryRulesByChild(cState).toArray(new UnaryRule[0]);
boolean somethingChanged = false;
int childScale = oScale[start][end][cState];
int scaleBeforeUnaries = childScale;
for (int r = 0; r < rules.length; r++) {
HierarchicalAdaptiveUnaryRule ur = (HierarchicalAdaptiveUnaryRule)rules[r];
int pState = ur.parentState;
if ((pState == cState)) continue;
if (allowedSubStates[start][end][pState]==null) continue;
boolean changeThisRound = computeOutsideScore(start, end, ur, viterbi);
if (!changeThisRound) continue;
somethingChanged = true;
int currentScale = oScale[start][end][pState];
currentScale = ScalingTools.scaleArray(unscaledScoresToAdd,currentScale);
if (childScale!=currentScale) {
if (childScale==Integer.MIN_VALUE){ // first time to build this span
childScale = currentScale;
} else {
int newScale = Math.max(currentScale,childScale);
ScalingTools.scaleArrayToScale(unscaledScoresToAdd,currentScale,newScale);
ScalingTools.scaleArrayToScale(h_oScorePostU[start][end][cState][finalLevel],childScale,newScale);
childScale = newScale;
}
}
for (int cp = 0; cp < nChildStates; cp++) {
if (viterbi){
h_oScorePostU[start][end][cState][finalLevel][cp] = Math.max(h_oScorePostU[start][end][cState][finalLevel][cp],unscaledScoresToAdd[cp]);
} else {
h_oScorePostU[start][end][cState][finalLevel][cp] += unscaledScoresToAdd[cp];
}
}
Arrays.fill(unscaledScoresToAdd,initVal);
}
if (somethingChanged){
int newScale = Math.max(scaleBeforeUnaries,childScale);
ScalingTools.scaleArrayToScale(h_oScorePreU[start][end][cState][finalLevel],scaleBeforeUnaries,newScale);
ScalingTools.scaleArrayToScale(h_oScorePostU[start][end][cState][finalLevel],childScale,newScale);
oScale[start][end][cState] = newScale;
if (newScale!=scaleBeforeUnaries){
updateHierarchy(h_oScorePreU[start][end][cState]);
}
}
// copy/add the entries where the unaries were not useful
for (int cp=0; cp0) {
if (viterbi){
h_oScorePostU[start][end][cState][finalLevel][cp] = Math.max(h_oScorePostU[start][end][cState][finalLevel][cp], val);
} else {
h_oScorePostU[start][end][cState][finalLevel][cp] += val;
}
}
}
if (cState!=0) updateHierarchy(h_oScorePostU[start][end][cState]);
else {
double val = h_oScorePostU[start][end][0][finalLevel][0];
for (int level=finalLevel-1; level>=0; level--){
h_oScorePostU[start][end][0][level][0] = val;
}
}
}
// do binaries
if (diff==1) continue; // there is no space for a binary
for (int pState=0; pState 2) {
int min2 = wideLExtent[end][rState];
min = (min1 > min2 ? min1 : min2);
if (max1 < min) { continue; }
int max2 = wideRExtent[start][lState];
max = (max1 < max2 ? max1 : max2);
if (max < min) { continue; }
}
int nLeftChildStates = numSubStatesArray[lState];
int nRightChildStates = numSubStatesArray[rState];
for (int split = min; split <= max; split++) {
if (allowedSubStates[start][split][lState] == null) continue;
if (allowedSubStates[split][end][rState] == null) continue;
// if (split-start>1 && !grammarTags[lState]) continue;
// if (end-split>1 && !grammarTags[rState]) continue;
boolean somethingChanged = computeOutsideScore(start, split, end, br, viterbi);
if (!somethingChanged) continue;
if (DoubleArrays.max(scoresToAdd)!=0){//oScale[start][end][pState]!=Integer.MIN_VALUE && iScale[split][end][rState]!=Integer.MIN_VALUE){
int leftScale = oScale[start][split][lState];
int currentScale = oScale[start][end][pState]+iScale[split][end][rState];
currentScale = ScalingTools.scaleArray(scoresToAdd,currentScale);
if (leftScale!=currentScale) {
if (leftScale==Integer.MIN_VALUE){ // first time to build this span
oScale[start][split][lState] = currentScale;
} else {
int newScale = Math.max(currentScale,leftScale);
ScalingTools.scaleArrayToScale(scoresToAdd,currentScale,newScale);
ScalingTools.scaleArrayToScale(h_oScorePreU[start][split][lState][finalLevel],leftScale,newScale);
oScale[start][split][lState] = newScale;
}
}
for (int cp=0; cp initVal){
if (viterbi){
h_oScorePreU[start][split][lState][finalLevel][cp] = Math.max(h_oScorePreU[start][split][lState][finalLevel][cp],scoresToAdd[cp]);
} else {
h_oScorePreU[start][split][lState][finalLevel][cp] += scoresToAdd[cp];
}
}
}
Arrays.fill(scoresToAdd, 0);
updateHierarchy(h_oScorePreU[start][split][lState]);
}
if (DoubleArrays.max(unscaledScoresToAdd)!=0){//oScale[start][end][pState]!=Integer.MIN_VALUE && iScale[start][split][lState]!=Integer.MIN_VALUE){
int rightScale = oScale[split][end][rState];
int currentScale = oScale[start][end][pState]+iScale[start][split][lState];
if (currentScale==Integer.MIN_VALUE)
System.out.println("shhaaa");
currentScale = ScalingTools.scaleArray(unscaledScoresToAdd,currentScale);
if (rightScale!=currentScale) {
if (rightScale==Integer.MIN_VALUE){ // first time to build this span
oScale[split][end][rState] = currentScale;
} else {
int newScale = Math.max(currentScale,rightScale);
ScalingTools.scaleArrayToScale(unscaledScoresToAdd,currentScale,newScale);
ScalingTools.scaleArrayToScale(h_oScorePreU[split][end][rState][finalLevel],rightScale,newScale);
oScale[split][end][rState] = newScale;
}
}
for (int cp=0; cp initVal) {
if (viterbi){
h_oScorePreU[split][end][rState][finalLevel][cp] = Math.max(h_oScorePreU[split][end][rState][finalLevel][cp],unscaledScoresToAdd[cp]);
} else {
h_oScorePreU[split][end][rState][finalLevel][cp] += unscaledScoresToAdd[cp];
}
}
}
Arrays.fill(unscaledScoresToAdd, 0);
updateHierarchy(h_oScorePreU[split][end][rState]);
}
}
}
}
}
}
}
private final boolean computeOutsideScore(int start, int split, int end, HierarchicalAdaptiveBinaryRule rule, boolean viterbi){
int pState = rule.parentState;
int lState = rule.leftChildState;
int rState = rule.rightChildState;
boolean changeThisRound = false;
for (HierarchicalAdaptiveBinaryRule.SubRule subRule : rule.subRuleList){
if (subRule==null) continue;
int level = subRule.level;
double oS = h_oScorePostU[start][end][pState][level][subRule.parent];
if (oS == 0) continue;
double lS = h_iScorePostU[start][split][lState][level][subRule.lChild];
double rS = h_iScorePostU[split][end][rState][level][subRule.rChild];
double pS = subRule.score;
double thisRoundL = pS*rS*oS;
double thisRoundR = pS*lS*oS;
if (thisRoundL!=0){
int k = substatesToCover[level]*subRule.lChild;
final int l = k+substatesToCover[level];
for (int lp=k; lp sentence, boolean noSmoothing, List posTags) {
final boolean useGoldPOS = (posTags!=null);
int start = 0;
int end = start+1;
for (StateSet word : sentence) {
end = start+1;
int goldTag = -1;
if (useGoldPOS) goldTag = tagNumberer.number(posTags.get(start));
for (short tag=0; tag1 && !grammarTags[state]) continue;
for (int level=0; level<=finalLevel; level++){
h_iScorePreU[start][end][state][level] = new double[numSubStatesArray[state]/substatesToCover[level]];
h_iScorePostU[start][end][state][level] = new double[numSubStatesArray[state]/substatesToCover[level]];
h_oScorePreU[start][end][state][level] = new double[numSubStatesArray[state]/substatesToCover[level]];
h_oScorePostU[start][end][state][level] = new double[numSubStatesArray[state]/substatesToCover[level]];
}
}
for (int level=0; level<=finalLevel; level++) {
h_oScorePreU[start][end][0][level] = new double[1];
h_oScorePostU[start][end][0][level] = new double[1];
}
}
}
narrowRExtent = new int[length + 1][numStates];
wideRExtent = new int[length + 1][numStates];
narrowLExtent = new int[length + 1][numStates];
wideLExtent = new int[length + 1][numStates];
for (int loc = 0; loc <= length; loc++) {
Arrays.fill(narrowLExtent[loc], -1); // the rightmost left with state s ending at i that we can get is the beginning
Arrays.fill(wideLExtent[loc], length + 1); // the leftmost left with state s ending at i that we can get is the end
Arrays.fill(narrowRExtent[loc], length + 1); // the leftmost right with state s starting at i that we can get is the end
Arrays.fill(wideRExtent[loc], -1); // the rightmost right with state s starting at i that we can get is the beginning
}
}
}
@Override
protected void scrubArrays() {
if (h_iScorePostU==null) return;
for (int start = 0; start < length; start++) {
for (int end = start + 1; end <= length; end++) {
for (int state=0; state1 && !grammarTags[state]) continue;
for (int level=0; level<=finalLevel; level++){
Arrays.fill(h_iScorePreU[start][end][state][level],0);
Arrays.fill(h_iScorePostU[start][end][state][level],0);
Arrays.fill(h_oScorePreU[start][end][state][level],0);
Arrays.fill(h_oScorePostU[start][end][state][level],0);
}
Arrays.fill(iScale[start][end], Integer.MIN_VALUE);
Arrays.fill(oScale[start][end], Integer.MIN_VALUE);
}
}
}
}
for (int loc = 0; loc <= length; loc++) {
Arrays.fill(narrowLExtent[loc], -1); // the rightmost left with state s ending at i that we can get is the beginning
Arrays.fill(wideLExtent[loc], length + 1); // the leftmost left with state s ending at i that we can get is the end
Arrays.fill(narrowRExtent[loc], length + 1); // the leftmost right with state s starting at i that we can get is the end
Arrays.fill(wideRExtent[loc], -1); // the rightmost right with state s starting at i that we can get is the beginning
}
}
@Override
protected double getLikelihoodAndSetRootOutsideScore() {
for (int level=0; level<=finalLevel; level++) h_oScorePreU[0][length][0][level][0] = 1.0;
oScale[0][length][0] = 0;
return Math.log(h_iScorePostU[0][length][0][finalLevel][0])+ (ScalingTools.LOGSCALE*iScale[0][length][0]);
}
@Override
void doConstrainedMaxCScores(List sentence) {
doConstrainedMaxCScores(sentence, null);
}
@Override
void doConstrainedMaxCScores(List sentence, final double[][][] spanScores) {
maxcScore = new double[length][length + 1][numStates];
maxcSplit = new int[length][length + 1][numStates];
maxcChild = new int[length][length + 1][numStates];
maxcLeftChild = new int[length][length + 1][numStates];
maxcRightChild = new int[length][length + 1][numStates];
double tree_score = h_iScorePostU[0][length][0][finalLevel][0];
int tree_scale = iScale[0][length][0];
for (int diff = 1; diff <= length; diff++) {
//System.out.print(diff + " ");
for (int start = 0; start < (length - diff + 1); start++) {
int end = start + diff;
Arrays.fill(maxcSplit[start][end], -1);
Arrays.fill(maxcChild[start][end], -1);
Arrays.fill(maxcLeftChild[start][end], -1);
Arrays.fill(maxcRightChild[start][end], -1);
if (diff > 1) {
// diff > 1: Try binary rules
for (int pState=0; pState= narrowR); // can this right constituent fit next to the left constituent?
if (!iPossibleR) { continue; }
int min1 = narrowR;
int min2 = wideLExtent[end][rState];
int min = (min1 > min2 ? min1 : min2); // can this right constituent stretch far enough to reach the left constituent?
if (min > narrowL) { continue; }
int max1 = wideRExtent[start][lState];
int max2 = narrowL;
int max = (max1 < max2 ? max1 : max2); // can this left constituent stretch far enough to reach the right constituent?
if (min > max) { continue; }
for (int split = min; split <= max; split++) {
if (allowedSubStates[start][split][lState] == null) continue;
if (allowedSubStates[split][end][rState] == null) continue;
double scalingFactor = ScalingTools.calcScaleFactor(
oScale[start][end][pState]+
iScale[start][split][lState]+
iScale[split][end][rState]-tree_scale);
if (scalingFactor==0) continue;
double ruleScore = computeRuleScore(start, split, end, r, tree_score, scalingFactor);
if (ruleScore==0) continue;
double leftChildScore = maxcScore[start][split][lState];
double rightChildScore = maxcScore[split][end][rState];
double gScore = ruleScore * leftChildScore * rightChildScore;
if (gScore > maxcScore[start][end][pState]) {
maxcScore[start][end][pState] = gScore;
maxcSplit[start][end][pState] = split;
maxcLeftChild[start][end][pState] = lState;
maxcRightChild[start][end][pState] = rState;
}
}
}
}
} else { // diff == 1
// We treat TAG --> word exactly as if it was a unary rule, except the score of the rule is
// given by the lexicon rather than the grammar and that we allow another unary on top of it.
for (short tag=0; tag urules = grammar.getUnaryRulesByParent(pState);//
// for (UnaryRule ur : urules){
int cState = ur.childState;
if ((pState == cState)) continue;// && (np == cp))continue;
if (allowedSubStates[start][end][cState]==null) continue;
double scalingFactor = ScalingTools.calcScaleFactor(
oScale[start][end][pState]+iScale[start][end][cState]-tree_scale);
if (scalingFactor==0) continue;
double ruleScore = computeRuleScore(start, end, ur, tree_score, scalingFactor, spanScore);
if (ruleScore==0) continue;
double childScore = maxcScore[start][end][cState];
double gScore = ruleScore * childScore;
if (gScore > maxcScoreStartEnd[pState]) {
maxcScoreStartEnd[pState] = gScore;
maxcChild[start][end][pState] = cState;
}
}
}
maxcScore[start][end] = maxcScoreStartEnd;
}
}
}
private final double computeRuleScore(int start, int split, int end, HierarchicalAdaptiveBinaryRule rule,
double tree_score, double scalingFactor) {
double ruleScore = 0;
int pState = rule.parentState;
int lState = rule.leftChildState;
int rState = rule.rightChildState;
for (HierarchicalAdaptiveBinaryRule.SubRule subRule : rule.subRuleList){
int level = subRule.level;
double lS = h_iScorePostU[start][split][lState][level][subRule.lChild];
if (lS == 0) continue;
double rS = h_iScorePostU[split][end][rState][level][subRule.rChild];
if (rS == 0) continue;
double pOS = h_oScorePostU[start][end][pState][level][subRule.parent];
if (pOS == 0) continue;
ruleScore += subRule.score * lS / tree_score * rS * scalingFactor * pOS;
// if (isValidExpectation(ruleCount)){
}
return ruleScore;
}
private final double computeRuleScore(int start, int end, HierarchicalAdaptiveUnaryRule rule,
double tree_score, double scalingFactor, double spanScore){
double ruleScore = 0;
int pState = rule.parentState;
int cState = rule.childState;
for (HierarchicalAdaptiveUnaryRule.SubRule subRule : rule.subRuleList){
if (subRule==null) continue;
int level = subRule.level;
double cS = h_iScorePreU[start][end][cState][level][subRule.child];
if (cS == 0) continue;
double pOS = h_oScorePreU[start][end][pState][level][subRule.parent];
if (pOS == 0) continue;
ruleScore += subRule.score * cS / tree_score * scalingFactor / spanScore * pOS;
// if (isValidExpectation(ruleCount)){
}
return ruleScore;
}
@Override
public void incrementExpectedCounts(Linearizer linearizer, double[] probs, List sentence) {
double tree_score = h_iScorePostU[0][length][0][finalLevel][0];// * h_oScorePreU[0][length][finalLevel][0][0];
int tree_scale = iScale[0][length][0];
if (ConditionalTrainer.Options.lockGrammar){
linearizer.increment(probs, sentence, getClassBracketPosteriors(), false);
return;
}
for (int start = 0; start < length; start++) {
final int lastState = numSubStatesArray.length;
StateSet currentStateSet = sentence.get(start);
for (int tag=0; tag= narrowR); // can this right constituent fit next to the left constituent?
if (!iPossibleR) { continue; }
int min1 = narrowR;
int min2 = wideLExtent[end][rState];
int min = (min1 > min2 ? min1 : min2); // can this right constituent stretch far enough to reach the left constituent?
if (min > narrowL) { continue; }
int max1 = wideRExtent[start][lState];
int max2 = narrowL;
int max = (max1 < max2 ? max1 : max2); // can this left constituent stretch far enough to reach the right constituent?
if (min > max) { continue; }
boolean foundSomething = false;
for (int split = min; split <= max; split++) {
if (allowedSubStates[start][split][lState] == null) continue;
if (allowedSubStates[split][end][rState] == null) continue;
double scalingFactor = ScalingTools.calcScaleFactor(
oScale[start][end][pState]+
iScale[start][split][lState]+
iScale[split][end][rState]-tree_scale);
if (scalingFactor==0){ continue; }
boolean tmp = computeExpectedCount(start, split, end, r, tree_score, scalingFactor);
foundSomething = foundSomething || tmp;
}
if (!foundSomething) continue; // nothing changed this round
linearizer.increment(probs, r, tmpCountsArray, false);
}
}
final int lastStateU = numSubStatesArray.length;
for (short pState=0; pState unaries = grammar.getUnaryRulesByParent(pState);
UnaryRule[] unaries = grammar.getClosedSumUnaryRulesByParent(pState);
for (UnaryRule ur : unaries) {
short cState = ur.childState;
if ((pState == cState)) continue;// && (np == cp))continue;
if (allowedSubStates[start][end][cState] == null) continue;
double scalingFactor = ScalingTools.calcScaleFactor(
oScale[start][end][pState]+iScale[start][end][cState]-tree_scale);
if (scalingFactor==0){ continue; }
boolean foundSomething = computeExpectedCount(start, end, (HierarchicalAdaptiveUnaryRule)ur, tree_score, scalingFactor);
if (!foundSomething) continue;
linearizer.increment(probs, ur, tmpCountsArray, false); //probs[thisStartIndex + curInd-1] += ruleCount;
}
}
}
}
if (spanPredictor!=null)
linearizer.increment(probs, sentence, getClassBracketPosteriors(), false);
}
private final boolean computeExpectedCount(int start, int split, int end, HierarchicalAdaptiveBinaryRule rule,
double tree_score, double scalingFactor){
int pState = rule.parentState;
int lState = rule.leftChildState;
int rState = rule.rightChildState;
boolean foundSomething = false;
int curInd = -1;
for (HierarchicalAdaptiveBinaryRule.SubRule subRule : rule.subRuleList){
if (subRule==null) continue; // shouldn't happen!
int level = subRule.level;
curInd++;
double lS = h_iScorePostU[start][split][lState][level][subRule.lChild];
if (lS == 0) continue;
double rS = h_iScorePostU[split][end][rState][level][subRule.rChild];
if (rS == 0) continue;
double pOS = h_oScorePostU[start][end][pState][level][subRule.parent];
if (pOS == 0) continue;
double ruleCount = subRule.score * lS / tree_score * rS * scalingFactor * pOS;
if (isValidExpectation(ruleCount)){
tmpCountsArray[curInd] += ruleCount;
foundSomething = true;
}// else if (ruleCount!=0)
// System.out.println("not an expected count, b: "+ruleCount+"\n"+rule.toString());
}
return foundSomething;
}
private final boolean computeExpectedCount(int start, int end, HierarchicalAdaptiveUnaryRule rule,
double tree_score, double scalingFactor){
int pState = rule.parentState;
int cState = rule.childState;
boolean foundSomething = false;
int curInd = -1;
for (HierarchicalAdaptiveUnaryRule.SubRule subRule : rule.subRuleList){
curInd++;
if (subRule==null) continue;
int level = subRule.level;
double cS = h_iScorePreU[start][end][cState][level][subRule.child];
if (cS == 0) continue;
double pOS = h_oScorePreU[start][end][pState][level][subRule.parent];
if (pOS == 0) continue;
double ruleCount = subRule.score * cS / tree_score * scalingFactor * pOS;
if (spanScores!=null){
ruleCount /= spanScores[start][end][stateClass[pState]];
}
if (isValidExpectation(ruleCount)){
tmpCountsArray[curInd] = ruleCount;
foundSomething = true;
} //else if (ruleCount!=0)
// System.out.println("not an expected count, u: "+ruleCount+"\n"+rule.toString()+"\n"+cS +" / "+ tree_score +" * "+scalingFactor +" * "+ pOS);
}
return foundSomething;
}
@Override
boolean[][][][] computeAllowedStates(double threshold) {
double tree_score = h_iScorePostU[0][length][0][finalLevel][0];
int tree_scale = iScale[0][length][0];
boolean[][][][] result = new boolean[length][length+1][][];
for (int start = 0; start < length; start++) {
for (int end = start + 1; end <= length; end++) {
result[start][end] = new boolean[numStates][];
final int lastState = numSubStatesArray.length;
for (int state = 0; state < lastState; state++) {
double spanScore = (spanScores!=null) ? spanScores[start][end][stateClass[state]] : 1;
if (allowedSubStates[start][end][state]==null) continue;
boolean atLeastOnePossible = false;
for (int substate = 0; substate < numSubStatesArray[state]; substate++) {
if (!allowedSubStates[start][end][state][substate]) continue;
double iS = h_iScorePostU[start][end][state][finalLevel][substate];
// if (iS==0) continue;
double oS = h_oScorePostU[start][end][state][finalLevel][substate];
// if (oS==0) continue;
double scalingFactor = ScalingTools.calcScaleFactor(
oScale[start][end][state]+iScale[start][end][state]-tree_scale);
if (scalingFactor==0) continue;
double tmp = Math.max(iS*h_oScorePreU[start][end][state][finalLevel][substate], h_iScorePreU[start][end][state][finalLevel][substate]*oS);
double posterior = tmp / spanScore / tree_score * scalingFactor;
if (posterior > threshold) {
if (result[start][end][state]==null) result[start][end][state] = new boolean[numSubStatesArray[state]];
result[start][end][state][substate]=true;
atLeastOnePossible = true;
}
}
if (!atLeastOnePossible) result[start][end][state]=null;
}
}
}
return result;
}
/**
* Calculate the inside scores, P(words_i,j|nonterminal_i,j) of a tree given
* the string if words it should parse to.
*
* @param tree
* @param sentence
*/
@Override
void doInsideScores(Tree tree, boolean noSmoothing, boolean debugOutput, double[][][] spanScores) {
if (grammar.isLogarithmMode() || lexicon.isLogarithmMode())
throw new Error("Grammar in logarithm mode! Cannot do inside scores!");
if (tree.isLeaf()){
return;
}
List> children = tree.getChildren();
for (Tree child : children) {
if (!child.isLeaf()) doInsideScores(child, noSmoothing, debugOutput, spanScores);
}
StateSet parent = tree.getLabel();
short pState = parent.getState();
int nParentStates = parent.numSubStates();
if (tree.isPreTerminal()) {
// Plays a role similar to initializeChart()
StateSet wordStateSet = tree.getChildren().get(0).getLabel();
double[] lexiconScores = lexicon.score(wordStateSet, pState, noSmoothing,false);
if (lexiconScores.length!=nParentStates){
System.out.println("Have more scores than substates!");// truncate the array
}
parent.setIScores(lexiconScores);
parent.scaleIScores(0);
} else {
switch (children.size()) {
case 0:
break;
case 1:
StateSet child = children.get(0).getLabel();
short cState = child.getState();
HierarchicalAdaptiveUnaryRule urule = (HierarchicalAdaptiveUnaryRule)grammar.getUnaryRule(pState,cState);
double[] iScores = new double[nParentStates];
for (HierarchicalAdaptiveUnaryRule.SubRule subRule : urule.subRuleList){
if (subRule==null) continue;
int level = subRule.level;
int i = substatesToCover[level]*subRule.child;
int j = i+substatesToCover[level];
int k = substatesToCover[level]*subRule.parent;
int l = k+substatesToCover[level];
double cS = 0;
for (int cp=i; cp tree) {
tree.getLabel().setOScore(0, 1);
tree.getLabel().setOScale(0);
}
/**
* Calculate the outside scores of a tree; that is,
* P(nonterminal_i,j|words_0,i; words_j,end). It is calculate from the inside
* scores of the tree.
*
*
* Note: when calling this, call setRootOutsideScore() first.
*
* @param tree
*/
@Override
void doOutsideScores(Tree tree, boolean unaryAbove, double[][][] spanScores) {
if (grammar.isLogarithmMode() || lexicon.isLogarithmMode())
throw new Error("Grammar in logarithm mode! Cannot do inside scores!");
if (tree.isLeaf())
return;
List> children = tree.getChildren();
StateSet parent = tree.getLabel();
short pState = parent.getState();
int nParentStates = parent.numSubStates();
// this sets the outside scores for the children
if (tree.isPreTerminal()) {
} else {
double[] parentScores = parent.getOScores();
if (spanScores!=null && !unaryAbove){
for (int i = 0; i < nParentStates; i++) {
parentScores[i] *= spanScores[parent.from][parent.to][stateClass[pState]];
}
}
switch (children.size()) {
case 0:
// Nothing to do
break;
case 1:
StateSet child = children.get(0).getLabel();
short cState = child.getState();
int nChildStates = child.numSubStates();
double[] oScores = new double[nChildStates];
HierarchicalAdaptiveUnaryRule urule = (HierarchicalAdaptiveUnaryRule)grammar.getUnaryRule(pState,cState);
for (HierarchicalAdaptiveUnaryRule.SubRule subRule : urule.subRuleList){
if (subRule==null) continue;
int level = subRule.level;
int i = substatesToCover[level]*subRule.child;
int j = i+substatesToCover[level];
int k = substatesToCover[level]*subRule.parent;
int l = k+substatesToCover[level];
if (pState==0) l = 1;
double pS = 0;
for (int np=k; np child : children) {
doOutsideScores(child, unaryAbove, spanScores);
}
}
}
@Override
public double doInsideOutsideScores(Tree tree, boolean noSmoothing, boolean debugOutput, double[][][] spanScores) {
doInsideScores(tree, noSmoothing, debugOutput, spanScores);
setRootOutsideScore(tree);
doOutsideScores(tree, false, spanScores);
return Math.log(tree.getLabel().getIScore(0)) + (ScalingTools.LOGSCALE*tree.getLabel().getIScale());
}
@Override
public void doInsideOutsideScores(Tree tree, boolean noSmoothing, boolean debugOutput) {
doInsideScores(tree, noSmoothing, debugOutput, null);
setRootOutsideScore(tree);
doOutsideScores(tree, false, null);
}
@Override
public void incrementExpectedGoldCounts(Linearizer linearizer, double[] probs, Tree tree){
if (ConditionalTrainer.Options.lockGrammar) return;
incrementExpectedGoldCounts(linearizer, probs, tree, tree.getLabel().getIScore(0), tree.getLabel().getIScale());
}
@Override
public void incrementExpectedGoldCounts(Linearizer linearizer, double[] probs, Tree tree,
double tree_score, int tree_scale) {
if (tree.isLeaf())
return;
if (tree.isPreTerminal()){
StateSet parent = tree.getLabel();
StateSet child = tree.getChildren().get(0).getLabel();
short tag = tree.getLabel().getState();
final int nSubStates = grammar.numSubStates[tag];
double scalingFactor = ScalingTools.calcScaleFactor(parent.getOScale()+parent.getIScale()-tree_scale);
for (short substate=0; substate> children = tree.getChildren();
StateSet parent = tree.getLabel();
short parentState = parent.getState();
switch (children.size()) {
case 0:
// This is a leaf (a preterminal node, if we count the words themselves),
// nothing to do
break;
case 1:
StateSet child = children.get(0).getLabel();
short childState = child.getState();
HierarchicalAdaptiveUnaryRule urule = (HierarchicalAdaptiveUnaryRule)grammar.getUnaryRule(parentState, childState);
double scalingFactor = ScalingTools.calcScaleFactor(parent.getOScale()+child.getIScale()-tree_scale);
int curInd = -1;
for (HierarchicalAdaptiveUnaryRule.SubRule subRule : urule.subRuleList){
curInd++;
if (subRule==null) continue;
int level = subRule.level;
int i = substatesToCover[level]*subRule.child;
int j = i+substatesToCover[level];
int k = substatesToCover[level]*subRule.parent;
int l = k+substatesToCover[level];
if (parentState==0) l = 1;
double pOS = 0;
for (int np=k; np child : children) {
incrementExpectedGoldCounts(linearizer, probs, child, tree_score, tree_scale);
}
}
public double[][][] getClassBracketPosteriors() {
double tree_score = h_iScorePostU[0][length][0][finalLevel][0];// * h_oScorePreU[0][length][finalLevel][0][0];
int tree_scale = iScale[0][length][0];
double[][][] result = new double[length][length+1][spanPredictor.getNClasses()];
for (int start = 0; start < length; start++) {
for (int end = start + 1; end <= length; end++) {
final int lastState = numSubStatesArray.length;
for (int state = 0; state < lastState; state++) {
final int clas = stateClass[state];
final double spanScore = spanScores[start][end][clas];
double statePosterior = 0;
if (allowedSubStates[start][end][state]==null) continue;
for (int substate = 0; substate < numSubStatesArray[state]; substate++) {
if (!allowedSubStates[start][end][state][substate]) continue;
double iS = h_iScorePostU[start][end][state][finalLevel][substate];
double oS = h_oScorePreU[start][end][state][finalLevel][substate];
double iS2= h_iScorePreU[start][end][state][finalLevel][substate];
double oS2 = h_oScorePostU[start][end][state][finalLevel][substate];
double scalingFactor = ScalingTools.calcScaleFactor(
oScale[start][end][state]+iScale[start][end][state]-tree_scale);
if (scalingFactor==0) continue;
double tmp = iS*oS;
double tmp2 = iS2 * oS2;
double posterior = Math.max(tmp,tmp2) / spanScore / tree_score * scalingFactor;
if (SloppyMath.isDangerous(posterior)) continue;
if (posterior>1.01){
System.out.println("too much posterior s:"+start+" e:"+end+" state "+state+" "+posterior+" "+spanScores[start][end]);
if (SloppyMath.isVeryDangerous(posterior)) posterior = 0;
}
result[start][end][clas] += posterior;
statePosterior += posterior;
}
if (statePosterior>1.01){
System.out.println("Too much for a single state: "+statePosterior);
for (int substate = 0; substate < numSubStatesArray[state]; substate++) {
if (!allowedSubStates[start][end][state][substate]) continue;
double iS = h_iScorePostU[start][end][state][finalLevel][substate];
double oS = h_oScorePreU[start][end][state][finalLevel][substate];
double iS2= h_iScorePreU[start][end][state][finalLevel][substate];
double oS2 = h_oScorePostU[start][end][state][finalLevel][substate];
double scalingFactor = ScalingTools.calcScaleFactor(
oScale[start][end][state]+iScale[start][end][state]-tree_scale);
if (scalingFactor==0) continue;
double tmp = iS*oS;//Math.max(iS*oS, iS2*oS2);
double tmp2 = iS2 * oS2;
double posterior = Math.max(tmp,tmp2) / spanScore / tree_score * scalingFactor;
System.out.println(posterior);
}
}
}
if (result[start][end][0]>2.01){
System.out.println("too much in the sum, start "+start+" end "+end+" "+result[start][end]);
result[start][end][0] = 0;
}
}
}
// System.out.println("length "+length);
return result;
}
}