All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.PCFGLA.ConstrainedArrayParser Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.PCFGLA;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;

import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Counter;
import edu.berkeley.nlp.util.Numberer;
import edu.berkeley.nlp.util.PriorityQueue;
import edu.berkeley.nlp.util.StringUtils;

public class ConstrainedArrayParser extends ArrayParser implements Callable{
	List[][] possibleStates;
	/** inside scores; start idx, end idx, state -> logProb */
	protected double[][][][] iScore;
	/** outside scores; start idx, end idx, state -> logProb	 */
	protected double[][][][] oScore;
	protected short[] numSubStatesArray;
	public long totalUsedUnaries;
	public long nRules, nRulesInf;
//the chart is now using scaled probabilities, NOT log-probs.
  protected int[][][] iScale;  // for each (start,end) span there is a scaling factor
  protected int[][][] oScale;  
	public Binarization binarization;

	Counter stateCounter = new Counter();
	Counter ruleCounter = new Counter();
	
	public boolean viterbi = false;
	/** number of times we restored unaries */
	public int nTimesRestoredUnaries;

	boolean noConstrains = false;

 
  protected List nextSentence;
  protected int nextSentenceID;
  int myID;
	PriorityQueue>> queue;
	public void setID(int i, PriorityQueue>> q){
		myID = i;
		queue = q;
	}
  public void setNextSentence(List nextS, int nextID){
  	nextSentence = nextS;
  	nextSentenceID = nextID; 
  }

  
  public synchronized Object call() {
		Tree parse = getBestParse(nextSentence);
		nextSentence = null;
		ArrayList> result = new ArrayList>();
		result.add(parse);
		synchronized(queue) {
      queue.add(result,-nextSentenceID);
      queue.notifyAll();
		}
		return null;
	}

	
  public ConstrainedArrayParser newInstance(){
  	ConstrainedArrayParser newParser = new ConstrainedArrayParser(grammar, lexicon, numSubStatesArray);
  	return newParser;
  }

  
	public double getLogLikelihood(Tree t){
		System.out.println("Unsuported for now!");
		return Double.NEGATIVE_INFINITY;
	}
	
	public Tree[] getSampledTrees(List sentence, List[][] pStates, int n){
	 return null;
	}
	
  public void setNoConstraints(boolean noC){
  	this.noConstrains = noC;
  }
  
  public List> getKBestConstrainedParses(List sentence, List posTags, int k) {
  	return null;
  }
	
  public ConstrainedArrayParser(){
  }

  public ConstrainedArrayParser(Grammar gr, Lexicon lex, short[] nSub) {
		super(gr, lex);
		this.numSubStatesArray = nSub;
		totalUsedUnaries=0;
		nTimesRestoredUnaries=0;
		nRules=0;
		nRulesInf=0;
		//Math.pow(GrammarTrainer.SCALE,scaleDiff);
	}


//	public Tree getBestConstrainedParse(List sentence, List[][] pStates) {
//		length = (short)sentence.size();
//		this.possibleStates = pStates;
//		createArrays();
//		initializeChart(sentence);
//		
//		doConstrainedInsideScores();
//    //showScores(iScore, "Inside scores:");
///*		oScore[0][length][0][0] = 0;
//		doConstrainedOutsideScores();
//
//		List possibleParentSt = possibleStates[12][13];
//		for (int pState : possibleParentSt){
//		 System.out.println(pState + " " + (String) tagNumberer.object(pState) + " iScore "+ Arrays.toString(iScore[12][13][pState]) + " oScore "+ Arrays.toString(oScore[12][13][pState])); 
//	   }
//		
//*/
//		Tree bestTree = new Tree("ROOT");
//		double score = iScore[0][length][0][0];
//		if (score > Double.NEGATIVE_INFINITY) {
//			//System.out.println("\nFound a parse for sentence with length "+length+". The LL is "+score+".");
//			Tree bestStateSetTree = extractBestStateSetTree(zero, zero, zero, length, sentence);
//			tallyStatesAndRules(bestStateSetTree);
//			bestTree = restoreStateSetTreeUnaries(bestStateSetTree);
//			//bestTree = extractBestParse(0, 0, 0, length, sentence);
//			//restoreUnaries(bestTree);			
//		}
//		else {
//			System.out.println("()\nDid NOT find a parse for sentence with length "+length+".");
//		}
//		
//		
//		return bestTree;
//	}
	
	/**
	 * Create a string representing the state for a StateSet tree that has the
	 * substate first iScore.
	 * 
	 */
	private String getStateString(Tree tree) {
		return tagNumberer.object(tree.getLabel().getState())+"&"+(short)tree.getLabel().getIScore(0);
	}
	
	/** Compute statistics on how often each state and rule appeared.
	 * 
	 * @param bestStateSetTree
	 */
	private void tallyStatesAndRules(Tree bestStateSetTree) {
		if (bestStateSetTree.isLeaf() || bestStateSetTree.isPreTerminal())
			return;
		String stateString = getStateString(bestStateSetTree);
		stateCounter.incrementCount(stateString,1);
		String ruleString = stateString+"->";
		for (Tree child : bestStateSetTree.getChildren()) {
			tallyStatesAndRules(child);
			ruleString += "|"+getStateString(child);
		}
		ruleCounter.incrementCount(ruleString,1);
	}
	
	/**
	 * Print the statistics about how often each state and rule appeared.
	 *
	 */
	public void printStateAndRuleTallies() {
		System.out.println("STATE TALLIES");
		for (String state : stateCounter.keySet()) {
			System.out.println(state+" "+stateCounter.getCount(state));
		}
		System.out.println("RULE TALLIES");
		for (String rule : ruleCounter.keySet()) {
			System.out.println(rule+" "+ruleCounter.getCount(rule));
		}
	}

	protected void createArrays() {
		// zero out some stuff first in case we recently ran out of memory and are reallocating
		clearArrays();
		
		// allocate just the parts of iScore and oScore used (end > start, etc.)
		//    System.out.println("initializing iScore arrays with length " + length + " and numStates " + numStates);
		iScore = new double[length][length + 1][][];
		oScore = new double[length][length + 1][][];
		iScale = new int[length][length + 1][];
		oScale = new int[length][length + 1][];
		for (int start = 0; start < length; start++) { // initialize for all POS tags so that we can use the lexicon
			int end = start+1;
			iScore[start][end] = new double[numStates][];
			oScore[start][end] = new double[numStates][];
			iScale[start][end] = new int[numStates];
			oScale[start][end] = new int[numStates];
			for (int state = 0; state < numStates; state++){
				iScore[start][end][state] = new double[numSubStatesArray[state]];
				oScore[start][end][state] = new double[numSubStatesArray[state]];
				Arrays.fill(iScore[start][end][state], Float.NEGATIVE_INFINITY);
				Arrays.fill(oScore[start][end][state], Float.NEGATIVE_INFINITY);
			}
		}
		
		for (int start = 0; start < length; start++) {
			for (int end = start + 2; end <= length; end++) {
				iScore[start][end] = new double[numStates][];
				oScore[start][end] = new double[numStates][];
				iScale[start][end] = new int[numStates];
				oScale[start][end] = new int[numStates];
				List pStates = null;
				if (noConstrains){
					pStates = new ArrayList();
	        for (int i = 0; i sentence) {
		// for simplicity the lexicon will store words and tags as strings,
		// while the grammar will be using integers -> Numberer()
		int start = 0;
		int end = start+1;
		for (String word : sentence) {
			end = start+1;
			for (short tag=0; tag possibleSt = possibleStates[start][end];
			//	for (int tag : possibleSt){
				narrowRExtent[start][tag] = end;
				narrowLExtent[end][tag] = start;
				wideRExtent[start][tag] = end;
				wideLExtent[end][tag] = start;
				double[] lexiconScores = lexicon.score(word,tag,start,false,false);
				for (short n=0; n0){
						prob = -10;
						System.out.println("Should never happen! Log-Prob > 0!!!");
						System.out.println("Word "+word+" Tag "+(String)tagNumberer.object(tag)+" prob "+prob);
					}*/
					iScore[start][end][tag][n] = prob;
					/*        UnaryRule[] unaries = grammar.getClosedUnaryRulesByChild(state);
					 for (int r = 0; r < unaries.length; r++) {
					 UnaryRule ur = unaries[r];
					 int parentState = ur.parent;
					 double pS = (double) ur.score;
					 double tot = prob + pS;
					 if (tot > iScore[start][end][parentState]) {
					 iScore[start][end][parentState] = tot;
					 narrowRExtent[start][parentState] = end;
					 narrowLExtent[end][parentState] = start;
					 wideRExtent[start][parentState] = end;
					 wideLExtent[end][parentState] = start;
					 }
					 }*/
				}
			}
			start++;
		}
	}
	
	public Tree getBestConstrainedParse(List sentence, List posTags, boolean[][][][] allowedS){//List[][] pStates) {
		return getBestConstrainedParse(sentence, posTags);
	}
	
	public Tree getBestConstrainedParse(List sentence, List posTags){//List[][] pStates) {
		length = (short)sentence.size();
//		this.possibleStates = pStates;
		noConstrains = true;
		createArrays();
		initializeChart(sentence);
		doConstrainedInsideScores();
    //showScores(iScore, "Inside scores:");
/*		oScore[0][length][0][0] = 0;
		doConstrainedOutsideScores();

		List possibleParentSt = possibleStates[12][13];
		for (int pState : possibleParentSt){
		 System.out.println(pState + " " + (String) tagNumberer.object(pState) + " iScore "+ Arrays.toString(iScore[12][13][pState]) + " oScore "+ Arrays.toString(oScore[12][13][pState])); 
	   }
		
*/
		Tree bestTree = new Tree("ROOT");
		double score = iScore[0][length][0][0];
		if (score > Double.NEGATIVE_INFINITY) {
			//System.out.println("\nFound a parse for sentence with length "+length+". The LL is "+score+".");
			Tree bestStateSetTree = extractBestStateSetTree(zero, zero, zero, length, sentence);
//			tallyStatesAndRules(bestStateSetTree);
			bestTree = restoreStateSetTreeUnaries(bestStateSetTree);
			//bestTree = extractBestParse(0, 0, 0, length, sentence);
			//restoreUnaries(bestTree);			
		}
		else {
			System.out.println("()\nDid NOT find a parse for sentence with length "+length+".");
		}
		
		
		return bestTree;
	}
	

	
	
	/** Fills in the iScore array of each category over each span
	 *  of length 2 or more.
	 */
	
	void doConstrainedInsideScores() {
		grammar.logarithmMode();
		lexicon.logarithmMode();
		for (int diff = 1; diff <= length; diff++) {
			System.out.print(diff + " ");
			for (int start = 0; start < (length - diff + 1); start++) {
				int end = start + diff;
				List possibleSt = null;
				if (noConstrains){
					possibleSt = new ArrayList();
	        for (int i = 0; i= narrowR); // can this right constituent fit next to the left constituent?
						if (!iPossibleR) { continue; }
						
						int min1 = narrowR;
						int min2 = wideLExtent[end][rState];
						int min = (min1 > min2 ? min1 : min2); // can this right constituent stretch far enough to reach the left constituent?
						if (min > narrowL) { continue; }
						
						int max1 = wideRExtent[start][lState];
						int max2 = narrowL;
						int max = (max1 < max2 ? max1 : max2); // can this left constituent stretch far enough to reach the right constituent?
						if (min > max) { continue; }
						
						// new: loop over all substates
						double[][][] scores = r.getScores2();
						int nParentSubStates = numSubStatesArray[pState];
						for (int np = 0; np < nParentSubStates; np++) {
							double oldIScore = iScore[start][end][pState][np];
							double bestIScore = oldIScore;
							for (int split = min; split <= max; split++) {
								if (iScore[start][split][lState] == null) continue;
								if (iScore[split][end][rState] == null) continue;
								
								for (int lp = 0; lp < scores.length; lp++) {
									double lS = iScore[start][split][lState][lp];
									if (lS == Double.NEGATIVE_INFINITY) continue;
									
									for (int rp = 0; rp < scores[0].length; rp++) {
										nRules++;
										double pS = Double.NEGATIVE_INFINITY;
										if (scores[lp][rp]!=null) pS = scores[lp][rp][np];
										if (pS==Double.NEGATIVE_INFINITY){
											nRulesInf++;
											continue; 
											//System.out.println("s "+start+" sp "+split+" e "+end+" pS "+pS+" rS "+rS);
										}

										double rS = iScore[split][end][rState][rp];
										if (rS == Double.NEGATIVE_INFINITY) continue;
										
										double tot = pS + lS + rS;
										if (tot >= bestIScore) { bestIScore = tot;} 
									}
								}
							}
							if (bestIScore > oldIScore) { // this way of making "parentState" is better
								// than previous
								iScore[start][end][pState][np] = bestIScore;
								if (oldIScore == Double.NEGATIVE_INFINITY) {
									if (start > narrowLExtent[end][pState]) {
										narrowLExtent[end][pState] = start;
										wideLExtent[end][pState] = start;
									} else {
										if (start < wideLExtent[end][pState]) {
											wideLExtent[end][pState] = start;
										}
									}
									if (end < narrowRExtent[start][pState]) {
										narrowRExtent[start][pState] = end;
										wideRExtent[start][pState] = end;
									} else {
										if (end > wideRExtent[start][pState]) {
											wideRExtent[start][pState] = end;
										}
									}
								}
							}
						}
					}
				}
				for (int pState : possibleSt){//int pState=0; pState<0; pState++){//
					//UnaryRule[] unaries = grammar.getUnaryRulesByParent(pState).toArray(new UnaryRule[0]);
					// it actually seems to be better to use the unaries without the closure... 
					//UnaryRule[] unaries = new UnaryRule[0];
					UnaryRule[] unaries = grammar.getClosedViterbiUnaryRulesByParent(pState);
					for (int r = 0; r < unaries.length; r++) {
						UnaryRule ur = unaries[r];
						int cState = ur.childState;
						if (iScore[start][end][cState]==null) continue;
						//if ((pState == cState)) continue;// && (np == cp))continue;
						//new loop over all substates
						double[][] scores = ur.getScores2();
						int nParentSubStates = numSubStatesArray[pState];
						for (int np = 0; np < nParentSubStates; np++) {
							double oldIScore = iScore[start][end][pState][np];
							double bestIScore = oldIScore;
							for (int cp = 0; cp < scores.length; cp++) {
								double pS = Double.NEGATIVE_INFINITY;
								if (scores[cp]!=null) pS = scores[cp][np];
								nRules++;
								if (pS==Double.NEGATIVE_INFINITY){
									nRulesInf++;
									continue;
								}
								double iS = iScore[start][end][cState][cp];
								if (iS == Double.NEGATIVE_INFINITY) continue;
								
								double tot = iS + pS;

								if (tot >= bestIScore) { bestIScore = tot; }
							}
							
							if (bestIScore > oldIScore) {
								iScore[start][end][pState][np] = bestIScore;
								if (oldIScore == Double.NEGATIVE_INFINITY) {
									if (start > narrowLExtent[end][pState]) {
										narrowLExtent[end][pState] = start;
										wideLExtent[end][pState] = start;
									} else {
										if (start < wideLExtent[end][pState]) {
											wideLExtent[end][pState] = start;
										}
									}
									if (end < narrowRExtent[start][pState]) {
										narrowRExtent[start][pState] = end;
										wideRExtent[start][pState] = end;
									} else {
										if (end > wideRExtent[start][pState]) {
											wideRExtent[start][pState] = end;
										}
									}
								}
							}
						}
					}
				}
			}
		}
	}


void doConstrainedOutsideScores() {
	grammar.logarithmMode();
	lexicon.logarithmMode();
	for (int diff = length; diff >= 1; diff--) {
		for (int start = 0; start + diff <= length; start++) {
			int end = start + diff;
			// do unaries
			//List possibleParentSt = possibleStates[start][end];
			List possibleParentSt = null;
			if (noConstrains){
				possibleParentSt = new ArrayList();
        for (int i = 0; i bestOScore) {
								bestOScore = tot; 
							}
						}
						if (bestOScore > oldOScore) {
							oScore[start][end][cState][cp] = bestOScore;
						}
					}
				}
			}
			// do binaries
			//for (int lState = 0; lState < numStates; lState++) {
			for (int pState=0; pState < numStates; pState++){
				//BinaryRule[] rules = grammar.splitRulesWithLC(lState);
				BinaryRule[] rules = grammar.splitRulesWithP(pState);
				for (int r = 0; r < rules.length; r++) {
					BinaryRule br = rules[r];
					if (oScore[start][end][br.parentState]==null) {continue;}
					
					int lState = br.leftChildState; 
					int min1 = narrowRExtent[start][lState];
					if (end < min1) { continue; }
					
					int rState = br.rightChildState;
					int max1 = narrowLExtent[end][rState];
					if (max1 < min1) { continue; }
					
					int min = min1;
					int max = max1;
					if (max - min > 2) {
						int min2 = wideLExtent[end][rState];
						min = (min1 > min2 ? min1 : min2);
						if (max1 < min) { continue; }
						int max2 = wideRExtent[start][lState];
						max = (max1 < max2 ? max1 : max2);
						if (max < min) { continue; }
					}
					
					double[][][] scores = br.getScores2();
					for (int split = min; split <= max; split++) {
						if (oScore[start][split][lState] == null) continue;
						if (oScore[split][end][rState] == null) continue;
						for (int lp=0; lp oScore[start][split][lState][lp]) {
										oScore[start][split][lState][lp] = totL;
									}
									double totR = pS + lS + oS;
									if (totR > oScore[split][end][rState][rp]) {
										oScore[split][end][rState][rp] = totR;
									}
								}
							}
						}
					}
				}
			}
/*			for (int rState = 0; rState < numStates; rState++) {
				int max1 = narrowLExtent[end][rState];
				if (max1 < start) { continue; }
				BinaryRule[] rules = grammar.splitRulesWithRC(rState);
				for (int r = 0; r < rules.length; r++) {
					BinaryRule br = rules[r];
					
					if (oScore[start][end][br.parentState]==null) {continue;}
					int lState = br.leftChildState;
					int min1 = narrowRExtent[start][lState];
					if (max1 < min1) { continue; }
					int min = min1;
					int max = max1;
					if (max - min > 2) {
						int min2 = wideLExtent[end][rState];
						min = (min1 > min2 ? min1 : min2);
						if (max1 < min) { continue; }
						int max2 = wideRExtent[start][lState];
						max = (max1 < max2 ? max1 : max2);
						if (max < min) { continue; }
					}
					
					double[][][] scores = br.getScores();
					for (int split = min; split <= max; split++) {
						if (oScore[start][split][lState] == null) continue;
						if (oScore[split][end][rState] == null) continue;
						for (int lp=0; lp oScore[start][split][lState][lp]) {
                    System.err.println("Shouldn't occur!");
                    System.exit(1);
										oScore[start][split][lState][lp] = totL;
									}
									double totR = pS + lS + oS;
									if (totR > oScore[split][end][rState][rp]) {
                    System.err.println("Shouldn't occur!");
                    System.exit(1);
										oScore[split][end][rState][rp] = totR;
									}
								}
							}
						}
					}
				}
			}*/
		}
	}
}

  public void showScores(double[][][][] scores, String title) {
    System.out.println(title);
    for (int diff = 1; diff <= length; diff++) {
      for (int start = 0; start < (length - diff + 1); start++) {
        int end = start + diff;
        System.out.print("[" + start + " " + end + "]: ");
        //List possibleSt = possibleStates[start][end];
				List possibleSt = null;
				if (noConstrains){
					possibleSt = new ArrayList();
	        for (int i = 0; i extractBestParse(int gState, int gp, int start, int end, List sentence ) {
	// find sources of inside score
	// no backtraces so we can speed up the parsing for its primary use
	double bestScore = iScore[start][end][gState][gp];
	String goalStr = (String)tagNumberer.object(gState);
	//System.out.println("Looking for "+goalStr+" from "+start+" to "+end+" with score "+ bestScore+".");
	if (end - start == 1) {
		// if the goal state is a preterminal state, then it can't transform into
		// anything but the word below it
//		if (lexicon.getAllTags().contains(gState)) {
   	if (!grammar.isGrammarTag[gState]){
			List> child = new ArrayList>();
			child.add(new Tree(sentence.get(start)));
			return new Tree(goalStr, child);
    }
		// if the goal state is not a preterminal state, then find a way to
		// transform it into one
		else {
			double veryBestScore = Double.NEGATIVE_INFINITY;
			int newIndex = -1;
			UnaryRule[] unaries = grammar.getClosedViterbiUnaryRulesByParent(gState);
			for (int r = 0; r < unaries.length; r++) {
				UnaryRule ur = unaries[r];
				int cState = ur.childState;
				double[][] scores = ur.getScores2();
				for (int cp=0; cp= veryBestScore) && (gState != cState || gp != cp)
            	&& (!grammar.isGrammarTag[ur.getChildState()])){
//							&& lexicon.getAllTags().contains(cState)) {
						veryBestScore = ruleScore;
						newIndex = cState;
					}
				}
			}
      List> child1 = new ArrayList>();
      child1.add(new Tree(sentence.get(start)));
      String goalStr1 = (String) tagNumberer.object(newIndex);
      if (goalStr1==null)
        System.out.println("goalStr1==null with newIndex=="+newIndex+" goalStr=="+goalStr);
      List> child = new ArrayList>();
      child.add(new Tree(goalStr1, child1));
      return new Tree(goalStr, child);
		}
	}
	// check binaries first
	for (int split = start + 1; split < end; split++) {
		//for (Iterator binaryI = grammar.bRuleIteratorByParent(gState, gp); binaryI.hasNext();) {
		//BinaryRule br = (BinaryRule) binaryI.next();
		BinaryRule[] parentRules = grammar.splitRulesWithP(gState);
		for (int i = 0; i < parentRules.length; i++) {
			BinaryRule br = parentRules[i];
			
			int lState = br.leftChildState;
			if (iScore[start][split][lState]==null) continue;
			
			int rState = br.rightChildState;
			if (iScore[split][end][rState]==null) continue;
			
			//new: iterate over substates
			double[][][] scores = br.getScores2();
			for (int lp=0; lp leftChildTree = extractBestParse(lState, lp, start, split, sentence);
						Tree rightChildTree = extractBestParse(rState, rp, split, end, sentence);
						List> children = new ArrayList>();
						children.add(leftChildTree);
						children.add(rightChildTree);
						Tree result = new Tree(goalStr, children);
						//System.out.println("Binary node: "+result);
						//result.setScore(score);
						return result;
					}
				}
			}
		}
	}
	// check unaries
	//for (Iterator unaryI = grammar.uRuleIteratorByParent(gState, gp); unaryI.hasNext();) {
	//UnaryRule ur = (UnaryRule) unaryI.next();
	UnaryRule[] unaries = grammar.getClosedViterbiUnaryRulesByParent(gState);
	for (int r = 0; r < unaries.length; r++) {
		UnaryRule ur = unaries[r];
		int cState = ur.childState;
		
		if (iScore[start][end][cState]==null) continue;
		
		//new: iterate over substates
		double[][] scores = ur.getScores2();
		for (int cp=0; cp childTree = extractBestParse(cState, cp, start, end, sentence);
				List> children = new ArrayList>();
				children.add(childTree);
				Tree result = new Tree(goalStr, children);
				//System.out.println("Unary node: "+result);
				//result.setScore(score);
				return result;
			}
		}
	}
	System.err.println("Warning: could not find the optimal way to build state "+goalStr+" spanning from "+ start+ " to "+end+".");
	return null;
}

/**
 * Return the single best parse.
 * Note that the returned tree may be missing intermediate nodes in
 * a unary chain because it parses with a unary-closed grammar.
 * A StateSet tree is returned, but the subState array is used in a 
 * different way:
 * it has only one entry, whose value is the substate! - dirty hack...
 */
public Tree extractBestStateSetTree(short gState, short gp, short start, short end, List sentence ) {
	// find sources of inside score
	// no backtraces so we can speed up the parsing for its primary use
	double bestScore = iScore[start][end][gState][gp];
	//Numberer tagNumberer = Numberer.getGlobalNumberer("tags");
	//System.out.println("Looking for "+(String)tagNumberer.object(gState)+" from "+start+" to "+end+" with score "+ bestScore+".");
	if (end - start == 1) {
		// if the goal state is a preterminal state, then it can't transform into
		// anything but the word below it
		if (!grammar.isGrammarTag(gState)) {
			List> child = new ArrayList>();
			StateSet node = new StateSet(zero,zero,sentence.get(start),start,end);
			child.add(new Tree(node));
			StateSet root = new StateSet(gState,one,null,start,end);
			root.allocate();
			root.setIScore(0,gp);
			return new Tree(root, child);
		}
		// if the goal state is not a preterminal state, then find a way to
		// transform it into one
		else {
			double veryBestScore = Double.NEGATIVE_INFINITY;
			short newIndex = -1;
			short newSubstate = -1;
			UnaryRule[] unaries = grammar.getClosedViterbiUnaryRulesByParent(gState);
			for (int r = 0; r < unaries.length; r++) {
				UnaryRule ur = unaries[r];
				short cState = ur.childState;
				double[][] scores = ur.getScores2();
				for (short cp=0; cp= veryBestScore) && (gState != cState || gp != cp)
							&& !grammar.isGrammarTag(cState)) {
						veryBestScore = ruleScore;
						newIndex = cState;
						newSubstate = cp;
					}
				}
			}
			List> child1 = new ArrayList>();
			StateSet node1 = new StateSet(zero,zero,sentence.get(start),start,end);
			child1.add(new Tree(node1));
			if (newIndex==-1)
        System.out.println("goalStr1==null with newIndex=="+newIndex+" goalState=="+gState);
      List> child = new ArrayList>();
			StateSet node = new StateSet(newIndex,one, null, start, end);
			node.allocate();
			node.setIScore(0,newSubstate);
			child.add(new Tree(node,child1));
			StateSet root = new StateSet(gState,one, null, start, end);
			root.allocate();
			root.setIScore(0,gp);
			//totalUsedUnaries++;
      return new Tree(root, child);
		}
	}
	// check binaries first
	double bestBScore = Double.NEGATIVE_INFINITY;
//	BinaryRule bestBRule = null;
//	short bestBLp, bestBRp;
	//TODO: fix parsing
	for (int split = start + 1; split < end; split++) {
		BinaryRule[] parentRules = grammar.splitRulesWithP(gState);
		for (short i = 0; i < parentRules.length; i++) {
			BinaryRule br = parentRules[i];
			
			short lState = br.leftChildState;
			if (iScore[start][split][lState]==null) continue;
			
			short rState = br.rightChildState;
			if (iScore[split][end][rState]==null) continue;
			
			//new: iterate over substates
			double[][][] scores = br.getScores2();
			for (short lp=0; lp bestBScore)
						bestBScore = score;
					if (matches(score, bestScore)) {
						// build binary split
						Tree leftChildTree = extractBestStateSetTree(lState, lp, start, (short)split, sentence);
						Tree rightChildTree = extractBestStateSetTree(rState, rp, (short)split, end, sentence);
						List> children = new ArrayList>();
						children.add(leftChildTree);
						children.add(rightChildTree);
						StateSet root = new StateSet(gState,one, null, start, end);
						root.allocate();
						root.setIScore(0,gp);
						Tree result = new Tree(root, children);
						//System.out.println("Binary node: "+result);
						//result.setScore(score);
						return result;
					}
				}
			}
		}
	}
	double bestUScore = Double.NEGATIVE_INFINITY;
	// check unaries
	UnaryRule[] unaries = grammar.getClosedViterbiUnaryRulesByParent(gState);
	for (short r = 0; r < unaries.length; r++) {
		UnaryRule ur = unaries[r];
		short cState = ur.childState;
		
		if (iScore[start][end][cState]==null) continue;
		
		//new: iterate over substates
		double[][] scores = ur.getScores2();
		for (short cp=0; cp bestUScore)
				bestUScore = score;
			if ((cState != ur.parentState || cp != gp) && matches(score, bestScore)) {
				// build unary
				Tree childTree = extractBestStateSetTree(cState, cp, start, end, sentence);
				List> children = new ArrayList>();
				children.add(childTree);
				StateSet root = new StateSet(gState,one, null, start, end);
				root.allocate();
				root.setIScore(0,gp);
				Tree result = new Tree(root, children);
				//System.out.println("Unary node: "+result);
				//result.setScore(score);
				totalUsedUnaries++;
				return result;
			}
		}
	}
	System.err.println("Warning: could not find the optimal way to build state "+gState+" spanning from "+ start+ " to "+end+".");
	System.err.println("The goal score was "+bestScore+", but the best we found was a binary rule giving "+bestBScore+" and a unary rule giving "+bestUScore);
	showScores(iScore,"iScores");
	return null;
}

	// the state set tree has nodes that are labeled with substate information
  // the substate information is the first element in the iscore array
	protected Tree restoreStateSetTreeUnaries(Tree t) {
		//System.out.println("In restoreUnaries...");
		//System.out.println("Doing node: "+node.getLabel());
		
		if (t.isLeaf()) {	// shouldn't happen
			System.err.println("Tried to restore unary from a leaf...");
			return null;
		} else if (t.isPreTerminal()){ // preterminal unaries have already been restored
			List> child = new ArrayList>();
			child.add(new Tree(t.getChildren().get(0).getLabel().getWord()));
			return new Tree((String)tagNumberer.object(t.getLabel().getState()), child);
		} else if (t.getChildren().size() != 1) { // nothing to restore
			// build binary split
			Tree leftChildTree = restoreStateSetTreeUnaries(t.getChildren().get(0));
			Tree rightChildTree = restoreStateSetTreeUnaries(t.getChildren().get(1));
			List> children = new ArrayList>();
			children.add(leftChildTree);
			children.add(rightChildTree);
			return new Tree((String)tagNumberer.object(t.getLabel().getState()), children);
		} // the interesting part:
		//System.out.println("Not skipping node: "+node.getLabel());
		StateSet parent = t.getLabel();
		StateSet child = t.getChildren().get(0).getLabel();
		short pLabel = parent.getState();
		short pSubState = (short)parent.getIScore(0); // dirty hack
		short cLabel = child.getState();
		short cSubState = (short)child.getIScore(0);
		
//System.out.println("P: "+(String)tagNumberer.object(pLabel)+" C: "+(String)tagNumberer.object(cLabel));
		List> goodChild = new ArrayList>(); 
		goodChild.add(restoreStateSetTreeUnaries(t.getChildren().get(0))); 
		// do we need a check here? if we can check whether the rule was
		// in the original grammar, then we wouldnt need the getBestPath call.
		// but getBestPath should be able to take care of that...
	//	if (grammar.getUnaryScore(new UnaryRule(pLabel,cLabel))[0][0] != 0){ continue; }// means the rule was already in grammar
		
		//System.out.println("Got path: "+path);
		//if (path.size()==1) return goodChild;
		Tree result = new Tree((String)tagNumberer.object(pLabel),goodChild);
		Tree working = result;
//		List path = grammar.getBestViterbiPath(pLabel,pSubState, cLabel,cSubState);
//		if (path.size()>2) {
//			nTimesRestoredUnaries++;
//		}
//		for (int pos=1; pos < path.size() - 1; pos++) {
//			int interState = path.get(pos)[0];
//			Tree intermediate = new Tree((String) tagNumberer.object(interState), working.getChildren());
//			List> children = new ArrayList>();
//			children.add(intermediate);
//			working.setChildren(children);
//			working = intermediate;
//		}
		return working;
	}
	
	public double[][][][] getInsideScores() {
		return ArrayUtil.clone(iScore);
	}
	
	public double[][][][] getOutsideScores() {
		return ArrayUtil.clone(oScore);
	}

	public void printUnaryStats(){
		System.out.println(" Used a total of "+totalUsedUnaries+" unary productions.");
    System.out.println(" restored unaries "+nTimesRestoredUnaries);
    System.out.println(" Out of "+nRules+" rules "+nRulesInf+" had probability=-Inf.");
	}
	
	public void projectConstraints(boolean[][][][] allowed, boolean allSubstatesAllowed){
		System.err.println("Not supported!\nThis parser cannot project constraints!");
	}
	
	/**
	 * @return the numSubStatesArray
	 */
	public short[] getNumSubStatesArray() {
		return numSubStatesArray;
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy