All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.PCFGLA.PosteriorMerger Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.PCFGLA;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.zip.GZIPInputStream;

import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Numberer;
import edu.berkeley.nlp.util.ScalingTools;


public class PosteriorMerger{

	public static class Options {

		@Option(name = "-grammarFiles", required = true, usage = "Input Files for Grammars.")
		public String grammarFiles;

		@Option(name = "-inputFile", usage = "Read input from this file instead of reading it from STDIN.")
		public String inputFile;

		@Option(name = "-outputFile", usage = "Store output in this file instead of printing it to STDOUT.")
		public String outputFile;

		@Option(name = "-nGrammars", usage = "Number of Grammars")
		public int nGrammars;

		@Option(name = "-maxLength", usage = "Maximum sentence length (Default = 200).")
		public int maxLength = 200;
	}

	static double[][][] maxcScore;  // start, end, state --> logProb
	static int[][][] maxcSplit;  // start, end, state -> split position
	static int[][][] maxcChild;  // start, end, state -> unary child (if any)
	static int[][][] maxcLeftChild;  // start, end, state -> left child
	static int[][][] maxcRightChild;  // start, end, state -> right child


	public static void main(String[] args) {
		OptionParser optParser = new OptionParser(Options.class);
		Options opts = (Options) optParser.parse(args, true);
		// provide feedback on command-line arguments
		System.err.println("Calling with " + optParser.getPassedInOptions());


		String inFileName = opts.grammarFiles;
		if (inFileName==null) {
			throw new Error("Did not provide a grammar.");
		}

		short[][] numSubstates = new short[opts.nGrammars][];
		Grammar[] grammars = new Grammar[opts.nGrammars];
		Lexicon[] lexicons = new Lexicon[opts.nGrammars];
		for (int gr=0; gr[] posteriors = null;
			while ((line = inputData.readLine()) != null) {
				List sentence = Arrays.asList(line.split(" "));
				if (posteriors == null || lineIndex == posteriors[0].size()){
					posteriors = new ArrayList[nGrammars];
					for (int gr=0; gr opts.maxLength){
					//    			lineIndex++;
					outputData.write("(())\n");
					continue;
				}

				List iScores = new ArrayList(nGrammars);
				List oScores = new ArrayList(nGrammars);
				List iScales = new ArrayList(nGrammars);
				List oScales = new ArrayList(nGrammars);
				boolean[][][] allowedStates = null;

				boolean skip = false;
				for (int gr=0; gr parsedTree = parser.extractBestMaxRuleParse(0, sentence.size(), sentence);
				parsedTree = TreeAnnotations.unAnnotateTree(parsedTree, false);
				outputData.write(parsedTree+"\n");
				outputData.flush();
			}



			outputData.flush();
			outputData.close();
		}catch (Exception ex) {
			ex.printStackTrace();
		}
		System.exit(0);
	}



	private static boolean[][][] mergeAllowedStates(boolean[][][] allowedStates,
			boolean[][][] allowedStates2) {
		if (allowedStates==null) return allowedStates2;
		for (int i=0; i sentence, 
			List iScores, List oScores, 
			List iScales, List oScales,
			boolean[][][] allowedStates, 
			Grammar[] grammars, Lexicon[] lexicons,
			short[][] numSubstates, boolean scale) {

		int length = sentence.size();
		int nGrammars = numSubstates.length;
		int numStates = numSubstates[0].length;
		boolean[] grammarTags = grammars[0].isGrammarTag;
		Numberer tagNumberer = Numberer.getGlobalNumberer("tags");

		maxcScore = new double[length][length + 1][numStates];
		maxcSplit = new int[length][length + 1][numStates];
		maxcChild      = new int[length][length + 1][numStates];
		maxcLeftChild  = new int[length][length + 1][numStates];
		maxcRightChild = new int[length][length + 1][numStates];
		ArrayUtil.fill(maxcScore, Double.NEGATIVE_INFINITY);
		
		double[] logNormalizer = new double[nGrammars];
		for (int i=0; i 1) {
					// diff > 1: Try binary rules
					for (short pState=0; pState "+(String)tagNumberer.object(lState)
												+" "+(String)tagNumberer.object(rState)+" in grammar "+gr);
										continue;
									}
									double[][][] scores = rule.getScores2();
									int nParentStates = numSubstates[gr][pState]; // == scores[0][0].length;
									int nLeftChildStates = numSubstates[gr][lState]; // == scores.length;
									int nRightChildStates = numSubstates[gr][rState]; // == scores[0].length;

									for (int lp = 0; lp < nLeftChildStates; lp++) {
										double lIS = iScores.get(gr)[start][split][lState][lp];
										if (lIS == 0) continue;

										for (int rp = 0; rp < nRightChildStates; rp++) {
											if (scores[lp][rp]==null) continue;
											double rIS = iScores.get(gr)[split][end][rState][rp];
											if (rIS == 0) continue;
											for (int np = 0; np < nParentStates; np++) {
												double pOS = oScores.get(gr)[start][end][pState][np];
												if (pOS == 0) continue;

												double ruleS = scores[lp][rp][np];
												if (ruleS == 0) continue;
												ruleScore += (pOS * ruleS * lIS * rIS) / logNormalizer[gr];
											}
										}
									}
//									if (ruleScore==0) continue;
									gScore += Math.log(ruleScore);
								}	


								if (gScore > scoreToBeat) {
									scoreToBeat = gScore;
									maxcScore[start][end][pState] = gScore;
									maxcSplit[start][end][pState] = split;
									maxcLeftChild[start][end][pState] = lState;
									maxcRightChild[start][end][pState] = rState;
								}
							}
						} 
					}
				} else { // diff == 1
					// We treat TAG --> word exactly as if it was a unary rule, except the score of the rule is
					// given by the lexicon rather than the grammar and that we allow another unary on top of it.
					//for (int tag : lexicon.getAllTags()){
					for (int tag=0; tag "+(String)tagNumberer.object(cState)+" in grammar "+gr);
								continue;
							}
							double[][] scores = rule.getScores2();

							int nChildStates = numSubstates[gr][cState]; // == scores.length;
							int nParentStates = numSubstates[gr][pState]; // == scores[0].length;

							for (int cp = 0; cp < nChildStates; cp++) {
								double cIS = iScores.get(gr)[start][end][cState][cp];
								if (cIS == 0) continue;

								if (scores[cp]==null) continue;
								for (int np = 0; np < nParentStates; np++) {
									double pOS = oScores.get(gr)[start][end][pState][np];
									if (pOS < 0) continue;

									double ruleS = scores[cp][np];
									if (ruleS == 0) continue;
									ruleScore += (pOS * ruleS * cIS) / logNormalizer[gr];
								}
							}
//							if (ruleScore==0) continue;
							gScore += Math.log(ruleScore);
						}

						if (gScore > maxcScoreStartEnd[pState]) {
							maxcScoreStartEnd[pState] = gScore;
							maxcChild[start][end][pState] = cState;
						}
					}
				}
				maxcScore[start][end] = maxcScoreStartEnd;
			}
		}
	}


	public static List loadPosteriors(String fileName) {
		List posteriors = null;
		try {
			FileInputStream fis = new FileInputStream(fileName); // Load from file
			GZIPInputStream gzis = new GZIPInputStream(fis); // Compressed
			ObjectInputStream in = new ObjectInputStream(gzis); // Load objects
			posteriors = (List)in.readObject(); // Read the mix of grammars
			in.close(); // And close the stream.
			gzis.close();
			fis.close();
		} catch (IOException e) {
			System.out.println("IOException\n"+e);
			return null;
		} catch (ClassNotFoundException e) {
			System.out.println("Class not found!");
			return null;
		}
		return posteriors;
	}


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy