All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.discPCFG.DefaultLinearizer Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.discPCFG;

import java.io.Serializable;
import java.util.Arrays;
import java.util.List;

import edu.berkeley.nlp.PCFGLA.BinaryRule;
import edu.berkeley.nlp.PCFGLA.ConditionalTrainer;
import edu.berkeley.nlp.PCFGLA.Grammar;
import edu.berkeley.nlp.PCFGLA.Rule;
import edu.berkeley.nlp.PCFGLA.SimpleLexicon;
import edu.berkeley.nlp.PCFGLA.SpanPredictor;
import edu.berkeley.nlp.PCFGLA.UnaryRule;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.math.DoubleArrays;
import edu.berkeley.nlp.math.SloppyMath;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Indexer;

/**
 * @author petrov
 *
 */
public class DefaultLinearizer implements Linearizer, Serializable {
	Grammar grammar;
	SimpleLexicon lexicon;
	SpanPredictor spanPredictor;
	
  int[][] linearIndex;
  int nGrammarWeights, nLexiconWeights, nSpanWeights;
  int nWords, startSpanWeights;
  int nSubstates;
  int nClasses;
  
  int startIndexPrevious, startIndexNext;
  int startIndexFirst, startIndexLast;
  int startIndexBeginPair, startIndexEndPair;
  int startIndexPunctuation;

  double[] lastProbs;
  
  private static final long serialVersionUID = 2L;

  public DefaultLinearizer() {
	}

	/**
	 * @param grammar
	 * @param lexicon
	 * @param threshold
	 */
	public DefaultLinearizer(Grammar grammar, SimpleLexicon lexicon, SpanPredictor sp) {
		this.grammar = grammar;
		this.lexicon = lexicon;
		this.spanPredictor = sp;
		this.nSubstates = (int)ArrayUtil.max(grammar.numSubStates);
		init();
	}
	
	protected void init() {
		double[] tmp = null;
		if (!ConditionalTrainer.Options.lockGrammar){
			tmp = getLinearizedGrammar(true);
			tmp = getLinearizedLexicon(true);
		}
		tmp = getLinearizedSpanPredictor(true);
	}

	public void delinearizeSpanPredictor(double [] probs) {
		if (spanPredictor==null) return;
		int ind = startSpanWeights, nDangerous = 0;

		if (spanPredictor.useFirstAndLast){
			double[][] tmp = spanPredictor.firstWordScore;
			for (int i=0; i300) {
						nDangerous++;
						continue;
					}
					val = Math.exp(val);
					tmp[i][c] = val;
				}
			}
	
			tmp = spanPredictor.lastWordScore;
			for (int i=0; i300) {
						nDangerous++;
						continue;
					}
					val = Math.exp(val);
					tmp[i][c] = val;
				}
			}
		}
		
		if (spanPredictor.usePreviousAndNext){
			double[][] tmp = spanPredictor.previousWordScore;
			for (int i=0; i300) {
						nDangerous++;
						continue;
					}
					val = Math.exp(val);
					tmp[i][c] = val;
				}
			}
	
			tmp = spanPredictor.nextWordScore;
			for (int i=0; i300) {
						nDangerous++;
						continue;
					}
					val = Math.exp(val);
					tmp[i][c] = val;
				}
			}
		}

		if (spanPredictor.useBeginAndEndPairs){
			double[][] tmp = spanPredictor.beginPairScore;
			for (int i=0; i300) {
						nDangerous++;
						continue;
					}
					val = Math.exp(val);
					tmp[i][c] = val;
				}
			}
	
			tmp = spanPredictor.endPairScore;
			for (int i=0; i300) {
						nDangerous++;
						continue;
					}
					val = Math.exp(val);
					tmp[i][c] = val;
				}
			}
		}

		if (spanPredictor.usePunctuation){
			double[][] tmp = spanPredictor.punctuationScores;
			for (int i=0; i300) {
						nDangerous++;
						continue;
					}
					val = Math.exp(val);
					tmp[i][c] = val;
				}
			}
		}
		
		if(nDangerous>0)
			System.out.println("Ignored "+nDangerous+" proposed span feature weights since they were dangerous.");
		
	}
	
	public void delinearizeGrammar(double [] probs) {
		int nDangerous = 0;
		for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
			int ind = bRule.identifier;//startIndex[ruleIndexer.indexOf(bRule)];
			double[][][] scores = bRule.getScores2();
			for (int j=0; j0) System.out.println("Left "+nDangerous+" binary rule weights unchanged since the proposed weight was dangerous.");

		nDangerous = 0;
		for (UnaryRule uRule : grammar.unaryRuleMap.keySet()){
			int ind = uRule.identifier;//startIndex[ruleIndexer.indexOf(uRule)];
			if (uRule.childState==uRule.parentState) continue;
			double[][] scores = uRule.getScores2();
			for (int j=0; j0) System.out.println("Left "+nDangerous+" unary rule weights unchanged since the proposed weight was dangerous.");

		
		grammar.closedSumRulesWithParent = grammar.closedViterbiRulesWithParent = grammar.unaryRulesWithParent;
		grammar.closedSumRulesWithChild = grammar.closedViterbiRulesWithChild = grammar.unaryRulesWithC;
//		computePairsOfUnaries();
		grammar.clearUnaryIntermediates();
		grammar.makeCRArrays();
//		return grammar;
	}

	
	public double[] getLinearizedGrammar(boolean update) {
		if (update){
//			int nRules = grammar.binaryRuleMap.size() + grammar.unaryRuleMap.size();
			
			nGrammarWeights = 0;
			for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
//				ruleIndexer.add(bRule);
				if (!grammar.isGrammarTag[bRule.parentState]){ System.out.println("Incorrect grammar tag"); }
				bRule.identifier = nGrammarWeights;
//				ruleIndexer.indexOf(bRule);
//				startIndex[bRule.identifier] = ;
				double[][][] scores = bRule.getScores2();
				for (int j=0; j0) System.out.println("Left "+nDangerous+" lexicon weights unchanged since the proposed weight was dangerous.");
//		return lexicon;
  }
  
  public double[] getLinearizedLexicon(){
  	return getLinearizedLexicon(false);
  }
  
  public double[] getLinearizedLexicon(boolean update){
  	if (update) {
  		nLexiconWeights = 0;
	  	for (short tag=0; tag=0){ //System.out.println("incrementing scores for unseen signature tag");
				for (int i=0; i=0) {
			for (int i=0; i sentence, double[][][] weights, boolean isGold) {
		int length = sentence.size();
  	int firstIndex, lastIndex;
  	int previousIndex=-1, nextIndex=-1;
  	
  	if (spanPredictor.usePunctuation){
  		int[][] punctSignatures = spanPredictor.getPunctuationSignatures(sentence);
  		for (int start = 0; start <= length-spanPredictor.minSpanLength; start++) {
  			for (int end = start + spanPredictor.minSpanLength; end <= length; end++) {
  				int sig = punctSignatures[start][end];
  				if (sig==-1) continue;
  				sig *= nClasses;
  				for (int c=0; c=0){
					beginI += startIndexBeginPair;
					for (int c=0; c= spanPredictor.minSpanLength; end--) {
			StateSet stateSet = sentence.get(end-1);
			lastIndex = (stateSet.sigIndex<0) ? stateSet.wordIndex : stateSet.sigIndex;
			if (spanPredictor.useOnlyWords) lastIndex = stateSet.wordIndex;
			
			double[] total = new double[spanPredictor.getNClasses()];
			for (int start = 0; start <= end-spanPredictor.minSpanLength; start++) {
				for (int c=0; c=0){
					endI += startIndexEndPair;
					for (int c=0; c




© 2015 - 2025 Weber Informatics LLC | Privacy Policy