All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.PCFGLA.HierarchicalCombinedLexicon Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
///**
// * 
// */
//package edu.berkeley.nlp.PCFGLA;
//
//import java.util.Arrays;
//import java.util.List;
//
//import edu.berkeley.nlp.PCFGLA.SimpleLexicon.IntegerIndexer;
//import edu.berkeley.nlp.PCFGLA.smoothing.Smoother;
//import edu.berkeley.nlp.syntax.StateSet;
//import edu.berkeley.nlp.syntax.Tree;
//import fig.basic.Indexer;
//
///**
// * @author petrov
// * each word's tagging probability is the sum of the (word,tag) score and the (signature,tag) score
// *
// */
//public class HierarchicalCombinedLexicon extends HierarchicalLexicon{
//	private static final long serialVersionUID = 1L;
//	protected int knownWordCount;
//	/**
//	 * @param numSubStates
//	 * @param threshold
//	 */
//	public HierarchicalCombinedLexicon(short[] numSubStates, int knownWordCount) {
//		super(numSubStates, 0);
//		this.knownWordCount = knownWordCount;
//	}
//	
//	public HierarchicalCombinedLexicon(short[] numSubStates, int smoothingCutoff, double[] smoothParam, 
//			Smoother smoother, StateSetTreeList trainTrees, int knownWordCount) {
//  	this(numSubStates, knownWordCount);
//  	init(trainTrees);
//  }
//
//
//  /**
//	 * @param previousLexicon
//	 */
//	public HierarchicalCombinedLexicon(SimpleLexicon previousLexicon, int knownWordCount) {
//		super(previousLexicon);
//		this.knownWordCount = knownWordCount;
//	}
//	
//	public HierarchicalCombinedLexicon newInstance() {
//		return new HierarchicalCombinedLexicon(this.numSubStates,this.knownWordCount);
//	}
//
////	public double[] score(String word, short tag, int loc, boolean noSmoothing, boolean isSignature) {
////		int globalWordIndex = wordIndexer.indexOf(word);
////		int globalSigIndex = wordIndexer.indexOf(getSignature(word, loc));
////		return score(globalWordIndex, globalSigIndex, tag, loc, noSmoothing, isSignature);
////	}
//	
//
//	
//	public double[] score(StateSet stateSet, short tag, boolean noSmoothing, boolean isSignature) {
////		String sig = getSignature(stateSet.getWord(), stateSet.from);
////		if (stateSet.sigIndex != wordIndexer.indexOf(sig));
////			System.out.println("problem, signatures dont match!");
//		if (stateSet.wordIndex == -2) {
//			String word = stateSet.getWord();
//			stateSet.wordIndex = (short)wordIndexer.indexOf(word);
//			stateSet.sigIndex = (short)wordIndexer.indexOf(getSignature(word,stateSet.from));
//		}
//		return score(stateSet.wordIndex, stateSet.sigIndex, tag, stateSet.from, noSmoothing, isSignature);
//	}
//	
//
//	public double[] score(int globalWordIndex, int globalSigIndex, short tag, int loc, boolean noSmoothing, boolean isSignature) {
//		double[] res = new double[numSubStates[tag]];
//		if (globalWordIndex!=-1) {
//			int tagSpecificWordIndex = tagWordIndexer[tag].indexOf(globalWordIndex);
//			if (tagSpecificWordIndex!=-1){
//				for (int i=0; i=0 && wordCounter[globalWordIndex]>knownWordCount) return res;
//		if (globalSigIndex!=-1) {
//			int tagSpecificWordIndex = tagWordIndexer[tag].indexOf(globalSigIndex);
//			if (tagSpecificWordIndex!=-1){
//				for (int i=0; i=0 && wordCounter[globalWordIndex]>knownWordCount) return null;
//  	double[] res = new double[numSubStates[tag]];
//		if (globalSigIndex!=-1) {
//			int tagSpecificWordIndex = tagWordIndexer[tag].indexOf(globalSigIndex);
//			if (tagSpecificWordIndex!=-1){
//				for (int i=0; i tree : trainTrees){
//  		List words = tree.getYield();
//  		List tags = tree.getPreTerminalYield();
//  		int ind = 0;
//  		for (StateSet word : words){
//  			word.wordIndex = (short)wordIndexer.indexOf(word.getWord());
//  			short tag = tags.get(ind).getState();
////				if (wordIsAmbiguous[word.wordIndex]) {
//					String sig = getSignature(word.getWord(), ind);
//					wordIndexer.add(sig);
//	  			word.sigIndex = (short)wordIndexer.indexOf(sig);
//					tagWordIndexer[tag].add(wordIndexer.indexOf(sig));
////				}
////				else { word.sigIndex = -1; }
//				ind++;
//  		}  		
//  	}
//
//	}
//	
//  public void init(StateSetTreeList trainTrees){
//  	for (Tree tree : trainTrees){
//  		List words = tree.getYield();
//  		List tags = tree.getPreTerminalYield();
//  		int ind = 0;
//  		for (StateSet word : words){
//				String sig = word.getWord();
//				wordIndexer.add(sig);
//				tagWordIndexer[tags.get(ind).getState()].add(wordIndexer.indexOf(sig));
//  			word.wordIndex = (short)wordIndexer.indexOf(sig);
//				ind++;
//  		}
//  	}
//  	wordCounter = new int[wordIndexer.size()];
//  	tagWordIndexer = new IntegerIndexer[numStates];
//  	for (int tag=0; tag tree : trainTrees){
////  		List words = tree.getYield();
////  		List tags = tree.getPreTerminalYield();
////  		int ind = 0;
////  		for (StateSet word : words){
////  			short tag = tags.get(ind).getState();
////				ind++;
////				if (firstTag[word.wordIndex]==0) firstTag[word.wordIndex] = tag;
////				else if (firstTag[word.wordIndex] != tag) {
//////					wordIsAmbiguous[word.wordIndex] = true;
////				}
////  		}
////  	}
//  	labelTrees(trainTrees);
//  	expectedCounts = new double[numStates][][];
//  	scores = new double[numStates][][];
//  	for (int tag=0; tag




© 2015 - 2025 Weber Informatics LLC | Privacy Policy