All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.PCFGLA.StateSetTreeList Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.PCFGLA;

import java.util.AbstractCollection;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.util.Numberer;

/**
 * Essentially equivalent to a List>, but each Tree is re-built every time
 * from the corresponding Tree. This saves a lot of memory at the expense of some time.
 * Most of the code is contained in the subclass StringTreeListIterator. 
 *
 * Beware of the behavior of hasNext(), which deallocates the current tree (the last one returned
 * by next()). This is PRESUMABLY when the current tree is no longer needed, but be careful.
 * 
 * @author Romain Thibaux
 */
public class StateSetTreeList extends AbstractCollection> {
  List> trees;
  static short zero = 0, one = 1;
  /*
   * Allocate the inside and outside score arrays for the whole tree
   */
  void allocate(Tree tree) {
    tree.getLabel().allocate();
    for (Tree child : tree.getChildren()) {
      allocate(child);
    }
  }
  
  /*
   * Deallocate the inside and outside score arrays for the whole tree
   */
  void deallocate(Tree tree) {
    tree.getLabel().deallocate();
    for (Tree child : tree.getChildren()) {
      deallocate(child);
    }
  }
  
  /*
   * create a deep copy of this object
   */
  public StateSetTreeList copy(){
  	StateSetTreeList copy = new StateSetTreeList();
  	for (Tree tree : trees){
  		copy.add(copyTree(tree));
  	}
  	return copy;
  }
  
  
  /**
	 * @param tree
	 * @return
	 */
	private Tree copyTree(Tree tree) {
  	ArrayList> newChildren = new ArrayList>(tree.getChildren().size());
  	for (Tree child : tree.getChildren()) {
  		newChildren.add(copyTree(child));
  	}
  	return new Tree(tree.getLabel().copy(), newChildren);
	}


	public class StateSetTreeListIterator implements Iterator> {
    Iterator> stringTreeListIterator;
    Tree currentTree;
    
    public StateSetTreeListIterator() {
      stringTreeListIterator = trees.iterator();
      currentTree = null;
    }

    public boolean hasNext() {
      // A somewhat crappy API, the tree is deallocated when hasNext() is called,
      // which is PRESUMABLY when the current tree is no longer needed.
      if (currentTree != null) {
        deallocate(currentTree);
      }
      return stringTreeListIterator.hasNext();
    }

    public Tree next() {
      currentTree = stringTreeListIterator.next();
      //allocate(currentTree);
      return currentTree;
    }
    
    public void remove() {
      stringTreeListIterator.remove();
    }
  }

  /**
	 * 
	 * @param trees
	 * @param numStates
	 * @param allSplitTheSame
	 *          This should be true only if all states are being split the same
	 *          number of times. This number is taken from numStates[0].
	 * @param tagNumberer
	 * @param dontSplitTags
	 */
  public StateSetTreeList(List> trees, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer) {
    this.trees = new ArrayList>();
    for (Tree tree : trees) {
      this.trees.add(stringTreeToStatesetTree(tree, numStates, allSplitTheSame, tagNumberer));
      tree = null;
    }
  }
  
  public StateSetTreeList(StateSetTreeList treeList, short[] numStates, boolean constant) {
  	this.trees = new ArrayList>();
  	for (Tree tree : treeList.trees) {
  		this.trees.add(resizeStateSetTree(tree,numStates,constant));
  	}
  }

  public StateSetTreeList() {
  	this.trees = new ArrayList>();
  }
  
  public boolean add(Tree tree){
  	return trees.add(tree);
  }
  
  public Tree get(int i){
  	return trees.get(i);
  }

  public int size() {
    return trees.size();
  }

  public boolean isEmpty() {
    return trees.isEmpty();
  }

  /* 
   * An iterator over the StateSet trees (which are re-built on the fly)
   */
  public Iterator> iterator() {
    return new StateSetTreeListIterator();
  }

  /**
   * Convert a single Tree[String] to Tree[StateSet]
   * 
   * @param tree
   * @param numStates
   * @param tagNumberer
   * @return
   */
  public static Tree stringTreeToStatesetTree (Tree tree, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer){
    Tree result = stringTreeToStatesetTree(tree,numStates,allSplitTheSame,tagNumberer,false,0,tree.getYield().size());
    // set the positions properly:
  	List words = result.getYield();
  	//for all words in sentence
  	for (short position = 0; position < words.size(); position++) {
  		words.get(position).from = position;
  		words.get(position).to = (short)(position + 1);
  	}
  	return result;
  }
    
  private static Tree stringTreeToStatesetTree (Tree tree, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer, boolean splitRoot, int from, int to){
    if (tree.isLeaf()) {
      StateSet newState = new StateSet(zero, one, tree.getLabel().intern(),(short)from,(short)to);
      return new Tree(newState);
    }
    short label = (short)tagNumberer.number(tree.getLabel());
    if (label<0) label =0;
//    System.out.println(label + " " +tree.getLabel());
    if (label>=numStates.length){
//    	System.err.println("Have never seen this state before: "+tree.getLabel());
//      StateSet newState = new StateSet(zero, one, tree.getLabel().intern(),(short)from,(short)to);
//      return new Tree(newState);
    }
    short nodeNumStates = (allSplitTheSame||numStates.length<=label) ? numStates[0] : numStates[label];
    if (!splitRoot) nodeNumStates = 1;
    StateSet newState = new StateSet(label, nodeNumStates, null, (short)from , (short)to);
    Tree newTree = new Tree(newState);
    List> newChildren = new ArrayList>(); 
    for (Tree child : tree.getChildren()) {
    	short length = (short) child.getYield().size(); 
      Tree newChild = stringTreeToStatesetTree(child, numStates, allSplitTheSame, tagNumberer, true, from, from+length);
      from += length;
      newChildren.add(newChild);
    }
    newTree.setChildren(newChildren);
    return newTree;
  }
  
  private static Tree resizeStateSetTree (Tree tree, short[] numStates, boolean constant) {
  	if (tree.isLeaf()) {
  		return tree;
  	}
  	short state = tree.getLabel().getState();
    short newNumStates = constant ? numStates[0] : numStates[state];
  	StateSet newState = new StateSet(tree.getLabel(), newNumStates);
  	Tree newTree = new Tree(newState);
  	List> newChildren = new ArrayList>();
  	for (Tree child : tree.getChildren()) {
  		newChildren.add(resizeStateSetTree(child, numStates, constant));
  	}
  	newTree.setChildren(newChildren);
  	return newTree;
  }

	/**
	 * @param trainTrees
	 * @param tagNumberer
	 */
	public static void initializeTagNumberer(List> trees, Numberer tagNumberer) {
    short[] nSub = new short[2];
    nSub[0] = 1;
    nSub[1] = 1;
    for (Tree tree : trees) {
      Tree tmp = stringTreeToStatesetTree(tree, nSub, true, tagNumberer);
    }
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy