![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.PCFGLA.StateSetTreeList Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
/**
*
*/
package edu.berkeley.nlp.PCFGLA;
import java.util.AbstractCollection;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.util.Numberer;
/**
* Essentially equivalent to a List>, but each Tree is re-built every time
* from the corresponding Tree. This saves a lot of memory at the expense of some time.
* Most of the code is contained in the subclass StringTreeListIterator.
*
* Beware of the behavior of hasNext(), which deallocates the current tree (the last one returned
* by next()). This is PRESUMABLY when the current tree is no longer needed, but be careful.
*
* @author Romain Thibaux
*/
public class StateSetTreeList extends AbstractCollection> {
List> trees;
static short zero = 0, one = 1;
/*
* Allocate the inside and outside score arrays for the whole tree
*/
void allocate(Tree tree) {
tree.getLabel().allocate();
for (Tree child : tree.getChildren()) {
allocate(child);
}
}
/*
* Deallocate the inside and outside score arrays for the whole tree
*/
void deallocate(Tree tree) {
tree.getLabel().deallocate();
for (Tree child : tree.getChildren()) {
deallocate(child);
}
}
/*
* create a deep copy of this object
*/
public StateSetTreeList copy(){
StateSetTreeList copy = new StateSetTreeList();
for (Tree tree : trees){
copy.add(copyTree(tree));
}
return copy;
}
/**
* @param tree
* @return
*/
private Tree copyTree(Tree tree) {
ArrayList> newChildren = new ArrayList>(tree.getChildren().size());
for (Tree child : tree.getChildren()) {
newChildren.add(copyTree(child));
}
return new Tree(tree.getLabel().copy(), newChildren);
}
public class StateSetTreeListIterator implements Iterator> {
Iterator> stringTreeListIterator;
Tree currentTree;
public StateSetTreeListIterator() {
stringTreeListIterator = trees.iterator();
currentTree = null;
}
public boolean hasNext() {
// A somewhat crappy API, the tree is deallocated when hasNext() is called,
// which is PRESUMABLY when the current tree is no longer needed.
if (currentTree != null) {
deallocate(currentTree);
}
return stringTreeListIterator.hasNext();
}
public Tree next() {
currentTree = stringTreeListIterator.next();
//allocate(currentTree);
return currentTree;
}
public void remove() {
stringTreeListIterator.remove();
}
}
/**
*
* @param trees
* @param numStates
* @param allSplitTheSame
* This should be true only if all states are being split the same
* number of times. This number is taken from numStates[0].
* @param tagNumberer
* @param dontSplitTags
*/
public StateSetTreeList(List> trees, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer) {
this.trees = new ArrayList>();
for (Tree tree : trees) {
this.trees.add(stringTreeToStatesetTree(tree, numStates, allSplitTheSame, tagNumberer));
tree = null;
}
}
public StateSetTreeList(StateSetTreeList treeList, short[] numStates, boolean constant) {
this.trees = new ArrayList>();
for (Tree tree : treeList.trees) {
this.trees.add(resizeStateSetTree(tree,numStates,constant));
}
}
public StateSetTreeList() {
this.trees = new ArrayList>();
}
public boolean add(Tree tree){
return trees.add(tree);
}
public Tree get(int i){
return trees.get(i);
}
public int size() {
return trees.size();
}
public boolean isEmpty() {
return trees.isEmpty();
}
/*
* An iterator over the StateSet trees (which are re-built on the fly)
*/
public Iterator> iterator() {
return new StateSetTreeListIterator();
}
/**
* Convert a single Tree[String] to Tree[StateSet]
*
* @param tree
* @param numStates
* @param tagNumberer
* @return
*/
public static Tree stringTreeToStatesetTree (Tree tree, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer){
Tree result = stringTreeToStatesetTree(tree,numStates,allSplitTheSame,tagNumberer,false,0,tree.getYield().size());
// set the positions properly:
List words = result.getYield();
//for all words in sentence
for (short position = 0; position < words.size(); position++) {
words.get(position).from = position;
words.get(position).to = (short)(position + 1);
}
return result;
}
private static Tree stringTreeToStatesetTree (Tree tree, short[] numStates, boolean allSplitTheSame, Numberer tagNumberer, boolean splitRoot, int from, int to){
if (tree.isLeaf()) {
StateSet newState = new StateSet(zero, one, tree.getLabel().intern(),(short)from,(short)to);
return new Tree(newState);
}
short label = (short)tagNumberer.number(tree.getLabel());
if (label<0) label =0;
// System.out.println(label + " " +tree.getLabel());
if (label>=numStates.length){
// System.err.println("Have never seen this state before: "+tree.getLabel());
// StateSet newState = new StateSet(zero, one, tree.getLabel().intern(),(short)from,(short)to);
// return new Tree(newState);
}
short nodeNumStates = (allSplitTheSame||numStates.length<=label) ? numStates[0] : numStates[label];
if (!splitRoot) nodeNumStates = 1;
StateSet newState = new StateSet(label, nodeNumStates, null, (short)from , (short)to);
Tree newTree = new Tree(newState);
List> newChildren = new ArrayList>();
for (Tree child : tree.getChildren()) {
short length = (short) child.getYield().size();
Tree newChild = stringTreeToStatesetTree(child, numStates, allSplitTheSame, tagNumberer, true, from, from+length);
from += length;
newChildren.add(newChild);
}
newTree.setChildren(newChildren);
return newTree;
}
private static Tree resizeStateSetTree (Tree tree, short[] numStates, boolean constant) {
if (tree.isLeaf()) {
return tree;
}
short state = tree.getLabel().getState();
short newNumStates = constant ? numStates[0] : numStates[state];
StateSet newState = new StateSet(tree.getLabel(), newNumStates);
Tree newTree = new Tree(newState);
List> newChildren = new ArrayList>();
for (Tree child : tree.getChildren()) {
newChildren.add(resizeStateSetTree(child, numStates, constant));
}
newTree.setChildren(newChildren);
return newTree;
}
/**
* @param trainTrees
* @param tagNumberer
*/
public static void initializeTagNumberer(List> trees, Numberer tagNumberer) {
short[] nSub = new short[2];
nSub[0] = 1;
nSub[1] = 1;
for (Tree tree : trees) {
Tree tmp = stringTreeToStatesetTree(tree, nSub, true, tagNumberer);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy