All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.syntax.RichLabel Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.syntax;

import edu.berkeley.nlp.ling.HeadFinder;
import edu.berkeley.nlp.ling.CollinsHeadFinder;
import edu.berkeley.nlp.util.Pair;

import java.util.List;
import java.util.ArrayList;
import java.io.StringReader;

/**
 * Created by IntelliJ IDEA.
 * User: aria42
 * Date: Oct 25, 2008
 * Time: 4:04:53 PM
 */
public class RichLabel {
	private String headWord;
	private String headTag;
	private int start;
	private int stop;
	private int headIndex;
	private String label;
	private Tree origNode;

	public int getSpanSize() {
		return stop-start;	
	}

	public int getHeadIndex() {
		return headIndex;
	}

	public void setHeadIndex(int headIndex) {
		this.headIndex = headIndex;
	}

	public String getHeadWord() {
		return headWord;
	}

	public void setHeadWord(String headWord) {
		this.headWord = headWord;
	}

	public String getHeadTag() {
		return headTag;
	}

	public void setHeadTag(String headTag) {
		this.headTag = headTag;
	}

	public int getStart() {
		return start;
	}

	public void setStart(int start) {
		this.start = start;
	}

	public int getStop() {
		return stop;
	}

	public void setStop(int stop) {
		this.stop = stop;
	}

	public String getLabel() {
		return label;
	}

	public void setLabel(String label) {
		this.label = label;
	}

	public Tree getOriginalNode() {
		return origNode;
	}

	public void setOriginalNode(Tree origNode) {
		this.origNode = origNode;
	}

	@Override
	public String toString() {
		return String.format("%s(%s[%d]-%s)[%d,%d]",label,headWord,headIndex,headTag,start,stop);	
	}

  private static final CollinsHeadFinder cf = new CollinsHeadFinder();

  public static Tree getRichTree(Tree tree) {
    return getRichTree(tree, cf);    
  }


	public static Tree getRichTree(Tree tree, HeadFinder headFinder) {
		return buildRecursive(tree,headFinder, 0);
	}

	private static Pair getHeadWordTag(Tree tree, HeadFinder headFinder) {
		if (tree.isPreTerminal()) {
			Tree term = tree.getChildren().get(0);
			return Pair.newPair(term.getLabel(),tree.getLabel());
		}
		if (tree.isLeaf()) {
			return Pair.newPair(tree.getLabel(),null);
		}
		Tree head = headFinder.determineHead(tree);
		return getHeadWordTag(head,headFinder);
	}

	private static Tree buildRecursive(Tree tree, HeadFinder headFinder, int start) {
		RichLabel label = new RichLabel();
		label.setStart(start);
		label.setStop(start + tree.getYield().size());
		label.setLabel(tree.getLabel());
		label.setOriginalNode(tree);
		Pair headWordTagPair = getHeadWordTag(tree,headFinder);		
		label.setHeadWord(headWordTagPair.getFirst());
		label.setHeadTag(headWordTagPair.getSecond());
		int offset = start;
		List> richChildren = new ArrayList>();
		for (Tree child : tree.getChildren()) {
			Tree richChild = buildRecursive(child,headFinder,offset);
			richChildren.add(richChild);
			offset += child.getYield().size();
		}
		// Head Index
		if (tree.isPhrasal()) {
			Tree headChild = headFinder.determineHead(tree);
			for (Tree child: richChildren) {
			   if (child.getLabel().origNode == headChild) {
				   label.setHeadIndex(child.getLabel().getHeadIndex());
			   }
			}
		}   else {
			label.setHeadIndex(label.start);
		}
		return new Tree(label,richChildren);
	}

	public static void main(String[] args) {
		String tStr = "((S (NP (DT The) (NN man)) (VP (VBD ran) (PP (IN down) (NP (DT the) (NNS stairs))))))";
		Tree t = new Trees.PennTreeReader(new StringReader(tStr)).next();
		System.out.println("Rich Tree: " + getRichTree(t, new CollinsHeadFinder()));
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy