All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.conll.PTBtoDep Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.conll;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

import edu.berkeley.nlp.PCFGLA.Option;
import edu.berkeley.nlp.PCFGLA.OptionParser;
import edu.berkeley.nlp.PCFGLA.GrammarTrainer.Options;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees.PennTreeReader;

/**
 * @author petrov
 *
 */
public class PTBtoDep {

	public static class Options {

		@Option(name = "-in", required = true, usage = "Input File for Trees (Required)")
		public String inFileName;

	}
	
	public static void main(String[] args) {
		OptionParser optParser = new OptionParser(Options.class);
		Options opts = (Options) optParser.parse(args, true);
		// provide feedback on command-line arguments
//		System.out.println("Calling with " + optParser.getPassedInOptions());

		String fileName = opts.inFileName;
		try {
			PennTreeReader treeReader = new PennTreeReader(new InputStreamReader(new FileInputStream(fileName), Charset.forName("UTF-8")));//GB18030")));
				while (treeReader.hasNext()) {
					Tree rootedTree = treeReader.next(); 
//					if (rootedTree.getChildren().size()>1)
//						System.err.println(rootedTree);
					if (rootedTree.getLabel().equals("ROOT"))
						rootedTree = rootedTree.getChildren().get(0);
				   printDependencies(rootedTree);
				}

			} catch (Exception ex) {
			ex.printStackTrace();
		}

	}

	/**
	 * @param sentence
	 */
	private static void printDependencies(Tree tree) {
//		System.out.println(tree);
		if (tree.getYield().size()<=1) 	{
			System.out.println(0+"\t_\t_\t_");
			return;
		}
		int thisHead = findHead(tree);
		int nWordsFound = printDependencies(tree, thisHead, 0, 0);
		int nWords = tree.getYield().size();
		while (nWords tree, int parent, int previousWords, int parentOfParent) {
		for (Tree child : tree.getChildren()){
			if (previousWords==parent-1){ // we are at the parent of this (sub)tree
				System.out.println(parentOfParent+"\t_\t_\t_");
//						(previousWords+1) + 
//						"\t" + child.getChildren().get(0).getLabel() + 
//						"\t" + child.getLabel()+
//						"\t" + parentOfParent);
				if (child.getYield().size()>1) 
					System.err.println(child);
				previousWords++;
			} else if (child.isPreTerminal()){
				System.out.println(parent+"\t_\t_\t_");
//						(previousWords+1) + 
//						"\t" + child.getChildren().get(0).getLabel() + 
//						"\t" + child.getLabel()+
//						"\t" + parent);
				previousWords++;
			} else {
				int thisHead =  previousWords + findHead(child);
				printDependencies(child, thisHead, previousWords, parent);
				previousWords += child.getYield().size();
			}
		}
		return previousWords;

	}
	/**
	 * @param tree
	 * @return
	 */
	private static int findHead(Tree tree) {
		String headLabel = tree.getLabel();
		headLabel = headLabel.substring(0,headLabel.length()-1);//cut off the *
		int headIndex = -2;
		int previousWords = 0;
		for (Tree child : tree.getChildren()){
			if (child.isPreTerminal()&& child.getLabel().equals(headLabel)){ // found a potential head
				headIndex = previousWords;
				previousWords++;
			}
			else previousWords += child.getYield().size();
		}
		return headIndex+1; //+1 since indices start with 1
	}

	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy