All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.clearnlp.experiment.ExtractVerbPredicates Maven / Gradle / Ivy

package com.googlecode.clearnlp.experiment;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

import com.googlecode.clearnlp.dependency.DEPArc;
import com.googlecode.clearnlp.dependency.DEPLib;
import com.googlecode.clearnlp.dependency.DEPNode;
import com.googlecode.clearnlp.dependency.DEPTree;
import com.googlecode.clearnlp.reader.SRLReader;
import com.googlecode.clearnlp.util.UTFile;
import com.googlecode.clearnlp.util.UTInput;
import com.googlecode.clearnlp.util.UTOutput;

public class ExtractVerbPredicates
{
	final Pattern P_VERBS = Pattern.compile("VB.*");
	
	public ExtractVerbPredicates(String inputDir, String outputFile)
	{
		PrintStream fout = UTOutput.createPrintBufferedFileStream(outputFile);
		SRLReader reader = new SRLReader(0, 1, 3, 5, 6, 8, 10, 12);
		String[] inputFiles = UTFile.getSortedFileList(inputDir);
		DEPTree tree;
		
		for (String inputFile : inputFiles)
		{
			reader.open(UTInput.createBufferedFileReader(inputFile));
			System.out.println(inputFile);
			
			while ((tree = reader.next()) != null)
			{
				if (strip(tree))
					fout.println(tree.toStringSRL()+"\n");
			}
			
			reader.close();
		}
		
		fout.close();
	}
	
	public boolean strip(DEPTree tree)
	{
		Set preds = getPredicates(tree);
		if (preds.isEmpty())	return false;
		
		int i, size = tree.size();
		List remove;
		DEPNode node;
		
		for (i=1; i();
			
			for (DEPArc arc : node.getSHeads())
			{
				if (!preds.contains(arc.getNode()))
					remove.add(arc);
			}
			
			node.removeSHeads(remove);
		}
		
		return true;
	}
	
	public Set getPredicates(DEPTree tree)
	{
		Set preds = new HashSet();
		int i, size = tree.size();
		DEPNode node;
		
		for (i=1; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy