![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.PCFGLA.ConditionalPipeline Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
/**
*
*/
package edu.berkeley.nlp.PCFGLA;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import edu.berkeley.nlp.PCFGLA.ConditionalTrainer.Options;
/**
* @author petrov
*
*/
public class ConditionalPipeline {
public static boolean initializeWithZero = true;
public static void main(String[] args) {
OptionParser optParser = new OptionParser(ConditionalTrainer.Options.class);
Options opts = (Options) optParser.parse(args, true);
// provide feedback on command-line arguments
//System.out.println("Calling with " + optParser.getPassedInOptions());
String dirName = opts.outDir;
String baseName = "split_";
File directory = new File(dirName);
if (!directory.mkdir()) System.out.println("Failed to make directory.");
File nextFile = null;
// first train an x-bar generative grammar
List baselineArgsList = new ArrayList(Arrays.asList(new String[]{"-path", opts.path, "-trfr", "" + opts.trainingFractionToKeep, "-treebank", opts.treebank+"", "-out", dirName+"/"+"base_gen.gr", "-baseline", "-maxL", opts.maxL+"","-b",opts.binarization+""}));
if (opts.markUnaryParents)
baselineArgsList.add("-markUnaryParents");
if (opts.markUnaryParents)
baselineArgsList.add("-filterStupidFrickinWHNP");
if (opts.collapseUnaries)
baselineArgsList.add("-collapseUnaries");
String[] baselineArgs = baselineArgsList.toArray(new String[]{});
nextFile = new File(dirName+"/"+"base_gen.gr");
if (opts.initializeDir == null){
if (!nextFile.exists() || opts.dontLoad) ConditionalTrainer.main(baselineArgs);
else System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
// now compute constraints with x-bar generative grammar
String[] consArgsTrain = addOptions(args, new String[]{"-out", dirName+"/"+baseName+"0", "-in", dirName+"/"+"base_gen.gr", "-outputLog", dirName+"/"+baseName+"0.cons.log"});
nextFile = new File(dirName+"/"+baseName+"0-0.data");
if (nextFile.exists() && !opts.dontLoad) System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
else {
ParserConstrainer.main(consArgsTrain);
consArgsTrain = addOptions(args, new String[]{"-out", dirName+"/"+baseName+"0_dev", "-in", dirName+"/"+"base_gen.gr", "-section", "dev", "-nChunks", "1", "-outputLog", dirName+"/"+baseName+"0_dev.cons.log"});
ParserConstrainer.main(consArgsTrain);
consArgsTrain = addOptions(args, new String[]{"-out", dirName+"/"+baseName+"0_test", "-in", dirName+"/"+"base_gen.gr", "-section", "final", "-nChunks", "1", "-outputLog", dirName+"/"+baseName+"0_test.cons.log"});
ParserConstrainer.main(consArgsTrain);
}
}
// then train an x-bar generative grammar with the simple lexicon
nextFile = new File(dirName+"/"+baseName+"0.gr");
String[] baselineCondArgs = null;
if (opts.initializeDir != null)
{
baselineCondArgs = addOptions(args, new String[]{"-out", nextFile.toString(), /*"-baseline",*/ "-cons", opts.initializeDir + "/" + baseName + "0", "-in", opts.initializeDir + "/" + baseName + "0.gr", "-doNOTprojectConstraints", "-noSplit", "-doConditional"});//,
}
else
{
baselineCondArgs = addOptions(args, new String[] { "-out",
nextFile.toString(), /* "-baseline", */"-cons",
dirName + "/" + baseName + "0",
initializeWithZero ? "-initializeZero" : "",
"-doNOTprojectConstraints", "-noSplit", "-doConditional" });// ,
}
if (!nextFile.exists() || opts.dontLoad) {
ConditionalTrainer.main(baselineCondArgs);
if (opts.testAll){
System.out.println("Testing all grammars to determine which one was the best and should be split next");
String[] testArgs = new String[]{"-doNOTprojectConstraints", "-cons", dirName+"/"+baseName+"0_dev-0.data", "-testAll", "-path", opts.path, "-in", baseName+"0.gr", "-filePath", opts.outDir, "-treebank", opts.treebank+"", "-maxL", opts.maxL+"", "-parser", "plain", "-nProcess", opts.nProcess+""};
GrammarTester.main(testArgs);
}
}
else System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
// loop:
for (int split=1; split<=6; split++){
System.out.println("\n\nIn "+split+". Split-Iteration.");
String previousGrammar = dirName+"/"+baseName+(split-1);
String currentGrammar = dirName+"/"+baseName+split;
// split grammar and train it
String[] trainArgs = null;
if (opts.initializeDir == null)
{nextFile = new File(currentGrammar + ".gr");
trainArgs = addOptions(args, new String[]{"-in", previousGrammar+".gr", "-doConditional", "-cons", previousGrammar, "-out", nextFile.toString()});//, "-sigma", Math.pow(split,1.5)+""});" +
}
else
{nextFile = new File(currentGrammar + ".gr");
trainArgs = addOptions(args, new String[]{"-in", opts.initializeDir+"/"+baseName+(split) +".gr", "-doConditional", "-noSplit", "-cons", opts.initializeDir+"/"+baseName+(split-1), "-out", nextFile.toString()});//, "-sigma", Math.pow(split,1.5)+""});" +
}
if (!nextFile.exists() || opts.dontLoad) {
ConditionalTrainer.main(trainArgs);
if (opts.testAll){
System.out.println("Testing all grammars to determine which one was the best and should be split next");
String[] testArgs = new String[]{"-cons", dirName+"/"+baseName+(split-1)+"_dev-0.data", "-testAll", "-path", opts.path, "-in", baseName+split+".gr", "-filePath", opts.outDir, "-treebank", opts.treebank+"", "-maxL", opts.maxL+"", "-parser", "plain", "-nProcess", opts.nProcess+""};
GrammarTester.main(testArgs);
}
}
else System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
// compute constraints with new grammar
if (opts.initializeDir == null)
{
nextFile = new File(currentGrammar + "-0.data");
if (nextFile.exists() && !opts.dontLoad) {
System.out.println("Skipping this step since "+nextFile.toString()+" already exists.");
} else {
String[] consArgs = addOptions(args, new String[]{"-cons", previousGrammar, "-out", currentGrammar, "-in", currentGrammar+".gr", "-outputLog", currentGrammar+".cons.log"});
ParserConstrainer.main(consArgs);
consArgs = addOptions(args, new String[]{"-cons", previousGrammar+"_dev", "-out", currentGrammar+"_dev", "-in", currentGrammar+".gr", "-section", "dev", "-nChunks", "1", "-outputLog", currentGrammar+"_dev.cons.log"});
ParserConstrainer.main(consArgs);
consArgs = addOptions(args, new String[]{"-cons", previousGrammar+"_test", "-out", currentGrammar+"_test", "-in", currentGrammar+".gr", "-section", "final", "-nChunks", "1", "-outputLog", currentGrammar+"_test.cons.log"});
}
}
}
System.exit(0);
}
private static String[] addOptions(String[] a, String[] b) {
String[] res = new String[a.length+b.length];
for (int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy