edu.stanford.nlp.parser.charniak.CharniakParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.parser.charniak;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.*;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
/**
* Runs charniak parser using command line
*
* @author Angel Chang
*/
public class CharniakParser {
private final static Logger logger = Logger.getLogger(CharniakParser.class.getName());
private static final String CHARNIAK_DIR = "/u/nlp/packages/bllip-parser/";
// note: this is actually the parser+reranker (will use 2 CPUs)
private static final String CHARNIAK_BIN = "./parse-50best.sh";
private final CharniakScoredParsesReaderWriter scoredParsesReaderWriter = new CharniakScoredParsesReaderWriter();
private String dir = CHARNIAK_DIR;
private String parserExecutable = CHARNIAK_BIN;
/** Do not parse sentences larger than this sentence length */
private int maxSentenceLength = 400;
private int beamSize = 0;
public CharniakParser() {}
public CharniakParser(String dir, String parserExecutable) {
this.parserExecutable = parserExecutable;
this.dir = dir;
}
public int getBeamSize() {
return beamSize;
}
public void setBeamSize(int beamSize) {
this.beamSize = beamSize;
}
public int getMaxSentenceLength() {
return maxSentenceLength;
}
public void setMaxSentenceLength(int maxSentenceLength) {
this.maxSentenceLength = maxSentenceLength;
}
public Tree getBestParse(List sentence)
{
ScoredObject scoredParse = getBestScoredParse(sentence);
return (scoredParse != null)? scoredParse.object():null;
}
public ScoredObject getBestScoredParse(List sentence)
{
List> kBestParses = getKBestParses(sentence, 1);
if (kBestParses != null) {
return kBestParses.get(0);
}
return null;
}
public List> getKBestParses(List sentence, int k)
{
return getKBestParses(sentence, k, true);
}
public List> getKBestParses(List sentence, int k, boolean deleteTempFiles)
{
try {
File inFile = File.createTempFile("charniak.", ".in");
if (deleteTempFiles) inFile.deleteOnExit();
File outFile = File.createTempFile("charniak.", ".out");
if (deleteTempFiles) outFile.deleteOnExit();
File errFile = File.createTempFile("charniak.", ".err");
if (deleteTempFiles) errFile.deleteOnExit();
printSentence(sentence, inFile.getAbsolutePath());
runCharniak(k, inFile.getAbsolutePath(), outFile.getAbsolutePath(), errFile.getAbsolutePath());
Iterable>> iter = scoredParsesReaderWriter.readScoredTrees(outFile.getAbsolutePath());
if (deleteTempFiles) {
inFile.delete();
outFile.delete();
errFile.delete();
}
return iter.iterator().next();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
public Iterable>> getKBestParses(Iterable> sentences, int k)
{
return getKBestParses(sentences, k, true);
}
public Iterable>> getKBestParses(Iterable> sentences, int k, boolean deleteTempFiles)
{
try {
File inFile = File.createTempFile("charniak.", ".in");
if (deleteTempFiles) inFile.deleteOnExit();
File outFile = File.createTempFile("charniak.", ".out");
if (deleteTempFiles) outFile.deleteOnExit();
File errFile = File.createTempFile("charniak.", ".err");
if (deleteTempFiles) errFile.deleteOnExit();
printSentences(sentences, inFile.getAbsolutePath());
runCharniak(k, inFile.getAbsolutePath(), outFile.getAbsolutePath(), errFile.getAbsolutePath());
Iterable>> iter = scoredParsesReaderWriter.readScoredTrees(outFile.getAbsolutePath());
if (deleteTempFiles) {
inFile.delete();
outFile.delete();
errFile.delete();
}
return new IterableIterator<>(iter.iterator());
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
public void printSentence(List sentence, String filename)
{
List> sentences = new ArrayList<>();
sentences.add(sentence);
printSentences(sentences, filename);
}
public void printSentences(Iterable> sentences, String filename)
{
try {
PrintWriter pw = IOUtils.getPrintWriter(filename);
for (List sentence:sentences) {
pw.print(" "); // Note: Use to identify sentences
String sentString = SentenceUtils.listToString(sentence);
if (sentence.size() > maxSentenceLength) {
logger.warning("Sentence length=" + sentence.size() +
" is longer than maximum set length " + maxSentenceLength);
logger.warning("Long Sentence: " + sentString);
}
pw.print(sentString);
pw.println(" ");
}
pw.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
public void runCharniak(int n, String infile, String outfile, String errfile)
{
try {
if (n == 1) n++; // Charniak does not output score if n = 1?
List args = new ArrayList<>();
args.add(parserExecutable);
args.add(infile);
ProcessBuilder process = new ProcessBuilder(args);
process.directory(new File(this.dir));
PrintWriter out = IOUtils.getPrintWriter(outfile);
PrintWriter err = IOUtils.getPrintWriter(errfile);
SystemUtils.run(process, out, err);
out.close();
err.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
}