All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.charniak.CharniakParser Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.parser.charniak;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.*;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

/**
 * Runs charniak parser using command line
 *
 * @author Angel Chang
 */
public class CharniakParser {
  private final static Logger logger = Logger.getLogger(CharniakParser.class.getName());

  private static final String CHARNIAK_DIR = "/u/nlp/packages/bllip-parser/";
  // note: this is actually the parser+reranker (will use 2 CPUs)
  private static final String CHARNIAK_BIN = "./parse-50best.sh";

  private final CharniakScoredParsesReaderWriter scoredParsesReaderWriter = new CharniakScoredParsesReaderWriter();

  private String dir = CHARNIAK_DIR;
  private String parserExecutable = CHARNIAK_BIN;

  /** Do not parse sentences larger than this sentence length */
  private int maxSentenceLength = 400;
  private int beamSize = 0;

  public CharniakParser() {}

  public CharniakParser(String dir, String parserExecutable) {
    this.parserExecutable = parserExecutable;
    this.dir = dir;
  }

  public int getBeamSize() {
    return beamSize;
  }

  public void setBeamSize(int beamSize) {
    this.beamSize = beamSize;
  }

  public int getMaxSentenceLength() {
    return maxSentenceLength;
  }

  public void setMaxSentenceLength(int maxSentenceLength) {
    this.maxSentenceLength = maxSentenceLength;
  }

  public Tree getBestParse(List sentence)
  {
    ScoredObject scoredParse = getBestScoredParse(sentence);
    return (scoredParse != null)? scoredParse.object():null;
  }

  public ScoredObject getBestScoredParse(List sentence)
  {
    List> kBestParses = getKBestParses(sentence, 1);
    if (kBestParses != null) {
      return kBestParses.get(0);
    }
    return null;
  }

  public List> getKBestParses(List sentence, int k)
  {
    return getKBestParses(sentence, k, true);
  }

  public List> getKBestParses(List sentence, int k, boolean deleteTempFiles)
  {
    try {
      File inFile = File.createTempFile("charniak.", ".in");
      if (deleteTempFiles) inFile.deleteOnExit();
      File outFile = File.createTempFile("charniak.", ".out");
      if (deleteTempFiles) outFile.deleteOnExit();
      File errFile = File.createTempFile("charniak.", ".err");
      if (deleteTempFiles) errFile.deleteOnExit();
      printSentence(sentence, inFile.getAbsolutePath());
      runCharniak(k, inFile.getAbsolutePath(), outFile.getAbsolutePath(), errFile.getAbsolutePath());
      Iterable>> iter = scoredParsesReaderWriter.readScoredTrees(outFile.getAbsolutePath());
      if (deleteTempFiles) {
        inFile.delete();
        outFile.delete();
        errFile.delete();
      }
      return iter.iterator().next();
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }

  public Iterable>> getKBestParses(Iterable> sentences, int k)
  {
    return getKBestParses(sentences, k, true);
  }

  public Iterable>> getKBestParses(Iterable> sentences, int k, boolean deleteTempFiles)
  {
    try {
      File inFile = File.createTempFile("charniak.", ".in");
      if (deleteTempFiles) inFile.deleteOnExit();
      File outFile = File.createTempFile("charniak.", ".out");
      if (deleteTempFiles) outFile.deleteOnExit();
      File errFile = File.createTempFile("charniak.", ".err");
      if (deleteTempFiles) errFile.deleteOnExit();
      printSentences(sentences, inFile.getAbsolutePath());
      runCharniak(k, inFile.getAbsolutePath(), outFile.getAbsolutePath(), errFile.getAbsolutePath());
      Iterable>> iter = scoredParsesReaderWriter.readScoredTrees(outFile.getAbsolutePath());
      if (deleteTempFiles) {
        inFile.delete();
        outFile.delete();
        errFile.delete();
      }
      return new IterableIterator<>(iter.iterator());
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }

  public void printSentence(List sentence, String filename)
  {
    List> sentences = new ArrayList<>();
    sentences.add(sentence);
    printSentences(sentences, filename);
  }

  public void printSentences(Iterable> sentences, String filename)
  {
    try {
      PrintWriter pw = IOUtils.getPrintWriter(filename);
      for (List sentence:sentences) {
        pw.print(" ");   // Note: Use  to identify sentences
        String sentString = SentenceUtils.listToString(sentence);
        if (sentence.size() > maxSentenceLength) {
          logger.warning("Sentence length=" + sentence.size() +
                  " is longer than maximum set length " + maxSentenceLength);
          logger.warning("Long Sentence: " + sentString);
        }
        pw.print(sentString);
        pw.println(" ");
      }
      pw.close();
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }

  public void runCharniak(int n, String infile, String outfile, String errfile)
  {
    try {
      if (n == 1) n++;  // Charniak does not output score if n = 1?

      List args = new ArrayList<>();
      args.add(parserExecutable);
      args.add(infile);
      ProcessBuilder process = new ProcessBuilder(args);
      process.directory(new File(this.dir));
      PrintWriter out = IOUtils.getPrintWriter(outfile);
      PrintWriter err = IOUtils.getPrintWriter(errfile);
      SystemUtils.run(process, out, err);
      out.close();
      err.close();
    } catch (IOException ex) {
      throw new RuntimeException(ex);
    }
  }




}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy