edu.stanford.nlp.parser.ui.ParserPanel Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
There is a newer version: 3.9.2
Show newest version
// StanfordLexicalizedParser -- a probabilistic lexicalized NL CFG parser
// Copyright (c) 2002, 2003, 2004, 2005 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 4A
//    Stanford CA 94305-9040
//    USA
//    [email protected]
//    http://nlp.stanford.edu/downloads/lex-parser.shtml

package edu.stanford.nlp.parser.ui;

import edu.stanford.nlp.io.ui.OpenPageDialog;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.parser.common.ParserQuery;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.process.*;
import edu.stanford.nlp.swing.FontDetector;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.international.pennchinese.ChineseTreebankLanguagePack;
import edu.stanford.nlp.ui.JarFileChooser;

import javax.swing.*;
import javax.swing.text.SimpleAttributeSet;
import javax.swing.text.StyleConstants;
import java.awt.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import javax.swing.event.AncestorEvent;
import javax.swing.event.AncestorListener;
import java.awt.event.MouseAdapter;
import java.awt.event.MouseEvent;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;


/**
 * Provides a simple GUI Panel for Parsing.  Allows a user to load a parser
 * created using lexparser.LexicalizedParser, load a text data file or type
 * in text, parse sentences within the input text, and view the resultant
 * parse tree.
 *
 * @author Huy Nguyen ([email protected])
 */
public class ParserPanel extends JPanel {

  /**
   *
   */
  private static final long serialVersionUID = -2118491857333662471L;
  // constants for language specification
  public static final int UNTOKENIZED_ENGLISH = 0;
  public static final int TOKENIZED_CHINESE = 1;
  public static final int UNTOKENIZED_CHINESE = 2;

  private static TreebankLanguagePack tlp;
  private String encoding = "UTF-8";

  // one second in milliseconds
  private static final int ONE_SECOND = 1000;
  // parser takes approximately a minute to load
  private static final int PARSER_LOAD_TIME = 60;
  // parser takes 5-60 seconds to parse a sentence
  private static final int PARSE_TIME = 30;

  // constants for finding nearest sentence boundary
  private static final int SEEK_FORWARD = 1;
  private static final int SEEK_BACK = -1;

  private final JFileChooser jfc;
  private final JFileChooserLocation jfcLocation;
  private final JarFileChooser chooseJarParser;
  private OpenPageDialog pageDialog;

  // for highlighting
  private SimpleAttributeSet normalStyle, highlightStyle;
  private int startIndex, endIndex;

  private TreeJPanel treePanel;
  private LexicalizedParser parser;

  // worker threads to handle long operations
  private LoadParserThread lpThread;
  private ParseThread parseThread;

  // to monitor progress of long operations
  private javax.swing.Timer timer;
  //private ProgressMonitor progressMonitor;
  private int count; // progress count
  // use glass pane to block input to components other than progressMonitor
  private Component glassPane;

  /** Whether to scroll one sentence forward after parsing. */
  private boolean scrollWhenDone;

  /**
   * Creates new form ParserPanel
   */
  public ParserPanel() {
    initComponents();

    // create dialogs for file selection
    jfc = new JFileChooser(System.getProperty("user.dir"));
    pageDialog = new OpenPageDialog(new Frame(), true);
    pageDialog.setFileChooser(jfc);

    jfcLocation = new JFileChooserLocation(jfc);

    tlp = new PennTreebankLanguagePack();
    encoding = tlp.getEncoding();
    setFont();

    // create a timer
    timer = new javax.swing.Timer(ONE_SECOND, new TimerListener());

    // for (un)highlighting text
    highlightStyle = new SimpleAttributeSet();
    normalStyle = new SimpleAttributeSet();
    StyleConstants.setBackground(highlightStyle, Color.yellow);
    StyleConstants.setBackground(normalStyle, textPane.getBackground());

    this.chooseJarParser = new JarFileChooser(".*\\.ser\\.gz", this);
  }

  /**
   * Scrolls back one sentence in the text
   */
  public void scrollBack() {
    highlightSentence(startIndex - 1);
    // scroll to highlight location
    textPane.setCaretPosition(startIndex);
  }

  /**
   * Scrolls forward one sentence in the text
   */
  public void scrollForward() {
    highlightSentence(endIndex + 1);
    // scroll to highlight location
    textPane.setCaretPosition(startIndex);
  }

  /**
   * Highlights specified text region by changing the character attributes
   */
  private void highlightText(int start, int end, SimpleAttributeSet style) {
    if (start < end) {
      textPane.getStyledDocument().setCharacterAttributes(start, end - start + 1, style, false);
    }
  }

  /**
   * Finds the sentence delimited by the closest sentence delimiter preceding
   * start and closest period following start.
   */
  private void highlightSentence(int start) {
    highlightSentence(start, -1);
  }

  /**
   * Finds the sentence delimited by the closest sentence delimiter preceding
   * start and closest period following end.  If end is less than start
   * (or -1), sets right boundary as closest period following start.
   * Actually starts search for preceding sentence delimiter at (start-1)
   */
  private void highlightSentence(int start, int end) {
    // clears highlight.  paints over entire document because the document may have changed
    highlightText(0, textPane.getText().length(), normalStyle);

    // if start<1 set startIndex to 0, otherwise set to index following closest preceding period
    startIndex = (start < 1) ? 0 : nearestDelimiter(textPane.getText(), start, SEEK_BACK) + 1;

    // if endseekDir
   * is SEEK_FORWARD, finds the nearest delimiter after start.  Else, if it is
   * SEEK_BACK, finds the nearest delimiter before start.
   */
  private int nearestDelimiter(String text, int start, int seekDir) {
    if (seekDir != SEEK_BACK && seekDir != SEEK_FORWARD) {
      throw new IllegalArgumentException("Unknown seek direction " +
                                         seekDir);
    }
    StringReader reader = new StringReader(text);
    DocumentPreprocessor processor = new DocumentPreprocessor(reader);
    TokenizerFactory tf = tlp.getTokenizerFactory();
    processor.setTokenizerFactory(tf);
    List boundaries = new ArrayList();
    for (List sentence : processor) {
      if (sentence.size() == 0)
        continue;
      if (!(sentence.get(0) instanceof HasOffset)) {
        throw new ClassCastException("Expected HasOffsets from the " +
                                     "DocumentPreprocessor");
      }
      if (boundaries.size() == 0) {
        boundaries.add(0);
      } else {
        HasOffset first = (HasOffset) sentence.get(0);
        boundaries.add(first.beginPosition());
      }
    }
    boundaries.add(text.length());
    for (int i = 0; i < boundaries.size() - 1; ++i) {
      if (boundaries.get(i) <= start && start < boundaries.get(i + 1)) {
        if (seekDir == SEEK_BACK) {
          return boundaries.get(i) - 1;
        } else if (seekDir == SEEK_FORWARD) {
          return boundaries.get(i + 1) - 1;
        }
      }
    }
    // The cursor position at the end is actually one past the text length.
    // We might as well highlight the last interval in that case.
    if (boundaries.size() >= 2 && start >= text.length()) {
      if (seekDir == SEEK_BACK) {
        return boundaries.get(boundaries.size() - 2) - 1;
      } else if (seekDir == SEEK_FORWARD) {
        return boundaries.get(boundaries.size() - 1) - 1;
      }
    }
    return -1;
  }

  /**
   * Highlights the sentence that is currently being selected by user
   * (via mouse highlight)
   */
  private void highlightSelectedSentence() {
    highlightSentence(textPane.getSelectionStart(), textPane.getSelectionEnd());
  }

  /**
   * Highlights the sentence that is currently being edited
   */
  private void highlightEditedSentence() {
    highlightSentence(textPane.getCaretPosition());

  }

  /**
   * Sets the status text at the bottom of the ParserPanel.
   */
  public void setStatus(String status) {
    statusLabel.setText(status);
  }

  private void setFont() {
    if (tlp instanceof ChineseTreebankLanguagePack) {
      setChineseFont();
    } else {
      textPane.setFont(new Font("Sans Serif", Font.PLAIN, 14));
      treePanel.setFont(new Font("Sans Serif", Font.PLAIN, 14));
    }
  }

  private void setChineseFont() {
    java.util.List fonts = FontDetector.supportedFonts(FontDetector.CHINESE);
    if (fonts.size() > 0) {
      Font font = new Font(fonts.get(0).getName(), Font.PLAIN, 14);
      textPane.setFont(font);
      treePanel.setFont(font);
      System.err.println("Selected font " + font);
    } else if (FontDetector.hasFont("Watanabe Mincho")) {
      textPane.setFont(new Font("Watanabe Mincho", Font.PLAIN, 14));
      treePanel.setFont(new Font("Watanabe Mincho", Font.PLAIN, 14));
    } else {
      textPane.setFont(new Font("Sans Serif", Font.PLAIN, 14));
      treePanel.setFont(new Font("Sans Serif", Font.PLAIN, 14));
    }
  }


  /**
   * Tokenizes the highlighted text (using a tokenizer appropriate for the
   * selected language, and initiates the ParseThread to parse the tokenized
   * text.
   */
  public void parse() {
    if (textPane.getText().length() == 0) {
      return;
    }

    // use endIndex+1 because substring subtracts 1
    String text = textPane.getText().substring(startIndex, endIndex + 1).trim();

    if (parser != null && text.length() > 0) {
      //Tokenizer toke = tlp.getTokenizerFactory().getTokenizer(new CharArrayReader(text.toCharArray()));
      Tokenizer toke = tlp.getTokenizerFactory().getTokenizer(new StringReader(text));
      List wordList = toke.tokenize();
      parseThread = new ParseThread(wordList);
      parseThread.start();
      startProgressMonitor("Parsing", PARSE_TIME);
    }
  }

  /**
   * Opens dialog to load a text data file
   */
  public void loadFile() {
    // centers dialog in panel
    pageDialog.setLocation(getLocationOnScreen().x + (getWidth() - pageDialog.getWidth()) / 2, getLocationOnScreen().y + (getHeight() - pageDialog.getHeight()) / 2);
    pageDialog.setVisible(true);

    if (pageDialog.getStatus() == OpenPageDialog.APPROVE_OPTION) {
      loadFile(pageDialog.getPage());
    }
  }

  /**
   * Loads a text or html file from a file path or URL.  Treats anything
   * beginning with http:\\,.htm, or .html as an
   * html file, and strips all tags from the document
   */
  public void loadFile(String filename) {
    if (filename == null) {
      return;
    }

    File file = new File(filename);

    String urlOrFile = filename;
    // if file can't be found locally, try prepending http:// and looking on web
    if (!file.exists() && filename.indexOf("://") == -1) {
      urlOrFile = "http://" + filename;
    }
    // else prepend file:// to handle local html file urls
    else if (filename.indexOf("://") == -1) {
      urlOrFile = "file://" + filename;
    }

    // TODO: why do any of this instead of just reading the file?
    // Also, is this working correctly still?
    // load the document
    Document doc;
    try {
      if (urlOrFile.startsWith("http://") || urlOrFile.endsWith(".htm") || urlOrFile.endsWith(".html")) {
        // strip tags from html documents
        Document docPre = new BasicDocument