edu.stanford.nlp.util.StringParsingTask Maven / Gradle / Ivy
package edu.stanford.nlp.util;
/**
* An instantiation of this abstract class parses a String
and
* returns an object of type E
. It's called a
* StringParsingTask
(rather than StringParser
)
* because a new instance is constructed for each String
to be
* parsed. We do this to be thread-safe: methods in
* StringParsingTask
share state information (e.g. current
* string index) via instance variables.
*
* @author Bill MacCartney
*/
public abstract class StringParsingTask {
// This class represents a parser working on a specific string. We
// construct from a specific string in order
protected String s;
protected int index = 0;
protected boolean isEOF = false; // true if we tried to read past end
/**
* Constructs a new StringParsingTask
from the specified
* String
. Derived class constructors should be sure to
* call super(s)
!
*/
public StringParsingTask(String s) {
this.s = s;
index = 0;
}
/**
* Parses the String
associated with this
* StringParsingTask
and returns a object of type
* E
.
*/
public abstract E parse();
// ---------------------------------------------------------------------
/**
* Reads characters until {@link #isWhiteSpace(char) isWhiteSpace(ch)}or
* {@link #isPunct(char) isPunct(ch)} or {@link #isEOF()}. You may need
* to override the definition of {@link #isPunct(char) isPunct(ch)} to
* get this to work right.
*/
protected String readName() {
readWhiteSpace();
StringBuilder sb = new StringBuilder();
char ch = read();
while (!isWhiteSpace(ch) && !isPunct(ch) && !isEOF) {
sb.append(ch);
ch = read();
}
unread();
// System.err.println("Read text: ["+sb+"]");
return sb.toString().intern();
}
protected String readJavaIdentifier() {
readWhiteSpace();
StringBuilder sb = new StringBuilder();
char ch = read();
if (Character.isJavaIdentifierStart(ch) && !isEOF) {
sb.append(ch);
ch = read();
while (Character.isJavaIdentifierPart(ch) && !isEOF) {
sb.append(ch);
ch = read();
}
}
unread();
// System.err.println("Read text: ["+sb+"]");
return sb.toString().intern();
}
// .....................................................................
protected void readLeftParen() {
// System.out.println("Read left.");
readWhiteSpace();
char ch = read();
if (!isLeftParen(ch))
throw new ParserException("Expected left paren!");
}
protected void readRightParen() {
// System.out.println("Read right.");
readWhiteSpace();
char ch = read();
if (!isRightParen(ch))
throw new ParserException("Expected right paren!");
}
protected void readDot() {
readWhiteSpace();
if (isDot(peek())) read();
}
protected void readWhiteSpace() {
char ch = read();
while (isWhiteSpace(ch) && !isEOF()) {
ch = read();
}
unread();
}
// .....................................................................
protected char read() {
if (index >= s.length() || index < 0) {
isEOF = true;
return ' '; // arbitrary
}
return s.charAt(index++);
}
protected void unread() {
index--;
}
protected char peek() {
char ch = read();
unread();
return ch;
}
// -----------------------------------------------------------------------
protected boolean isEOF() {
return isEOF;
}
protected boolean isWhiteSpace(char ch) {
return (ch == ' ' || ch == '\t' || ch == '\f' || ch == '\r' || ch == '\n');
}
protected boolean isPunct(char ch) {
return
isLeftParen(ch) ||
isRightParen(ch);
}
protected boolean isLeftParen(char ch) {
return ch == '(';
}
protected boolean isRightParen(char ch) {
return ch == ')';
}
protected boolean isDot(char ch) {
return ch == '.';
}
// exception class -------------------------------------------------------
public static class ParserException extends RuntimeException {
private static final long serialVersionUID = 1L;
public ParserException(Exception e) { super(e); }
public ParserException(String message) { super(message); }
}
}