webit.script.core.lr_parser Maven / Gradle / Ivy
// Copyright (c) 2013, Webit Team. All Rights Reserved.
package webit.script.core;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.HashMap;
import java.util.Map;
import webit.script.Engine;
import webit.script.Template;
import webit.script.core.ast.TemplateAST;
import webit.script.core.ast.statments.PlaceHolderStatmentFactory;
import webit.script.core.text.TextStatmentFactory;
import webit.script.exceptions.ParseException;
import webit.script.loggers.Logger;
import webit.script.util.ClassLoaderUtil;
import webit.script.util.ExceptionUtil;
import webit.script.util.StringUtil;
import webit.script.util.collection.ArrayStack;
import webit.script.util.collection.Stack;
/**
* This class implements a skeleton table driven LR parser. In general, LR
* parsers are a form of bottom up shift-reduce parsers. Shift-reduce parsers
* act by shifting input onto a parse _stack until the Symbols matching the
* column hand side of a production appear on the top of the _stack. Once this
* occurs, a reduce is performed. This involves removing the Symbols
* corresponding to the column hand side of the production (the so called
* "handle") and replacing them with the non-terminal from the line hand side of
* the production.
*
*
* To control the decision of whether to shift or reduce at any given point, the
* parser uses a state machine (the "viable prefix recognition machine" built by
* the parser generator). The current state of the machine is placed on top of
* the parse _stack (stored as part of a Symbol object representing a terminal
* or non terminal). The parse action table is consulted (using the current
* state and the current lookahead Symbol as indexes) to determine whether to
* shift or to reduce. When the parser shifts, it changes to a new state by
* pushing a new Symbol (containing a new state) onto the _stack. When the
* parser reduces, it pops the handle (column hand side of a production) off the
* _stack. This leaves the parser in the state it was in before any of those
* Symbols were matched. Next the reduce-goto table is consulted (using the new
* state and current lookahead Symbol as indexes) to determine a new state to go
* to. The parser then shifts to this goto state by pushing the line hand side
* Symbol of the production (also containing the new state) onto the _stack.
*
* This class actually provides four LR parsers. The methods parse() and
* debug_parse() provide two versions of the main parser (the only difference
* being that debug_parse() emits debugging trace messages as it parses). In
* addition to these main parsers, the error recovery mechanism uses two more.
* One of these is used to simulate "parsing ahead" in the input without
* carrying out actions (to verify that a potential error recovery has worked),
* and the other is used to parse through buffered "parse ahead" input in order
* to execute all actions and re-synchronize the actual parser configuration.
*
* This is an abstract class which is normally filled out by a subclass
* generated by the JavaCup parser generator. In addition to supplying the
* actual parse tables, generated code also supplies methods which invoke
* various pieces of user supplied code, provide access to certain special
* Symbols (e.g., EOF and error), etc. Specifically, the following abstract
* methods are normally supplied by generated code:
*
* - Symbol do_action()
*
- Executes a piece of user supplied action code. This always comes at the
* point of a reduce in the parse, so this code also allocates and fills in the
* line hand side non terminal Symbol object that is to be pushed onto the
* _stack for the reduce.
*
- void init_actions()
*
- Code to initialize a special object that encapsulates user supplied
* actions (this object is used by do_action() to actually carry out the
* actions).
*
*
* In addition to these routines that must be supplied by the generated
* subclass there are also a series of routines that may
* be supplied. These include:
*
* - Symbol scan()
*
- Used to get the next input Symbol from the scanner.
*
- int error_sync_size()
*
- This determines how many Symbols past the point of an error must be
* parsed without error in order to consider a recovery to be valid. This
* defaults to 3. Values less than 2 are not recommended.
*
- void report_error(String message, Object info)
*
- This method is called to report an error. The default implementation
* simply prints a message to System.err and where the error occurred. This
* method is often replaced in order to provide a more sophisticated error
* reporting mechanism.
*
- void report_fatal_error(String message, Object info)
*
- This method is called when a fatal error that cannot be recovered from
* is encountered. In the default implementation, it calls report_error() to
* emit a message, then throws an exception.
*
- void syntax_error(Symbol cur_token)
*
- This method is called as soon as syntax error is detected (but before
* recovery is attempted). In the default implementation it invokes:
* report_error("Syntax error", null);
*
- void unrecovered_syntax_error(Symbol cur_token)
*
- This method is called if syntax error recovery fails. In the default
* implementation it invokes:
* report_fatal_error("Couldn't repair and continue parse", null);
*
*
* @version last updated: 7/3/96
* @author Frank Flannery
*/
abstract class lr_parser {
private final static int stackInitialCapacity = 24;
lr_parser() {
this._stack = new ArrayStack(stackInitialCapacity);
}
/**
* The parse _stack itself.
*/
final Stack _stack;
/**
* Internal flag to indicate when parser should quit.
*/
boolean goonParse = false;
//
Engine engine;
Template template;
TextStatmentFactory textStatmentFactory;
PlaceHolderStatmentFactory placeHolderStatmentFactory;
Logger logger;
boolean locateVarForce;
NativeImportManager nativeImportMgr;
VariantManager varmgr;
Map labelsIndexMap;
int currentLabelIndex;
/**
*
* @param in java.io.Reader
* @param template Template
* @return TemplateAST
* @throws ParseException
*/
public TemplateAST parseTemplate(final Template template) throws ParseException {
Lexer lexer = null;
try {
lexer = new Lexer(template.resource.openReader());
this.template = template;
final Engine _engine;
this.engine = _engine = template.engine;
lexer.setTrimCodeBlockBlankLine(_engine.isTrimCodeBlockBlankLine());
this.logger = _engine.getLogger();
TextStatmentFactory _textStatmentFactory;
this.textStatmentFactory = _textStatmentFactory = _engine.getTextStatmentFactory();
this.locateVarForce = !_engine.isLooseVar();
this.placeHolderStatmentFactory = new PlaceHolderStatmentFactory(_engine.getFilter());
//
this.nativeImportMgr = new NativeImportManager();
this.varmgr = new VariantManager(_engine);
this.labelsIndexMap = new HashMap();
this.labelsIndexMap.put(null, 0);
this.currentLabelIndex = 0;
//
_textStatmentFactory.startTemplateParser(template);
Symbol sym = this.parse(lexer);
_textStatmentFactory.finishTemplateParser(template);
return (TemplateAST) sym.value;
} catch (Exception e) {
throw ExceptionUtil.castToParseException(e);
} finally {
if (lexer != null) {
try {
lexer.yyclose();
} catch (IOException ignore) {
}
}
}
}
/**
* Perform a bit of user supplied action code (supplied by generated
* subclass). Actions are indexed by an internal action number assigned at
* parser generation time.
*
* @param act_num the internal index of the action to be performed.
* @return Object
* @throws java.lang.Exception
*/
abstract Object do_action(int act_num) throws ParseException;
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/**
* Fetch an action from the action table. The table is broken up into rows,
* one per state (rows are indexed directly by state number). Within each
* row, a list of index, value pairs are given (as sequential entries in the
* table), and the list is terminated by a default entry (denoted with a
* Symbol index of -1). To find the proper entry in a row we do a linear or
* binary search (depending on the size of the row).
*
* @param row actionTable[state]
* @param id the Symbol index of the action being accessed.
*/
private short getAction(final short[] row, int sym) {
short tag;
int first, last, probe, row_len;
//final short[] row = actionTable[state];
/* linear search if we are < 10 entries */
if ((row_len = row.length) < 20) {
for (probe = 0; probe < row_len; probe++) {
/* is this entry labeled with our Symbol or the default? */
tag = row[probe++];
if (tag == sym || tag == -1) {
/* return the next entry */
return row[probe];
}
}
} else {
/* otherwise binary search */
first = 0;
last = ((row_len - 1) >> 1) - 1; /* leave out trailing default entry */
int probe_2;
while (first <= last) {
probe = (first + last) >> 1;
probe_2 = probe << 1;
if (sym == row[probe_2]) {
return row[probe_2 + 1];
} else if (sym > row[probe_2]) {
first = probe + 1;
} else {
last = probe - 1;
}
}
/* not found, use the default at the end */
return row[row_len - 1];
}
/* shouldn't happened, but if we run off the end we return the
default (error == 0) */
return 0;
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/**
* Fetch a state from the reduce-goto table. The table is broken up into
* rows, one per state (rows are indexed directly by state number). Within
* each row, a list of index, value pairs are given (as sequential entries
* in the table), and the list is terminated by a default entry (denoted
* with a Symbol index of -1). To find the proper entry in a row we do a
* linear search.
*
* @param row reduceTable[state]
* @param id the Symbol index of the entry being accessed.
*/
private short getReduce(final short[] row, int sym) {
int probe, len;
short tag;
for (probe = 0, len = row.length; probe < len; probe++) {
/* is this entry labeled with our Symbol or the default? */
if ((tag = row[probe++]) == sym || tag == -1) {
/* return the next entry */
return row[probe];
}
}
/* if we run off the end we return the default (error == -1) */
return -1;
}
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
/**
* This method provides the main parsing routine. It returns only when
* finishParsing() has been called (typically because the parser has
* accepted, or a fatal error has been reported). See the header
* documentation for the class regarding how shift/reduce parsers operate
* and how the various tables are used.
*/
private Symbol parse(final Lexer myLexer) throws Exception {
/* the current action code */
int act;
Symbol cur_token;
Symbol currentSymbol;
final Stack stack;
(stack = this._stack).clear();
//stack.push(newSymbol("START", 0, start_state()));
{
Symbol START;
(START = new Symbol(0, null)).state = Parser.START_STATE;
stack.push(currentSymbol = START);
}
final short[][] actionTable = Parser.ACTION_TABLE;
final short[][] reduceTable = Parser.REDUCE_TABLE;
final short[][] productionTable = Parser.PRODUCTION_TABLE;
//final Lexer myLexer = lexer;
/* get the first token */
cur_token = myLexer.nextToken();
/* continue until we are told to stop */
goonParse = true;
do {
/* look up action out of the current state with the current input */
act = getAction(actionTable[currentSymbol.state], cur_token.id);
/* decode the action -- > 0 encodes shift */
if (act > 0) {
/* shift to the encoded state by pushing it on the _stack */
cur_token.state = act - 1;
stack.push(currentSymbol = cur_token);
/* advance to the next Symbol */
cur_token = myLexer.nextToken();
} else if (act < 0) {
/* if its less than zero, then it encodes a reduce action */
//reduceAction()
act = (-act) - 1;
final int symId, handleSize;
final Object result = do_action(act);
final short[] row;
symId = (row = productionTable[act])[0];
handleSize = row[1];
if (handleSize == 0) {
currentSymbol = new Symbol(symId, result);
} else {
currentSymbol = new Symbol(symId, result, stack.peek(handleSize - 1)); //position based on left
/* pops the handle off the _stack */
stack.pops(handleSize);
}
/* look up the state to go to from the one popped back to */
/* shift to that state */
currentSymbol.state = getReduce(reduceTable[stack.peek().state], symId);
stack.push(currentSymbol);
} else {//act == 0
throw new ParseException(StringUtil.concat("Parser stop at: ", Integer.toString(myLexer.getLine()), "(", Integer.toString(myLexer.getColumn()), ")"), myLexer.getLine(), myLexer.getColumn());
}
} while (goonParse);
return stack.peek();//lhs_sym;
}
static short[][] loadFromDataFile(String name) {
ObjectInputStream in = null;
try {
return (short[][]) (in = new ObjectInputStream(ClassLoaderUtil
.getDefaultClassLoader()
.getResourceAsStream(StringUtil.concat("webit/script/core/Parser$", name, ".data"))))
.readObject();
} catch (IOException e) {
throw new Error(e);
} catch (ClassNotFoundException e) {
throw new Error(e);
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
}
}
}
}
}