webit.script.core.lr_parser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of webit-script Show documentation
There is a newer version: 1.5.2
// Copyright (c) 2013, Webit Team. All Rights Reserved.
package webit.script.core;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.HashMap;
import java.util.Map;
import webit.script.Engine;
import webit.script.Template;
import webit.script.core.ast.TemplateAST;
import webit.script.core.ast.statments.PlaceHolderStatmentFactory;
import webit.script.core.text.TextStatmentFactory;
import webit.script.exceptions.ParseException;
import webit.script.loggers.Logger;
import webit.script.util.ClassLoaderUtil;
import webit.script.util.ExceptionUtil;
import webit.script.util.StringUtil;
import webit.script.util.collection.ArrayStack;
import webit.script.util.collection.Stack;

/**
 * This class implements a skeleton table driven LR parser. In general, LR
 * parsers are a form of bottom up shift-reduce parsers. Shift-reduce parsers
 * act by shifting input onto a parse _stack until the Symbols matching the
 * column hand side of a production appear on the top of the _stack. Once this
 * occurs, a reduce is performed. This involves removing the Symbols
 * corresponding to the column hand side of the production (the so called
 * "handle") and replacing them with the non-terminal from the line hand side of
 * the production.
 * 
 *
 * To control the decision of whether to shift or reduce at any given point, the
 * parser uses a state machine (the "viable prefix recognition machine" built by
 * the parser generator). The current state of the machine is placed on top of
 * the parse _stack (stored as part of a Symbol object representing a terminal
 * or non terminal). The parse action table is consulted (using the current
 * state and the current lookahead Symbol as indexes) to determine whether to
 * shift or to reduce. When the parser shifts, it changes to a new state by
 * pushing a new Symbol (containing a new state) onto the _stack. When the
 * parser reduces, it pops the handle (column hand side of a production) off the
 * _stack. This leaves the parser in the state it was in before any of those
 * Symbols were matched. Next the reduce-goto table is consulted (using the new
 * state and current lookahead Symbol as indexes) to determine a new state to go
 * to. The parser then shifts to this goto state by pushing the line hand side
 * Symbol of the production (also containing the new state) onto the _stack.

 *
 * This class actually provides four LR parsers. The methods parse() and
 * debug_parse() provide two versions of the main parser (the only difference
 * being that debug_parse() emits debugging trace messages as it parses). In
 * addition to these main parsers, the error recovery mechanism uses two more.
 * One of these is used to simulate "parsing ahead" in the input without
 * carrying out actions (to verify that a potential error recovery has worked),
 * and the other is used to parse through buffered "parse ahead" input in order
 * to execute all actions and re-synchronize the actual parser configuration.

 *
 * This is an abstract class which is normally filled out by a subclass
 * generated by the JavaCup parser generator. In addition to supplying the
 * actual parse tables, generated code also supplies methods which invoke
 * various pieces of user supplied code, provide access to certain special
 * Symbols (e.g., EOF and error), etc. Specifically, the following abstract
 * methods are normally supplied by generated code:
 * 

 *  Symbol do_action()
 * 
 Executes a piece of user supplied action code. This always comes at the
 * point of a reduce in the parse, so this code also allocates and fills in the
 * line hand side non terminal Symbol object that is to be pushed onto the
 * _stack for the reduce.
 * 
 void init_actions()
 * 
 Code to initialize a special object that encapsulates user supplied
 * actions (this object is used by do_action() to actually carry out the
 * actions).
 * 
 *
 * In addition to these routines that must be supplied by the generated
 * subclass there are also a series of routines that may
 * be supplied. These include:
 * 
 *  Symbol scan()
 * 
 Used to get the next input Symbol from the scanner.
 * 
 int error_sync_size()
 * 
 This determines how many Symbols past the point of an error must be
 * parsed without error in order to consider a recovery to be valid. This
 * defaults to 3. Values less than 2 are not recommended.
 * 
 void report_error(String message, Object info)
 * 
 This method is called to report an error. The default implementation
 * simply prints a message to System.err and where the error occurred. This
 * method is often replaced in order to provide a more sophisticated error
 * reporting mechanism.
 * 
 void report_fatal_error(String message, Object info)
 * 
 This method is called when a fatal error that cannot be recovered from
 * is encountered. In the default implementation, it calls report_error() to
 * emit a message, then throws an exception.
 * 
 void syntax_error(Symbol cur_token)
 * 
 This method is called as soon as syntax error is detected (but before
 * recovery is attempted). In the default implementation it invokes:
 * report_error("Syntax error", null);
 * 
 void unrecovered_syntax_error(Symbol cur_token)
 * 
 This method is called if syntax error recovery fails. In the default
 * implementation it invokes:

 * report_fatal_error("Couldn't repair and continue parse", null);
 * 
 *
 * @version last updated: 7/3/96
 * @author Frank Flannery
 */
abstract class lr_parser {

    private final static int stackInitialCapacity = 24;

    lr_parser() {
        this._stack = new ArrayStack(stackInitialCapacity);
    }
    /**
     * The parse _stack itself.
     */
    final Stack _stack;
    /**
     * Internal flag to indicate when parser should quit.
     */
    boolean goonParse = false;

    //
    Engine engine;
    Template template;
    TextStatmentFactory textStatmentFactory;
    PlaceHolderStatmentFactory placeHolderStatmentFactory;
    Logger logger;
    boolean locateVarForce;
    NativeImportManager nativeImportMgr;
    VariantManager varmgr;
    Map labelsIndexMap;
    int currentLabelIndex;

    /**
     *
     * @param in java.io.Reader
     * @param template Template
     * @return TemplateAST
     * @throws ParseException
     */
    public TemplateAST parseTemplate(final Template template) throws ParseException {
        Lexer lexer = null;
        try {
            lexer = new Lexer(template.resource.openReader());
            this.template = template;
            final Engine _engine;
            this.engine = _engine = template.engine;
            lexer.setTrimCodeBlockBlankLine(_engine.isTrimCodeBlockBlankLine());
            this.logger = _engine.getLogger();
            TextStatmentFactory _textStatmentFactory;
            this.textStatmentFactory = _textStatmentFactory = _engine.getTextStatmentFactory();
            this.locateVarForce = !_engine.isLooseVar();
            this.placeHolderStatmentFactory = new PlaceHolderStatmentFactory(_engine.getFilter());
            //
            this.nativeImportMgr = new NativeImportManager();
            this.varmgr = new VariantManager(_engine);
            this.labelsIndexMap = new HashMap();
            this.labelsIndexMap.put(null, 0);
            this.currentLabelIndex = 0;
            //
            _textStatmentFactory.startTemplateParser(template);
            Symbol sym = this.parse(lexer);
            _textStatmentFactory.finishTemplateParser(template);
            return (TemplateAST) sym.value;
        } catch (Exception e) {
            throw ExceptionUtil.castToParseException(e);
        } finally {
            if (lexer != null) {
                try {
                    lexer.yyclose();
                } catch (IOException ignore) {
                }
            }
        }
    }

    /**
     * Perform a bit of user supplied action code (supplied by generated
     * subclass). Actions are indexed by an internal action number assigned at
     * parser generation time.
     *
     * @param act_num the internal index of the action to be performed.
     * @return Object
     * @throws java.lang.Exception
     */
    abstract Object do_action(int act_num) throws ParseException;

    /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
    /**
     * Fetch an action from the action table. The table is broken up into rows,
     * one per state (rows are indexed directly by state number). Within each
     * row, a list of index, value pairs are given (as sequential entries in the
     * table), and the list is terminated by a default entry (denoted with a
     * Symbol index of -1). To find the proper entry in a row we do a linear or
     * binary search (depending on the size of the row).
     *
     * @param row actionTable[state]
     * @param id the Symbol index of the action being accessed.
     */
    private short getAction(final short[] row, int sym) {
        short tag;
        int first, last, probe, row_len;
        //final short[] row = actionTable[state];

        /* linear search if we are < 10 entries */
        if ((row_len = row.length) < 20) {
            for (probe = 0; probe < row_len; probe++) {
                /* is this entry labeled with our Symbol or the default? */
                tag = row[probe++];
                if (tag == sym || tag == -1) {
                    /* return the next entry */
                    return row[probe];
                }
            }
        } else {
            /* otherwise binary search */
            first = 0;
            last = ((row_len - 1) >> 1) - 1;  /* leave out trailing default entry */

            int probe_2;
            while (first <= last) {
                probe = (first + last) >> 1;
                probe_2 = probe << 1;
                if (sym == row[probe_2]) {
                    return row[probe_2 + 1];
                } else if (sym > row[probe_2]) {
                    first = probe + 1;
                } else {
                    last = probe - 1;
                }
            }

            /* not found, use the default at the end */
            return row[row_len - 1];
        }

        /* shouldn't happened, but if we run off the end we return the 
         default (error == 0) */
        return 0;
    }

    /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
    /**
     * Fetch a state from the reduce-goto table. The table is broken up into
     * rows, one per state (rows are indexed directly by state number). Within
     * each row, a list of index, value pairs are given (as sequential entries
     * in the table), and the list is terminated by a default entry (denoted
     * with a Symbol index of -1). To find the proper entry in a row we do a
     * linear search.
     *
     * @param row reduceTable[state]
     * @param id the Symbol index of the entry being accessed.
     */
    private short getReduce(final short[] row, int sym) {
        int probe, len;
        short tag;
        for (probe = 0, len = row.length; probe < len; probe++) {
            /* is this entry labeled with our Symbol or the default? */
            if ((tag = row[probe++]) == sym || tag == -1) {
                /* return the next entry */
                return row[probe];
            }
        }
        /* if we run off the end we return the default (error == -1) */
        return -1;
    }

    /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
    /**
     * This method provides the main parsing routine. It returns only when
     * finishParsing() has been called (typically because the parser has
     * accepted, or a fatal error has been reported). See the header
     * documentation for the class regarding how shift/reduce parsers operate
     * and how the various tables are used.
     */
    private Symbol parse(final Lexer myLexer) throws Exception {
        /* the current action code */
        int act;
        Symbol cur_token;
        Symbol currentSymbol;
        final Stack stack;
        (stack = this._stack).clear();
        //stack.push(newSymbol("START", 0, start_state()));
        {
            Symbol START;
            (START = new Symbol(0, null)).state = Parser.START_STATE;
            stack.push(currentSymbol = START);
        }

        final short[][] actionTable = Parser.ACTION_TABLE;
        final short[][] reduceTable = Parser.REDUCE_TABLE;
        final short[][] productionTable = Parser.PRODUCTION_TABLE;
        //final Lexer myLexer = lexer;
        /* get the first token */
        cur_token = myLexer.nextToken();

        /* continue until we are told to stop */
        goonParse = true;
        do {

            /* look up action out of the current state with the current input */
            act = getAction(actionTable[currentSymbol.state], cur_token.id);

            /* decode the action -- > 0 encodes shift */
            if (act > 0) {
                /* shift to the encoded state by pushing it on the _stack */
                cur_token.state = act - 1;
                stack.push(currentSymbol = cur_token);

                /* advance to the next Symbol */
                cur_token = myLexer.nextToken();
            } else if (act < 0) {
                /* if its less than zero, then it encodes a reduce action */
                //reduceAction()
                act = (-act) - 1;
                final int symId, handleSize;
                final Object result = do_action(act);
                final short[] row;
                symId = (row = productionTable[act])[0];
                handleSize = row[1];
                if (handleSize == 0) {
                    currentSymbol = new Symbol(symId, result);
                } else {
                    currentSymbol = new Symbol(symId, result, stack.peek(handleSize - 1)); //position based on left
                        /* pops the handle off the _stack */
                    stack.pops(handleSize);
                }

                /* look up the state to go to from the one popped back to */
                /* shift to that state */
                currentSymbol.state = getReduce(reduceTable[stack.peek().state], symId);
                stack.push(currentSymbol);

            } else {//act == 0
                throw new ParseException(StringUtil.concat("Parser stop at: ", Integer.toString(myLexer.getLine()), "(", Integer.toString(myLexer.getColumn()), ")"), myLexer.getLine(), myLexer.getColumn());
            }
        } while (goonParse);

        return stack.peek();//lhs_sym;
    }

    static short[][] loadFromDataFile(String name) {
        ObjectInputStream in = null;
        try {
            return (short[][]) (in = new ObjectInputStream(ClassLoaderUtil
                    .getDefaultClassLoader()
                    .getResourceAsStream(StringUtil.concat("webit/script/core/Parser$", name, ".data"))))
                    .readObject();
        } catch (IOException e) {
            throw new Error(e);
        } catch (ClassNotFoundException e) {
            throw new Error(e);
        } finally {
            if (in != null) {
                try {
                    in.close();
                } catch (IOException e) {
                }
            }
        }
    }
}