com.creativewidgetworks.goldparser.engine.Parser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of goldengine Show documentation
Java implementation of Devin Cook's GOLD Parser engine
There is a newer version: 5.0.5
package com.creativewidgetworks.goldparser.engine;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;

import com.creativewidgetworks.goldparser.engine.enums.AdvanceMode;
import com.creativewidgetworks.goldparser.engine.enums.CGTRecord;
import com.creativewidgetworks.goldparser.engine.enums.EndingMode;
import com.creativewidgetworks.goldparser.engine.enums.LRActionType;
import com.creativewidgetworks.goldparser.engine.enums.ParseMessage;
import com.creativewidgetworks.goldparser.engine.enums.ParseResult;
import com.creativewidgetworks.goldparser.engine.enums.SymbolType;
import com.creativewidgetworks.goldparser.util.FormatHelper;

/**
 * Parser 
 *
 * This is the main class in the GOLD Parser Engine and is used to perform
 * all duties required to the parsing of a source text string. This class
 * contains the LALR(1) State Machine code, the DFA State Machine code,
 * character table (used by the DFA algorithm) and all other structures and
 * methods needed to interact with the developer.
 * 
 * Dependencies: 
 * @see Group
 * @see GroupList
 * @see FStateList
 * @see LRState
 * @see Position
 * @see Production
 * @see ProductionList
 * @see Reduction
 * @see Symbol
 * @see SymbolList
 * @see Token
 *
 * Note that several class fields are marked as protected instead of private. This was done
 * to avoid having to add getters() used only for testing.  If and when the tests are
 * refactored to use reflection to probe these fields, they can once again be marked
 * protected.
 *
 * @author Devin Cook (http://www.DevinCook.com/GOLDParser)
 * @author Ralph Iden (http://www.creativewidgetworks.com), port to Java
 * @version 5.0.0
 */
public class Parser {
    
    // Standard attribute names
    public static final String ABOUT             = "About";
    public static final String AUTHOR            = "Author";
    public static final String CASE_SENSITIVE    = "Case Sensitive";
    public static final String CHARACTER_MAPPING = "Character Mapping";
    public static final String CHARACTER_SET     = "Character Set";
    public static final String GENERATED_BY      = "Generated By";
    public static final String GENERATED_DATE    = "Generated Date";
    public static final String NAME              = "Name";
    public static final String START_SYMBOL      = "Start Symbol";
    public static final String VERSION           = "Version";
   
    public static final String PARSER_NAME = "GOLD Parser Engine - Version ";
    public static final String PARSER_VERSION = "5.0.3";

    // Flag to indicate which grammar table file is being processed
    protected boolean version1Format;
    
    // Symbols recognized by the system
    protected SymbolList symbolTable;

    // DFA
    protected FAStateList dfa;
    protected CharacterSetList characterSetTable;
    protected StringBuilder lookaheadBuffer;
    
    // Productions
    protected ProductionList productionTable;
    
    // LALR
    protected LRStateList lrStates;
    private int currentLALR;
    protected Stack stack;

    // Fields for Reductions and errors
    private SymbolList expectedSymbols; 
    protected boolean haveReduction;
    private boolean trimReductions;      
    
    // Locally used fields
    private boolean tablesLoaded;
    private Stack inputTokens; // Tokens to be analyzed

    // Input reader for the source code to parse
    protected Reader source;
    
    // Line and column information
    private Position sysPosition;      // Internal only, so user cannot alter values
    private Position currentPosition;  // Location of last read terminal

    // Grammar attributes
    protected Map attributes;
    
    // Lexical groups
    private Stack groupStack;
    protected GroupList groupTable;
    
    public Parser() {
        stack = new Stack();
        inputTokens = new Stack();
        groupStack = new Stack();
    }
    
    /*----------------------------------------------------------------------------*/

    /**
     * Return library name and version information
     * @return version information
     */
    public String about() {
        return PARSER_NAME + PARSER_VERSION;
    }

    /*----------------------------------------------------------------------------*/

    /**
     * Consume/remove characters from the front of the lookahead buffer
     * and adjust the value of the system Position object.
     * @param count the number of characters to consume
     */
    private void consumeBuffer(int count) {
        if (count > 0 && count <= lookaheadBuffer.length()) {
            // Adjust position
            for (int i = 0; i < count; i++) {
                char c = lookaheadBuffer.charAt(i);
                if (c == 0x0A) {
                    if (sysPosition.getColumn() > 1) {
                        // Increment row if Unix EOLN (LF)
                        sysPosition.incrementLine();
                    }
                } else if (c == 0x0D) {
                    sysPosition.incrementLine();
                } else {
                    sysPosition.incrementColumn();
                }
            }
            
            // Remove the characters
            lookaheadBuffer.delete(0, count);
        }
    }

    /*----------------------------------------------------------------------------*/

    public String getAttribute(String name) {
        return getAttribute(name, null);
    }
    
    public String getAttribute(String name, String defaultValue) {
        if (attributes == null) {
            attributes = new TreeMap();
        }
        
        String value = attributes.get(name);
        return value == null ? defaultValue : value;
    }
    
    public void setAttribute(String name, String value) {
        if (attributes == null) {
            attributes = new TreeMap();
        }
        attributes.put(name, value);
    }
    
    /*----------------------------------------------------------------------------*/

    public Position getCurrentPosition() {
        return currentPosition;
    }
    
    /*----------------------------------------------------------------------------*/
    
    /**
     * Return the last token read by the parser
     * @return the last token read by the parser.
     */
    protected Token getCurrentToken() {
        return inputTokens.peek();
    }

    /*----------------------------------------------------------------------------*/

    /**
     * When parse() returns a REDUCE, the method will return the current reduction.
     * @return the reduction
     */
    public Reduction getCurrentReduction() {
        return haveReduction ? stack.peek().asReduction() : null;
    }
    
    protected void setCurrentReduction(Reduction reduction) {
        if (haveReduction) {
            stack.peek().setData(reduction);
        }
    }
    
    /*----------------------------------------------------------------------------*/
    
    /**
     * Return the list of expected symbols
     * @Token the last token read by the parser.
     */
    public SymbolList getExpectedSymbols() {
        return expectedSymbols;
    }
    
    /*----------------------------------------------------------------------------*/
    
    /**
     * Searches the symbol table for the first occurrence of the the
     * specified symbol type.
     * @param SymbolTypeTest to find
     * @return Symbol, the first symbol of type or null if no symbol found
     */
    private Symbol getFirstSymbolOfType(SymbolType type) {
        for (Symbol symbol : symbolTable) {
            if (symbol.getType().equals(type)) {
                return symbol;
            }
        }
        return null;
    }

    /*----------------------------------------------------------------------------*/
    
    /**
     * Searches the symbol table for a specific symbol specified by the symbol name.
     * @param name of symbol to find
     * @return Symbol, the symbol or null if no symbol found
     */
    protected Symbol getSymbolByName(String name) {
        if (symbolTable != null) {
            for (Symbol symbol : symbolTable) {
                if (symbol.getName().equals(name)) {
                    return symbol;
                }
            }
        }
        return null;
    }
    
    /*----------------------------------------------------------------------------*/

    /**
     * Return the parser's current position (row, column)
     * @return Position
     */
    public Position getPosition() {
        return currentPosition;
    }
    
    /*----------------------------------------------------------------------------*/

    protected Reader getSource() {
        return source;
    }
    
    /*----------------------------------------------------------------------------*/

    protected boolean isVersion1Format() {
        return version1Format;
    }
    
    protected void setVersion1Format(boolean version1) {
        version1Format = version1;
    }
    
    /*----------------------------------------------------------------------------*/

    /**
     * Loads the parse tables from the specified file.
     * NOTE: Only CGT version 5.0 is supported.
     * @param file to open and load.
     * @return true if the file was successfully processed.
     * @throws IOException
     */
    protected boolean loadTables(File file) throws IOException {
        if (file == null) {
            throw new IOException(FormatHelper.formatMessage("messages", "error.cgt_missing"));
        }
        return loadTables(new FileInputStream(file));
    }

    /**
     * Loads the parse tables from the specified input stream. The inputstream will
     * be closed when the method returns.
     * NOTE: Only CGT version 5.0 is supported.
     * @param input stream to read.
     * @return true if the stream was successfully processed.
     * @throws IOException
     */
    protected boolean loadTables(InputStream input) throws IOException {
        boolean result = true;
        int index;
            
        CGT cgt = new CGT();
        CharacterSet characterSet;
        
        try {
            cgt.open(input);
            
            restart();
            tablesLoaded = false;
            
            while (result) {
                cgt.getNextRecord();
                if (cgt.atEOF()) {
                    break;
                }

                int recordType = cgt.retrieveByte();
                // System.out.println(CGTRecord.getCGTRecord(recordType));
                
                switch (CGTRecord.getCGTRecord(recordType)) {
                    case PARAMETER:
                        version1Format = true;
                        setAttribute(NAME, cgt.retrieveString());
                        setAttribute(VERSION, cgt.retrieveString());
                        setAttribute(AUTHOR, cgt.retrieveString());
                        setAttribute(ABOUT, cgt.retrieveString());
                        setAttribute(CASE_SENSITIVE, Boolean.toString(cgt.retrieveBoolean()));
                        setAttribute(START_SYMBOL, Integer.toString(cgt.retrieveInteger()));                        
                        break;
                
                    case PROPERTY:
                        // Index (not used), name, value
                        version1Format = false;
                        cgt.retrieveInteger();  // Index (not used)
                        setAttribute(cgt.retrieveString(), cgt.retrieveString());
                        break;
                      
                    // Counts for Symbols, Rules, DFA, and LALR lists    
                    case COUNTS:
                    case COUNTS5:
                        symbolTable = new SymbolList(cgt.retrieveInteger());
                        characterSetTable = new CharacterSetList(cgt.retrieveInteger());
                        productionTable = new ProductionList(cgt.retrieveInteger());
                        dfa = new FAStateList(cgt.retrieveInteger());
                        lrStates = new LRStateList(cgt.retrieveInteger());
                        if (!isVersion1Format()) {
                            groupTable = new GroupList(cgt.retrieveInteger());
                        } else {
                            // Create an empty table in case GROUP_START/END pairs follow
                            groupTable = new GroupList();
                        }
                        break;

                    // Character set     
                    case CHARSET:
                        index = cgt.retrieveInteger();
                        characterSet = new CharacterSet();
                        characterSetTable.set(index, characterSet);     
                        characterSet.add(new CharacterRange(cgt.retrieveString()));
                        break;
                        
                    // Character range     
                    case CHARRANGES:
                        index = cgt.retrieveInteger();
                        cgt.retrieveInteger(); // codepage
                        cgt.retrieveInteger(); // total sets
                        cgt.retrieveEntry(); // reserved

                        characterSet = new CharacterSet();
                        characterSetTable.set(index, characterSet);
                        while (!cgt.isRecordComplete()) {
                            characterSet.add(new CharacterRange(cgt.retrieveInteger(), cgt.retrieveInteger()));
                        }

                        break;                        
                        
                    // Symbols    
                    case SYMBOL:
                        index = cgt.retrieveInteger();
                        String name = cgt.retrieveString();
                        SymbolType type = SymbolType.getSymbolType(cgt.retrieveInteger());
                        Symbol symbol = new Symbol(name, type, index);
                        symbolTable.set(index, symbol);
                        break;

                    // Rules (productions)    
                    case RULE:
                        index = cgt.retrieveInteger();
                        int headIndex = cgt.retrieveInteger();
                        cgt.retrieveEntry();  // Reserved
                        
                        Production production = new Production(symbolTable.get(headIndex), index);
                        productionTable.set(index, production);
                        while (!cgt.isRecordComplete()) {
                            int symIndex = cgt.retrieveInteger();
                            production.getHandle().add(symbolTable.get(symIndex));
                        }
                        
                        break;                        

                    // Initial states for DFA and LALR
                    case INITIALSTATES:
                        dfa.setInitialState(cgt.retrieveInteger());
                        lrStates.setInitialState(cgt.retrieveInteger());
                        break;                        
                        
                    // Groups   
                    case GROUP:
                        index = cgt.retrieveInteger();

                        Group group = new Group();
                        group.setName(cgt.retrieveString());
                        group.setContainer(symbolTable.get(cgt.retrieveInteger()));
                        group.setStart(symbolTable.get(cgt.retrieveInteger()));
                        group.setEnd(symbolTable.get(cgt.retrieveInteger()));
                        group.setAdvanceMode(AdvanceMode.getAdvanceMode(cgt.retrieveInteger()));
                        group.setEndingMode(EndingMode.getEndingMode(cgt.retrieveInteger()));

                        cgt.retrieveEntry(); // Reserved                        
                        
                        // Nesting levels
                        int count = cgt.retrieveInteger();
                        for (int i = 0; i < count; i++) {
                            group.getNesting().add(cgt.retrieveInteger());
                        }
  
                        // Link back
                        group.getContainer().setGroup(group);
                        group.getStart().setGroup(group);
                        group.getEnd().setGroup(group);
                        
                        groupTable.set(index, group);
                        break;
                        
                    case GROUPNESTING:
                        break;
        
                    case DFASTATE:
                        index = cgt.retrieveInteger();
                        boolean accept = cgt.retrieveBoolean();
                        int acceptIndex = cgt.retrieveInteger();
                        cgt.retrieveEntry(); // Reserved

                        if (accept) {
                            dfa.set(index, new FAState(symbolTable.get(acceptIndex)));
                        } else {
                            dfa.set(index, new FAState());
                        }

                        while (!cgt.isRecordComplete()) {
                            int setIndex = cgt.retrieveInteger();
                            int target = cgt.retrieveInteger();
                            cgt.retrieveEntry();  // Reserved
                            dfa.get(index).getEdges().add(new FAEdge(characterSetTable.get(setIndex), target));
                        }
                        
                        break;
                        
                    case LRSTATE:
                        index = cgt.retrieveInteger();
                        cgt.retrieveEntry();  // Reserved
                        
                        LRState lrState = new LRState();
                        lrStates.set(index, lrState);
                        while (!cgt.isRecordComplete()) {
                            symbol = symbolTable.get(cgt.retrieveInteger());
                            LRActionType actionType = LRActionType.getLRActionType(cgt.retrieveInteger());
                            int value = cgt.retrieveInteger();
                            cgt.retrieveEntry();  // Reserved
                            lrState.add(new LRAction(symbol,actionType, value));
                        }
                        
                        break;
                        
                    case UNDEFINED:
                        throw new IOException("Unknown record type of " + recordType + " was read.");
                }
            }
        } finally {
            cgt.close();
        }
        
        tablesLoaded = result;
        
        resolveCommentGroupsForVersion1Grammars();
        
        return result;
    }
    
    /*----------------------------------------------------------------------------*/

    /**
     * Return a single character at charIndex. This method will read and fill the
     * buffer as needed from the source stream. 
     * @param charIndex offset of the lookahead buffer.
     * @return char that was read or "" EOF has been reached.
     */
    private String lookahead(int charIndex) {
        if (charIndex >= 0) {
            if (charIndex > lookaheadBuffer.length()) {
                // Requesting data past the end of stream, so perform a read
                int readCount = charIndex - lookaheadBuffer.length();
                for (int i = 0; i < readCount; i++) {
                    int c;
                    try {
                        c = source.read();
                    } catch (IOException ioe) {
                        c = -1;
                    }
                    if (c != -1) {
                        lookaheadBuffer.append((char)c);
                    } else {
                        break; // EOF reached
                    }
                }
            }
            
            // If the buffer is still smaller than charIndex, we have reached
            // the end of the text. In this case, return a null string - the DFA
            // code will understand.
            return (charIndex <= lookaheadBuffer.length()) ? String.valueOf(lookaheadBuffer.charAt(charIndex - 1)) : "";
        }
        
        return "";
    }
    
    /*----------------------------------------------------------------------------*/

    /**
     * Return count characters from the lookahead buffer. 
     * These characters are used to create the text stored in a token. Because of  
     * the design of the DFA algorithm, count should never exceed the buffer length.
     * @param count number of characters to return
     * @return String 
     */
    private String getLookaheadBuffer(int count) {
        if (count > lookaheadBuffer.length()) {
            count = lookaheadBuffer.length();
        }
        
        return count > 0 ? lookaheadBuffer.substring(0, count) : "";
    }
    
    /*----------------------------------------------------------------------------*/

    /**
     * This method implements the DFA for the parser's lexer. A
     * token is generated which is used by the LALR state machine.
     * @return Token
     */
    private Token lookaheadDFA() {
        Token token = new Token();
     
        int currentDFA = dfa.getInitialState();
        
        int curPosition = 1;            // Next byte in the input stream
        int lastAcceptState = -1;       // Nothing has been accepted yet
        int lastAcceptPosition = -1;
        int target = 0;
        
        String str = lookahead(1);
        if (str.length() > 0) {
            boolean found;
            boolean done = false;
            while (!done) {
                // Search all the branches of the current DFA state for the next 
                // character in the input stream. If found, the target state is returned.
                str = lookahead(curPosition);
                if (str.length() == 0) {
                    found = false;
                } else {
                    found = false;
                    for (int i = 0; !found && i < dfa.get(currentDFA).getEdges().size(); i++) {
                        FAEdge edge = dfa.get(currentDFA).getEdges().get(i);
                        if (edge.getChars().contains(str.charAt(0))) {
                            found = true;
                            target = edge.getTarget();
                        }
                    }
                }

                // This block checks whether an edge was found from the current state. If so, the 
                // state and current position advance. Otherwise it is time to exit the main loop 
                // and report the token found (if there was one). If the LastAcceptState is -1, then 
                // we never found a match and the Error Token is created. Otherwise, a new token is 
                // created using the Symbol in the Accept State and all the characters that comprise it.
                if (found) {
                    // This code checks whether the target state accepts a token. If so, it sets
                    // the appropriate variables so when the algorithm is done, it can return the
                    // proper token and number of characters.
                    if (dfa.get(target).getAccept() != null) {
                        lastAcceptState = target;
                        lastAcceptPosition = curPosition;
                    }
                    currentDFA = target;
                    curPosition++;
                } else {
                    // No edge found
                    done = true;
                    if (lastAcceptState == -1) {
                        // Lexer doesn't recognize the symbol
                        token.setSymbol(getFirstSymbolOfType(SymbolType.ERROR));
                        token.setData(getLookaheadBuffer(1));
                    } else {
                        // Create Token and read characters
                        // Data contains the total number of accept characters
                        token.setSymbol(dfa.get(lastAcceptState).getAccept());
                        token.setData(getLookaheadBuffer(lastAcceptPosition));
                    }
                }
            }
        } else {
            token.setData("");
            token.setSymbol(getFirstSymbolOfType(SymbolType.END));
        }

        token.setPosition(new Position(sysPosition));
        
        return token;
    }

    /*----------------------------------------------------------------------------*/

    /**
     * Open a file to be parsed
     * @param sourceFile to be parsed
     * @return true if the file is ready to be parsed
     * @throws IOException if file is not available.
     */
    protected boolean open(File sourceFile) throws IOException {
        return open(new FileReader(sourceFile));
    }
    
    /**
     * Prepare the parser to process the source contained in the String.
     * @param sourceStatements, the code to parse
     * @return true if the source is ready to be parsed.
     */
    protected boolean open(String sourceStatements) {
        return open(new StringReader(sourceStatements));
    }
    
    /**
     * Open the reader to be parsed
     * @param reader that will be used to read the source code to parse.
     * @return true if the reader is ready to be parsed.
     */
    protected boolean open(Reader reader) {
        restart();
        source = reader;
        stack.push(new Token());
        return true;
    }

    /*----------------------------------------------------------------------------*/

    /**
     * Returns the next token in the stream -- This method can be overridden to support
     * virtual terminals (indentation sensitive grammars, etc.)
     */
    protected Token nextToken() {
        return produceToken();
    }
    
    /**
     * Performs a parse action on the input stream. This method is typically used in a loop until 
     * either the grammar is accepted or an error occurs.
     * @return ParseMessage
     */
    protected ParseMessage parse() {
        if (!tablesLoaded) {
            return ParseMessage.NOT_LOADED_ERROR;
        }

        Token read;
        ParseMessage parseMessage = ParseMessage.UNDEFINED;
        
        // Loop until a breakable event
        boolean done = false;
        while (!done) {
            if (inputTokens.size() == 0) {
                read = nextToken();
                inputTokens.push(read);
                
                // Handle the case where an unterminated comment block consumes the entire program
                if (SymbolType.END.equals(read.getType()) && groupStack.size() > 0) {
                    // Runaway group
                    parseMessage = ParseMessage.GROUP_ERROR;                    
                } else {
                    // A good token was read
                    parseMessage = ParseMessage.TOKEN_READ;
                }
                
                done = true;
            } else {
                read = inputTokens.peek();
                currentPosition.set(read.getPosition());  // Update current position

                if (SymbolType.NOISE.equals(read.getType())) {
                    // Discard token - these tokens were already reported to the user
                    inputTokens.pop();
                } else if (SymbolType.ERROR.equals(read.getType())) {
                    parseMessage = ParseMessage.LEXICAL_ERROR;
                    done = true;
                } else if (SymbolType.END.equals(read.getType()) && groupStack.size() > 0) {
                    // Runaway group
                    parseMessage = ParseMessage.GROUP_ERROR;
                    done = true;                    
                } else {
                    ParseResult parseResult = parseLALR(read);  // Same method as v1
                    switch (parseResult) {
                        case ACCEPT:
                            parseMessage = ParseMessage.ACCEPT;
                            done = true;
                            break;
                            
                        case INTERNAL_ERROR:
                            parseMessage = ParseMessage.INTERNAL_ERROR;
                            done = true;
                            break;

                        case REDUCE_NORMAL:
                            parseMessage = ParseMessage.REDUCTION;
                            done = true;
                            break;

                        case SHIFT:
                            // ParseToken() shifted the token on the front of the Token-Queue. It 
                            // now exists on the Token-Stack and must be eliminated from the queue.
                            inputTokens.remove(0);
                            break;
                            
                        case SYNTAX_ERROR:
                            parseMessage = ParseMessage.SYNTAX_ERROR;
                            done = true;
                            break;
                            
                        case REDUCE_ELIMINATED:  // fall through intended
                        case UNDEFINED:
                            // do nothing
                            break;
                    }
                }
            }
        }
        
        return parseMessage;
    }
        
    /*----------------------------------------------------------------------------*/

    /**
     * This method analyzes a token and either:
     *   1. Makes a SINGLE reduction and pushes a complete Reduction object on the stack
     *   2. Accepts the token and shifts
     *   3. Errors and places the expected symbol indexes in the Tokens list.
     *   
     * @param nextToken to be analyzed
     * @return ParseResult  
     */
    private ParseResult parseLALR(Token nextToken) {
        ParseResult parseResult = null;

        haveReduction = false;
        
        LRAction parseAction = lrStates.get(currentLALR).find(nextToken);
        //System.out.println("Action: " + parseAction.toString());
        
        switch (parseAction.getType()) {
            case ACCEPT:
                haveReduction = true;
                parseResult = ParseResult.ACCEPT;
                break;
                
            case REDUCE:
                // Produce a reduction - remove as many tokens as members in the rule and push a nonterminal token
                Production production = productionTable.get(parseAction.getValue());
                
                Token head;
                if (trimReductions && production.containsOneNonTerminal()) {
                    // The current rule only consists of a single nonterminal and can be trimmed from the
                    // parse tree. Usually we create a new Reduction, assign it to the Data property
                    // of Head and push it on the m_Stack. However, in this case, the Data property of the
                    // Head will be assigned the Data property of the reduced token (i.e. the only one
                    // on the m_Stack). To save code, the value popped of the m_Stack is changed into the head.
                    head = stack.pop();
                    head.setSymbol(production.getHead());
                    parseResult = ParseResult.REDUCE_ELIMINATED;
                } else {
                    haveReduction = true;
                    Reduction newReduction = new Reduction(production.getHandle().size());
                    newReduction.setParent(production);
                    for (int i = production.getHandle().size() - 1; i >= 0; i--) {
                        newReduction.set(i, stack.pop());
                    }
                    head = new Token(production.getHead(), newReduction);
                    parseResult = ParseResult.REDUCE_NORMAL;
                }
                
                // goto value
                int index = stack.peek().getState();

                LRAction lrAction = lrStates.get(index).find(production.getHead());
                if (!lrAction.equals(LRState.LRACTION_UNDEFINED)) {
                    currentLALR = lrAction.getValue();
                    head.setState(currentLALR);
                    stack.push(head);
                } else {
                    parseResult = ParseResult.INTERNAL_ERROR;
                }
                
                break;
                
            case SHIFT:
                currentLALR = parseAction.getValue();
                nextToken.setState(currentLALR);
                stack.push(nextToken);
                parseResult = ParseResult.SHIFT;
                break;
                
            case ERROR:      // fall-through intended
            case GOTO:       // fall-through intended         
            case UNDEFINED:
                // Syntax error - produce a list of expected symbols to report
                expectedSymbols.clear();
                for (LRAction action : lrStates.get(currentLALR)) {
                    SymbolType type = action.getSymbol().getType();
                    switch (type) {
                        case CONTENT:       // fall-through intended
                        case END:           // fall-through intended
                            expectedSymbols.add(action.getSymbol());
                            break;

                        case GROUP_START:   // fall-through intended
                        case GROUP_END:     // fall-through intended
                        case COMMENT_LINE:
                            expectedSymbols.add(action.getSymbol());
                            break;
                            
                        case ERROR:
                        case NOISE:
                        case NON_TERMINAL:
                        case UNDEFINED:
                            // do nothing
                            break;
                    }
                }
                
                parseResult = ParseResult.SYNTAX_ERROR;
                break;
        }
        
        return parseResult;
    }
    
    /*----------------------------------------------------------------------------*/

    /**
     * This method creates a token and also takes into account the current
     * lexing mode of the parser. In particular, it contains the group logic. 
     *
     * A stack is used to track the current "group". This replaces the comment
     * level counter. Text is appended to the token on the top of the stack. This 
     * allows the group text to returned in one chunk.
     * @return Token
     */
    protected Token produceToken() {
        Token token = null;

        boolean nestGroup = false;
        
        boolean done = false;
        while (!done) {
            Token read = lookaheadDFA();
            
            // Groups (comments, etc.)
            // The logic - to determine if a group should be nested - requires that the top 
            // of the stack and the symbol's linked group need to be looked at. Both of these 
            // can be unset. So, this section sets a boolean and avoids errors. We will use 
            // this boolean in the logic chain below. 
            if (read.getType().equals(SymbolType.GROUP_START) || read.getType().equals(SymbolType.COMMENT_LINE)) {
                if (groupStack.size() == 0) {
                    nestGroup = true;
                } else {
                    nestGroup = groupStack.peek().getGroup().getNesting().contains(read.getGroup().getIndex());
                }
            } else {
                nestGroup = false;
            }

            // Logic chain
            if (nestGroup) {
                consumeBuffer(read.asString().length());
                
                // fix up the comment block
                if (read.getData() != null) {
                    read.appendData(read.getData().toString());
                    read.setData(null);
                }
                
                groupStack.push(read);                
            } else if (groupStack.size() == 0) {
                // The token is ready to be analyzed
                consumeBuffer(read.asString().length());
                token = read;
                done = true;
            } else if (groupStack.peek().getGroup().getEnd().getTableIndex() == read.getTableIndex()) {
                // End the current group
                Token pop = groupStack.pop();

                // Ending logic
                if (pop.getGroup().getEndingMode() == EndingMode.CLOSED) {
                    pop.appendData(read.asString());
                    consumeBuffer(read.asString().length());
                }
                
                if (groupStack.size() == 0) {
                    // We are out of the group. Return pop'd token which contains all the group text    
                    pop.setSymbol(pop.getGroup().getContainer());
                    token = pop;
                    done = true;
                } else {
                    // Append group text to parent
                    groupStack.peek().appendData(pop.asString());
                }
            } else if (read.getType().equals(SymbolType.END)) {
                // EOF always stops the loop. The caller method (parse) can flag a runaway group error.
                token = read;
                done = true;
            } else {
                // We are in a group, Append to the Token on the top of the stack.
                // Take into account the Token group mode 
                Token top = groupStack.peek();
                if (top.getGroup().getAdvanceMode() == AdvanceMode.TOKEN) {
                    // Append all text
                    top.appendData(read.asString());
                    consumeBuffer(read.asString().length());
                } else {
                    // Append one character
                    top.appendData(read.asString().substring(0, 1));
                    consumeBuffer(1);
                }
            }                
        }
        
        return token;
    }
    
    /*----------------------------------------------------------------------------*/

    /**
     * Inserts Group objects into the group table so comments can be processed in a 
     * grammar.  It is assumed that version 1.0 files have a maximum of 1 closed
     * comment block and one comment line symbol.
     */
    private void resolveCommentGroupsForVersion1Grammars() {
        if (isVersion1Format()) {
            Group group;
            Symbol symbolStart = null;
            Symbol symbolEnd = null;
            
            // Create a new COMMENT_LINE group
            for (Symbol currentStartSymbol : symbolTable) {
                if (currentStartSymbol.getType().equals(SymbolType.COMMENT_LINE)) {
                    symbolStart = currentStartSymbol;
                    group = new Group();
                    group.setName("Comment Line");
                    group.setContainer(symbolTable.findByName(SymbolList.SYMBOL_COMMENT));
                    group.setStart(symbolStart);
                    group.setEnd(symbolTable.findByName("NewLine"));
                    group.setAdvanceMode(AdvanceMode.TOKEN);
                    group.setEndingMode(EndingMode.OPEN);
                    groupTable.add(group);
                    symbolStart.setGroup(group);
                    break;
                }
            }

            // Create a new COMMENT_BLOCK group
            for (Symbol currentStartSymbol : symbolTable) {
                if (currentStartSymbol.getType().equals(SymbolType.GROUP_START)) {
                    symbolStart = symbolEnd = currentStartSymbol;
                    for (Symbol currentEndSymbol : symbolTable) {
                        if (currentEndSymbol.getType().equals(SymbolType.GROUP_END)) { 
                            symbolEnd = currentEndSymbol;
                            break;
                        }
                    }    
                    group = new Group();
                    group.setName("Comment Block");
                    group.setContainer(symbolTable.findByName(SymbolList.SYMBOL_COMMENT));
                    group.setStart(symbolStart);
                    group.setEnd(symbolEnd);
                    group.setAdvanceMode(AdvanceMode.TOKEN);
                    group.setEndingMode(EndingMode.CLOSED);
                    groupTable.add(group);
                    
                    symbolStart.setGroup(group);                         
                    symbolEnd.setGroup(group);                         
                    
                    break;
                }
            }
        }
    }
    
    /*----------------------------------------------------------------------------*/
    
    /**
     * Restarts the parser. The loaded tables are retained
     */
    protected void restart() {
        currentLALR = LRState.INITIAL_STATE;
        
        sysPosition = new Position(1, 1);
        currentPosition = new Position(1, 1);
        
        lookaheadBuffer = new StringBuilder();
        
        haveReduction = false;
        
        if (expectedSymbols == null) {
            expectedSymbols = new SymbolList();
        }
        expectedSymbols.clear();

        if (groupStack == null) {
            groupStack = new Stack();
        }
        groupStack.clear();

        if (inputTokens == null) {
            inputTokens = new Stack();
        }
        inputTokens.clear();

        if (stack == null) {
            stack = new Stack();
        }
        stack.clear();
    }
    
    /*----------------------------------------------------------------------------*/

    public void setTrimReductions(boolean value) {
        trimReductions = value;
    }
}