Alachisoft.NCache.Parser.Parser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of nc-parser Show documentation
Internal package of Alachisoft.
There is a newer version: 5.3.3
package Alachisoft.NCache.Parser;
//using Console = System.Console;

import java.io.*;

// C# Translation of GoldParser, by Marcus Klimstra .
// Based on GOLDParser by Devin Cook .

/**
 * This is the main class in the GoldParser Engine and is used to perform all duties required to the parsing of a source text string. This class contains the LALR(1) State Machine
 * code, the DFA State Machine code, character table (used by the DFA algorithm) and all other structures and methods needed to interact with the developer.
 */
public class Parser {

    private java.util.HashMap m_parameters;
    private Symbol[] m_symbols;
    private String[] m_charsets;
    private Rule[] m_rules;
    private FAState[] m_DfaStates;
    private LRActionTable[] m_LalrTables;
    private boolean m_initialized;
    private boolean m_caseSensitive;
    private int m_startSymbol;
    private int m_initDfaState;
    private Symbol m_errorSymbol;
    private Symbol m_endSymbol;
    private LookAheadReader m_source;
    private int m_lineNumber;
    private boolean m_haveReduction;
    private boolean m_trimReductions;
    private int m_commentLevel;
    private int m_initLalrState;
    private int m_LalrState;
    private TokenStack m_inputTokens; // Stack of tokens to be analyzed
    private TokenStack m_outputTokens; // The set of tokens for 1. Expecting during error, 2. Reduction
    private TokenStack m_tempStack; // I often dont know what to call variables.

    /*
     * constructor
     */
    public Parser() {
    }

    /**
     * Creates a new Parser object for the specified CGT file.
     *
     * @param p_filename The name of the CGT file.
     */
    public Parser(String p_filename) throws IOException {
        LoadGrammar(p_filename);
    }

    /**
     * Creates a new Parser object for the specified CGT file.
     *
     * @param p_filename The name of the CGT file.
     */
    public Parser(FileInputStream stream) throws IOException {
        LoadGrammar(stream);
    }

    public final void LoadGrammar(String p_filename) throws IOException {
        m_parameters = new java.util.HashMap();
        m_inputTokens = new TokenStack();
        m_outputTokens = new TokenStack();
        m_tempStack = new TokenStack();
        m_initialized = false;
        m_trimReductions = false;

        LoadTables(new GrammarReader(p_filename));
    }

    public final void LoadGrammar(InputStream stream) throws IOException {
        m_parameters = new java.util.HashMap();
        m_inputTokens = new TokenStack();
        m_outputTokens = new TokenStack();
        m_tempStack = new TokenStack();
        m_initialized = false;
        m_trimReductions = false;

        LoadTables(new GrammarReader(stream));
    }

    /*
     * properties
     */

    /**
     * Gets or sets whether or not to trim reductions which contain only one non-terminal.
     */
    public final boolean getTrimReductions() {
        return m_trimReductions;
    }

    public final void setTrimReductions(boolean value) {
        m_trimReductions = value;
    }

    /**
     * Gets the current token.
     */
    public final Token getCurrentToken() {
        return m_inputTokens.PeekToken();
    }

    /**
     * Gets the Reduction made by the parsing engine. The value of this property is only valid when the Parse-method returns ParseMessage.Reduction.
     */
    public final Reduction getCurrentReduction() {
        if (m_haveReduction) {
            Token token = m_tempStack.PeekToken();
            Object tempVar = token.getData();
            return ((Reduction) ((tempVar instanceof Reduction) ? tempVar : null));
        } else {
            return null;
        }
    }

    public final void setCurrentReduction(Reduction value) {
        if (m_haveReduction) {
            m_tempStack.PeekToken().setData(value);
        }
    }

    /**
     * Gets the line number that is currently being processed.
     */
    public final int getCurrentLineNumber() {
        return m_lineNumber;
    }

    /*
     * public methods
     */

    /**
     * Pushes the specified token onto the internal input queue. It will be the next token analyzed by the parsing engine.
     */
    public final void PushInputToken(Token p_token) {
        m_inputTokens.PushToken(p_token);
    }

    /**
     * Pops the next token from the internal input queue.
     */
    public final Token PopInputToken() {
        return m_inputTokens.PopToken();
    }
    /*
     * /// Returns the token at the specified index. public Token GetToken(int p_index) { return m_outputTokens.GetToken(p_index); }
     */

    /**
     * Returns a TokenStack containing the tokens for the reduced rule or the tokens that where expected when a syntax error occures.
     */
    public final TokenStack GetTokens() {
        return m_outputTokens;
    }

    /**
     * Returns a string containing the value of the specified parameter. These parameters include: Name, Version, Author, About, Case Sensitive and Start Symbol. If the name
     * specified is invalid, this method will return an empty string.
     */
    public final String GetParameter(String p_name) {
        String result = (String) m_parameters.get(p_name);
        return (result != null ? result : "");
    }

    /**
     * Opens the file with the specified name for parsing.
     */
    public final void OpenFile(String p_filename) throws FileNotFoundException {
        Reset();

        m_source = new LookAheadReader(new BufferedReader(new FileReader(p_filename)));

        PrepareToParse();
    }

    /**
     * Opens the file with the specified name for parsing.
     */
    public final void OpenStream(BufferedReader stream) {
        Reset();

        m_source = new LookAheadReader(stream);

        PrepareToParse();
    }

    /**
     * Closes the file opened with OpenFile.
     */
    public final void CloseFile() throws IOException {
        // This will automaticly close the FileStream (I think :))
        if (m_source != null) {
            m_source.Close();
        }

        m_source = null;
    }

    /**
     * Executes a parse-action. When this method is called, the parsing engine reads information from the source text and then reports what action was taken. This ranges from a
     * token being read and recognized from the source, a parse reduction, or some type of error.
     */
    public final ParseMessage Parse() throws IOException {
        while (true) {
            if (m_inputTokens.getCount() == 0) {
                // we must read a token.

                Token token = RetrieveToken();

                if (token == null) {
                    throw new ParserException("RetrieveToken returned null");
                }

                if (token.getKind() != SymbolType.Whitespace) {
                    m_inputTokens.PushToken(token);

                    if (m_commentLevel == 0 && !CommentToken(token)) {
                        return ParseMessage.TokenRead;
                    }
                }
            } else if (m_commentLevel > 0) {
                // we are in a block comment.

                Token token = m_inputTokens.PopToken();

                switch (token.getKind()) {
                    case CommentStart:
                        m_commentLevel++;
                        break;
                    case CommentEnd:
                        m_commentLevel--;
                        break;
                    case End:
                        return ParseMessage.CommentError;
                }
            } else {
                // we are ready to parse.

                Token token = m_inputTokens.PeekToken();
                switch (token.getKind()) {
                    case CommentStart:
                        m_inputTokens.PopToken();
                        m_commentLevel++;
                        break;
                    case CommentLine:
                        m_inputTokens.PopToken();
                        DiscardLine();
                        break;
                    default:
                        ParseResult result = ParseToken(token);
                        switch (result) {
                            case Accept:
                                return ParseMessage.Accept;
                            case InternalError:
                                return ParseMessage.InternalError;
                            case ReduceNormal:
                                return ParseMessage.Reduction;
                            case Shift:
                                m_inputTokens.PopToken();
                                break;
                            case SyntaxError:
                                return ParseMessage.SyntaxError;
                        }
                        break;
                } // switch
            } // else
        } // while
    }

    /*
     * private methods
     */
    private char FixCase(char p_char) {
        if (m_caseSensitive) {
            return p_char;
        }

        return Character.toLowerCase(p_char);
    }

    private String FixCase(String p_string) {
        if (m_caseSensitive) {
            return p_string;
        }

        return p_string.toLowerCase();
    }

    private void AddSymbol(Symbol p_symbol) {
        if (!m_initialized) {
            throw new ParserException("Table sizes not initialized");
        }

        int index = p_symbol.getTableIndex();
        m_symbols[index] = p_symbol;
    }

    private void AddCharset(int p_index, String p_charset) {
        if (!m_initialized) {
            throw new ParserException("Table sizes not initialized");
        }

        m_charsets[p_index] = FixCase(p_charset);
    }

    private void AddRule(Rule p_rule) {
        if (!m_initialized) {
            throw new ParserException("Table sizes not initialized");
        }

        int index = p_rule.getTableIndex();
        m_rules[index] = p_rule;
    }

    private void AddDfaState(int p_index, FAState p_fastate) {
        if (!m_initialized) {
            throw new ParserException("Table sizes not initialized");
        }

        m_DfaStates[p_index] = p_fastate;
    }

    private void AddLalrTable(int p_index, LRActionTable p_table) {
        if (!m_initialized) {
            throw new ParserException("Table counts not initialized");
        }

        m_LalrTables[p_index] = p_table;
    }

    private void LoadTables(GrammarReader reader) {
        Object obj;
        short index;
        while (reader.MoveNext()) {
//C# TO JAVA CONVERTER WARNING: Unsigned integer types have no direct equivalent in Java:
//ORIGINAL LINE: byte id = (byte)reader.RetrieveNext();
            byte id = (Byte) reader.RetrieveNext();

            switch (RecordId.forValue(id)) {
                case Parameters:
                    m_parameters.put("Name", (String) reader.RetrieveNext());
                    m_parameters.put("Version", (String) reader.RetrieveNext());
                    m_parameters.put("Author", (String) reader.RetrieveNext());
                    m_parameters.put("About", (String) reader.RetrieveNext());
                    m_caseSensitive = (Boolean) reader.RetrieveNext();
                    m_startSymbol = (Short) reader.RetrieveNext();
                    break;

                case TableCounts:
                    m_symbols = new Symbol[(Short) reader.RetrieveNext()];
                    m_charsets = new String[(Short) reader.RetrieveNext()];
                    m_rules = new Rule[(Short) reader.RetrieveNext()];
                    m_DfaStates = new FAState[(Short) reader.RetrieveNext()];
                    m_LalrTables = new LRActionTable[(Short) reader.RetrieveNext()];
                    m_initialized = true;
                    break;

                case Initial:
                    m_initDfaState = (Short) reader.RetrieveNext();
                    m_initLalrState = (Short) reader.RetrieveNext();
                    break;

                case Symbols:
                    index = (Short) reader.RetrieveNext();
                    String name = (String) reader.RetrieveNext();
                    SymbolType kind = SymbolType.forValue((Short) reader.RetrieveNext());
                    Symbol symbol = new Symbol(index, name, kind);
                    AddSymbol(symbol);
                    break;

                case CharSets:
                    index = (Short) reader.RetrieveNext();
                    String charset = (String) reader.RetrieveNext();
                    AddCharset(index, charset);
                    break;

                case Rules:
                    index = (Short) reader.RetrieveNext();
                    Symbol head = m_symbols[(Short) reader.RetrieveNext()];
                    Rule rule = new Rule(index, head);

                    reader.RetrieveNext(); // reserved
                    while ((obj = reader.RetrieveNext()) != null) {
                        rule.AddItem(m_symbols[(Short) obj]);
                    }

                    AddRule(rule);
                    break;

                case DFAStates:
                    FAState fastate = new FAState();
                    index = (Short) reader.RetrieveNext();

                    if ((Boolean) reader.RetrieveNext()) {
                        fastate.setAcceptSymbol((Short) reader.RetrieveNext());
                    } else {
                        reader.RetrieveNext();
                    }

                    reader.RetrieveNext(); // reserverd

                    while (!reader.RetrieveDone()) {
                        short ci = (Short) reader.RetrieveNext();
                        short ti = (Short) reader.RetrieveNext();
                        reader.RetrieveNext(); // reserved
                        fastate.AddEdge(m_charsets[ci], ti);
                    }

                    AddDfaState(index, fastate);
                    break;

                case LRTables:
                    LRActionTable table = new LRActionTable();
                    index = (Short) reader.RetrieveNext();
                    reader.RetrieveNext(); // reserverd

                    while (!reader.RetrieveDone()) {
                        short sid = (Short) reader.RetrieveNext();
                        short action = (Short) reader.RetrieveNext();
                        short tid = (Short) reader.RetrieveNext();
                        reader.RetrieveNext(); // reserved
                        table.AddItem(m_symbols[sid], Action.forValue(action), tid);
                    }

                    AddLalrTable(index, table);
                    break;

                case Comment:
                    //System.out.println("Comment record encountered");
                    break;

                default:
                    throw new ParserException("Wrong id for record");
            }
        }
    }

    private void Reset() {
        for (Symbol symbol : m_symbols) {
            if (symbol.getKind() == SymbolType.Error) {
                m_errorSymbol = symbol;
            } else if (symbol.getKind() == SymbolType.End) {
                m_endSymbol = symbol;
            }
        }

        m_haveReduction = false;
        m_LalrState = m_initLalrState;
        m_lineNumber = 1;
        m_commentLevel = 0;

        m_inputTokens.Clear();
        m_outputTokens.Clear();
        m_tempStack.Clear();
    }

    private void PrepareToParse() {
        Token token = new Token();
        token.setState(m_initLalrState);
        token.SetParent(m_symbols[m_startSymbol]);
        m_tempStack.PushToken(token);
    }

    private void DiscardLine() throws IOException {
        m_source.DiscardLine();
        m_lineNumber++;
    }

    /**
     * Returns true if the specified token is a CommentLine or CommentStart-symbol.
     */
    private boolean CommentToken(Token p_token) {
        return (p_token.getKind() == SymbolType.CommentLine) || (p_token.getKind() == SymbolType.CommentStart);
    }

    /**
     * This function analyzes a token and either: 1. Makes a SINGLE reduction and pushes a complete Reduction object on the stack 2. Accepts the token and shifts 3. Errors and
     * places the expected symbol indexes in the Tokens list The Token is assumed to be valid and WILL be checked
     */
    private ParseResult ParseToken(Token p_token) {
        ParseResult result = ParseResult.InternalError;
        LRActionTable table = m_LalrTables[m_LalrState];
        LRAction action = table.GetActionForSymbol(p_token.getTableIndex());

        if (action != null) {
            m_haveReduction = false;
            m_outputTokens.Clear();

            switch (action.getAction()) {
                case Accept:
                    m_haveReduction = true;
                    result = ParseResult.Accept;
                    break;
                case Shift:
                    p_token.setState(m_LalrState = action.getValue());
                    m_tempStack.PushToken(p_token);
                    result = ParseResult.Shift;
                    break;
                case Reduce:
                    result = Reduce(m_rules[action.getValue()]);
                    break;
            }
        } else {
            // syntax error - fill expected tokens.
            m_outputTokens.Clear();
            //Huma:
            LRAction a;
            for (Object aObj : table.getMembers()) {
                a = (LRAction) aObj;
                SymbolType kind = a.getSymbol().getKind();

                if (kind == SymbolType.Terminal || kind == SymbolType.End) {
                    m_outputTokens.PushToken(new Token(a.getSymbol()));
                }
            }
            result = ParseResult.SyntaxError;
        }

        return result;
    }

    /**
     * Produces a reduction. Removes as many tokens as members in the rule and pushes a non-terminal token.
     */
    private ParseResult Reduce(Rule p_rule) {
        ParseResult result;
        Token head;

        if (m_trimReductions && p_rule.getContainsOneNonTerminal()) {
            // The current rule only consists of a single nonterminal and can be trimmed from the
            // parse tree. Usually we create a new Reduction, assign it to the Data property
            // of Head and push it on the stack. However, in this case, the Data property of the
            // Head will be assigned the Data property of the reduced token (i.e. the only one
            // on the stack). In this case, to save code, the value popped of the stack is changed
            // into the head.
            head = m_tempStack.PopToken();
            head.SetParent(p_rule.getRuleNonTerminal());

            result = ParseResult.ReduceEliminated;
        } else {
            Reduction reduction = new Reduction();
            reduction.setParentRule(p_rule);

            m_tempStack.PopTokensInto(reduction, p_rule.getSymbolCount());

            head = new Token();
            head.setData(reduction);
            head.SetParent(p_rule.getRuleNonTerminal());

            m_haveReduction = true;
            result = ParseResult.ReduceNormal;
        }

        int index = m_tempStack.PeekToken().getState();
        LRAction action = m_LalrTables[index].GetActionForSymbol(p_rule.getRuleNonTerminal().getTableIndex());

        if (action != null) {
            head.setState(m_LalrState = action.getValue());
            m_tempStack.PushToken(head);
        } else {
            throw new ParserException("Action for LALR state is null");
        }

        return result;
    }

    /**
     * This method implements the DFA algorithm and returns a token to the LALR state machine.
     */
    private Token RetrieveToken() throws IOException {
        Token result;
        int currentPos = 0;
        int lastAcceptState = -1;
        int lastAcceptPos = -1;
        FAState currentState = m_DfaStates[m_initDfaState];

        try {
            while (true) {
                // This code searches all the branches of the current DFA state for the next
                // character in the input LookaheadStream. If found the target state is returned.
                // The InStr() function searches the string pCharacterSetTable.Member(CharSetIndex)
                // starting at position 1 for ch.  The pCompareMode variable determines whether
                // the search is case sensitive.
                int target = -1;
                char ch = FixCase(m_source.LookAhead(currentPos));
                //Huma:
                FAEdge edge;
                for (Object edgeObj : currentState.getEdges()) {
                    edge = (FAEdge) edgeObj;
                    String chars = edge.getCharacters();
                    if (chars.indexOf(ch) != -1) {
                        target = edge.getTargetIndex();
                        break;
                    }
                }

                // This block-if statement checks whether an edge was found from the current state.
                // If so, the state and current position advance. Otherwise it is time to exit the main loop
                // and report the token found (if there was it fact one). If the LastAcceptState is -1,
                // then we never found a match and the Error Token is created. Otherwise, a new token
                // is created using the Symbol in the Accept State and all the characters that
                // comprise it.
                if (target != -1) {
                    // This code checks whether the target state accepts a token. If so, it sets the
                    // appropiate variables so when the algorithm is done, it can return the proper
                    // token and number of characters.
                    if (m_DfaStates[target].getAcceptSymbol() != -1) {
                        lastAcceptState = target;
                        lastAcceptPos = currentPos;
                    }

                    currentState = m_DfaStates[target];
                    currentPos++;
                } else {
                    if (lastAcceptState == -1) {
                        result = new Token(m_errorSymbol);
                        result.setData(m_source.Read(1));
                    } else {
                        Symbol symbol = m_symbols[m_DfaStates[lastAcceptState].getAcceptSymbol()];
                        result = new Token(symbol);
                        result.setData(m_source.Read(lastAcceptPos + 1));
                    }
                    break;
                }
            }
        } catch (StreamCorruptedException e) {
            result = new Token(m_endSymbol);
            result.setData("");
        }

        UpdateLineNumber((String) result.getData());

        return result;
    }

    private void UpdateLineNumber(String p_string) {
        int index, pos = 0;
        while ((index = p_string.indexOf('\n', pos)) != -1) {
            pos = index + 1;
            m_lineNumber++;
        }
    }
}