All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.waxeye.parser.Parser Maven / Gradle / Ivy

There is a newer version: 5.4.0
Show newest version
/*
 * Waxeye Parser Generator
 * www.waxeye.org
 * Copyright (C) 2008-2010 Orlando Hill
 * Licensed under the MIT license. See 'LICENSE' for details.
 */
package org.waxeye.parser;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Stack;
import org.waxeye.ast.AST;
import org.waxeye.ast.Char;
import org.waxeye.ast.Empty;
import org.waxeye.ast.IAST;
import org.waxeye.ast.Position;
import org.waxeye.input.IParserInput;

/**
 * Implements the logic of the parser.
 *
 * @param  The node types for the AST.
 *
 * @author Orlando Hill
 */
public abstract class Parser > implements IParser
{
    /** The empty node. */
    private final IAST empty;

    /** The char type. */
    private final E charType;

    /** The pos type. */
    private final E posType;

    /** The neg type. */
    private final E negType;

    /** The automata of the parser. */
    private final List> automata;

    /** Whether to check that all input gets parsed. */
    private final boolean eofCheck;

    /** The starting automaton. */
    private final int start;

    /**
     * Creates a new Parser.
     *
     * @param automata The automata of the parser.
     *
     * @param eofCheck Whether to check that all input gets parsed.
     *
     * @param start The starting automaton.
     *
     * @param emptyType The empty type.
     *
     * @param charType The char type.
     *
     * @param posType The positive check type.
     *
     * @param negType The negative check type.
     */
    public Parser(final List> automata,  final boolean eofCheck,
        final int start,
        final E emptyType, final E charType, final E posType, final E negType)
    {
        this.automata = automata;
        this.eofCheck = eofCheck;
        this.start = start;
        this.empty = new Empty(emptyType);
        this.charType = charType;
        this.posType = posType;
        this.negType = negType;
    }

    /** {@inheritDoc} */
    public final ParseResult parse(final IParserInput input)
    {
        return new InnerParser(input).parse();
    }

    /**
     * A hidden inner class so that we can visit the transition costs without
     * exposing things to the API user.
     *
     * @author Orlando Hill
     */
    private final class InnerParser implements ITransitionVisitor
    {
        /** The input to parse. */
        private final IParserInput input;

        /** The automata stack. */
        private final Stack> faStack;

        /** The result cache. */
        private final HashMap> cache;

        /** The line number. */
        private int line;

        /** The column number. */
        private int column;

        /** Whether the last character was a carriage return. */
        private boolean lastCR;

        /** The position of the deepest error. */
        private int errorPos;

        /** The line of the deepest error. */
        private int errorLine;

        /** The column of the deepest error. */
        private int errorCol;

        /** The nt deepest error. */
        private String errorNT;

        /**
         * Creates a new Parser.
         *
         * @param input The input to parse.
         */
        InnerParser(final IParserInput input)
        {
            this.input = input;
            this.faStack = new Stack>();
            this.cache = new HashMap>();
            this.line = 1;
            this.column = 0;
            this.lastCR = false;
            this.errorPos = 0;
            this.errorLine = 1;
            this.errorCol = 0;
            this.errorNT = automata.get(start).getType().name();
        }

        /**
         * Parses the input.
         *
         * @return The result of the parse.
         */
        ParseResult parse()
        {
            IAST ast = matchAutomaton(start);
            ParseError error = null;

            if (ast == null)
            {
                // Create a parse error
                error = new ParseError(errorPos, errorLine, errorCol, errorNT);
            }
            else
            {
                // Check that all input was consumed
                if (eofCheck && input.peek() != IParserInput.EOF)
                {
                    // Create a parse error - Not all input consumed
                    error = new ParseError(errorPos, errorLine, errorCol, errorNT);
                    ast = null;
                }
            }

            return new ParseResult(ast, error);
        }

        /**
         * Restores the input position to the given values.
         *
         * @param pos The position.
         *
         * @param line The line.
         *
         * @param col The column.
         *
         * @param cr Whether the last character was a CR.
         */
        private void restorePos(final int pos, final int line, final int col,
            final boolean cr)
        {
            this.input.setPosition(pos);
            this.line = line;
            this.column = col;
            this.lastCR = cr;
        }

        /**
         * Matches the automaton at the given index.
         *
         * @param index The index.
         *
         * @return The result.
         */
        private IAST matchAutomaton(final int index)
        {
            final int startPos = input.getPosition();
            final CacheKey key = new CacheKey(index, startPos);
            final CacheItem cachedItem = cache.get(key);

            if (cachedItem != null)
            {
                restorePos(cachedItem.getPosition(), cachedItem.getLine(),
                    cachedItem.getColumn(), cachedItem.getLastCR());
                return cachedItem.getResult();
            }

            final int startLine = line;
            final int startCol = column;
            final boolean startCR = lastCR;
            final FA automaton = automata.get(index);
            final E type = automaton.getType();
            final int mode = automaton.getMode();

            faStack.push(automaton);
            final List> res = matchState(0);
            faStack.pop();

            IAST value;

            if (type.equals(posType))
            {
                restorePos(startPos, startLine, startCol, startCR);

                if (res == null)
                {
                    value = null;
                }
                else
                {
                    value = empty;
                }
            }
            else
            {
                if (type.equals(negType))
                {
                    restorePos(startPos, startLine, startCol, startCR);

                    if (res == null)
                    {
                        value = empty;
                    }
                    else
                    {
                        updateError();
                        value = null;
                    }
                }
                else
                {
                    if (res == null)
                    {
                        updateError();
                        value = null;
                    }
                    else
                    {
                        switch (mode)
                        {
                            case FA.VOID:
                            {
                                value = empty;
                                break;
                            }
                            case FA.PRUNE:
                            {
                                switch (res.size())
                                {
                                    case 0:
                                    {
                                        value = empty;
                                        break;
                                    }
                                    case 1:
                                    {
                                        value = res.get(0);
                                        break;
                                    }
                                    default:
                                    {
                                        value = new AST(type, res, new Position(startPos, input.getPosition()));
                                        break;
                                    }
                                }
                                break;
                            }
                            default:
                            {
                                value = new AST(type, res, new Position(startPos, input.getPosition()));
                                break;
                            }
                        }
                    }
                }
            }

            cache.put(key, new CacheItem(value, input.getPosition(), line,
                column, lastCR));

            return value;
        }

        /**
         * Matches the state at the given index.
         *
         * @param index The index.
         *
         * @return The result.
         */
        private List> matchState(final int index)
        {
            final State state = faStack.peek().getStates().get(index);
            final List> res = matchEdges(state.getEdges(), 0);

            if (res == null)
            {
                if (state.isMatch())
                {
                    return new ArrayList>();
                }
                else
                {
                    return null;
                }
            }
            else
            {
                return res;
            }
        }

        /**
         * Matches the given edges starting from the given index.
         *
         * @param edges The edges.
         *
         * @param index The index.
         *
         * @return The result.
         */
        private List> matchEdges(final List> edges, final int index)
        {
            if (index < edges.size())
            {
                final List> res = matchEdge(edges.get(index));

                if (res == null)
                {
                    return matchEdges(edges, index + 1);
                }
                else
                {
                    return res;
                }
            }
            else
            {
                return null;
            }
        }

        /**
         * Matches the given edge.
         *
         * @param edge The edge.
         *
         * @return The result.
         */
        private List> matchEdge(final Edge edge)
        {
            final int startPos = input.getPosition();
            final int startLine = line;
            final int startCol = column;
            final boolean startCR = lastCR;
            final IAST res = edge.getTrans().acceptVisitor(this);

            if (res == null)
            {
                return null;
            }
            else
            {
                final List> transRes = matchState(edge.getState());

                if (transRes == null)
                {
                    restorePos(startPos, startLine, startCol, startCR);
                    return null;
                }
                else
                {
                    if (edge.isVoided() || res.equals(empty))
                    {
                        return transRes;
                    }
                    else
                    {
                        // Note: If we were to memoize state results,
                        //       this would need to be changed.
                        transRes.add(0, res);
                        return transRes;
                    }
                }
            }
        }

        /**
         * Updates the line and column numbers.
         *
         * @param ch The character being consumed.
         */
        private void updateLineCol(final char ch)
        {
            if (ch == '\r')
            {
                line++;
                column = 0;
                lastCR = true;
            }
            else
            {
                if (ch == '\n')
                {
                    if (!lastCR)
                    {
                        line++;
                        column = 0;
                    }
                }
                else
                {
                    column++;
                }

                lastCR = false;
            }
        }

        /**
         * Updates the error info if needed.
         */
        private void updateError()
        {
            if (errorPos < input.getPosition())
            {
                errorPos = input.getPosition();
                errorLine = line;
                errorCol = column;
                errorNT = faStack.peek().getType().name();
            }
        }

        /** {@inheritDoc} */
        public IAST visitAutomatonTransition(final AutomatonTransition t)
        {
            return matchAutomaton(t.getIndex());
        }

        /** {@inheritDoc} */
        public IAST visitCharTransition(final CharTransition t)
        {
            if (input.peek() != IParserInput.EOF)
            {
                final char c = (char) input.peek();

                if (t.withinSet(c))
                {
                    input.consume();
                    updateLineCol(c);
                    return new Char(c, charType);
                }
            }

            updateError();
            return null;
        }

        /** {@inheritDoc} */
        public IAST visitWildCardTransition(final WildCardTransition t)
        {
            if (input.peek() == IParserInput.EOF)
            {
                updateError();
                return null;
            }

            final char c = (char) input.consume();
            updateLineCol(c);
            return new Char(c, charType);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy