All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.vesalainen.regex.TinyTokenizer Maven / Gradle / Ivy

Go to download

Java Lookahead Parser Generator. Generator produces LALR(k) parsers. Grammar rules are entered using annotations. Rule annotation can be attached to reducer method, which keeps rule and it's action together.

The newest version!
/*
 * Copyright (C) 2012 Timo Vesalainen
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.vesalainen.regex;

import org.vesalainen.regex.TinyExpressionParser.Op;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Iterator;
/**
 * This is part of the original hand written part of regex parser. It is replaced by RegexParser class
 * @author tkv
 */
class TinyTokenizer implements Iterator, Iterable
{
    private EscapeResolver resolver;
    private RangeSet current;
    private boolean concat;
    private Deque queue = new ArrayDeque();

    public TinyTokenizer(String expression)
    {
        resolver = new EscapeResolver(expression);
    }

    private static RangeSet getInstance(int cc, boolean escaped)
    {
        RangeSet rs = null;
        if (escaped)
        {
            switch (cc)
            {
                case 'd':
                    rs = new RangeSet();
                    rs.add(new Range('0', '9'+1));
                    return rs;
                case 'D':
                    rs = new RangeSet();
                    rs.add(new Range('0', '9'+1));
                    return rs.complement();
                case 's':
                    rs = new RangeSet();
                    rs.add(new Range(' '));
                    rs.add(new Range('\t'));
                    rs.add(new Range('\n'));
                    rs.add(new Range(0x0B));
                    rs.add(new Range('\f'));
                    rs.add(new Range('\r'));
                    return rs;
                case 'S':
                    rs = new RangeSet();
                    rs.add(new Range(' '));
                    rs.add(new Range('\t'));
                    rs.add(new Range('\n'));
                    rs.add(new Range(0x0B));
                    rs.add(new Range('\f'));
                    rs.add(new Range('\r'));
                    return rs.complement();
                case 'w':
                    rs = new RangeSet();
                    rs.add(new Range('a', 'z'+1));
                    rs.add(new Range('A', 'Z'+1));
                    rs.add(new Range('0', '9'+1));
                    rs.add(new Range('_'));
                    return rs;
                case 'W':
                    rs = new RangeSet();
                    rs.add(new Range('a', 'z'+1));
                    rs.add(new Range('A', 'Z'+1));
                    rs.add(new Range('0', '9'+1));
                    rs.add(new Range('_'));
                    return rs.complement();
                case 'p':
                    throw new UnsupportedOperationException("Posix escapes not supported");
                default:
                    rs = new RangeSet();
                    rs.add(new Range(cc));
                    return rs;
            }
        }
        else
        {
            switch (cc)
            {
                case '.':
                    rs = new RangeSet();
                    rs.add(new Range(0, Integer.MAX_VALUE));
                    return rs;
                default:
                    rs = new RangeSet();
                    rs.add(new Range(cc));
                    return rs;
            }
        }
    }

    public Op next()
    {
        if (!queue.isEmpty())
        {
            return queue.pollFirst();
        }
        int cc = resolver.next();
        if (!resolver.isEscaped())
        {
            switch (cc)
            {
                case '(':
                    if (concat)
                    {
                        queue.add(Op.LEFT);
                        concat = false;
                        return Op.CONCAT;
                    }
                    else
                    {
                        return Op.LEFT;
                    }
                case ')':
                    concat = true;
                    return Op.RIGHT;
                case '*':
                    return Op.STAR;
                case '?':
                    return Op.QUESS;
                case '+':
                    queue.add(Op.RANGE);
                    queue.add(Op.STAR);
                    return Op.CONCAT;
                case '|':
                    concat = false;
                    return Op.UNION;
                case '[':
                    current = parseRangeSet();
                    if (concat)
                    {
                        queue.add(Op.RANGE);
                        return Op.CONCAT;
                    }
                    else
                    {
                        concat = true;
                        return Op.RANGE;
                    }
                case '{':
                    try
                    {
                        fillQueue();
                    }
                    catch (SyntaxErrorException ex)
                    {
                        return Op.ERROR;
                    }
                    return queue.pollFirst();
            }
        }
        current = getInstance(cc, resolver.isEscaped());
        if (concat)
        {
            queue.add(Op.RANGE);
            return Op.CONCAT;
        }
        else
        {
            concat = true;
            return Op.RANGE;
        }
    }

    public boolean hasNext()
    {
        return resolver.hasNext() || !queue.isEmpty();
    }

    public void remove()
    {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    public RangeSet getRangeSet()
    {
        return current;
    }

    public Iterator iterator()
    {
        return this;
    }

    private RangeSet parseRangeSet()
    {
        RangeSet set = new RangeSet();
        boolean neg = false;
        int cc = resolver.next();
        if (!resolver.isEscaped() && cc == '^')
        {
            neg = true;
            cc = resolver.next();
        }
        while (
                !resolver.isEscaped() && cc != ']' ||
                resolver.isEscaped()
                )
        {
            int nn = resolver.peek();
            if (!resolver.isEscaped() && nn == '-')
            {
                resolver.next();
                nn = resolver.next();
                set.add(cc, nn+1);
            }
            else
            {
                set.add(cc);
            }
            cc = resolver.next();
        }
        if (neg)
        {
            set = set.complement();
        }
        return set;
    }

    private void fillQueue() throws SyntaxErrorException
    {
        int min = 0;
        int cc = resolver.next();
        while (Character.isDigit(cc))
        {
            min = 10*min+Character.digit(cc, 10);
            cc = resolver.next();
        }
        for (int ii=1;ii= min)
            {
                int gap = max-min;
                for (int ii=0;ii




© 2015 - 2025 Weber Informatics LLC | Privacy Policy