![JAR search and dependency download from the Maven repository](/logo.png)
org.vesalainen.regex.TinyTokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lpg Show documentation
Show all versions of lpg Show documentation
Java Lookahead Parser Generator. Generator produces LALR(k) parsers. Grammar
rules are entered using annotations. Rule annotation can be attached to reducer
method, which keeps rule and it's action together.
The newest version!
/*
* Copyright (C) 2012 Timo Vesalainen
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.vesalainen.regex;
import org.vesalainen.regex.TinyExpressionParser.Op;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Iterator;
/**
* This is part of the original hand written part of regex parser. It is replaced by RegexParser class
* @author tkv
*/
class TinyTokenizer implements Iterator, Iterable
{
private EscapeResolver resolver;
private RangeSet current;
private boolean concat;
private Deque queue = new ArrayDeque();
public TinyTokenizer(String expression)
{
resolver = new EscapeResolver(expression);
}
private static RangeSet getInstance(int cc, boolean escaped)
{
RangeSet rs = null;
if (escaped)
{
switch (cc)
{
case 'd':
rs = new RangeSet();
rs.add(new Range('0', '9'+1));
return rs;
case 'D':
rs = new RangeSet();
rs.add(new Range('0', '9'+1));
return rs.complement();
case 's':
rs = new RangeSet();
rs.add(new Range(' '));
rs.add(new Range('\t'));
rs.add(new Range('\n'));
rs.add(new Range(0x0B));
rs.add(new Range('\f'));
rs.add(new Range('\r'));
return rs;
case 'S':
rs = new RangeSet();
rs.add(new Range(' '));
rs.add(new Range('\t'));
rs.add(new Range('\n'));
rs.add(new Range(0x0B));
rs.add(new Range('\f'));
rs.add(new Range('\r'));
return rs.complement();
case 'w':
rs = new RangeSet();
rs.add(new Range('a', 'z'+1));
rs.add(new Range('A', 'Z'+1));
rs.add(new Range('0', '9'+1));
rs.add(new Range('_'));
return rs;
case 'W':
rs = new RangeSet();
rs.add(new Range('a', 'z'+1));
rs.add(new Range('A', 'Z'+1));
rs.add(new Range('0', '9'+1));
rs.add(new Range('_'));
return rs.complement();
case 'p':
throw new UnsupportedOperationException("Posix escapes not supported");
default:
rs = new RangeSet();
rs.add(new Range(cc));
return rs;
}
}
else
{
switch (cc)
{
case '.':
rs = new RangeSet();
rs.add(new Range(0, Integer.MAX_VALUE));
return rs;
default:
rs = new RangeSet();
rs.add(new Range(cc));
return rs;
}
}
}
public Op next()
{
if (!queue.isEmpty())
{
return queue.pollFirst();
}
int cc = resolver.next();
if (!resolver.isEscaped())
{
switch (cc)
{
case '(':
if (concat)
{
queue.add(Op.LEFT);
concat = false;
return Op.CONCAT;
}
else
{
return Op.LEFT;
}
case ')':
concat = true;
return Op.RIGHT;
case '*':
return Op.STAR;
case '?':
return Op.QUESS;
case '+':
queue.add(Op.RANGE);
queue.add(Op.STAR);
return Op.CONCAT;
case '|':
concat = false;
return Op.UNION;
case '[':
current = parseRangeSet();
if (concat)
{
queue.add(Op.RANGE);
return Op.CONCAT;
}
else
{
concat = true;
return Op.RANGE;
}
case '{':
try
{
fillQueue();
}
catch (SyntaxErrorException ex)
{
return Op.ERROR;
}
return queue.pollFirst();
}
}
current = getInstance(cc, resolver.isEscaped());
if (concat)
{
queue.add(Op.RANGE);
return Op.CONCAT;
}
else
{
concat = true;
return Op.RANGE;
}
}
public boolean hasNext()
{
return resolver.hasNext() || !queue.isEmpty();
}
public void remove()
{
throw new UnsupportedOperationException("Not supported yet.");
}
public RangeSet getRangeSet()
{
return current;
}
public Iterator iterator()
{
return this;
}
private RangeSet parseRangeSet()
{
RangeSet set = new RangeSet();
boolean neg = false;
int cc = resolver.next();
if (!resolver.isEscaped() && cc == '^')
{
neg = true;
cc = resolver.next();
}
while (
!resolver.isEscaped() && cc != ']' ||
resolver.isEscaped()
)
{
int nn = resolver.peek();
if (!resolver.isEscaped() && nn == '-')
{
resolver.next();
nn = resolver.next();
set.add(cc, nn+1);
}
else
{
set.add(cc);
}
cc = resolver.next();
}
if (neg)
{
set = set.complement();
}
return set;
}
private void fillQueue() throws SyntaxErrorException
{
int min = 0;
int cc = resolver.next();
while (Character.isDigit(cc))
{
min = 10*min+Character.digit(cc, 10);
cc = resolver.next();
}
for (int ii=1;ii= min)
{
int gap = max-min;
for (int ii=0;ii
© 2015 - 2025 Weber Informatics LLC | Privacy Policy