org.vesalainen.regex.RegexMatcher Maven / Gradle / Ivy
Show all versions of lpg Show documentation
/*
* Copyright (C) 2015 tkv
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.vesalainen.regex;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.vesalainen.grammar.state.DFA;
import org.vesalainen.grammar.state.DFAState;
import org.vesalainen.grammar.state.NFA;
import org.vesalainen.grammar.state.NFAState;
import org.vesalainen.grammar.state.Scope;
import org.vesalainen.regex.Regex.Option;
import org.vesalainen.util.Matcher;
/**
* An regex implementation of matcher.
* This implementation creates DFA in runtime and is therefore slower than
* using compiled Regex.
*
*
This class is not thread safe except for streams.
* @author tkv
* @param
* @see org.vesalainen.regex.Regex
*/
public class RegexMatcher implements Matcher
{
protected RegexParserIntf parser = RegexParserFactory.newInstance();
protected Scope> nfaScope = new Scope<>("org.vesalainen.regex.RegexMatcher");
protected NFA nfa;
protected DFA dfa;
protected DFAState root;
protected DFAState state;
protected T matched;
/**
* Creates RegexMatcher
*/
public RegexMatcher()
{
}
/**
* Creates RegexMatcher with initial expression
* @param expr
* @param attach
* @param options
*/
public RegexMatcher(String expr, T attach, Option... options)
{
addExpression(expr, attach, options);
}
/**
* Add expression.
* @param expr
* @param attach
* @param options
*/
public void addExpression(String expr, T attach, Option... options)
{
if (nfa == null)
{
nfa = parser.createNFA(nfaScope, expr, attach, options);
}
else
{
NFA nfa2 = parser.createNFA(nfaScope, expr, attach, options);
nfa = new NFA<>(nfaScope, nfa, nfa2);
}
}
/**
* Compiles expressions
*/
public void compile()
{
Scope> dfaScope = new Scope<>("org.vesalainen.regex.RegexMatcher");
if (nfa == null)
{
nfa = new NFA(dfaScope);
}
dfa = nfa.constructDFA(dfaScope);
state = root = dfa.getRoot();
parser = null;
nfaScope = null;
nfa = null;
}
/**
* Returns true if compile method is called.
* @return
*/
public boolean isCompiled()
{
return root != null;
}
/**
* Matches given text. Returns associated token if match, otherwise null.
* @param text
* @return
*/
public T match(CharSequence text)
{
if (root == null)
{
throw new IllegalStateException("not compiled");
}
int length = text.length();
for (int ii=0;iiStream is safe to use same regex from different thread.
* @param seq
* @return
*/
public Stream split(CharSequence seq)
{
return StreamSupport.stream(new SpliteratorImpl(seq, this), false);
}
/**
* Returns stream that contains subsequences delimited by given regex
* @param seq
* @param regex
* @param options
* @return
*/
public static Stream split(CharSequence seq, String regex, Option... options)
{
return StreamSupport.stream(new SpliteratorImpl(seq, regex, options), false);
}
/**
* Returns stream that contains subsequences delimited by given matcher
* Stream is safe to use same regex from different thread.
* @param seq
* @param matcher
* @return
*/
public static Stream split(CharSequence seq, RegexMatcher matcher)
{
return StreamSupport.stream(new SpliteratorImpl(seq, matcher), false);
}
private static class SpliteratorImpl implements Spliterator
{
private CharSequence seq;
private int length;
private DFAState root;
private DFAState state;
private int start;
private int end;
public SpliteratorImpl(CharSequence seq, String regex, Option... options)
{
this(seq, new RegexMatcher(regex, "token", options));
}
public SpliteratorImpl(CharSequence seq, RegexMatcher matcher)
{
this.seq = seq;
this.length = seq.length();
if (!matcher.isCompiled())
{
matcher.compile();
}
root = state = matcher.root;
}
@Override
public boolean tryAdvance(Consumer super CharSequence> action)
{
while (start < length && delim(seq.charAt(start)))
{
start++;
}
if (start + 1 >= length)
{
return false;
}
end = start+1;
while (end < length && !delim(seq.charAt(end)))
{
end++;
}
action.accept(seq.subSequence(start, end));
start = end+1;
return true;
}
public boolean delim(int cc)
{
state = state.transit(cc);
if (state != null)
{
return true;
}
else
{
state = root;
return false;
}
}
@Override
public Spliterator trySplit()
{
return null;
}
@Override
public long estimateSize()
{
return 1;
}
@Override
public int characteristics()
{
return 0;
}
}
}