org.vesalainen.regex.RegexMatcher Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lpg Show documentation
Java Lookahead Parser Generator. Generator produces LALR(k) parsers. Grammar rules are entered using annotations. Rule annotation can be attached to reducer method, which keeps rule and it's action together.
The newest version!
/*
 * Copyright (C) 2015 tkv
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.vesalainen.regex;

import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.vesalainen.grammar.state.DFA;
import org.vesalainen.grammar.state.DFAState;
import org.vesalainen.grammar.state.NFA;
import org.vesalainen.grammar.state.NFAState;
import org.vesalainen.grammar.state.Scope;
import org.vesalainen.regex.Regex.Option;
import org.vesalainen.util.Matcher;

/**
 * An regex implementation of matcher. 
 * This implementation creates DFA in runtime and is therefore slower than
 * using compiled Regex.
 * 
 * 
This class is not thread safe except for streams.
 * @author tkv
 * @param 
 * @see org.vesalainen.regex.Regex
 */
public class RegexMatcher implements Matcher
{
    protected RegexParserIntf parser = RegexParserFactory.newInstance();
    protected Scope> nfaScope = new Scope<>("org.vesalainen.regex.RegexMatcher");
    protected NFA nfa;
    protected DFA dfa;
    protected DFAState root;
    protected DFAState state;
    protected T matched;
    /**
     * Creates RegexMatcher
     */
    public RegexMatcher()
    {
    }
    /**
     * Creates RegexMatcher with initial expression
     * @param expr
     * @param attach
     * @param options 
     */
    public RegexMatcher(String expr, T attach, Option... options)
    {
        addExpression(expr, attach, options);
    }
    /**
     * Add expression. 
     * @param expr
     * @param attach
     * @param options 
     */
    public void addExpression(String expr, T attach, Option... options)
    {
        if (nfa == null)
        {
            nfa = parser.createNFA(nfaScope, expr, attach, options);
        }
        else
        {
            NFA nfa2 = parser.createNFA(nfaScope, expr, attach, options);
            nfa = new NFA<>(nfaScope, nfa, nfa2);
        }
    }
    /**
     * Compiles expressions
     */
    public void compile()
    {
        Scope> dfaScope = new Scope<>("org.vesalainen.regex.RegexMatcher");
        if (nfa == null)
        {
            nfa = new NFA(dfaScope);
        }
        dfa = nfa.constructDFA(dfaScope);
        state = root = dfa.getRoot();
        parser = null;
        nfaScope = null;
        nfa = null;
    }
    /**
     * Returns true if compile method is called.
     * @return 
     */
    public boolean isCompiled()
    {
        return root != null;
    }
    /**
     * Matches given text. Returns associated token if match, otherwise null.
     * @param text
     * @return 
     */
    public T match(CharSequence text)
    {
        if (root == null)
        {
            throw new IllegalStateException("not compiled");
        }
        int length = text.length();
        for (int ii=0;iiStream is safe to use same regex from different thread.
     * @param seq
     * @return 
     */
    public Stream split(CharSequence seq)
    {
        return StreamSupport.stream(new SpliteratorImpl(seq, this), false);
    }
    /**
     * Returns stream that contains subsequences delimited by given regex
     * @param seq
     * @param regex
     * @param options
     * @return 
     */
    public static Stream split(CharSequence seq, String regex, Option... options)
    {
        return StreamSupport.stream(new SpliteratorImpl(seq, regex, options), false);
    }
    /**
     * Returns stream that contains subsequences delimited by given matcher
     * Stream is safe to use same regex from different thread.
     * @param seq
     * @param matcher
     * @return 
     */
    public static Stream split(CharSequence seq, RegexMatcher matcher)
    {
        return StreamSupport.stream(new SpliteratorImpl(seq, matcher), false);
    }
    private static class SpliteratorImpl implements Spliterator
    {
        private CharSequence seq;
        private int length;
        private DFAState root;
        private DFAState state;
        private int start;
        private int end;

        public SpliteratorImpl(CharSequence seq, String regex, Option... options)
        {
            this(seq, new RegexMatcher(regex, "token", options));
        }

        public SpliteratorImpl(CharSequence seq, RegexMatcher matcher)
        {
            this.seq = seq;
            this.length = seq.length();
            if (!matcher.isCompiled())
            {
                matcher.compile();
            }
            root = state = matcher.root;
        }
        
        @Override
        public boolean tryAdvance(Consumer action)
        {
            while (start < length && delim(seq.charAt(start)))
            {
                start++;
            }
            if (start + 1 >= length)
            {
                return false;
            }
            end = start+1;
            while (end < length && !delim(seq.charAt(end)))
            {
                end++;
            }
            action.accept(seq.subSequence(start, end));
            start = end+1;
            return true;
        }

        public boolean delim(int cc)
        {
            state = state.transit(cc);
            if (state != null)
            {
                return true;
            }
            else
            {
                state = root;
                return false;
            }
        }
        @Override
        public Spliterator trySplit()
        {
            return null;
        }

        @Override
        public long estimateSize()
        {
            return 1;
        }

        @Override
        public int characteristics()
        {
            return 0;
        }
        
    }
}