All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.latinolib.tokenizer.RegexTokenizer Maven / Gradle / Ivy

package org.latinolib.tokenizer;

import com.google.common.base.Preconditions;
import sun.reflect.generics.reflectiveObjects.NotImplementedException;

import java.io.Serializable;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Author saxo
 */
public class RegexTokenizer implements Tokenizer, Serializable
{
    private static final long serialVersionUID = -742906136170767936L;

    private final Pattern pattern;

    public RegexTokenizer() {
        pattern = RegexTokenizers.LATIN.getPattern();
    }

    public RegexTokenizer(Pattern pattern) {
        this.pattern = Preconditions.checkNotNull(pattern);
    }

    public RegexTokenizer(String regex) {
        pattern = Pattern.compile(Preconditions.checkNotNull(regex));
    }

    @Override
    public Iterable getTokens(final String text) {
        return new Iterable()
        {
            @Override
            public Iterator iterator() {
                return new TokenIterator(pattern.matcher(text));
            }
        };
    }

    private class TokenIterator implements Iterator
    {
        private final Matcher matcher;
        private int startIdx = 0;

        public TokenIterator(Matcher matcher) {
            this.matcher = matcher;
        }

        @Override
        public boolean hasNext() {
            boolean result = matcher.find(startIdx);
            if (result) {
                startIdx = matcher.start();
            }
            return result;
        }

        @Override
        public Token next() {
            if (matcher.find(startIdx)) {
                startIdx = matcher.end();
                return new Token(matcher.group(), matcher.start(), matcher.end());
            }
            throw new NoSuchElementException();
        }

        @Override
        public void remove() {
            throw new NotImplementedException();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy