All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.slimgears.util.stream.Tokenizer Maven / Gradle / Ivy

There is a newer version: 0.7.58
Show newest version
package com.slimgears.util.stream;

import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static com.slimgears.util.stream.Streams.self;

public class Tokenizer> {
    private final Pattern pattern;
    private final Set tokenTypes;

    private Tokenizer(Map patterns) {
        String regex = patterns.entrySet()
                .stream()
                .map(entry -> "(?<" + entry.getKey() + ">" + entry.getValue() + ")")
                .collect(Collectors.joining("|"));
        this.pattern = Pattern.compile(regex);
        this.tokenTypes = patterns.keySet();
    }

    public static class Token> {
        private final E type;
        private final String lexeme;

        Token(E type, String token) {
            this.type = type;
            this.lexeme = token;
        }

        public E type() {
            return type;
        }

        public String lexeme() {
            return lexeme;
        }
    }

    public Stream> tokenize(String input) {
        Matcher matcher = pattern.matcher(input);
        return Streams
                .takeWhile(Stream.generate(() -> nextToken(matcher)), Optional::isPresent)
                .map(Optional::get);
    }

    private Optional> nextToken(Matcher matcher) {
        if (!matcher.find()) {
            return Optional.empty();
        }

        return tokenTypes
                .stream()
                .map(type -> Optional
                        .ofNullable(matcher.group(type.name()))
                        .map(token -> new Token<>(type, token)))
                .filter(Optional::isPresent)
                .findFirst()
                .flatMap(self());
    }

    public static > Builder builder() {
        return new Builder<>();
    }

    public static class Builder> {
        private final Map patterns = new HashMap<>();

        public Builder add(E key, String regex) {
            patterns.put(key, regex);
            return this;
        }

        public Builder addAll(Class keyClass, Function patternGetter) {
            Stream.of(keyClass.getEnumConstants()).forEach(type -> add(type, patternGetter.apply(type)));
            return this;
        }

        public Tokenizer build() {
            return new Tokenizer<>(patterns);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy