com.slimgears.util.stream.Tokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stream-utils Show documentation
Show all versions of stream-utils Show documentation
General purpose utils / module: stream-utils
package com.slimgears.util.stream;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.slimgears.util.stream.Streams.self;
public class Tokenizer> {
private final Pattern pattern;
private final Set tokenTypes;
private Tokenizer(Map patterns) {
String regex = patterns.entrySet()
.stream()
.map(entry -> "(?<" + entry.getKey() + ">" + entry.getValue() + ")")
.collect(Collectors.joining("|"));
this.pattern = Pattern.compile(regex);
this.tokenTypes = patterns.keySet();
}
public static class Token> {
private final E type;
private final String lexeme;
Token(E type, String token) {
this.type = type;
this.lexeme = token;
}
public E type() {
return type;
}
public String lexeme() {
return lexeme;
}
}
public Stream> tokenize(String input) {
Matcher matcher = pattern.matcher(input);
return Streams
.takeWhile(Stream.generate(() -> nextToken(matcher)), Optional::isPresent)
.map(Optional::get);
}
private Optional> nextToken(Matcher matcher) {
if (!matcher.find()) {
return Optional.empty();
}
return tokenTypes
.stream()
.map(type -> Optional
.ofNullable(matcher.group(type.name()))
.map(token -> new Token<>(type, token)))
.filter(Optional::isPresent)
.findFirst()
.flatMap(self());
}
public static > Builder builder() {
return new Builder<>();
}
public static class Builder> {
private final Map patterns = new HashMap<>();
public Builder add(E key, String regex) {
patterns.put(key, regex);
return this;
}
public Builder addAll(Class keyClass, Function patternGetter) {
Stream.of(keyClass.getEnumConstants()).forEach(type -> add(type, patternGetter.apply(type)));
return this;
}
public Tokenizer build() {
return new Tokenizer<>(patterns);
}
}
}