net.amygdalum.util.text.ByteWordSetBuilder Maven / Gradle / Ivy
package net.amygdalum.util.text;
import static net.amygdalum.util.text.AttachmentAdaptor.attach;
import static net.amygdalum.util.text.ByteConnectionAdaptor.addNextNode;
import java.util.Deque;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
public class ByteWordSetBuilder {
private ByteWordGraphCompiler compiler;
private JoinStrategy strategy;
private ByteNode root;
public ByteWordSetBuilder(ByteWordGraphCompiler compiler) {
this.compiler = compiler;
this.root = compiler.create();
}
public ByteWordSetBuilder(ByteWordGraphCompiler compiler, JoinStrategy strategy) {
this.compiler = compiler;
this.strategy = strategy;
this.root = compiler.create();
}
public ByteWordSetBuilder extend(byte[] bytes, T data) {
ByteNode node = root;
for (byte b : bytes) {
ByteNode next = node.nextNode(b);
if (next == null) {
next = compiler.create();
addNextNode(node, b, next);
}
node = next;
}
if (data == null) {
return this;
}
if (strategy != null) {
T existing = node.getAttached();
T joinedData = strategy.join(existing, data);
if (joinedData != existing) {
attach(node, joinedData);
}
} else {
attach(node, data);
}
return this;
}
public ByteWordSetBuilder work(ByteTask task) {
Queue> worklist = new LinkedList<>();
worklist.addAll(task.init(root));
while (!worklist.isEmpty()) {
ByteNode current = worklist.remove();
List> nexts = task.process(current);
worklist.addAll(nexts);
}
return this;
}
private Queue> postOrdered() {
Map, int[]> counters = new IdentityHashMap<>();
Queue> todo = new LinkedList<>();
todo.add(root);
counters.put(root, new int[1]);
int max = 1;
while (!todo.isEmpty()) {
ByteNode current = todo.remove();
for (byte b : current.getAlternatives()) {
ByteNode nextNode = current.nextNode(b);
int[] counter = counters.get(nextNode);
if (counter == null) {
counter = new int[1];
counters.put(nextNode, counter);
todo.add(nextNode);
}
counter[0]++;
if (max < counter[0]) {
max = counter[0];
}
}
}
Queue> nexts = new LinkedList<>();
Iterator, int[]>> iterator = counters.entrySet().iterator();
while (iterator.hasNext()) {
Entry, int[]> entry = iterator.next();
if (entry.getValue()[0] == 0) {
nexts.add(entry.getKey());
iterator.remove();
}
}
Deque> postOrdered = new LinkedList<>();
while (!counters.isEmpty()) {
if (nexts.isEmpty()) {
throw new IllegalArgumentException("graph is not acylic");
}
while (!nexts.isEmpty()) {
ByteNode next = nexts.remove();
postOrdered.push(next);
for (byte b : next.getAlternatives()) {
ByteNode ref = next.nextNode(b);
int[] counter = counters.get(ref);
if (--counter[0] == 0) {
nexts.add(ref);
counters.remove(ref);
}
}
}
}
return postOrdered;
}
private List> compiled() {
Set> visited = new HashSet<>();
List> compiled = new LinkedList<>();
Queue> todo = new LinkedList<>();
todo.add(root);
while (!todo.isEmpty()) {
ByteNode current = todo.remove();
if (visited.contains(current)) {
continue;
}
visited.add(current);
compiled.add(current);
for (byte b : current.getAlternatives()) {
ByteNode nextNode = current.nextNode(b);
todo.add(nextNode);
}
}
return compiled;
}
public R build() {
NodeResolver> nodes = compiler.resolver();
for (ByteNode node : postOrdered()) {
nodes.compile(node);
}
for (ByteNode node : compiled()) {
nodes.link(node);
}
return compiler.build(nodes.resolve(root));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy