All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.amygdalum.util.text.ByteWordSetBuilder Maven / Gradle / Ivy

package net.amygdalum.util.text;

import static net.amygdalum.util.text.AttachmentAdaptor.attach;
import static net.amygdalum.util.text.ByteConnectionAdaptor.addNextNode;

import java.util.Deque;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;

public class ByteWordSetBuilder {

	private ByteWordGraphCompiler compiler;
	private JoinStrategy strategy;
	private ByteNode root;

	public ByteWordSetBuilder(ByteWordGraphCompiler compiler) {
		this.compiler = compiler;
		this.root = compiler.create();
	}

	public ByteWordSetBuilder(ByteWordGraphCompiler compiler, JoinStrategy strategy) {
		this.compiler = compiler;
		this.strategy = strategy;
		this.root = compiler.create();
	}

	public ByteWordSetBuilder extend(byte[] bytes, T data) {
		ByteNode node = root;
		for (byte b : bytes) {
			ByteNode next = node.nextNode(b);
			if (next == null) {
				next = compiler.create();
				addNextNode(node, b, next);
			}
			node = next;
		}
		if (data == null) {
			return this;
		}
		if (strategy != null) {
			T existing = node.getAttached();
			T joinedData = strategy.join(existing, data);
			if (joinedData != existing) {
				attach(node, joinedData);
			}
		} else {
			attach(node, data);
		}
		return this;
	}

	public ByteWordSetBuilder work(ByteTask task) {
		Queue> worklist = new LinkedList<>();
		worklist.addAll(task.init(root));
		while (!worklist.isEmpty()) {
			ByteNode current = worklist.remove();
			List> nexts = task.process(current);
			worklist.addAll(nexts);
		}
		return this;
	}

	private Queue> postOrdered() {
		Map, int[]> counters = new IdentityHashMap<>();

		Queue> todo = new LinkedList<>();
		todo.add(root);
		counters.put(root, new int[1]);

		int max = 1;
		while (!todo.isEmpty()) {
			ByteNode current = todo.remove();
			for (byte b : current.getAlternatives()) {
				ByteNode nextNode = current.nextNode(b);
				int[] counter = counters.get(nextNode);
				if (counter == null) {
					counter = new int[1];
					counters.put(nextNode, counter);
					todo.add(nextNode);
				}
				counter[0]++;
				if (max < counter[0]) {
					max = counter[0];
				}
			}
		}

		Queue> nexts = new LinkedList<>();
		Iterator, int[]>> iterator = counters.entrySet().iterator();
		while (iterator.hasNext()) {
			Entry, int[]> entry = iterator.next();
			if (entry.getValue()[0] == 0) {
				nexts.add(entry.getKey());
				iterator.remove();
			}
		}

		Deque> postOrdered = new LinkedList<>();

		while (!counters.isEmpty()) {
			if (nexts.isEmpty()) {
				throw new IllegalArgumentException("graph is not acylic");
			}
			while (!nexts.isEmpty()) {
				ByteNode next = nexts.remove();
				postOrdered.push(next);
				for (byte b : next.getAlternatives()) {
					ByteNode ref = next.nextNode(b);
					int[] counter = counters.get(ref);
					if (--counter[0] == 0) {
						nexts.add(ref);
						counters.remove(ref);
					}
				}

			}
		}

		return postOrdered;
	}

	private List> compiled() {
		Set> visited = new HashSet<>();
		List> compiled = new LinkedList<>();

		Queue> todo = new LinkedList<>();
		todo.add(root);
		while (!todo.isEmpty()) {
			ByteNode current = todo.remove();
			if (visited.contains(current)) {
				continue;
			}
			visited.add(current);
			compiled.add(current);
			for (byte b : current.getAlternatives()) {
				ByteNode nextNode = current.nextNode(b);
				todo.add(nextNode);
			}
		}

		return compiled;
	}

	public R build() {
		NodeResolver> nodes = compiler.resolver();
		for (ByteNode node : postOrdered()) {
			nodes.compile(node);
		}
		for (ByteNode node : compiled()) {
			nodes.link(node);
		}
		return compiler.build(nodes.resolve(root));
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy