All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.amygdalum.patternsearchalgorithms.automaton.chars.NFA Maven / Gradle / Ivy

package net.amygdalum.patternsearchalgorithms.automaton.chars;

import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;

import net.amygdalum.util.text.CharRange;
import net.amygdalum.util.text.CharRangeAccumulator;
import net.amygdalum.util.worklist.WorkSet;

public class NFA implements Cloneable {

	private State start;
	private List charRanges;
	private State[] states;
	private int accepting;

	public NFA(State start) {
		init(start);
	}

	private void init(State start) {
		this.start = start;
		this.charRanges = computeEquivalentCharRanges(start);
		this.states = clean(start, null);
		this.accepting = order(states);
	}

	private void init(State start, State error) {
		this.start = start;
		this.charRanges = computeEquivalentCharRanges(start);
		this.states = clean(start, error);
		this.accepting = order(states);
	}

	public State getStart() {
		return start;
	}

	public State[] states() {
		return states;
	}

	public State[] accepting() {
		return Arrays.copyOfRange(states, accepting, states.length);
	}

	public List getCharRanges() {
		return charRanges;
	}

	private static State[] clean(State start, State error) {
		WorkSet todo = new WorkSet<>();
		todo.add(start);
		Set dead = new HashSet<>();
		WorkSet live = new WorkSet<>();
		while (!todo.isEmpty()) {
			State state = todo.remove();
			if (state.isAccepting()) {
				live.add(state);
			} else {
				dead.add(state);
			}
			for (Transition transition : state.out()) {
				todo.add(transition.getTarget());
			}
		}
		while (!live.isEmpty()) {
			State current = live.remove();
			for (Transition liveTransition : current.in()) {
				State nextLive = liveTransition.getOrigin();
				live.add(nextLive);
				dead.remove(nextLive);
			}
		}

		dead.remove(start);
		live.remove(start);
		live.getDone().add(start);
		if (error != null) {
			dead.remove(error);
			live.remove(error);
			live.getDone().add(error);
		}

		for (State current : dead) {
			current.disconnect();
		}

		return live.getDone().toArray(new State[0]);
	}

	private static int order(State[] states) {
		int left = 0;
		int right = states.length - 1;
		while (left <= right) {
			while (left < states.length && !states[left].isAccepting()) {
				states[left].setId(left);
				left++;
			}
			while (right >= 0 && states[right].isAccepting()) {
				states[right].setId(right);
				right--;
			}
			if (left < right) {
				State temp = states[right];
				states[right] = states[left];
				states[left] = temp;
			}
		}
		return right + 1;
	}

	public void prune() {
		eliminateTrivialEpsilons();
		mergeTransitions();
	}

	public void determinize() {
		eliminateAllEpsilons();
		mergeTransitions();
		determinizeStates();
		totalizeStates();
		minimizeStates();
	}

	private void totalizeStates() {
		State error = new State();
		Queue todo = new WorkSet<>();
		todo.add(error);
		todo.add(start);
		while (!todo.isEmpty()) {
			State current = todo.remove();
			List missingRanges = new LinkedList<>(charRanges);
			for (Transition transition : current.out()) {
				todo.add(transition.getTarget());
				if (transition instanceof OrdinaryTransition) {
					char c = ((OrdinaryTransition) transition).getFrom();
					Iterator iterator = missingRanges.iterator();
					while (iterator.hasNext()) {
						CharRange check = iterator.next();
						if (check.contains(c)) {
							iterator.remove();
							break;
						}
					}
				}
			}
			for (CharRange range : missingRanges) {
				new CharsTransition(current, range.from, range.to, error).connect();
			}
		}
		init(start, error);
	}

	private void minimizeStates() {
		List> partitions = new LinkedList<>();
		Queue> todo = new LinkedList<>();

		SplitPartition initialPartition = initialPartition();

		if (initialPartition.min.isEmpty()) {
			partitions.add(initialPartition.max);
		} else {
			partitions.add(initialPartition.max);
			partitions.add(initialPartition.min);
			todo.add(initialPartition.min);
		}

		while (!todo.isEmpty()) {
			Set current = todo.remove();
			for (CharRange charRange : charRanges) {
				Set origins = origins(current, charRange);
				ListIterator> partitionIterator = partitions.listIterator();
				while (partitionIterator.hasNext()) {
					Set partition = partitionIterator.next();
					SplitPartition splitPartition = split(partition, origins);
					if (splitPartition.min.isEmpty()) {
						continue;
					}
					partitionIterator.set(splitPartition.max);
					partitionIterator.add(splitPartition.min);
					if (todo.contains(partition)) {
						todo.remove(partition);
						todo.add(splitPartition.max);
						todo.add(splitPartition.min);
					} else {
						todo.add(splitPartition.min);
					}
				}
			}
		}

		State newstart = digest(partitions);
		init(newstart);
	}

	private State digest(List> partitions) {
		Map mapping = new IdentityHashMap<>();
		State start = null;
		for (Set partition : partitions) {
			State state = new State();
			for (State partstate : partition) {
				mapping.put(partstate, state);
				if (partstate.isAccepting()) {
					state.setAccepting();
				}
				if (!partstate.isSilent()) {
					state.setSilent(false);
				}
				if (partstate == this.start) {
					start = state;
				}
			}
		}

		for (Set partition : partitions) {
			State representative = partition.iterator().next();

			for (Transition transition : representative.out()) {
				State origin = transition.getOrigin();
				State mappedOrigin = mapping.get(origin);
				State target = transition.getTarget();
				State mappedTarget = mapping.get(target);
				transition.asPrototype().withOrigin(mappedOrigin).withTarget(mappedTarget).connect();
			}
		}
		return start;
	}

	private SplitPartition split(Set partition, Set splitter) {
		Set intersection = new HashSet<>(partition.size());
		Set remainder = new HashSet<>(partition.size());
		for (State state : partition) {
			if (splitter.contains(state)) {
				intersection.add(state);
			} else {
				remainder.add(state);
			}
		}
		return new SplitPartition(intersection, remainder);
	}

	private Set origins(Set states, CharRange charRange) {
		Set in = new HashSet<>();
		for (State state : states) {
			for (Transition transition : state.in()) {
				if (transition instanceof OrdinaryTransition && ((OrdinaryTransition) transition).accepts(charRange.from)) {
					in.add(transition.getOrigin());
				}
			}
		}
		return in;
	}

	private SplitPartition initialPartition() {
		Set accept = new HashSet<>(states.length);
		Set nonaccept = new HashSet<>(states.length);
		for (State state : states) {
			if (state.isAccepting()) {
				accept.add(state);
			} else {
				nonaccept.add(state);
			}
		}
		return new SplitPartition(accept, nonaccept);
	}

	private void determinizeStates() {
		Map, State> dStates = new HashMap<>();
		Queue> todo = new WorkSet<>();

		Set startset = new HashSet<>();
		startset.add(start);
		todo.add(startset);
		State dStart = new State();
		dStates.put(startset, dStart);

		while (!todo.isEmpty()) {
			Set current = todo.remove();
			State dState = dStates.get(current);
			transferAccept(current, dState);

			for (CharRange range : charRanges) {
				char from = range.from;
				char to = range.to;
				Set nextset = new HashSet<>();
				for (State state : current) {
					for (Transition transition : state.out()) {
						if (transition instanceof OrdinaryTransition && ((OrdinaryTransition) transition).accepts(from))
							nextset.add(transition.getTarget());
					}
				}
				State target = dStates.get(nextset);
				if (target == null) {
					todo.add(nextset);
					target = new State();
					dStates.put(nextset, target);
				}
				if (from == to) {
					new CharTransition(dState, from, target).connect();
				} else {
					new CharsTransition(dState, from, to, target).connect();
				}
			}
		}
		init(dStart);
	}

	private void transferAccept(Set states, State dState) {
		boolean accepting = false;
		boolean silent = true;
		for (State state : states) {
			accepting |= state.isAccepting();
			silent &= state.isSilent();
		}
		dState.setAccepting(accepting);
		dState.setSilent(silent);
	}

	private static List computeEquivalentCharRanges(State start) {
		CharRangeAccumulator acc = new CharRangeAccumulator();

		Queue todo = new WorkSet<>();
		todo.add(start);
		while (!todo.isEmpty()) {
			State state = todo.remove();
			for (Transition transition : state.out()) {
				if (transition instanceof OrdinaryTransition) {
					OrdinaryTransition ordinaryTransition = (OrdinaryTransition) transition;
					acc.split(ordinaryTransition.getFrom(), ordinaryTransition.getTo());
				}
				todo.add(transition.getTarget());
			}
		}
		return acc.getRanges();
	}

	private void mergeTransitions() {
		Queue todo = new WorkSet<>();
		todo.add(start);
		while (!todo.isEmpty()) {
			State state = todo.remove();
			SortedSet transitions = new TreeSet<>(new TransitionComparator());
			transitions.addAll(state.out());

			Transition last = null;
			for (Transition transition : transitions) {
				if (last == null) {
					last = transition;
				} else {
					Transition joined = tryJoin(last, transition);
					if (joined == null) {
						if (!transitions.contains(last)) {
							last.connect();
						}
						last = transition;
					} else if (joined == last) {
						transition.remove();
					} else if (joined == transition) {
						if (transitions.contains(last)) {
							last.remove();
						}
						last = joined;
					} else {
						if (transitions.contains(last)) {
							last.remove();
						}
						transition.remove();
						last = joined;
					}
				}
			}
			if (last != null && !transitions.contains(last)) {
				last.connect();
			}
		}
		clean(start, null);
	}

	private Transition tryJoin(Transition t1, Transition t2) {
		if (t1.getTarget() != t2.getTarget() || t1.getOrigin() != t2.getOrigin()) {
			return null;
		}
		State origin = t1.getOrigin();
		State target = t1.getTarget();

		if (t1 instanceof EpsilonTransition && t2 instanceof EpsilonTransition) {
			return new EpsilonTransition(origin, target);
		}
		if (t1 instanceof OrdinaryTransition && t2 instanceof OrdinaryTransition) {
			OrdinaryTransition ot1 = (OrdinaryTransition) t1;
			OrdinaryTransition ot2 = (OrdinaryTransition) t2;

			int from1 = ot1.getFrom() & 0xffff;
			int to1 = ot1.getTo() & 0xffff;
			int from2 = ot2.getFrom() & 0xffff;
			int to2 = ot2.getTo() & 0xffff;

			if (from2 >= from1 && from2 <= to1 + 1 || from1 >= from2 && from1 <= to2 + 1) {

				char from = (char) Math.min(from1, from2);
				char to = (char) Math.max(to1, to2);

				if (from1 == from && to1 == to) {
					return ot1;
				} else if (from2 == from && to2 == to) {
					return ot2;
				} else {
					return new CharsTransition(origin, from, to, target);
				}
			}
		}
		return null;
	}

	private void eliminateTrivialEpsilons() {
		Queue todo = new WorkSet<>();
		todo.add(start);
		while (!todo.isEmpty()) {
			State state = todo.remove();
			for (Transition transition : state.out()) {
				todo.add(transition.getTarget());
			}
			for (EpsilonTransition epsilon : transitiveEpsilons(state)) {
				State origin = epsilon.getOrigin();
				if (origin == state) {
					epsilon.remove();
				}
				State target = epsilon.getTarget();
				if (target.isAccepting()) {
					state.setAccepting();
				}
				if (!target.isSilent()) {
					state.setSilent(false);
				}
				for (Transition transition : target.out()) {
					if (transition instanceof OrdinaryTransition) {
						transition.asPrototype().withOrigin(state).withTarget(transition.getTarget()).connect();
					}
				}
				for (Transition transition : target.out()) {
					if (transition instanceof EpsilonTransition) {
						Action action = transition.getAction();
						if (action != null) {
							transition.asPrototype().withOrigin(state).withTarget(transition.getTarget()).withAction(action).connect();
						}
					}
				}
			}
		}
		init(start);
	}

	private Set transitiveEpsilons(State state) {

		WorkSet todo = new WorkSet<>();
		for (Transition transition : state.out()) {
			if (transition instanceof EpsilonTransition) {
				todo.add((EpsilonTransition) transition);
			}
		}
		while (!todo.isEmpty()) {
			EpsilonTransition current = todo.remove();
			if (current.getAction() != null) {
				todo.remove(current);
				continue;
			}
			State target = current.getTarget();
			for (Transition transition : target.out()) {
				if (transition instanceof EpsilonTransition) {
					todo.add((EpsilonTransition) transition);
				}
			}
		}
		return todo.getDone();
	}

	private void eliminateAllEpsilons() {
		Queue epsilons = new LinkedList<>();
		Queue todo = new WorkSet<>();
		todo.add(start);
		while (!todo.isEmpty()) {
			State state = todo.remove();
			for (Transition transition : state.out()) {
				todo.add(transition.getTarget());
				if (transition instanceof EpsilonTransition) {
					epsilons.add((EpsilonTransition) transition);
				}
			}
		}

		WorkSet propagateEpsilons = new WorkSet<>();
		propagateEpsilons.addAll(epsilons);
		while (!propagateEpsilons.isEmpty()) {
			EpsilonTransition epsilon = propagateEpsilons.remove();
			Set done = propagateStates(epsilon);
			propagateEpsilons.removeAll(done);
			propagateEpsilons.getDone().addAll(done);
		}

		while (!epsilons.isEmpty()) {
			EpsilonTransition epsilon = epsilons.remove();
			State origin = epsilon.getOrigin();
			State target = epsilon.getTarget();
			int in = origin.in().size();
			int out = target.out().size();
			if (origin == start) {
				eliminateForward(epsilon);
			} else if (in >= out) {
				eliminateForward(epsilon);
			} else {
				eliminateBackward(epsilon);
			}
		}

		init(start);
	}

	private Set propagateStates(EpsilonTransition epsilon) {
		boolean accepting = false;
		boolean silent = true;

		Set propagated = new HashSet<>();
		WorkSet eclosure = new WorkSet<>();
		eclosure.add(epsilon.getOrigin());
		eclosure.add(epsilon.getTarget());
		while (!eclosure.isEmpty()) {
			State next = eclosure.remove();
			accepting |= next.isAccepting();
			silent &= next.isSilent();

			for (Transition transition : next.out()) {
				if (transition instanceof EpsilonTransition) {
					propagated.add((EpsilonTransition) transition);
					eclosure.add(transition.getTarget());
				}
			}
			for (Transition transition : next.in()) {
				if (transition instanceof EpsilonTransition) {
					propagated.add((EpsilonTransition) transition);
					eclosure.add(transition.getOrigin());
				}
			}
		}

		for (State state : eclosure.getDone()) {
			state.setAccepting(accepting);
			state.setSilent(silent);
		}

		return propagated;
	}

	private void eliminateForward(EpsilonTransition epsilon) {
		State origin = epsilon.getOrigin();
		WorkSet targets = new WorkSet<>();
		targets.add(epsilon.getTarget());
		while (!targets.isEmpty()) {
			State next = targets.remove();
			for (Transition transition : next.out()) {
				if (transition instanceof OrdinaryTransition) {
					transition.asPrototype().withOrigin(origin).withTarget(transition.getTarget()).connect();
				} else if (transition instanceof EpsilonTransition) {
					targets.add(transition.getTarget());
				}
			}
		}
		epsilon.remove();
		if (origin.out().isEmpty() && !origin.isAccepting()) {
			origin.disconnect();
		}
		for (State target : targets.getDone()) {
			if (target.in().isEmpty() && target != start) {
				target.disconnect();
			}
		}
	}

	private void eliminateBackward(EpsilonTransition epsilon) {
		State target = epsilon.getTarget();
		WorkSet origins = new WorkSet<>();
		origins.add(epsilon.getOrigin());
		while (!origins.isEmpty()) {
			State next = origins.remove();
			for (Transition transition : next.in()) {
				if (transition instanceof OrdinaryTransition) {
					transition.asPrototype().withOrigin(transition.getOrigin()).withTarget(target).connect();
				} else if (transition instanceof EpsilonTransition) {
					origins.add(transition.getOrigin());
				}
			}
		}
		epsilon.remove();
		if (target.in().isEmpty() && target != start) {
			target.disconnect();
		}
		for (State origin : origins.getDone()) {
			if (origin.out().isEmpty()) {
				origin.disconnect();
			}
		}
	}

	@Override
	public NFA clone() {
		try {
			NFA nfa = (NFA) super.clone();
			StateClone stateClone = StateClone.cloneTree(start);
			nfa.init(stateClone.getStart());
			return nfa;
		} catch (CloneNotSupportedException e) {
			return null;
		}
	}

	private static class SplitPartition {
		public Set max;
		public Set min;

		public SplitPartition(Set intersection, Set remainder) {
			this.max = intersection.size() > remainder.size() ? intersection : remainder;
			this.min = intersection.size() <= remainder.size() ? intersection : remainder;
		}

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy