
analysis.NFAAnalysisTools Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of regex-static-analysis Show documentation
Show all versions of regex-static-analysis Show documentation
A tool to perform static analysis on regexes to determine whether they are vulnerable to ReDoS.
package analysis;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import org.jgrapht.Graph;
import org.jgrapht.alg.connectivity.KosarajuStrongConnectivityInspector;
import nfa.NFAGraph;
import nfa.NFAVertexND;
import nfa.UPNFAState;
import nfa.NFAEdge;
import nfa.FilterEdge;
import nfa.transitionlabel.TransitionLabel;
import nfa.transitionlabel.CharacterClassTransitionLabel;
import nfa.transitionlabel.EmptyTransitionLabelException;
public class NFAAnalysisTools {
/* ============= GENERAL TOOLS ============= */
/**
* Modifies an NFA graph so that it can be used with the Mohri filter.
*
* @param m
* The NFA graph to modify
* @param modifyLabel
* The label to assign to the current epsilon transitions.
* @param selfloopLabel
* The label to assign to the selfloops added to all states.
*/
public static void prepareForFilter(NFAGraph m, String modifyLabel, String selfloopLabel) {
for (NFAVertexND v : m.vertexSet()) {
/* changing current epsilon transitions to modifyLabel */
for (NFAEdge e : m.outgoingEdgesOf(v)) {
if (e.getIsEpsilonTransition()) {
e.setTransitionLabel(modifyLabel);
}
}
/* Adding the self loop */
try {
m.addEdge(new NFAEdge(v, v, selfloopLabel));
} catch (EmptyTransitionLabelException e1) {
throw new RuntimeException("Empty transition label");
}
}
}
/**
* Creates an NFA graph representing the Mohri filter.
*
* @return the NFA graph
*/
public static NFAGraph createFilter() {
NFAGraph mohriFilter = new NFAGraph();
NFAVertexND v0 = new NFAVertexND(0);
NFAVertexND v1 = new NFAVertexND(1);
NFAVertexND v2 = new NFAVertexND(2);
mohriFilter.addVertex(v0); /* state 0 */
mohriFilter.addVertex(v1); /* state 1 */
mohriFilter.addVertex(v2); /* state 2 */
try {
mohriFilter.addEdge(new FilterEdge(v0, v0, "ε2", "ε1"));
mohriFilter.addEdge(new FilterEdge(v0, v0, "x", "x"));
mohriFilter.addEdge(new FilterEdge(v0, v1, "ε1", "ε1"));
mohriFilter.addEdge(new FilterEdge(v0, v2, "ε2", "ε2"));
mohriFilter.addEdge(new FilterEdge(v1, v1, "ε1", "ε1"));
mohriFilter.addEdge(new FilterEdge(v1, v0, "x", "x"));
mohriFilter.addEdge(new FilterEdge(v2, v2, "ε2", "ε2"));
mohriFilter.addEdge(new FilterEdge(v2, v0, "x", "x"));
} catch (EmptyTransitionLabelException e1) {
throw new RuntimeException("Empty transition label");
}
mohriFilter.setInitialState(v0);
mohriFilter.addAcceptingState(v0);
mohriFilter.addAcceptingState(v1);
mohriFilter.addAcceptingState(v2);
return mohriFilter;
}
public static NFAGraph productConstructionAFB(NFAGraph a, NFAGraph b) throws InterruptedException {
NFAGraph m1 = a.copy();
NFAGraph m2 = b.copy();
NFAGraph f = NFAAnalysisTools.createFilter();
NFAAnalysisTools.prepareForFilter(m1, "ε2", "ε1");
NFAAnalysisTools.prepareForFilter(m2, "ε1", "ε2");
HashMap originalWords = new HashMap();
NFAGraph af = NFAAnalysisTools.productConstruction(m1, f, originalWords);
return NFAAnalysisTools.productConstruction(af, m2, originalWords);
}
/**
* Calculates the product construction of a graph using the the Mohri
* filter.
*
* @param m
* The NFA to get the product construction of.
* @return The NFA representing the product construction.
*/
public static NFAGraph productConstructionAFA(NFAGraph m) throws InterruptedException {
return NFAAnalysisTools.productConstructionAFB(m, m);
}
public static NFAGraph productConstructionAFAFA(NFAGraph m) throws InterruptedException {
NFAGraph m1 = m.copy();
NFAGraph m2 = m.copy();
NFAGraph f = NFAAnalysisTools.createFilter();
NFAAnalysisTools.prepareForFilter(m1, "ε2", "ε1");
NFAAnalysisTools.prepareForFilter(m2, "ε1", "ε2");
HashMap originalWords = new HashMap();
NFAGraph af = NFAAnalysisTools.productConstruction(m1, f, originalWords);
NFAGraph afa = NFAAnalysisTools.productConstruction(af, m2, originalWords);
/*
* Changing the existing epsilon transitions and adding the epsilon self
* loops
*/
NFAAnalysisTools.prepareForFilter(afa, "ε2", "ε1");
NFAGraph afaf = NFAAnalysisTools.productConstruction(afa, f, originalWords);
NFAGraph afafa = NFAAnalysisTools.productConstruction(afaf, m2, originalWords);
return afafa;
}
public static NFAGraph productConstruction(NFAGraph m1, NFAGraph m2, HashMap originalWords) throws InterruptedException {
NFAGraph productConstruction = new NFAGraph();
NFAVertexND m1SourceState, m2SourceState;
m1SourceState = m1.getInitialState();
m2SourceState = m2.getInitialState();
int m1Dimensions = m1SourceState.getNumDimensions();
int m2Dimensions = m2SourceState.getNumDimensions();
NFAVertexND firstVertex = new NFAVertexND(m1SourceState, m2SourceState);
LinkedList toVisit = new LinkedList();
/* Adding the initial state */
toVisit.add(firstVertex);
productConstruction.addVertex(firstVertex);
productConstruction.setInitialState(firstVertex);
while (!toVisit.isEmpty()) {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException();
}
NFAVertexND sourceVertex = toVisit.poll();
m1SourceState = sourceVertex.getStateByDimensionRange(1, 1 + m1Dimensions);
m2SourceState = sourceVertex.getStateByDimensionRange(1 + m1Dimensions, 1 + m1Dimensions + m2Dimensions);
/* see if the current vertex is accepting */
if (m1.isAcceptingState(m1SourceState) && m2.isAcceptingState(m2SourceState)) {
productConstruction.addAcceptingState(sourceVertex);
}
for (NFAEdge currentM1Edge : m1.outgoingEdgesOf(m1SourceState)) {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException();
}
int m1NumParallel = currentM1Edge.getNumParallel();
NFAVertexND m1TargetState = currentM1Edge.getTargetVertex();
TransitionLabel word = currentM1Edge.getTransitionLabel();
TransitionLabel originalWord = word;
if (originalWords.containsKey(currentM1Edge)) {
/* This edge changed the word, find it's original value */
originalWord = originalWords.get(currentM1Edge);
}
for (NFAEdge currentM2Edge : m2.outgoingEdgesOf(m2SourceState)) {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException();
}
if (!currentM2Edge.isTransitionFor(word)) {
/* current edge can't handle word */
continue;
}
int m2NumParallel = currentM2Edge.getNumParallel();
NFAVertexND m2TargetState = currentM2Edge.getTargetVertex();
NFAVertexND targetVertex = new NFAVertexND(m1TargetState, m2TargetState);
/* ensure each state is only visited once */
if (!productConstruction.containsVertex(targetVertex)) {
toVisit.add(targetVertex);
productConstruction.addVertex(targetVertex);
}
NFAEdge newEdge = new NFAEdge(sourceVertex, targetVertex, originalWord);
if (isFilterEdge(currentM2Edge)) {
/*
* swap out the current character for the filter's
* output character
*/
FilterEdge fEdge = (FilterEdge) currentM2Edge;
if (fEdge.getIsEpsilonTransition()) {
/*
* Storing the original name of the edge in the
* outgoing transition character
*/
newEdge.setTransitionLabel(fEdge.getOutGoingTransitionCharacter());
originalWords.put(newEdge, originalWord);
}
} else {
if (!currentM2Edge.getIsEpsilonTransition()) {
TransitionLabel tl2 = currentM2Edge.getTransitionLabel();
TransitionLabel intersection = originalWord.intersection(tl2);
newEdge.setTransitionLabel(intersection);
}
}
newEdge.setNumParallel(m1NumParallel * m2NumParallel);
productConstruction.addEdge(newEdge);
}
}
}
return productConstruction;
}
/* Trims away states not reachable form start */
public static NFAGraph makeTrimFromStart(NFAGraph m) throws InterruptedException {
NFAGraph trimmed = m.copy();
NFAVertexND mInitialVertex = m.getInitialState();
Set vSet = m.vertexSet();
HashSet usefulStates = new HashSet();
makeTrimFromStartDFS(m, mInitialVertex, usefulStates);
for (NFAVertexND currentVertex : vSet) {
if (!usefulStates.contains(currentVertex)) {
trimmed.removeVertex(currentVertex);
}
}
return trimmed;
}
private static void makeTrimFromStartDFS(NFAGraph m, NFAVertexND currentVertex, HashSet usefulStates) {
usefulStates.add(currentVertex);
for (NFAEdge e : m.outgoingEdgesOf(currentVertex)) {
NFAVertexND target = e.getTargetVertex();
if (!usefulStates.contains(target)) {
makeTrimFromStartDFS(m, target, usefulStates);
}
}
}
/**
* Removes all useless states from an NFA graph. This is done recursively by
* determining whether each vertex is connected to a useful vertex,
* recursively.
*
* @param m
* The NFA graph to remove all useless states from.
* @return The trimmed graph.
*/
public static NFAGraph makeTrimAlternative(NFAGraph m) throws InterruptedException {
NFAGraph trimmed = m.copy();
Set vSet = m.vertexSet();
LinkedList toRemove = new LinkedList();
HashSet usefulStates = new HashSet();
for (NFAVertexND acceptingState : m.getAcceptingStates()) {
usefulStates.add(acceptingState);
}
for (NFAVertexND currentVertex : vSet) {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException();
}
if (!NFAAnalysisTools.makeTrimIsUseful(trimmed, currentVertex, new HashSet(), usefulStates)) {
/* We do not want to remove the initial state */
if (!m.getInitialState().equals(currentVertex)) {
toRemove.add(currentVertex);
}
} else {
usefulStates.add(currentVertex);
}
}
for (NFAVertexND currentVertex : toRemove) {
trimmed.removeVertex(currentVertex);
}
return trimmed;
}
/**
* A function used recursively to determine whether a state is useful.
*
* @param m
* The graph containing the state
* @param currentVertex
* The current state being considered.
* @param visited
* A map containing all visited states to prevent loops.
* @return True if the state is useful, false if not.
*/
static boolean makeTrimIsUseful(NFAGraph m, NFAVertexND currentVertex, HashSet visited, HashSet usefulStates) {
if (usefulStates.contains(currentVertex)) {
/* the current vertex is useful */
return true;
}
if (visited.contains(currentVertex)) {
/* The current vertex is not useful, and has been visited before. */
return false;
} else {
visited.add(currentVertex);
}
boolean result = false;
for (NFAEdge currentEdge : m.outgoingEdgesOf(currentVertex)) {
/* See if any of the adjacent vertices are useful */
result |= makeTrimIsUseful(m, currentEdge.getTargetVertex(), visited, usefulStates);
if (result) {
return true;
}
}
return result;
}
public static NFAGraph makeTrimUPNFA(NFAGraph m, NFAGraph upnfa) throws InterruptedException {
HashSet usefulStates = new HashSet();
for (NFAVertexND v : upnfa.vertexSet()) {
UPNFAState upNFAState = (UPNFAState) v;
if (upNFAStateIsUseful(m, upnfa, upNFAState)) {
usefulStates.add(upNFAState);
}
}
NFAGraph trimmedUPNFA = upnfa.copy();
for (NFAVertexND v : upnfa.vertexSet()) {
if (!usefulStates.contains(v)) {
if (trimmedUPNFA.isAcceptingState(v)) {
trimmedUPNFA.removeAcceptingState(v);
}
trimmedUPNFA.removeVertex(v);
}
}
return trimmedUPNFA;
}
public static boolean upNFAStateIsUseful(NFAGraph m, NFAGraph upnfa, UPNFAState upNFAState) throws InterruptedException {
HashSet alphabet = new HashSet();
alphabet.add(CharacterClassTransitionLabel.wildcardLabel());
HashSet P = (HashSet) upNFAState.getP();
TransitionLabel higherPrioritySymbols = new CharacterClassTransitionLabel();
boolean containsAcceptState = false;
for (NFAVertexND p : P) {
if (m.isAcceptingState(p)) {
containsAcceptState = true;
}
Set outgoingEdges = m.outgoingEdgesOf(p);
for (NFAEdge e : outgoingEdges) {
if (!e.getIsEpsilonTransition()) {
higherPrioritySymbols = higherPrioritySymbols.union(e.getTransitionLabel());
}
}
}
if (!containsAcceptState || !higherPrioritySymbols.complement().isEmpty()) {
return true;
} else {
Iterator i0 = P.iterator();
NFAVertexND p = i0.next();
HashSet reachableFromStart = new HashSet();
reachableFromStart.addAll(P);
NFAGraph intersectionDfa = NFAAnalysisTools.determinize(m, reachableFromStart, alphabet);
intersectionDfa = complementDfa(intersectionDfa);
/* Try to reach an accept state */
Stack toVisit = new Stack();
HashSet visited = new HashSet();
toVisit.push(intersectionDfa.getInitialState());
while (!toVisit.isEmpty()) {
NFAVertexND currentState = toVisit.pop();
if (intersectionDfa.isAcceptingState(currentState)) {
return true;
} else {
Set outgoingEdges = intersectionDfa.outgoingEdgesOf(currentState);
for (NFAEdge e : outgoingEdges) {
NFAVertexND targetVertex = e.getTargetVertex();
if (!visited.contains(targetVertex)) {
toVisit.push(targetVertex);
visited.add(targetVertex);
}
}
}
}
}
return false;
}
public static NFAGraph complementDfa(NFAGraph dfa) {
NFAGraph resultGraph = dfa.copy();
/* swapping final and nonfinal states */
for (NFAVertexND currentState : dfa.vertexSet()) {
if (dfa.isAcceptingState(currentState)) {
resultGraph.removeAcceptingState(currentState);
} else {
resultGraph.addAcceptingState(currentState);
}
}
return resultGraph;
}
/**
* Removes all useless states from an NFA graph. This is done by reversing
* the graph and determining which states are reachable from the final
* state. Typically this method was found to be slower than the other
* makeTrim method.
*
* @param m
* The NFA graph to remove all useless states from.
* @return The trimmed graph.
* @throws InterruptedException
*/
public static NFAGraph makeTrim(NFAGraph m) throws InterruptedException {
NFAGraph trimmed = m.copy();
NFAGraph reversedGraph = m.reverse();
HashSet usefulStates = makeTrimReachable(reversedGraph, reversedGraph.getAcceptingStates());
for (NFAVertexND v : m.vertexSet()) {
if (!usefulStates.contains(v)) {
/* We do not want to remove the initial state (even if it is useless) */
if (!m.getInitialState().equals(v)) {
trimmed.removeVertex(v);
}
}
}
return trimmed;
}
private static HashSet makeTrimReachable(NFAGraph reversedGraph, Set defaultUsefulStates) throws InterruptedException {
HashSet usefulStates = new HashSet();
LinkedList toVisit = new LinkedList();
for (NFAVertexND defaultUsefulState : defaultUsefulStates) {
toVisit.add(defaultUsefulState);
}
while (!toVisit.isEmpty()) {
if (isInterrupted()) {
throw new InterruptedException();
}
NFAVertexND currentVertex = toVisit.pop();
usefulStates.add(currentVertex);
for (NFAEdge outGoingEdge : reversedGraph.outgoingEdgesOf(currentVertex)) {
NFAVertexND targetVertex = outGoingEdge.getTargetVertex();
if (!usefulStates.contains(targetVertex) && !toVisit.contains(targetVertex)) {
toVisit.push(targetVertex);
}
}
}
return usefulStates;
}
public static NFAGraph convertUpNFAToNFAGraph(NFAGraph m, HashMap newStateMap) {
HashMap stateMap = new HashMap();
int stateCounter = 0;
NFAGraph resultGraph = new NFAGraph();
for (NFAVertexND v : m.vertexSet()) {
NFAVertexND correspondingState = new NFAVertexND("q" + stateCounter);
stateMap.put((UPNFAState) v, correspondingState);
newStateMap.put(correspondingState, (UPNFAState) v);
resultGraph.addVertex(correspondingState);
if (m.isAcceptingState(v)) {
resultGraph.addAcceptingState(correspondingState);
}
stateCounter++;
}
resultGraph.setInitialState(stateMap.get(m.getInitialState()));
for (NFAEdge e : m.edgeSet()) {
UPNFAState sourceState = (UPNFAState) e.getSourceVertex();
UPNFAState targetState = (UPNFAState) e.getTargetVertex();
NFAVertexND newSource = stateMap.get(sourceState);
NFAVertexND newTarget = stateMap.get(targetState);
TransitionLabel transitionLabel = e.getTransitionLabel();
NFAEdge newEdge = new NFAEdge(newSource, newTarget, transitionLabel);
resultGraph.addEdge(newEdge);
}
return resultGraph;
}
/**
* Constructs a list of NFA graphs each representing a strongly connected
* component in the graph given as parameter.
*
* @param m
* The NFA graph to find the strongly connected components in.
* @return A list containing all the strongly connected components.
* @throws InterruptedException
*/
public static LinkedList getStronglyConnectedComponents(NFAGraph m) throws InterruptedException {
KosarajuStrongConnectivityInspector sci = new KosarajuStrongConnectivityInspector(m);
List> sccs = sci.getStronglyConnectedComponents();
LinkedList sccNFAs = new LinkedList();
for (Graph scc : sccs) {
if (isInterrupted()) {
throw new InterruptedException();
}
/* scc's consisting of no edges are irrelevant for our purpose */
if (scc.edgeSet().size() > 0) {
NFAGraph currentNFAG = new NFAGraph();
for (NFAVertexND v : scc.vertexSet()) {
if (isInterrupted()) {
throw new InterruptedException();
}
currentNFAG.addVertex(v);
}
for (NFAEdge e : scc.edgeSet()) {
if (isInterrupted()) {
throw new InterruptedException();
}
currentNFAG.addEdge(e);
}
sccNFAs.add(currentNFAG);
}
}
return sccNFAs;
}
/**
* Determines whether the specified edge is a filter edge.
*
* @param e
* The edge specified.
* @return True if the edge is a filter edge, false if not.
*/
private static boolean isFilterEdge(NFAEdge e) {
return FilterEdge.class.isAssignableFrom(e.getClass());
}
/* ============= EDA TOOLS ============= */
/**
* Constructs a list of NFA graphs each representing a strongly connected
* component containing only epsilon transitions in the graph given as
* parameter.
*
* @param m
* The NFA graph to find the strongly connected components in.
* @return A list containing all the strongly connected components, with
* only epsilon transitions between the states.
* @throws InterruptedException
*/
public static LinkedList getEpsilonStronglyConnectedComponents(NFAGraph m) throws InterruptedException {
NFAGraph epsilonGraph = m.copy();
/* iterating over m's edge set so we can modify epsilonGraph's edges */
for (NFAEdge e : m.edgeSet()) {
/* removing all edges that aren't epsilon transitions */
if (!e.getIsEpsilonTransition()) {
epsilonGraph.removeEdge(e);
}
}
return getStronglyConnectedComponents(epsilonGraph);
}
/**
* Creates a single state for each strongly connected component.
*
* @param m
* The NFA graph to merge the epsilon strongly connected
* components in.
* @param epsilon
* True if only epsilon strongly connected components should be
* merged, false if all strongly connected components should be
* merged.
* @return A HashMap containing the merged states as key and the original
* escc as value.
* @throws InterruptedException
*/
public static Map mergeStronglyConnectedComponents(NFAGraph m, boolean epsilon) throws InterruptedException {
Map mergedStates = new HashMap();
LinkedList sccs;
if (epsilon) {
sccs = getEpsilonStronglyConnectedComponents(m);
} else {
sccs = getStronglyConnectedComponents(m);
}
for (NFAGraph scc : sccs) {
NFAVertexND mergedState = null;
boolean isAccepting = false;
boolean isInitial = false;
LinkedList edgesToRestore = new LinkedList();
for (NFAVertexND v : scc.vertexSet()) {
if (isInterrupted()) {
throw new InterruptedException();
}
if (mergedState == null) {
mergedState = new NFAVertexND(v.getStateNumberByDimension(1));
}
/* to make the merged state also accepting */
if (m.isAcceptingState(v)) {
isAccepting = true;
m.removeAcceptingState(v);
}
/* to make the merged state also initial */
if (m.getInitialState() != null && m.getInitialState().equals(v)) {
isInitial = true;
}
for (NFAEdge e : m.edgesOf(v)) {
/*
* if the edge doesn't come from another vertex in the escc
* or if it's a non-epsilon transition. Note that symbol transitions between vertices in the escc will become self loops on the merged state
*/
if (!scc.containsEdge(e)) {
/*
* A necessary check for non-epsilon transitions in the
* escc
*/
NFAVertexND sourceVertex = scc.containsVertex(e.getSourceVertex()) ? mergedState : e.getSourceVertex();
/*
* A necessary check for non-epsilon transitions in the
* escc
*/
NFAVertexND targetVertex = scc.containsVertex(e.getTargetVertex()) ? mergedState : e.getTargetVertex();
NFAEdge newEdge = new NFAEdge(sourceVertex, targetVertex, e.getTransitionLabel());
newEdge.setNumParallel(e.getNumParallel());
edgesToRestore.add(newEdge);
}
}
m.removeVertex(v);
}
m.addVertex(mergedState);
if (isInitial) {
m.setInitialState(mergedState);
}
if (isAccepting) {
m.addAcceptingState(mergedState);
}
for (NFAEdge e : edgesToRestore) {
m.addEdge(e);
}
mergedStates.put(mergedState, scc);
}
return mergedStates;
}
/**
* Determines the number of walks between a vertex and all other vertices in
* a given graph.
*
* @param m
* The graph to count the walks in.
* @param s
* The starting vertex.
* @return A HashMap containing every destination vertex as key and the
* number of walks to this vertex as value.
*/
public static HashMap numWalksFrom(NFAGraph m, NFAVertexND s) {
HashMap paths = new HashMap();
/* initialise all paths */
for (NFAVertexND v : m.vertexSet()) {
paths.put(v, 0);
}
HashMap visitedEdges = new HashMap();
/* set the number of times each edge has been visited to 0 */
for (NFAEdge e : m.edgeSet()) {
visitedEdges.put(e, 0);
}
NFAAnalysisTools.numWalksFromSearch(m, s, visitedEdges, paths);
return paths;
}
/**
* A function to recursively determine the number of walks between a vertex
* and all other vertices in a given graph.
*
* @param m
* The graph to count the walks in.
* @param current
* The starting vertex.
* @param visitedEdges
* A HashMap containing all the edges as key and the current
* number of times they have been visited as value.
* @param paths
* A HashMap containing all the vertices as key and the current
* number of walks to them as value.
*/
static void numWalksFromSearch(NFAGraph m, NFAVertexND current, HashMap visitedEdges, HashMap paths) {
/* update the number of paths to the current vertex */
paths.put(current, paths.get(current) + 1);
for (NFAEdge e : m.outgoingEdgesOf(current)) {
int currentNumVisit = visitedEdges.get(e);
/* update the number of times this edge has been visited */
visitedEdges.put(e, currentNumVisit + 1);
/* for the amount of times the edge can be visited again */
for (int i = currentNumVisit; i < e.getNumParallel(); i++) {
/* search from the new vertex */
numWalksFromSearch(m, e.getTargetVertex(), visitedEdges, paths);
}
/* unvisit the current edge */
visitedEdges.put(e, currentNumVisit);
}
}
public static Set getAlphabet(NFAGraph n) {
Set regexAlphabet = new HashSet();
for (NFAEdge e : n.edgeSet()) {
if (!e.getIsEpsilonTransition()) {
TransitionLabel tl = e.getTransitionLabel();
if (tl instanceof CharacterClassTransitionLabel) {
CharacterClassTransitionLabel cctl = (CharacterClassTransitionLabel) tl;
if (!regexAlphabet.contains(cctl)) {
regexAlphabet.add(cctl);
}
}
}
}
return regexAlphabet;
}
/**
* This function finds the shortest path from the initial state in the NFA
* to a certain finish state.
*
* @param m
* The graph representing the NFA.
* @param finish
* The state to search to.
* @return A linked list containing the edges in the path.
*/
public static LinkedList shortestPathTo(NFAGraph m, NFAVertexND finish) {
return shortestPathBetween(m, m.getInitialState(), finish);
}
public static LinkedList shortestPathBetween(NFAGraph m, NFAVertexND start, NFAVertexND finish) {
HashMap> pathToMap = new HashMap>();
HashSet traversed = new HashSet();
LinkedList queue = new LinkedList();
NFAVertexND firstVertex = start;
LinkedList emptyPath = new LinkedList();
queue.add(firstVertex);
pathToMap.put(firstVertex, emptyPath);
while (!queue.isEmpty()) {
NFAVertexND currentVertex = queue.removeLast();
LinkedList currentPath = pathToMap.get(currentVertex);
if (currentVertex.equals(finish)) {
return currentPath;
}
for (NFAEdge e : m.outgoingEdgesOf(currentVertex)) {
if (!traversed.contains(e)) {
traversed.add(e);
NFAVertexND target = e.getTargetVertex();
LinkedList newPath = new LinkedList(currentPath);
newPath.add(e);
pathToMap.put(target, newPath);
queue.addFirst(target);
}
}
}
return null;
}
public static HashSet reachableWithEpsilon(NFAGraph n, NFAVertexND v) {
HashSet visited = new HashSet();
LinkedList toVisit = new LinkedList();
toVisit.add(v);
while (!toVisit.isEmpty()) {
NFAVertexND currentVertex = toVisit.removeLast();
visited.add(currentVertex);
for (NFAEdge e : n.outgoingEdgesOf(currentVertex)) {
if (e.getIsEpsilonTransition()) {
NFAVertexND targetVertex = e.getTargetVertex();
if (!visited.contains(targetVertex)) {
toVisit.add(targetVertex);
}
}
}
}
return visited;
}
public static NFAGraph determinize(NFAGraph input, Set reachableFromStart, Set alphabet) throws InterruptedException {
NFAGraph dfa = new NFAGraph();
/* http://www.cse.unsw.edu.au/~rvg/pub/nfadfa.pdf */
//NFAVertexND startState = input.getInitialState();
//HashSet reachableFromStart = reachableWithEpsilon(input, startState);
LinkedList toVisit = new LinkedList();
LinkedList sortedReachableFromStart = new LinkedList(reachableFromStart);
Collections.sort(sortedReachableFromStart);
StringBuilder labelBuilder = new StringBuilder();
Iterator i0 = sortedReachableFromStart.iterator();
while (i0.hasNext()) {
if (isInterrupted()) {
throw new InterruptedException();
}
NFAVertexND startState = i0.next();
List subStates = startState.getStates();
if (subStates.size() == 1) {
String currentLabel = subStates.iterator().next();
labelBuilder.append(currentLabel);
} else {
Iterator i1 = subStates.iterator();
labelBuilder.append("(");
while (i1.hasNext()) {
String currentLabel = i1.next();
labelBuilder.append(currentLabel);
if (i1.hasNext()) {
labelBuilder.append(", ");
}
}
labelBuilder.append(")");
}
if (i0.hasNext()) {
labelBuilder.append(", ");
}
}
/* note dfa states are not multidimensional */
NFAVertexND dfaStartState = new NFAVertexND(labelBuilder.toString());
toVisit.add(dfaStartState);
HashMap> dfaStateToSubStatesMap = new HashMap>();
dfaStateToSubStatesMap.put(dfaStartState, reachableFromStart);
dfa.addVertex(dfaStartState);
dfa.setInitialState(dfaStartState);
for (NFAVertexND i : reachableFromStart) {
if (isInterrupted()) {
throw new InterruptedException();
}
if (input.isAcceptingState(i)) {
dfa.addAcceptingState(dfaStartState);
}
}
NFAVertexND emptyState = new NFAVertexND(0);
while (!toVisit.isEmpty()) {
if (isInterrupted()) {
throw new InterruptedException();
}
NFAVertexND P = toVisit.removeLast();
/* with the label in TransitionLabel, P can get to the states in HashSet */
HashMap> newStates = new HashMap>();
Set subStates = dfaStateToSubStatesMap.get(P);
for (NFAVertexND currentSubState : subStates) {
if (isInterrupted()) {
throw new InterruptedException();
}
for (NFAEdge e : input.outgoingEdgesOf(currentSubState)) {
if (isInterrupted()) {
throw new InterruptedException();
}
if (!e.getIsEpsilonTransition()) {
TransitionLabel label = e.getTransitionLabel();
NFAVertexND targetState = e.getTargetVertex();
//HashSet newState = newStates.getOrDefault(label, new HashSet());
HashSet newState;
if (newStates.containsKey(label)) {
newState = newStates.get(label);
} else {
newState = new HashSet();
}
if (!newState.contains(targetState)) {
newState.add(targetState);
}
/* If there isn't another outgoing label exactly like this one, check for other overlapping labels */
if (!newStates.containsKey(label)) {
/* Copying entries, to avoid concurrent modification errors */
Set>> entries = new HashSet>>(newStates.entrySet());
for (Map.Entry> kv : entries) {
if (isInterrupted()) {
throw new InterruptedException();
}
TransitionLabel tl1 = kv.getKey();
TransitionLabel intersection = tl1.intersection(label);
if (!intersection.isEmpty()) {
//System.out.println(tl1 + " " + symbol + " " + intersection);
/* the transition labels over lap. For a DFA we need to
* ensure that all character class labels are disjoint.
* We can do this by ensuring the original class (tl1Copy) without the intersecting part of
* the new class (symbol) goes to the original states
* the new class (symbol) without the intersecting part of the orignal class (tl1Copy)
* goes to the new states.
* the intersection goes to both the classes.
* */
HashSet tmpVertices = newStates.remove(kv.getKey());
TransitionLabel uniqueTl1 = tl1.intersection(label.complement());
if (!uniqueTl1.isEmpty()) {
//System.out.println("1: " + uniqueTl1 + " " + tmpVertices);
newStates.put(uniqueTl1, tmpVertices);
}
HashSet unionStates = new HashSet(tmpVertices);
unionStates.addAll(newState);
//System.out.println("2: " + intersection + " " + unionStates);
/* We know intersection is not empty */
newStates.put(intersection, unionStates);
label = label.intersection(tl1.complement());
}
}
}
if (!label.isEmpty()) {
//System.out.println("3: " + symbol + " " + newState);
newStates.put(label, newState);
}
}
}
}
//System.out.println(P + "\t\t" + newStates);
/* Finding all the ranges in the alphabet not accounted for */
for (TransitionLabel s : alphabet) {
if (isInterrupted()) {
throw new InterruptedException();
}
TransitionLabel toEmptyState = s;
/* for each range accounted for, remove it from the current alphabet range */
for (TransitionLabel tl : newStates.keySet()) {
if (isInterrupted()) {
throw new InterruptedException();
}
toEmptyState = toEmptyState.intersection(tl.complement());
if (toEmptyState.isEmpty()) {
break;
}
}
if (!toEmptyState.isEmpty()) {
//System.out.println("check: " + toEmptyState);
//System.out.println(toEmptyState);
if (!dfa.containsVertex(emptyState)) {
dfa.addVertex(emptyState);
for (TransitionLabel s2 : alphabet) {
dfa.addEdge(new NFAEdge(emptyState, emptyState, s2));
}
}
dfa.addEdge(new NFAEdge(P, emptyState, toEmptyState));
}
}
/*for (TransitionLabel s : alphabet) {
if (!newStates.containsKey(s)) {
if (!dfa.containsVertex(emptyState)) {
dfa.addVertex(emptyState);
for (TransitionLabel s2 : alphabet) {
dfa.addEdge(new NFAEdge(emptyState, emptyState, s2));
}
}
dfa.addEdge(new NFAEdge(P, emptyState, s));
}
}*/
for (Map.Entry> kv : newStates.entrySet()) {
if (isInterrupted()) {
throw new InterruptedException();
}
TransitionLabel label = kv.getKey();
HashSet reachableViaSymbolEpsilon = new HashSet();
for (NFAVertexND v : kv.getValue()) {
if (isInterrupted()) {
throw new InterruptedException();
}
reachableViaSymbolEpsilon.add(v);
HashSet reachableViaEpsilon = reachableWithEpsilon(input, v);
reachableViaSymbolEpsilon.addAll(reachableViaEpsilon);
}
/* we sort the sub states, so that states with the same sub states are equal (since order matters in the PC, but not here) */
LinkedList sortedReachableViaSymbolEpsilon = new LinkedList(reachableViaSymbolEpsilon);
Collections.sort(sortedReachableViaSymbolEpsilon);
labelBuilder = new StringBuilder();
Iterator i1 = sortedReachableViaSymbolEpsilon.iterator();
while (i1.hasNext()) {
if (isInterrupted()) {
throw new InterruptedException();
}
NFAVertexND currentSubState = i1.next();
List labelSubStates = currentSubState.getStates();
if (labelSubStates.size() == 1) {
String currentLabel = labelSubStates.iterator().next();
labelBuilder.append(currentLabel);
} else {
/* TODO determinizing a multidimensional NFA is untested */
Iterator i2 = labelSubStates.iterator();
labelBuilder.append("(");
while (i2.hasNext()) {
if (isInterrupted()) {
throw new InterruptedException();
}
String currentLabel = i2.next();
labelBuilder.append(currentLabel);
if (i2.hasNext()) {
labelBuilder.append(", ");
}
}
labelBuilder.append(")");
}
if (i1.hasNext()) {
labelBuilder.append(", ");
}
}
NFAVertexND stateToAdd = new NFAVertexND(labelBuilder.toString()); /* << build a label from sortedReachableViaSymbolEpsilon and store in map from stateToAdd to sortedReachableViaSymbolEpsilon */
dfaStateToSubStatesMap.put(stateToAdd, reachableViaSymbolEpsilon);
if (!dfa.containsVertex(stateToAdd)) {
toVisit.add(stateToAdd);
dfa.addVertex(stateToAdd);
for (NFAVertexND i : reachableViaSymbolEpsilon) {
if (input.isAcceptingState(i)) {
dfa.addAcceptingState(stateToAdd);
}
}
}
//System.out.println(P + " " + " " + stateToAdd + " " + symbol);
dfa.addEdge(new NFAEdge(P, stateToAdd, label));
}
}
//System.out.println(dfa);
return dfa;
}
public static NFAGraph dfaIntersection(NFAGraph m1, NFAGraph m2) {
NFAGraph intersectionGraph = new NFAGraph();
NFAVertexND intersectionGraphInitialstate = new NFAVertexND(m1.getInitialState(), m2.getInitialState());
int stateCounter = 0;
/* We want the intersection DFA to be one dimensional, map is 2D to 1D */
HashMap stateMap = new HashMap();
NFAVertexND mappedinitialState = new NFAVertexND("q" + stateCounter);
stateCounter++;
stateMap.put(intersectionGraphInitialstate, mappedinitialState);
intersectionGraph.addVertex(mappedinitialState);
boolean isAcceptingState = m1.isAcceptingState(m1.getInitialState()) && m2.isAcceptingState(m2.getInitialState());
if (isAcceptingState) {
intersectionGraph.addAcceptingState(mappedinitialState);
}
intersectionGraph.setInitialState(mappedinitialState);
Stack toVisit = new Stack();
toVisit.push(intersectionGraphInitialstate);
boolean containsSinkState = false;
NFAVertexND sinkState = null;
while (!toVisit.isEmpty()) {
NFAVertexND currentIntersectionState = toVisit.pop();
NFAVertexND m1SourceState = currentIntersectionState.getStateByDimension(1);
NFAVertexND m2SourceState = currentIntersectionState.getStateByDimension(2);
NFAVertexND currentMappedState = stateMap.get(currentIntersectionState);
TransitionLabel accountedSymbols = new CharacterClassTransitionLabel();
for (NFAEdge e1 : m1.outgoingEdgesOf(m1SourceState)) {
for (NFAEdge e2 : m2.outgoingEdgesOf(m2SourceState)) {
TransitionLabel e1TransitionLabel = e1.getTransitionLabel();
TransitionLabel e2TransitionLabel = e2.getTransitionLabel();
TransitionLabel intersectionTransitionLabel = e1TransitionLabel.intersection(e2TransitionLabel);
if (!intersectionTransitionLabel.isEmpty()) {
NFAVertexND m1TargetState = e1.getTargetVertex();
NFAVertexND m2TargetState = e2.getTargetVertex();
NFAVertexND newIntersectionState = new NFAVertexND(m1TargetState, m2TargetState);
NFAVertexND targetMappedState;
if (!stateMap.containsKey(newIntersectionState)) {
targetMappedState = new NFAVertexND("q" + stateCounter);
stateCounter++;
stateMap.put(newIntersectionState, targetMappedState);
intersectionGraph.addVertex(targetMappedState);
toVisit.push(newIntersectionState);
isAcceptingState = m1.isAcceptingState(m1TargetState) && m2.isAcceptingState(m2TargetState);
if (isAcceptingState) {
intersectionGraph.addAcceptingState(targetMappedState);
}
} else {
targetMappedState = stateMap.get(newIntersectionState);
}
intersectionGraph.addEdge(new NFAEdge(currentMappedState, targetMappedState, intersectionTransitionLabel));
accountedSymbols = accountedSymbols.union(intersectionTransitionLabel);
}
}
}
TransitionLabel unaccountedSymbols = accountedSymbols.complement();
if (!unaccountedSymbols.isEmpty()) {
if (containsSinkState) {
intersectionGraph.addEdge(new NFAEdge(currentIntersectionState, sinkState, unaccountedSymbols));
} else {
sinkState = new NFAVertexND("q" + stateCounter);
stateCounter++;
stateMap.put(sinkState, sinkState);
intersectionGraph.addVertex(sinkState);
containsSinkState = true;
NFAEdge wildcardLoop = new NFAEdge(sinkState, sinkState, CharacterClassTransitionLabel.wildcardLabel());
intersectionGraph.addEdge(wildcardLoop);
}
}
}
return intersectionGraph;
}
/**
* A function that uses Kahn's algorithm to find the topological order of the vertices in
* the graph.
*
* @param originalM
* The NFA graph to find the topological order for.
* @return A map to find the position of each vertex in the topological
* order.
*/
public static HashMap topologicalSort(NFAGraph originalM) {
NFAGraph m = originalM.copy();
LinkedList toVisit = new LinkedList();
HashMap oldNewMap = new HashMap();
int orderCounter = 0;
toVisit.addLast(m.getInitialState());
while (!toVisit.isEmpty()) {
NFAVertexND n = toVisit.removeLast();
oldNewMap.put(n, orderCounter++);
for (NFAEdge e : originalM.outgoingEdgesOf(n)) {
NFAVertexND targetVertex = e.getTargetVertex();
m.removeEdge(e);
if (m.inDegreeOf(targetVertex) == 0) {
toVisit.addLast(targetVertex);
}
}
}
if (!m.edgeSet().isEmpty()) {
throw new RuntimeException("G5 cannot have cycles.");
}
return oldNewMap;
}
protected static boolean isInterrupted() {
return Thread.currentThread().isInterrupted();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy