![JAR search and dependency download from the Maven repository](/logo.png)
edu.princeton.cs.algs4.NFA Maven / Gradle / Ivy
Show all versions of algorithm Show documentation
/******************************************************************************
* Compilation: javac NFA.java
* Execution: java NFA regexp text
* Dependencies: Stack.java Bag.java Digraph.java DirectedDFS.java
*
* % java NFA "(A*B|AC)D" AAAABD
* true
*
* % java NFA "(A*B|AC)D" AAAAC
* false
*
* % java NFA "(a|(bc)*d)*" abcbcd
* true
*
* % java NFA "(a|(bc)*d)*" abcbcbcdaaaabcbcdaaaddd
* true
*
* Remarks
* -----------
* The following features are not supported:
* - The + operator
* - Multiway or
* - Metacharacters in the text
* - Character classes.
*
******************************************************************************/
package edu.princeton.cs.algs4;
/**
* The {@code NFA} class provides a data type for creating a
* nondeterministic finite state automaton (NFA) from a regular
* expression and testing whether a given string is matched by that regular
* expression.
* It supports the following operations: concatenation,
* closure, binary or, and parentheses.
* It does not support mutiway or, character classes,
* metacharacters (either in the text or pattern),
* capturing capabilities, greedy or relucantant
* modifiers, and other features in industrial-strength implementations
* such as {@link java.util.regex.Pattern} and {@link java.util.regex.Matcher}.
*
* This implementation builds the NFA using a digraph and a stack
* and simulates the NFA using digraph search (see the textbook for details).
* The constructor takes time proportional to m, where m
* is the number of characters in the regular expression.
* The recognizes method takes time proportional to m n,
* where n is the number of characters in the text.
*
* For additional documentation,
* see Section 5.4 of
* Algorithms, 4th Edition by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class NFA {
private Digraph graph; // digraph of epsilon transitions
private String regexp; // regular expression
private final int m; // number of characters in regular expression
/**
* Initializes the NFA from the specified regular expression.
*
* @param regexp the regular expression
*/
public NFA(String regexp) {
this.regexp = regexp;
m = regexp.length();
Stack ops = new Stack();
graph = new Digraph(m+1);
for (int i = 0; i < m; i++) {
int lp = i;
if (regexp.charAt(i) == '(' || regexp.charAt(i) == '|')
ops.push(i);
else if (regexp.charAt(i) == ')') {
int or = ops.pop();
// 2-way or operator
if (regexp.charAt(or) == '|') {
lp = ops.pop();
graph.addEdge(lp, or+1);
graph.addEdge(or, i);
}
else if (regexp.charAt(or) == '(')
lp = or;
else assert false;
}
// closure operator (uses 1-character lookahead)
if (i < m-1 && regexp.charAt(i+1) == '*') {
graph.addEdge(lp, i+1);
graph.addEdge(i+1, lp);
}
if (regexp.charAt(i) == '(' || regexp.charAt(i) == '*' || regexp.charAt(i) == ')')
graph.addEdge(i, i+1);
}
if (ops.size() != 0)
throw new IllegalArgumentException("Invalid regular expression");
}
/**
* Returns true if the text is matched by the regular expression.
*
* @param txt the text
* @return {@code true} if the text is matched by the regular expression,
* {@code false} otherwise
*/
public boolean recognizes(String txt) {
DirectedDFS dfs = new DirectedDFS(graph, 0);
Bag pc = new Bag();
for (int v = 0; v < graph.V(); v++)
if (dfs.marked(v)) pc.add(v);
// Compute possible NFA states for txt[i+1]
for (int i = 0; i < txt.length(); i++) {
if (txt.charAt(i) == '*' || txt.charAt(i) == '|' || txt.charAt(i) == '(' || txt.charAt(i) == ')')
throw new IllegalArgumentException("text contains the metacharacter '" + txt.charAt(i) + "'");
Bag match = new Bag();
for (int v : pc) {
if (v == m) continue;
if ((regexp.charAt(v) == txt.charAt(i)) || regexp.charAt(v) == '.')
match.add(v+1);
}
dfs = new DirectedDFS(graph, match);
pc = new Bag();
for (int v = 0; v < graph.V(); v++)
if (dfs.marked(v)) pc.add(v);
// optimization if no states reachable
if (pc.size() == 0) return false;
}
// check for accept state
for (int v : pc)
if (v == m) return true;
return false;
}
/**
* Unit tests the {@code NFA} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
String regexp = "(" + args[0] + ")";
String txt = args[1];
NFA nfa = new NFA(regexp);
StdOut.println(nfa.recognizes(txt));
}
}
/******************************************************************************
* Copyright 2002-2018, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/