Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package edu.washington.cs.knowitall.regex;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import edu.washington.cs.knowitall.regex.Expression.AssertionExpression;
import edu.washington.cs.knowitall.regex.Expression.MatchingGroup;
/**
* A finite automaton implementation. There is support for epsilon
* transitions (NFA) but if those are omitted then this works as an
* implementation of a DFA.
*
* @author Michael Schmitz
*/
public class FiniteAutomaton {
/**
* A component automaton with a single start state and a single end
* state.
* @author Michael Schmitz
*
* @param
*/
public static class Automaton {
public final StartState start;
public final EndState end;
public Automaton(StartState start, EndState end) {
this.start = start;
this.end = end;
}
public Automaton(Expression expr) {
this.start = new StartState(expr);
this.end = new EndState(expr);
}
public boolean apply(List tokens) {
return this.evaluate(tokens, true) != null;
}
public int minMatchingLength() {
return start.minMatchingLength();
}
public Match.FinalMatch lookingAt(List tokens) {
return lookingAt(tokens, 0);
}
/**
* @return null if no match, otherwise a representation of the match
*/
public Match.FinalMatch lookingAt(List tokens, int startIndex) {
if (tokens.size() - startIndex - this.minMatchingLength() < 0) {
// don't try if we can't possible match
return null;
}
else {
List sublist = tokens.subList(startIndex, tokens.size());
Step path = this.evaluate(sublist, startIndex == 0);
if (path == null) {
return null;
}
// build list of edges
List> edges = new ArrayList>();
while (path.state != this.start) {
edges.add(path.path);
path = path.prev;
}
Match.IntermediateMatch match = new Match.IntermediateMatch();
buildMatch(sublist.iterator(), null, new AtomicInteger(startIndex), this.start,
Lists.reverse(edges).iterator(), match);
return new Match.FinalMatch(match);
}
}
/**
* Retrace the path through the NFA and produce an object that
* represents the match.
* @param tokenIterator an iterator over the tokens.
* @param expression the expression to match.
* @param index the present index.
* @param state the present state.
* @param edgeIterator an iterator over the edges in the solution.
* @param match the solution.
* @return
*/
private State buildMatch(Iterator tokenIterator, Expression expression,
AtomicInteger index, State state, Iterator> edgeIterator,
Match.IntermediateMatch match) {
Match.IntermediateMatch newMatch = new Match.IntermediateMatch();
while (edgeIterator.hasNext() && !((state instanceof EndState>)
&& ((EndState)state).expression == expression)) {
AbstractEdge edge = edgeIterator.next();
// run the sub-automaton
if (edge instanceof Edge>
&& !(((Edge>) edge).expression instanceof AssertionExpression>)) {
// consume a token, this is the base case
E token = tokenIterator.next();
newMatch.add(((Edge)edge).expression, token, index.getAndIncrement());
state = edge.dest;
}
else if (state instanceof StartState>) {
// recurse on StartState so we have a group for that match
Expression expr = ((StartState)state).expression;
state = buildMatch(tokenIterator, expr, index, edge.dest, edgeIterator, newMatch);
assert(state instanceof EndState> && ((EndState>)state).expression == expr);
}
else {
assert(edge instanceof Epsilon>);
state = edge.dest;
}
}
// add the sub match group
if (expression != null
&& (!newMatch.isEmpty() || expression instanceof MatchingGroup>)) {
// create a wrapper for the expressions it matched
Match.Group pair = new Match.Group(expression);
for (Match.Group p : newMatch.pairs()) {
if (p.expr instanceof Expression.BaseExpression>) {
pair.addTokens(p);
}
}
// add it
match.add(pair);
}
// add the contents of the sub match group
match.addAll(newMatch.pairs());
return state;
}
/**
* A representation of a movement from a state to another, with a
* backreference to the previous state. This is used in building
* a match object once a solution has been found.
* @author Michael Schmitz
*
* @param
*/
private static class Step {
public final State state;
public final Step prev;
public final AbstractEdge path;
public Step(State state) {
this(state, null, null);
}
public Step(State state, Step prev, AbstractEdge path) {
this.state = state;
this.prev = prev;
this.path = path;
}
public String toString() {
return this.state.toString();
}
}
/**
* Expand all epsilon transitions for the supplied steps. That is,
* add all states available via an epsilon transition from a supplied
* state to the list.
* @param steps
*/
private void expandEpsilons(List> steps) {
int size = steps.size();
for (int i = 0; i < size; i++) {
Step step = steps.get(i);
expandEpsilon(step, steps);
}
}
/**
* Expand all epsilon transitions for the specified step. That is,
* add all states avaiable via an epsilon transition from step.state.
* @param step
* @param steps
*/
private void expandEpsilon(Step step, List> steps) {
// loop over edges
for (final Epsilon edge : step.state.epsilons) {
// try free edges if they do not lead to an existing
// step
if (!Iterables.any(steps,
new Predicate>() {
@Override
public boolean apply(Step step) {
return step.state == edge.dest;
}
})) {
Step newstep = new Step(edge.dest, step, edge);
steps.add(newstep);
expandEpsilon(newstep, steps);
}
}
}
/**
* Expand any state that has an assertion edge if the assertion passes
* given the present state.
* @param steps
* @param newsteps
* @param hasStart true iff the tokens contains the start token.
* @param tokens
* @param totalTokens
*/
private void expandAssertions(List> steps, List> newsteps, boolean hasStart,
List tokens, int totalTokens) {
for (Step step : steps) {
for (final Edge edge : step.state.edges) {
if (edge.expression instanceof AssertionExpression>) {
AssertionExpression assertion = (AssertionExpression)edge.expression;
if (assertion.apply(hasStart, tokens, totalTokens)) {
newsteps.add(new Step(edge.dest, step, edge));
}
}
}
}
}
private Step evaluate(List tokens, boolean hasStart) {
List> steps = new ArrayList>();
steps.add(new Step(this.start));
return evaluate(tokens, steps, hasStart);
}
/**
* Evaluate the NFA against the list of tokens using the Thompson NFA
* algorithm.
* @param tokens the tokens to evaluate against
* @param steps present list of accessible states.
* @param hasStart true iff tokens contains the start token.
* @return a Step object representing the last transition or null.
*/
private Step evaluate(List tokens, List> steps, boolean hasStart) {
int totalTokens = tokens.size();
int solutionTokensLeft = totalTokens;
Step solution = null;
while (!steps.isEmpty()) {
expandEpsilons(steps);
List> intermediate = new ArrayList>(steps);
List> newsteps = new ArrayList>(steps.size() * 2);
do {
// check if at end
for (Step step : intermediate) {
if (step.state == this.end) {
if (tokens.size() == totalTokens) {
// can't succeed if no tokens are consumed
}
else {
// we have reached the end
if (tokens.size() < solutionTokensLeft) {
solution = step;
solutionTokensLeft = tokens.size();
}
}
}
}
// handle assertions
newsteps.clear();
expandAssertions(intermediate, newsteps, hasStart, tokens, totalTokens);
expandEpsilons(newsteps);
intermediate.clear();
intermediate.addAll(newsteps);
steps.addAll(newsteps);
} while (newsteps.size() > 0);
newsteps.clear();
if (!tokens.isEmpty()) {
for (Step step : steps) {
for (final Edge edge : step.state.edges) {
// try other edges if they match the current token
if (edge.apply(tokens.get(0))) {
newsteps.add(new Step(edge.dest, step, edge));
}
}
}
// consume a token
tokens = tokens.subList(1, tokens.size());
}
steps = newsteps;
}
return solution;
}
}
/**
* Representation of a state in the automaton.
* @author Michael Schmitz
*
* @param
*/
public static class State {
public final List> edges = new ArrayList>();
public final List> epsilons = new ArrayList>();
/**
* Add an epsilon transition between this state and dest.
* @param dest the state to connect
*/
public void connect(State dest) {
this.epsilons.add(new Epsilon(dest));
}
/**
* Add an edge between this state and dest.
* @param dest the state to connect
* @param cost the expression of the edge
*/
public void connect(State dest, Expression cost) {
this.edges.add(new Edge(dest, cost));
}
public String toString() {
return this.getClass().getSimpleName() + ":" + this.edges.size();
}
}
/**
* A start or end state.
* @author Michael Schmitz
*
* @param
*/
public static class TerminusState extends State {
public final Expression expression;
public TerminusState(Expression expression) {
super();
this.expression = expression;
}
public String toString() {
return this.getClass().getSimpleName()
+ "("+this.expression.toString()+"):" + this.edges.size();
}
}
/**
* A start state.
* @author Michael Schmitz
*
* @param
*/
public static class StartState extends TerminusState {
public StartState(Expression expression) {
super(expression);
}
public int minMatchingLength() {
return this.expression.minMatchingLength();
}
}
/**
* An end state.
* @author Michael Schmitz
*
* @param
*/
public static class EndState extends TerminusState {
public EndState(Expression expression) {
super(expression);
}
}
/**
* An abstract representation of an edge.
* @author Michael Schmitz
*
* @param
*/
public static abstract class AbstractEdge implements Predicate {
public final State dest;
public AbstractEdge(State dest) {
this.dest = dest;
}
}
/**
* An edge with cost {@code expression}.
* @author Michael Schmitz
*
* @param
*/
public static class Edge extends AbstractEdge {
public final Expression expression;
public Edge(State dest, Expression base) {
super(dest);
this.expression = base;
}
@Override
public String toString() {
return "(" + this.expression.toString() + ") -> " + this.dest.toString();
}
@Override
public boolean apply(E entity) {
if (expression == null) {
return true;
}
else {
return expression.apply(entity);
}
}
}
/**
* An edge without cost, an epsilon transition.
* @author Michael Schmitz
*
* @param
*/
public static class Epsilon extends AbstractEdge {
public Epsilon(State dest) {
super(dest);
}
@Override
public String toString() {
return "(epsilon) -> " + dest.toString();
}
@Override
public boolean apply(E entity) {
return true;
}
}
}