All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dk.brics.automaton.BasicOperations Maven / Gradle / Ivy

/*
 * dk.brics.automaton
 * 
 * Copyright (c) 2001-2011 Anders Moeller
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package dk.brics.automaton;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Basic automata operations.
 */
final public class BasicOperations {
	
	private BasicOperations() {}

	/** 
	 * Returns an automaton that accepts the concatenation of the languages of 
	 * the given automata. 
	 * 

* Complexity: linear in number of states. */ static public Automaton concatenate(Automaton a1, Automaton a2) { if (a1.isSingleton() && a2.isSingleton()) return BasicAutomata.makeString(a1.singleton + a2.singleton); if (isEmpty(a1) || isEmpty(a2)) return BasicAutomata.makeEmpty(); boolean deterministic = a1.isSingleton() && a2.isDeterministic(); if (a1 == a2) { a1 = a1.cloneExpanded(); a2 = a2.cloneExpanded(); } else { a1 = a1.cloneExpandedIfRequired(); a2 = a2.cloneExpandedIfRequired(); } for (State s : a1.getAcceptStates()) { s.accept = false; s.addEpsilon(a2.initial); } a1.deterministic = deterministic; a1.clearHashCode(); a1.checkMinimizeAlways(); return a1; } /** * Returns an automaton that accepts the concatenation of the languages of * the given automata. *

* Complexity: linear in total number of states. */ static public Automaton concatenate(List l) { if (l.isEmpty()) return BasicAutomata.makeEmptyString(); boolean all_singleton = true; for (Automaton a : l) if (!a.isSingleton()) { all_singleton = false; break; } if (all_singleton) { StringBuilder b = new StringBuilder(); for (Automaton a : l) b.append(a.singleton); return BasicAutomata.makeString(b.toString()); } else { for (Automaton a : l) if (a.isEmpty()) return BasicAutomata.makeEmpty(); Set ids = new HashSet(); for (Automaton a : l) ids.add(System.identityHashCode(a)); boolean has_aliases = ids.size() != l.size(); Automaton b = l.get(0); if (has_aliases) b = b.cloneExpanded(); else b = b.cloneExpandedIfRequired(); Set ac = b.getAcceptStates(); boolean first = true; for (Automaton a : l) if (first) first = false; else { if (a.isEmptyString()) continue; Automaton aa = a; if (has_aliases) aa = aa.cloneExpanded(); else aa = aa.cloneExpandedIfRequired(); Set ns = aa.getAcceptStates(); for (State s : ac) { s.accept = false; s.addEpsilon(aa.initial); if (s.accept) ns.add(s); } ac = ns; } b.deterministic = false; b.clearHashCode(); b.checkMinimizeAlways(); return b; } } /** * Returns an automaton that accepts the union of the empty string and the * language of the given automaton. *

* Complexity: linear in number of states. */ static public Automaton optional(Automaton a) { a = a.cloneExpandedIfRequired(); State s = new State(); s.addEpsilon(a.initial); s.accept = true; a.initial = s; a.deterministic = false; a.clearHashCode(); a.checkMinimizeAlways(); return a; } /** * Returns an automaton that accepts the Kleene star (zero or more * concatenated repetitions) of the language of the given automaton. * Never modifies the input automaton language. *

* Complexity: linear in number of states. */ static public Automaton repeat(Automaton a) { a = a.cloneExpanded(); State s = new State(); s.accept = true; s.addEpsilon(a.initial); for (State p : a.getAcceptStates()) p.addEpsilon(s); a.initial = s; a.deterministic = false; a.clearHashCode(); a.checkMinimizeAlways(); return a; } /** * Returns an automaton that accepts min or more * concatenated repetitions of the language of the given automaton. *

* Complexity: linear in number of states and in min. */ static public Automaton repeat(Automaton a, int min) { if (min == 0) return repeat(a); List as = new ArrayList(); while (min-- > 0) as.add(a); as.add(repeat(a)); return concatenate(as); } /** * Returns an automaton that accepts between min and * max (including both) concatenated repetitions of the * language of the given automaton. *

* Complexity: linear in number of states and in min and * max. */ static public Automaton repeat(Automaton a, int min, int max) { if (min > max) return BasicAutomata.makeEmpty(); max -= min; a.expandSingleton(); Automaton b; if (min == 0) b = BasicAutomata.makeEmptyString(); else if (min == 1) b = a.clone(); else { List as = new ArrayList(); while (min-- > 0) as.add(a); b = concatenate(as); } if (max > 0) { Automaton d = a.clone(); while (--max > 0) { Automaton c = a.clone(); for (State p : c.getAcceptStates()) p.addEpsilon(d.initial); d = c; } for (State p : b.getAcceptStates()) p.addEpsilon(d.initial); b.deterministic = false; b.clearHashCode(); b.checkMinimizeAlways(); } return b; } /** * Returns a (deterministic) automaton that accepts the complement of the * language of the given automaton. *

* Complexity: linear in number of states (if already deterministic). */ static public Automaton complement(Automaton a) { a = a.cloneExpandedIfRequired(); a.determinize(); a.totalize(); for (State p : a.getStates()) p.accept = !p.accept; a.removeDeadTransitions(); return a; } /** * Returns a (deterministic) automaton that accepts the intersection of * the language of a1 and the complement of the language of * a2. As a side-effect, the automata may be determinized, if not * already deterministic. *

* Complexity: quadratic in number of states (if already deterministic). */ static public Automaton minus(Automaton a1, Automaton a2) { if (a1.isEmpty() || a1 == a2) return BasicAutomata.makeEmpty(); if (a2.isEmpty()) return a1.cloneIfRequired(); if (a1.isSingleton()) { if (a2.run(a1.singleton)) return BasicAutomata.makeEmpty(); else return a1.cloneIfRequired(); } return intersection(a1, a2.complement()); } /** * Returns an automaton that accepts the intersection of * the languages of the given automata. * Never modifies the input automata languages. *

* Complexity: quadratic in number of states. */ static public Automaton intersection(Automaton a1, Automaton a2) { if (a1.isSingleton()) { if (a2.run(a1.singleton)) return a1.cloneIfRequired(); else return BasicAutomata.makeEmpty(); } if (a2.isSingleton()) { if (a1.run(a2.singleton)) return a2.cloneIfRequired(); else return BasicAutomata.makeEmpty(); } if (a1 == a2) return a1.cloneIfRequired(); Transition[][] transitions1 = Automaton.getSortedTransitions(a1.getStates()); Transition[][] transitions2 = Automaton.getSortedTransitions(a2.getStates()); Automaton c = new Automaton(); LinkedList worklist = new LinkedList(); HashMap newstates = new HashMap(); StatePair p = new StatePair(c.initial, a1.initial, a2.initial); worklist.add(p); newstates.put(p, p); while (worklist.size() > 0) { p = worklist.removeFirst(); p.s.accept = p.s1.accept && p.s2.accept; Transition[] t1 = transitions1[p.s1.number]; Transition[] t2 = transitions2[p.s2.number]; for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) { while (b2 < t2.length && t2[b2].max < t1[n1].min) b2++; for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) if (t2[n2].max >= t1[n1].min) { StatePair q = new StatePair(t1[n1].to, t2[n2].to); StatePair r = newstates.get(q); if (r == null) { q.s = new State(); worklist.add(q); newstates.put(q, q); r = q; } char min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min; char max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max; p.s.transitions.add(new Transition(min, max, r.s)); } } } c.deterministic = a1.deterministic && a2.deterministic; c.removeDeadTransitions(); c.checkMinimizeAlways(); return c; } /** * Returns true if the language of a1 is a subset of the * language of a2. * As a side-effect, a2 is determinized if not already marked as * deterministic. *

* Complexity: quadratic in number of states. */ public static boolean subsetOf(Automaton a1, Automaton a2) { if (a1 == a2) return true; if (a1.isSingleton()) { if (a2.isSingleton()) return a1.singleton.equals(a2.singleton); return a2.run(a1.singleton); } a2.determinize(); Transition[][] transitions1 = Automaton.getSortedTransitions(a1.getStates()); Transition[][] transitions2 = Automaton.getSortedTransitions(a2.getStates()); LinkedList worklist = new LinkedList(); HashSet visited = new HashSet(); StatePair p = new StatePair(a1.initial, a2.initial); worklist.add(p); visited.add(p); while (worklist.size() > 0) { p = worklist.removeFirst(); if (p.s1.accept && !p.s2.accept) return false; Transition[] t1 = transitions1[p.s1.number]; Transition[] t2 = transitions2[p.s2.number]; for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) { while (b2 < t2.length && t2[b2].max < t1[n1].min) b2++; int min1 = t1[n1].min, max1 = t1[n1].max; for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) { if (t2[n2].min > min1) return false; if (t2[n2].max < Character.MAX_VALUE) min1 = t2[n2].max + 1; else { min1 = Character.MAX_VALUE; max1 = Character.MIN_VALUE; } StatePair q = new StatePair(t1[n1].to, t2[n2].to); if (!visited.contains(q)) { worklist.add(q); visited.add(q); } } if (min1 <= max1) return false; } } return true; } /** * Returns an automaton that accepts the union of the languages of the given automata. *

* Complexity: linear in number of states. */ public static Automaton union(Automaton a1, Automaton a2) { if ((a1.isSingleton() && a2.isSingleton() && a1.singleton.equals(a2.singleton)) || a1 == a2) return a1.cloneIfRequired(); if (a1 == a2) { a1 = a1.cloneExpanded(); a2 = a2.cloneExpanded(); } else { a1 = a1.cloneExpandedIfRequired(); a2 = a2.cloneExpandedIfRequired(); } State s = new State(); s.addEpsilon(a1.initial); s.addEpsilon(a2.initial); a1.initial = s; a1.deterministic = false; a1.clearHashCode(); a1.checkMinimizeAlways(); return a1; } /** * Returns an automaton that accepts the union of the languages of the given automata. *

* Complexity: linear in number of states. */ public static Automaton union(Collection l) { Set ids = new HashSet(); for (Automaton a : l) ids.add(System.identityHashCode(a)); boolean has_aliases = ids.size() != l.size(); State s = new State(); for (Automaton b : l) { if (b.isEmpty()) continue; Automaton bb = b; if (has_aliases) bb = bb.cloneExpanded(); else bb = bb.cloneExpandedIfRequired(); s.addEpsilon(bb.initial); } Automaton a = new Automaton(); a.initial = s; a.deterministic = false; a.clearHashCode(); a.checkMinimizeAlways(); return a; } /** * Determinizes the given automaton. *

* Complexity: exponential in number of states. */ public static void determinize(Automaton a) { if (a.deterministic || a.isSingleton()) return; Set initialset = new HashSet(); initialset.add(a.initial); determinize(a, initialset); } /** * Determinizes the given automaton using the given set of initial states. */ static void determinize(Automaton a, Set initialset) { char[] points = a.getStartPoints(); // subset construction Map, Set> sets = new HashMap, Set>(); LinkedList> worklist = new LinkedList>(); Map, State> newstate = new HashMap, State>(); sets.put(initialset, initialset); worklist.add(initialset); a.initial = new State(); newstate.put(initialset, a.initial); while (worklist.size() > 0) { Set s = worklist.removeFirst(); State r = newstate.get(s); for (State q : s) if (q.accept) { r.accept = true; break; } for (int n = 0; n < points.length; n++) { Set p = new HashSet(); for (State q : s) for (Transition t : q.transitions) if (t.min <= points[n] && points[n] <= t.max) p.add(t.to); if (!sets.containsKey(p)) { sets.put(p, p); worklist.add(p); newstate.put(p, new State()); } State q = newstate.get(p); char min = points[n]; char max; if (n + 1 < points.length) max = (char)(points[n + 1] - 1); else max = Character.MAX_VALUE; r.transitions.add(new Transition(min, max, q)); } } a.deterministic = true; a.removeDeadTransitions(); } /** * Adds epsilon transitions to the given automaton. * This method adds extra character interval transitions that are equivalent to the given * set of epsilon transitions. * @param pairs collection of {@link StatePair} objects representing pairs of source/destination states * where epsilon transitions should be added */ public static void addEpsilons(Automaton a, Collection pairs) { a.expandSingleton(); HashMap> forward = new HashMap>(); HashMap> back = new HashMap>(); for (StatePair p : pairs) { HashSet to = forward.get(p.s1); if (to == null) { to = new HashSet(); forward.put(p.s1, to); } to.add(p.s2); HashSet from = back.get(p.s2); if (from == null) { from = new HashSet(); back.put(p.s2, from); } from.add(p.s1); } // calculate epsilon closure LinkedList worklist = new LinkedList(pairs); HashSet workset = new HashSet(pairs); while (!worklist.isEmpty()) { StatePair p = worklist.removeFirst(); workset.remove(p); HashSet to = forward.get(p.s2); HashSet from = back.get(p.s1); if (to != null) { for (State s : to) { StatePair pp = new StatePair(p.s1, s); if (!pairs.contains(pp)) { pairs.add(pp); forward.get(p.s1).add(s); back.get(s).add(p.s1); worklist.add(pp); workset.add(pp); if (from != null) { for (State q : from) { StatePair qq = new StatePair(q, p.s1); if (!workset.contains(qq)) { worklist.add(qq); workset.add(qq); } } } } } } } // add transitions for (StatePair p : pairs) p.s1.addEpsilon(p.s2); a.deterministic = false; a.clearHashCode(); a.checkMinimizeAlways(); } /** * Returns true if the given automaton accepts the empty string and nothing else. */ public static boolean isEmptyString(Automaton a) { if (a.isSingleton()) return a.singleton.length() == 0; else return a.initial.accept && a.initial.transitions.isEmpty(); } /** * Returns true if the given automaton accepts no strings. */ public static boolean isEmpty(Automaton a) { if (a.isSingleton()) return false; return !a.initial.accept && a.initial.transitions.isEmpty(); } /** * Returns true if the given automaton accepts all strings. */ public static boolean isTotal(Automaton a) { if (a.isSingleton()) return false; if (a.initial.accept && a.initial.transitions.size() == 1) { Transition t = a.initial.transitions.iterator().next(); return t.to == a.initial && t.min == Character.MIN_VALUE && t.max == Character.MAX_VALUE; } return false; } /** * Returns a shortest accepted/rejected string. * If more than one shortest string is found, the lexicographically first of the shortest strings is returned. * @param accepted if true, look for accepted strings; otherwise, look for rejected strings * @return the string, null if none found */ public static String getShortestExample(Automaton a, boolean accepted) { if (a.isSingleton()) { if (accepted) return a.singleton; else if (a.singleton.length() > 0) return ""; else return "\u0000"; } return getShortestExample(a.getInitialState(), accepted); } static String getShortestExample(State s, boolean accepted) { Map path = new HashMap(); LinkedList queue = new LinkedList(); path.put(s, ""); queue.add(s); String best = null; while (!queue.isEmpty()) { State q = queue.removeFirst(); String p = path.get(q); if (q.accept == accepted) { if (best == null || p.length() < best.length() || (p.length() == best.length() && p.compareTo(best) < 0)) best = p; } else for (Transition t : q.getTransitions()) { String tp = path.get(t.to); String np = p + t.min; if (tp == null || (tp.length() == np.length() && np.compareTo(tp) < 0)) { if (tp == null) queue.addLast(t.to); path.put(t.to, np); } } } return best; } /** * Returns true if the given string is accepted by the automaton. *

* Complexity: linear in the length of the string. *

* Note: for full performance, use the {@link RunAutomaton} class. */ public static boolean run(Automaton a, String s) { if (a.isSingleton()) return s.equals(a.singleton); if (a.deterministic) { State p = a.initial; for (int i = 0; i < s.length(); i++) { State q = p.step(s.charAt(i)); if (q == null) return false; p = q; } return p.accept; } else { Set states = a.getStates(); Automaton.setStateNumbers(states); LinkedList pp = new LinkedList(); LinkedList pp_other = new LinkedList(); BitSet bb = new BitSet(states.size()); BitSet bb_other = new BitSet(states.size()); pp.add(a.initial); ArrayList dest = new ArrayList(); boolean accept = a.initial.accept; for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); accept = false; pp_other.clear(); bb_other.clear(); for (State p : pp) { dest.clear(); p.step(c, dest); for (State q : dest) { if (q.accept) accept = true; if (!bb_other.get(q.number)) { bb_other.set(q.number); pp_other.add(q); } } } LinkedList tp = pp; pp = pp_other; pp_other = tp; BitSet tb = bb; bb = bb_other; bb_other = tb; } return accept; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy