All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dk.brics.automaton.SpecialOperations Maven / Gradle / Ivy

/*
 * dk.brics.automaton
 * 
 * Copyright (c) 2001-2011 Anders Moeller
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package dk.brics.automaton;

import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/**
 * Special automata operations.
 */
final public class SpecialOperations {
	
	private SpecialOperations() {}

	/**
	 * Reverses the language of the given (non-singleton) automaton while returning
	 * the set of new initial states.
	 */
	public static Set reverse(Automaton a) {
		// reverse all edges
		HashMap> m = new HashMap>();
		Set states = a.getStates();
		Set accept = a.getAcceptStates();
		for (State r : states) {
			m.put(r, new HashSet());
			r.accept = false;
		}
		for (State r : states)
			for (Transition t : r.getTransitions())
				m.get(t.to).add(new Transition(t.min, t.max, r));
		for (State r : states)
			r.transitions = m.get(r);
		// make new initial+final states
		a.initial.accept = true;
		a.initial = new State();
		for (State r : accept)
			a.initial.addEpsilon(r); // ensures that all initial states are reachable
		a.deterministic = false;
		return accept;
	}

	/**
	 * Returns an automaton that accepts the overlap of strings that in more than one way can be split into
	 * a left part being accepted by a1 and a right part being accepted by
	 * a2.
	 */
	public static Automaton overlap(Automaton a1, Automaton a2) {
		Automaton b1 = a1.cloneExpanded();
		b1.determinize();
		acceptToAccept(b1);
		Automaton b2 = a2.cloneExpanded();
		reverse(b2);
		b2.determinize();
		acceptToAccept(b2);
		reverse(b2);
		b2.determinize();
		return b1.intersection(b2).minus(BasicAutomata.makeEmptyString());
	}
	
	private static void acceptToAccept(Automaton a) {
		State s = new State();
		for (State r : a.getAcceptStates())
			s.addEpsilon(r);
		a.initial = s;
		a.deterministic = false;
	}
	
	/** 
	 * Returns an automaton that accepts the single chars that occur 
	 * in strings that are accepted by the given automaton. 
	 * Never modifies the input automaton.
	 */
	public static Automaton singleChars(Automaton a) {
		Automaton b = new Automaton();
		State s = new State();
		b.initial = s;
		State q = new State();
		q.accept = true;
		if (a.isSingleton()) 
			for (int i = 0; i < a.singleton.length(); i++)
				s.transitions.add(new Transition(a.singleton.charAt(i), q));
		else
			for (State p : a.getStates())
				for (Transition t : p.transitions)
					s.transitions.add(new Transition(t.min, t.max, q));
		b.deterministic = true;
		b.removeDeadTransitions();
		return b;
	}
	
	/**
	 * Returns an automaton that accepts the trimmed language of the given
	 * automaton. The resulting automaton is constructed as follows: 1) Whenever
	 * a c character is allowed in the original automaton, one or
	 * more set characters are allowed in the new automaton. 2)
	 * The automaton is prefixed and postfixed with any number of
	 * set characters.
	 * @param set set of characters to be trimmed
	 * @param c canonical trim character (assumed to be in set)
	 */
	public static Automaton trim(Automaton a, String set, char c) {
		a = a.cloneExpandedIfRequired();
		State f = new State();
		addSetTransitions(f, set, f);
		f.accept = true;
		for (State s : a.getStates()) {
			State r = s.step(c);
			if (r != null) {
				// add inner
				State q = new State();
				addSetTransitions(q, set, q);
				addSetTransitions(s, set, q);
				q.addEpsilon(r);
			}
			// add postfix
			if (s.accept)
				s.addEpsilon(f);
		}
		// add prefix
		State p = new State();
		addSetTransitions(p, set, p);
		p.addEpsilon(a.initial);
		a.initial = p;
		a.deterministic = false;
		a.removeDeadTransitions();
		a.checkMinimizeAlways();
		return a;
	}
	
	private static void addSetTransitions(State s, String set, State p) {
		for (int n = 0; n < set.length(); n++)
			s.transitions.add(new Transition(set.charAt(n), p));
	}
	
	/**
	 * Returns an automaton that accepts the compressed language of the given
	 * automaton. Whenever a c character is allowed in the
	 * original automaton, one or more set characters are allowed
	 * in the new automaton.
	 * @param set set of characters to be compressed
	 * @param c canonical compress character (assumed to be in set)
	 */
	public static Automaton compress(Automaton a, String set, char c) {
		a = a.cloneExpandedIfRequired();
		for (State s : a.getStates()) {
			State r = s.step(c);
			if (r != null) {
				// add inner
				State q = new State();
				addSetTransitions(q, set, q);
				addSetTransitions(s, set, q);
				q.addEpsilon(r);
			}
		}
		// add prefix
		a.deterministic = false;
		a.removeDeadTransitions();
		a.checkMinimizeAlways();
		return a;
	}
	
	/**
	 * Returns an automaton where all transition labels have been substituted.
	 * 

* Each transition labeled c is changed to a set of * transitions, one for each character in map(c). If * map(c) is null, then the transition is unchanged. * @param map map from characters to sets of characters (where characters * are Character objects) */ public static Automaton subst(Automaton a, Map> map) { if (map.isEmpty()) return a.cloneIfRequired(); Set ckeys = new TreeSet(map.keySet()); char[] keys = new char[ckeys.size()]; int j = 0; for (Character c : ckeys) keys[j++] = c; a = a.cloneExpandedIfRequired(); for (State s : a.getStates()) { Set st = s.transitions; s.resetTransitions(); for (Transition t : st) { int index = findIndex(t.min, keys); while (t.min <= t.max) { if (keys[index] > t.min) { char m = (char)(keys[index] - 1); if (t.max < m) m = t.max; s.transitions.add(new Transition(t.min, m, t.to)); if (m + 1 > Character.MAX_VALUE) break; t.min = (char)(m + 1); } else if (keys[index] < t.min) { char m; if (index + 1 < keys.length) m = (char)(keys[++index] - 1); else m = Character.MAX_VALUE; if (t.max < m) m = t.max; s.transitions.add(new Transition(t.min, m, t.to)); if (m + 1 > Character.MAX_VALUE) break; t.min = (char)(m + 1); } else { // found t.min in substitution map for (Character c : map.get(t.min)) s.transitions.add(new Transition(c, t.to)); if (t.min + 1 > Character.MAX_VALUE) break; t.min++; if (index + 1 < keys.length && keys[index + 1] == t.min) index++; } } } } a.deterministic = false; a.removeDeadTransitions(); a.checkMinimizeAlways(); return a; } /** * Finds the largest entry whose value is less than or equal to c, * or 0 if there is no such entry. */ static int findIndex(char c, char[] points) { int a = 0; int b = points.length; while (b - a > 1) { int d = (a + b) >>> 1; if (points[d] > c) b = d; else if (points[d] < c) a = d; else return d; } return a; } /** * Returns an automaton where all transitions of the given char are replaced by a string. * @param c char * @param s string * @return new automaton */ public static Automaton subst(Automaton a, char c, String s) { a = a.cloneExpandedIfRequired(); Set epsilons = new HashSet(); for (State p : a.getStates()) { Set st = p.transitions; p.resetTransitions(); for (Transition t : st) if (t.max < c || t.min > c) p.transitions.add(t); else { if (t.min < c) p.transitions.add(new Transition(t.min, (char)(c - 1), t.to)); if (t.max > c) p.transitions.add(new Transition((char)(c + 1), t.max, t.to)); if (s.length() == 0) epsilons.add(new StatePair(p, t.to)); else { State q = p; for (int i = 0; i < s.length(); i++) { State r; if (i + 1 == s.length()) r = t.to; else r = new State(); q.transitions.add(new Transition(s.charAt(i), r)); q = r; } } } } a.addEpsilons(epsilons); a.deterministic = false; a.removeDeadTransitions(); a.checkMinimizeAlways(); return a; } /** * Returns an automaton accepting the homomorphic image of the given automaton * using the given function. *

* This method maps each transition label to a new value. * source and dest are assumed to be arrays of * same length, and source must be sorted in increasing order * and contain no duplicates. source defines the starting * points of char intervals, and the corresponding entries in * dest define the starting points of corresponding new * intervals. */ public static Automaton homomorph(Automaton a, char[] source, char[] dest) { a = a.cloneExpandedIfRequired(); for (State s : a.getStates()) { Set st = s.transitions; s.resetTransitions(); for (Transition t : st) { int min = t.min; while (min <= t.max) { int n = findIndex((char)min, source); char nmin = (char)(dest[n] + min - source[n]); int end = (n + 1 == source.length) ? Character.MAX_VALUE : source[n + 1] - 1; int length; if (end < t.max) length = end + 1 - min; else length = t.max + 1 - min; s.transitions.add(new Transition(nmin, (char)(nmin + length - 1), t.to)); min += length; } } } a.deterministic = false; a.removeDeadTransitions(); a.checkMinimizeAlways(); return a; } /** * Returns an automaton with projected alphabet. The new automaton accepts * all strings that are projections of strings accepted by the given automaton * onto the given characters (represented by Character). If * null is in the set, it abbreviates the intervals * u0000-uDFFF and uF900-uFFFF (i.e., the non-private code points). It is * assumed that all other characters from chars are in the * interval uE000-uF8FF. */ public static Automaton projectChars(Automaton a, Set chars) { Character[] c = chars.toArray(new Character[chars.size()]); char[] cc = new char[c.length]; boolean normalchars = false; for (int i = 0; i < c.length; i++) if (c[i] == null) normalchars = true; else cc[i] = c[i]; Arrays.sort(cc); if (a.isSingleton()) { for (int i = 0; i < a.singleton.length(); i++) { char sc = a.singleton.charAt(i); if (!(normalchars && (sc <= '\udfff' || sc >= '\uf900') || Arrays.binarySearch(cc, sc) >= 0)) return BasicAutomata.makeEmpty(); } return a.cloneIfRequired(); } else { HashSet epsilons = new HashSet(); a = a.cloneExpandedIfRequired(); for (State s : a.getStates()) { HashSet new_transitions = new HashSet(); for (Transition t : s.transitions) { boolean addepsilon = false; if (t.min < '\uf900' && t.max > '\udfff') { int w1 = Arrays.binarySearch(cc, t.min > '\ue000' ? t.min : '\ue000'); if (w1 < 0) { w1 = -w1 - 1; addepsilon = true; } int w2 = Arrays.binarySearch(cc, t.max < '\uf8ff' ? t.max : '\uf8ff'); if (w2 < 0) { w2 = -w2 - 2; addepsilon = true; } for (int w = w1; w <= w2; w++) { new_transitions.add(new Transition(cc[w], t.to)); if (w > w1 && cc[w - 1] + 1 != cc[w]) addepsilon = true; } } if (normalchars) { if (t.min <= '\udfff') new_transitions.add(new Transition(t.min, t.max < '\udfff' ? t.max : '\udfff', t.to)); if (t.max >= '\uf900') new_transitions.add(new Transition(t.min > '\uf900' ? t.min : '\uf900', t.max, t.to)); } else if (t.min <= '\udfff' || t.max >= '\uf900') addepsilon = true; if (addepsilon) epsilons.add(new StatePair(s, t.to)); } s.transitions = new_transitions; } a.reduce(); a.addEpsilons(epsilons); a.removeDeadTransitions(); a.checkMinimizeAlways(); return a; } } /** * Returns true if the language of this automaton is finite. */ public static boolean isFinite(Automaton a) { if (a.isSingleton()) return true; return isFinite(a.initial, new HashSet(), new HashSet()); } /** * Checks whether there is a loop containing s. (This is sufficient since * there are never transitions to dead states.) */ private static boolean isFinite(State s, HashSet path, HashSet visited) { path.add(s); for (Transition t : s.transitions) if (path.contains(t.to) || (!visited.contains(t.to) && !isFinite(t.to, path, visited))) return false; path.remove(s); visited.add(s); return true; } /** * Returns the set of accepted strings of the given length. */ public static Set getStrings(Automaton a, int length) { HashSet strings = new HashSet(); if (a.isSingleton() && a.singleton.length() == length) strings.add(a.singleton); else if (length >= 0) getStrings(a.initial, strings, new StringBuilder(), length); return strings; } private static void getStrings(State s, Set strings, StringBuilder path, int length) { if (length == 0) { if (s.accept) strings.add(path.toString()); } else for (Transition t : s.transitions) for (int n = t.min; n <= t.max; n++) { path.append((char)n); getStrings(t.to, strings, path, length - 1); path.deleteCharAt(path.length() - 1); } } /** * Returns the set of accepted strings, assuming this automaton has a finite * language. If the language is not finite, null is returned. */ public static Set getFiniteStrings(Automaton a) { HashSet strings = new HashSet(); if (a.isSingleton()) strings.add(a.singleton); else if (!getFiniteStrings(a.initial, new HashSet(), strings, new StringBuilder(), -1)) return null; return strings; } /** * Returns the set of accepted strings, assuming that at most limit * strings are accepted. If more than limit strings are * accepted, null is returned. If limit<0, then this * methods works like {@link #getFiniteStrings(Automaton)}. */ public static Set getFiniteStrings(Automaton a, int limit) { HashSet strings = new HashSet(); if (a.isSingleton()) { if (limit > 0) strings.add(a.singleton); else return null; } else if (!getFiniteStrings(a.initial, new HashSet(), strings, new StringBuilder(), limit)) return null; return strings; } /** * Returns the strings that can be produced from the given state, or false if more than * limit strings are found. limit<0 means "infinite". * */ private static boolean getFiniteStrings(State s, HashSet pathstates, HashSet strings, StringBuilder path, int limit) { pathstates.add(s); for (Transition t : s.transitions) { if (pathstates.contains(t.to)) return false; for (int n = t.min; n <= t.max; n++) { path.append((char)n); if (t.to.accept) { strings.add(path.toString()); if (limit >= 0 && strings.size() > limit) return false; } if (!getFiniteStrings(t.to, pathstates, strings, path, limit)) return false; path.deleteCharAt(path.length() - 1); } } pathstates.remove(s); return true; } /** * Returns the longest string that is a prefix of all accepted strings and * visits each state at most once. * @return common prefix */ public static String getCommonPrefix(Automaton a) { if (a.isSingleton()) return a.singleton; StringBuilder b = new StringBuilder(); HashSet visited = new HashSet(); State s = a.initial; boolean done; do { done = true; visited.add(s); if (!s.accept && s.transitions.size() == 1) { Transition t = s.transitions.iterator().next(); if (t.min == t.max && !visited.contains(t.to)) { b.append(t.min); s = t.to; done = false; } } } while (!done); return b.toString(); } /** * Prefix closes the given automaton. */ public static void prefixClose(Automaton a) { for (State s : a.getStates()) s.setAccept(true); a.clearHashCode(); a.checkMinimizeAlways(); } /** * Constructs automaton that accepts the same strings as the given automaton * but ignores upper/lower case of A-F. * @param a automaton * @return automaton */ public static Automaton hexCases(Automaton a) { Map> map = new HashMap>(); for (char c1 = 'a', c2 = 'A'; c1 <= 'f'; c1++, c2++) { Set ws = new HashSet(); ws.add(c1); ws.add(c2); map.put(c1, ws); map.put(c2, ws); } Automaton ws = Datatypes.getWhitespaceAutomaton(); return ws.concatenate(a.subst(map)).concatenate(ws); } /** * Constructs automaton that accepts 0x20, 0x9, 0xa, and 0xd in place of each 0x20 transition * in the given automaton. * @param a automaton * @return automaton */ public static Automaton replaceWhitespace(Automaton a) { Map> map = new HashMap>(); Set ws = new HashSet(); ws.add(' '); ws.add('\t'); ws.add('\n'); ws.add('\r'); map.put(' ', ws); return a.subst(map); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy