All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unive.lisa.analysis.string.tarsis.StringSearcher Maven / Gradle / Ivy

The newest version!
package it.unive.lisa.analysis.string.tarsis;

import it.unive.lisa.util.datastructures.automaton.State;
import it.unive.lisa.util.datastructures.automaton.Transition;
import it.unive.lisa.util.datastructures.regex.RegularExpression;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Vector;

/**
 * An algorithm that searches strings across all paths of an automaton.
 * 
 * @author Vincenzo Arceri
 * @author Luca Negrini
 */
public class StringSearcher {

	/**
	 * The target automaton
	 */
	private final RegexAutomaton automaton;

	/**
	 * The string to search
	 */
	private String searchString;

	/**
	 * True if and only if we are currently matching characters
	 */
	private boolean matching;

	/**
	 * Builds the searcher. For this algorithm to work correctly, the automaton
	 * passed as parameter must be first exploded with a call to
	 * {@link RegexAutomaton#explode()}.
	 * 
	 * @param origin the target automaton
	 */
	public StringSearcher(
			RegexAutomaton origin) {
		automaton = origin;
		searchString = null;
		matching = false;
	}

	/**
	 * Yields a set containing all the sequences of transitions that recognize
	 * the given string.
	 * 
	 * @param toSearch the string to search
	 * 
	 * @return the set of sequences of transitions
	 */
	public Set>> searchInAllPaths(
			String toSearch) {
		Set>> collected = new HashSet<>();

		Set> paths = automaton.getAllPaths();

		if (paths.size() == 0)
			return collected;

		for (List v : paths)
			collected.addAll(searchInPath(v, toSearch));

		return collected;
	}

	@SuppressWarnings("unchecked")
	private Set>> searchInPath(
			List v,
			String toSearch) {

		Set>> collected = new HashSet<>();
		if (v.size() == 1 && toSearch.length() == 1)
			return handleSelfLoop(v, collected);

		Vector> path = new Vector<>();
		resetSearchState(path, toSearch);
		for (int i = 0; i < v.size() - 1; i++) {
			State from = v.get(i);
			State to = v.get(i + 1);
			Set> transitions = automaton.getAllTransitionsConnecting(from, to);

			if (transitions.size() == 0)
				continue;

			boolean matched = false;
			for (Transition t : transitions) {
				if (matching)
					if (t.getSymbol().is(searchString.substring(0, 1))) {
						// we found a matching char
						advanceSearch(path, t);
						matched = true;
					} else {
						resetSearchState(path, toSearch);
						if (t.getSymbol().is(searchString.substring(0, 1))) {
							startSearch(path, t);
							matched = true;
						}
					}
				else if (t.getSymbol().is(searchString.substring(0, 1))) {
					// we found the beginning of the string
					startSearch(path, t);
					matched = true;
				}

				if (matched)
					// we break since we do not care about the other transitions
					// between these two nodes
					break;
			}

			if (searchString.isEmpty()) {
				collected.add((Vector>) path.clone());
				resetSearchState(path, toSearch);
			}

		}

		return collected;
	}

	private Set>> handleSelfLoop(
			List v,
			Set>> collected) {
		// self loop!
		Set> transitions = automaton.getAllTransitionsConnecting(v.get(0), v.get(0));

		if (transitions.size() == 0)
			return collected;

		for (Transition t : transitions)
			if (t.getSymbol().is(searchString.substring(0, 1))) {
				Vector> result = new Vector<>();
				result.add(t);
				collected.add(result);
			}

		return collected;
	}

	private void advanceSearch(
			Vector> path,
			Transition t) {
		searchString = searchString.substring(1);
		path.add(t);
	}

	private void startSearch(
			Vector> path,
			Transition t) {
		matching = true;
		advanceSearch(path, t);
	}

	private void resetSearchState(
			Vector> path,
			String toSearch) {
		matching = false;
		searchString = toSearch;
		path.clear();
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy