it.unive.lisa.analysis.string.tarsis.StringReplacer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lisa-analyses Show documentation
Show all versions of lisa-analyses Show documentation
A library for static analysis
The newest version!
package it.unive.lisa.analysis.string.tarsis;
import it.unive.lisa.util.datastructures.automaton.State;
import it.unive.lisa.util.datastructures.automaton.Transition;
import it.unive.lisa.util.datastructures.regex.Atom;
import it.unive.lisa.util.datastructures.regex.RegularExpression;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
/**
* An algorithm that replaces strings across all paths of an automaton.
*
* @author Vincenzo Arceri
* @author Luca Negrini
*/
public class StringReplacer {
/**
* The target automaton
*/
private final RegexAutomaton origin;
/**
* The string searching algorithm
*/
private final StringSearcher searcher;
/**
* Builds the replacer. For this algorithm to work correctly, the target
* automaton is first exploded with a call to
* {@link RegexAutomaton#explode()}.
*
* @param origin the target automaton
*/
public StringReplacer(
RegexAutomaton origin) {
this.origin = origin.explode();
searcher = new StringSearcher(origin);
}
/**
* Yields a new automaton where every occurrence of {@code toReplace} have
* been replaced with {@code str}. If {@code must} is {@code true}, then
* this method effectively replaces {@code toReplace}. Otherwise, a
* may-replacement is perfomed, meaning that {@code toReplaced} is replaced
* with {@code toReplace || str}.
*
* @param toReplace the string to replace
* @param str the automaton to use as a replacement
* @param must whether or not a must-replacement has to be made
*
* @return the replaced automaton
*/
public RegexAutomaton replace(
String toReplace,
RegexAutomaton str,
boolean must) {
if (toReplace.isEmpty())
return emptyStringReplace(str);
Set>> replaceablePaths = searcher.searchInAllPaths(toReplace);
if (replaceablePaths.isEmpty())
return origin;
RegexAutomaton replaced = must ? str : str.union(origin.singleString(toReplace));
AtomicInteger counter = new AtomicInteger();
for (Vector> path : replaceablePaths) {
// start replacing inputs
Set statesToRemove = new HashSet<>();
Set> edgesToRemove = new HashSet<>();
for (int i = path.size() - 1; i >= 0; i--) {
Transition t = path.get(i);
if (i == path.size() - 1)
// last step: just remove it;
edgesToRemove.add(t);
else
// we need to check if there is a branch in the destination node
// in that case, we keep both the transition and the node
// otherwise, we can remove both of them
if (origin.getOutgoingTransitionsFrom(t.getDestination()).size() < 2) {
edgesToRemove.add(t);
statesToRemove.add(t.getDestination());
} else
// we must stop since we found a branch
break;
}
origin.removeTransitions(edgesToRemove);
origin.removeStates(statesToRemove);
// we add the new automaton
Map conversion = new HashMap<>();
Set states = new HashSet<>();
Set> delta = new HashSet<>();
Function maker = s -> new State(counter.getAndIncrement(), false, false);
for (State origin : replaced.getStates()) {
State r = conversion.computeIfAbsent(origin, maker);
states.add(r);
for (Transition t : replaced.getOutgoingTransitionsFrom(origin)) {
State dest = conversion.computeIfAbsent(t.getDestination(), maker);
states.add(dest);
delta.add(new Transition<>(r, dest, t.getSymbol()));
}
}
states.forEach(origin::addState);
delta.forEach(origin::addTransition);
for (State s : replaced.getInitialStates())
origin.addTransition(path.firstElement().getSource(), conversion.get(s), Atom.EPSILON);
for (State f : replaced.getFinalStates())
origin.addTransition(conversion.get(f), path.lastElement().getDestination(), Atom.EPSILON);
}
return origin;
}
private RegexAutomaton emptyStringReplace(
RegexAutomaton str) {
int maxId = origin.getStates().stream().mapToInt(s -> s.getId()).max().getAsInt();
AtomicInteger counter = new AtomicInteger(maxId + 1);
SortedSet states = new TreeSet<>();
SortedSet> delta = new TreeSet<>();
// states will be a superset of the original ones,
// except that all final states are tuned non-final:
// all paths will end with `str`
Map mapper = new HashMap<>();
origin.getStates().forEach(s -> mapper.put(s, new State(s.getId(), s.isInitial(), false)));
states.addAll(mapper.values());
Function maker = s -> new State(counter.getAndIncrement(), false, false);
for (Transition t : origin.getTransitions()) {
Map conversion = new HashMap<>();
for (State origin : str.getStates()) {
State r = conversion.computeIfAbsent(origin, maker);
states.add(r);
for (Transition tt : str.getOutgoingTransitionsFrom(origin)) {
State dest = conversion.computeIfAbsent(tt.getDestination(), maker);
states.add(dest);
delta.add(new Transition<>(r, dest, tt.getSymbol()));
}
}
for (State s : str.getInitialStates())
delta.add(new Transition<>(mapper.get(t.getSource()), conversion.get(s), Atom.EPSILON));
for (State f : str.getFinalStates())
delta.add(new Transition<>(conversion.get(f), mapper.get(t.getDestination()), t.getSymbol()));
}
maker = s -> new State(counter.getAndIncrement(), false, s.isFinal());
for (State f : origin.getFinalStates()) {
Map conversion = new HashMap<>();
for (State origin : str.getStates()) {
State r = conversion.computeIfAbsent(origin, maker);
states.add(r);
for (Transition tt : str.getOutgoingTransitionsFrom(origin)) {
State dest = conversion.computeIfAbsent(tt.getDestination(), maker);
states.add(dest);
delta.add(new Transition<>(r, dest, tt.getSymbol()));
}
}
for (State s : str.getInitialStates())
delta.add(new Transition<>(f, conversion.get(s), Atom.EPSILON));
}
return new RegexAutomaton(states, delta);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy