All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.unive.lisa.analysis.string.tarsis.Tarsis Maven / Gradle / Ivy

The newest version!
package it.unive.lisa.analysis.string.tarsis;

import it.unive.lisa.analysis.Lattice;
import it.unive.lisa.analysis.SemanticException;
import it.unive.lisa.analysis.SemanticOracle;
import it.unive.lisa.analysis.lattices.Satisfiability;
import it.unive.lisa.analysis.nonrelational.value.BaseNonRelationalValueDomain;
import it.unive.lisa.analysis.numeric.Interval;
import it.unive.lisa.analysis.string.ContainsCharProvider;
import it.unive.lisa.analysis.string.fsa.FSA;
import it.unive.lisa.analysis.string.fsa.SimpleAutomaton;
import it.unive.lisa.analysis.string.fsa.StringSymbol;
import it.unive.lisa.program.cfg.ProgramPoint;
import it.unive.lisa.symbolic.value.Constant;
import it.unive.lisa.symbolic.value.operator.binary.BinaryOperator;
import it.unive.lisa.symbolic.value.operator.binary.StringConcat;
import it.unive.lisa.symbolic.value.operator.binary.StringContains;
import it.unive.lisa.symbolic.value.operator.ternary.StringReplace;
import it.unive.lisa.symbolic.value.operator.ternary.TernaryOperator;
import it.unive.lisa.util.datastructures.automaton.CyclicAutomatonException;
import it.unive.lisa.util.datastructures.automaton.State;
import it.unive.lisa.util.datastructures.automaton.Transition;
import it.unive.lisa.util.datastructures.regex.RegularExpression;
import it.unive.lisa.util.datastructures.regex.TopAtom;
import it.unive.lisa.util.numeric.IntInterval;
import it.unive.lisa.util.numeric.MathNumber;
import it.unive.lisa.util.numeric.MathNumberConversionException;
import it.unive.lisa.util.representation.StringRepresentation;
import it.unive.lisa.util.representation.StructuredRepresentation;
import java.util.Objects;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.commons.lang3.tuple.Pair;

/**
 * A class that represent the Tarsis domain for strings, exploiting a
 * {@link RegexAutomaton}.
 *
 * @author Luca Negrini
 */
public class Tarsis implements BaseNonRelationalValueDomain, ContainsCharProvider {

	/**
	 * Top element of the domain
	 */
	private static final Tarsis TOP = new Tarsis();

	/**
	 * Top element of the domain
	 */
	private static final Tarsis BOTTOM = new Tarsis(RegexAutomaton.emptyLang());

	/**
	 * Maximum widening threshold, or default threshold if there is no
	 * difference in the size of the two automata.
	 */
	public static final int WIDENING_CAP = 5;

	/**
	 * Used to store the string representation
	 */
	private final RegexAutomaton a;

	/**
	 * Creates a new Tarsis object representing the TOP element.
	 */
	public Tarsis() {
		// top
		this.a = RegexAutomaton.topString();
	}

	/**
	 * Creates a new FSA object using a {@link SimpleAutomaton}.
	 * 
	 * @param a the {@link SimpleAutomaton} used for object construction.
	 */
	public Tarsis(
			RegexAutomaton a) {
		this.a = a;
	}

	/**
	 * Yields the {@link RegexAutomaton} backing this domain element.
	 * 
	 * @return the automaton
	 */
	public RegexAutomaton getAutomaton() {
		return a;
	}

	@Override
	public Tarsis lubAux(
			Tarsis other)
			throws SemanticException {
		return new Tarsis(this.a.union(other.a));
	}

	@Override
	public Tarsis glbAux(
			Tarsis other)
			throws SemanticException {
		return new Tarsis(this.a.intersection(other.a));
	}

	/**
	 * Yields the size of this string, that is, the number of states of the
	 * underlying automaton.
	 * 
	 * @return the size of this string
	 */
	public int size() {
		return a.getStates().size();
	}

	private int getSizeDiffCapped(
			Tarsis other) {
		int size = size();
		int otherSize = other.size();
		if (size > otherSize)
			return Math.min(size - otherSize, WIDENING_CAP);
		else if (size < otherSize)
			return Math.min(otherSize - size, WIDENING_CAP);
		else
			return WIDENING_CAP;
	}

	@Override
	public Tarsis wideningAux(
			Tarsis other)
			throws SemanticException {
		return new Tarsis(this.a.union(other.a).widening(getSizeDiffCapped(other)));
	}

	@Override
	public boolean lessOrEqualAux(
			Tarsis other)
			throws SemanticException {
		return this.a.isContained(other.a);
	}

	@Override
	public boolean equals(
			Object o) {
		if (this == o)
			return true;
		if (o == null || getClass() != o.getClass())
			return false;
		Tarsis fsa = (Tarsis) o;
		return Objects.equals(a, fsa.a);
	}

	@Override
	public int hashCode() {
		return Objects.hash(a);
	}

	@Override
	public Tarsis top() {
		return TOP;
	}

	@Override
	public Tarsis bottom() {
		return BOTTOM;
	}

	@Override
	public boolean isBottom() {
		return !isTop() && this.a.acceptsEmptyLanguage();
	}

	@Override
	public StructuredRepresentation representation() {
		if (isBottom())
			return Lattice.bottomRepresentation();
		else if (isTop())
			return Lattice.topRepresentation();

		return new StringRepresentation(this.a.toRegex().simplify());
	}

	@Override
	public Tarsis evalNonNullConstant(
			Constant constant,
			ProgramPoint pp,
			SemanticOracle oracle)
			throws SemanticException {
		if (constant.getValue() instanceof String)
			return new Tarsis(RegexAutomaton.string((String) constant.getValue()));
		return top();
	}

	// TODO unary and ternary and all other binary
	@Override
	public Tarsis evalBinaryExpression(
			BinaryOperator operator,
			Tarsis left,
			Tarsis right,
			ProgramPoint pp,
			SemanticOracle oracle)
			throws SemanticException {
		if (operator == StringConcat.INSTANCE)
			return new Tarsis(left.a.concat(right.a));
		return top();
	}

	@Override
	public Tarsis evalTernaryExpression(
			TernaryOperator operator,
			Tarsis left,
			Tarsis middle,
			Tarsis right,
			ProgramPoint pp,
			SemanticOracle oracle)
			throws SemanticException {
		if (operator == StringReplace.INSTANCE)
			return left.replace(middle, right);
		return TOP;
	}

	@Override
	public Satisfiability satisfiesBinaryExpression(
			BinaryOperator operator,
			Tarsis left,
			Tarsis right,
			ProgramPoint pp,
			SemanticOracle oracle)
			throws SemanticException {
		if (operator == StringContains.INSTANCE)
			return left.contains(right);
		return Satisfiability.UNKNOWN;
	}

	/**
	 * Semantics of {@link StringContains} between {@code this} and
	 * {@code other}.
	 * 
	 * @param other the other domain instance
	 * 
	 * @return the satisfiability result
	 */
	public Satisfiability contains(
			Tarsis other) {
		try {
			if (!a.hasCycle()
					&& !other.a.hasCycle()
					&& !a.acceptsTopEventually()
					&& !other.a.acceptsTopEventually()) {
				// we can compare languages
				boolean atLeastOne = false, all = true;
				for (String a : a.getLanguage())
					for (String b : other.a.getLanguage()) {
						boolean cont = a.contains(b);
						atLeastOne = atLeastOne || cont;
						all = all && cont;
					}

				if (all)
					return Satisfiability.SATISFIED;
				if (atLeastOne)
					return Satisfiability.UNKNOWN;
				return Satisfiability.NOT_SATISFIED;
			}

			if (!other.a.hasCycle() && other.a.getLanguage().size() == 1
					&& other.a.getLanguage().iterator().next().isEmpty())
				// the empty string is always contained
				return Satisfiability.SATISFIED;

			if (other.a.hasOnlyOnePath() && !other.a.acceptsTopEventually()) {
				Satisfiability allSat = Satisfiability.UNKNOWN;
				RegexAutomaton C = other.a.extractLongestString();
				String longest = C.getLanguage().iterator().next();
				RegexAutomaton withNoScc = a.minimize().makeAcyclic();
				SortedSet lang = withNoScc.getLanguage();
				for (String a : lang)
					allSat = allSat.glb(contains(a, longest));

				if (!lang.isEmpty() && allSat == Satisfiability.SATISFIED)
					return allSat;
			}

			RegexAutomaton transformed = a.explode().factors();
			RegexAutomaton otherExploded = other.a.explode();
			if (otherExploded.intersection(transformed).acceptsEmptyLanguage())
				// we can explode since it does not matter how the inner strings
				// overlap
				return Satisfiability.NOT_SATISFIED;

		} catch (CyclicAutomatonException e) {
			// can safely ignore
		}
		return Satisfiability.UNKNOWN;
	}

	private Satisfiability contains(
			String other,
			String that) {
		if (!other.contains("Ͳ")) {
			if (other.contains(that))
				return Satisfiability.SATISFIED;
			return Satisfiability.NOT_SATISFIED;
		} else {
			String otherWithoutTops = other.replaceAll("Ͳ", "");
			if (otherWithoutTops.contains(that))
				return Satisfiability.SATISFIED;
			else
				return Satisfiability.UNKNOWN;
		}
	}

	/**
	 * Yields the Tarsis automaton corresponding to the substring of this Tarsis
	 * automaton abstract value between two indexes.
	 * 
	 * @param begin where the substring starts
	 * @param end   where the substring ends
	 * 
	 * @return the Tarsis automaton corresponding to the substring of this
	 *             Tarsis automaton between two indexes
	 */
	public Tarsis substring(
			long begin,
			long end) {
		if (isTop() || isBottom())
			return this;

		RegexAutomaton[] array = this.a.toRegex().substring((int) begin, (int) end)
				.parallelStream()
				.map(s -> RegexAutomaton.string(s)).toArray(RegexAutomaton[]::new);

		RegexAutomaton result = RegexAutomaton.emptyLang();

		for (int i = 0; i < array.length; i++)
			result = result.union(array[i]);
		return new Tarsis(result);
	}

	/**
	 * Yields the {@link IntInterval} containing the minimum and maximum length
	 * of this abstract value.
	 * 
	 * @return the minimum and maximum length of this abstract value
	 */
	public IntInterval length() {
		int max = a.lenghtOfLongestString();
		int min = a.toRegex().minLength();
		return new IntInterval(Integer.valueOf(min), max == Integer.MAX_VALUE ? null : max);
	}

	/**
	 * Yields the {@link IntInterval} containing the minimum and maximum index
	 * of {@code s} in {@code this}.
	 *
	 * @param s the string to be searched
	 * 
	 * @return the minimum and maximum index of {@code s} in {@code this}
	 * 
	 * @throws CyclicAutomatonException when the automaton is cyclic and its
	 *                                      language is accessed
	 */
	public IntInterval indexOf(
			Tarsis s)
			throws CyclicAutomatonException {
		if (contains(s) == Satisfiability.NOT_SATISFIED)
			return new IntInterval(-1, -1);
		else if (a.hasCycle() || s.a.hasCycle() || s.a.acceptsTopEventually())
			return new IntInterval(MathNumber.MINUS_ONE, MathNumber.PLUS_INFINITY);
		Pair interval = IndexFinder.findIndexesOf(a, s.a);
		return new IntInterval(interval.getLeft(), interval.getRight());
	}

	/**
	 * Yields the concatenation between two automata.
	 * 
	 * @param other the other automaton
	 * 
	 * @return the concatenation between two automata
	 */
	public Tarsis concat(
			Tarsis other) {
		return new Tarsis(this.a.concat(other.a));
	}

	/**
	 * Yields the replacement of occurrences of {@code search} inside
	 * {@code this} with {@code repl}.
	 * 
	 * @param search the domain instance containing the automaton to search
	 * @param repl   the domain instance containing the automaton to use as
	 *                   replacement
	 * 
	 * @return the domain instance containing the replaced automaton
	 */
	public Tarsis replace(
			Tarsis search,
			Tarsis repl) {
		if (isBottom() || search.isBottom() || repl.isBottom())
			return bottom();

		try {
			return new Tarsis(this.a.replace(search.a, repl.a));
		} catch (CyclicAutomatonException e) {
			return TOP;
		}
	}

	/**
	 * Converts this domain instance to one of {@link FSA}, that uses single
	 * characters as transition symbols.
	 * 
	 * @return the converted domain instance
	 */
	public FSA toFSA() {
		RegexAutomaton exploded = this.a.minimize().explode();
		SortedSet> fsaDelta = new TreeSet<>();

		if (!this.a.acceptsTopEventually()) {
			for (Transition t : exploded.getTransitions())
				fsaDelta.add(new Transition<>(t.getSource(), t.getDestination(),
						new StringSymbol(t.getSymbol().toString())));

			return new FSA(new SimpleAutomaton(exploded.getStates(), fsaDelta));
		}

		SortedSet fsaStates = new TreeSet<>(exploded.getStates());

		for (Transition t : exploded.getTransitions()) {
			if (t.getSymbol() != TopAtom.INSTANCE)
				fsaDelta.add(new Transition<>(t.getSource(), t.getDestination(),
						new StringSymbol(t.getSymbol().toString())));
			else {
				for (char c = 32; c <= 123; c++)
					fsaDelta.add(new Transition<>(t.getSource(), t.getDestination(), new StringSymbol(c)));
				fsaDelta.add(new Transition<>(t.getSource(), t.getDestination(), StringSymbol.EPSILON));
			}
		}

		SimpleAutomaton fsa = new SimpleAutomaton(fsaStates, fsaDelta).minimize();
		return new FSA(fsa);
	}

	@Override
	public Satisfiability containsChar(
			char c)
			throws SemanticException {
		if (isTop())
			return Satisfiability.UNKNOWN;
		if (isBottom())
			return Satisfiability.BOTTOM;

		return satisfiesBinaryExpression(StringContains.INSTANCE, this,
				new Tarsis(RegexAutomaton.string(String.valueOf(c))), null, null);
	}

	/**
	 * Yields a new Tarsis's instance recognizing each string of {@code this}
	 * automaton repeated k-times, with k belonging to {@code intv}.
	 * 
	 * @param intv the interval
	 * 
	 * @return a new Tarsis's instance recognizing each string of {@code this}
	 *             automaton repeated k-times, with k belonging to {@code intv}
	 * 
	 * @throws MathNumberConversionException if {@code intv} is iterated but is
	 *                                           not finite
	 */
	public Tarsis repeat(
			Interval intv)
			throws MathNumberConversionException {
		if (isBottom())
			return this;
		else if (intv.isTop() || a.hasCycle())
			return new Tarsis(a.star());
		else if (intv.interval.isFinite()) {
			if (intv.interval.isSingleton())
				return new Tarsis(a.repeat(intv.interval.getHigh().toLong()));
			else {
				RegexAutomaton result = a.emptyLanguage();

				for (Long i : intv.interval)
					result = result.union(a.repeat(i));
				return new Tarsis(result);
			}
		} else
			return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.star()));
	}

	/**
	 * Yields a new Tarsis's instance where trailing and leading whitespaces
	 * have been removed from {@code this}.
	 * 
	 * @return a new Tarsis's instance where trailing and leading whitespaces
	 *             have been removed from {@code this}
	 */
	public Tarsis trim() {
		if (isBottom() || isTop())
			return this;

		return new Tarsis(this.a.trim());
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy