Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
z3-z3-4.12.6.src.ast.seq_decl_plugin.cpp Maven / Gradle / Ivy
/*++
Copyright (c) 2011 Microsoft Corporation
Module Name:
seq_decl_plugin.h
Abstract:
decl_plugin for the theory of sequences
Author:
Nikolaj Bjorner (nbjorner) 2011-14-11
Revision History:
--*/
#include "ast/seq_decl_plugin.h"
#include "ast/arith_decl_plugin.h"
#include "ast/array_decl_plugin.h"
#include "ast/ast_pp.h"
#include
seq_decl_plugin::seq_decl_plugin(): m_init(false),
m_stringc_sym("String"),
m_string(nullptr),
m_char(nullptr),
m_reglan(nullptr),
m_has_re(false),
m_has_seq(false) {
}
void seq_decl_plugin::finalize() {
for (psig* s : m_sigs)
dealloc(s);
m_manager->dec_ref(m_string);
m_manager->dec_ref(m_char);
m_manager->dec_ref(m_reglan);
}
bool seq_decl_plugin::is_sort_param(sort* s, unsigned& idx) {
return
s->get_name().is_numerical() &&
(idx = s->get_name().get_num(), true);
}
bool seq_decl_plugin::match(ptr_vector& binding, sort* s, sort* sP) {
if (s == sP) return true;
unsigned idx;
if (is_sort_param(sP, idx)) {
if (binding.size() <= idx) binding.resize(idx+1);
if (binding[idx] && (binding[idx] != s)) return false;
binding[idx] = s;
return true;
}
if (s->get_family_id() == sP->get_family_id() &&
s->get_decl_kind() == sP->get_decl_kind() &&
s->get_num_parameters() == sP->get_num_parameters()) {
for (unsigned i = 0, sz = s->get_num_parameters(); i < sz; ++i) {
parameter const& p = s->get_parameter(i);
if (p.is_ast() && is_sort(p.get_ast())) {
parameter const& p2 = sP->get_parameter(i);
if (!match(binding, to_sort(p.get_ast()), to_sort(p2.get_ast()))) return false;
}
}
return true;
}
else {
TRACE("seq", tout << "Could not match " << mk_pp(s, *m_manager) << " and " << mk_pp(sP, *m_manager) << "\n";);
return false;
}
}
/*
\brief match right associative operator.
*/
void seq_decl_plugin::match_assoc(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) {
ptr_vector binding;
ast_manager& m = *m_manager;
if (dsz == 0) {
std::ostringstream strm;
strm << "Unexpected number of arguments to '" << sig.m_name << "' ";
strm << "at least one argument expected " << dsz << " given";
m.raise_exception(strm.str());
}
bool is_match = true;
for (unsigned i = 0; is_match && i < dsz; ++i) {
SASSERT(dom[i]);
is_match = match(binding, dom[i], sig.m_dom.get(0));
}
if (range && is_match) {
is_match = match(binding, range, sig.m_range);
}
if (!is_match) {
std::ostringstream strm;
strm << "Sort of function '" << sig.m_name << "' ";
strm << "does not match the declared type. Given domain: ";
for (unsigned i = 0; i < dsz; ++i) {
strm << mk_pp(dom[i], m) << " ";
}
if (range) {
strm << " and range: " << mk_pp(range, m);
}
m.raise_exception(strm.str());
}
range_out = apply_binding(binding, sig.m_range);
SASSERT(range_out);
}
void seq_decl_plugin::match(psig& sig, unsigned dsz, sort *const* dom, sort* range, sort_ref& range_out) {
m_binding.reset();
ast_manager& m = *m_manager;
if (sig.m_dom.size() != dsz) {
std::ostringstream strm;
strm << "Unexpected number of arguments to '" << sig.m_name << "' ";
strm << sig.m_dom.size() << " arguments expected " << dsz << " given";
m.raise_exception(strm.str());
}
bool is_match = true;
for (unsigned i = 0; is_match && i < dsz; ++i) {
is_match = match(m_binding, dom[i], sig.m_dom[i].get());
}
if (range && is_match) {
is_match = match(m_binding, range, sig.m_range);
}
if (!is_match) {
std::ostringstream strm;
strm << "Sort of polymorphic function '" << sig.m_name << "' ";
strm << "does not match the declared type. ";
strm << "\nGiven domain: ";
for (unsigned i = 0; i < dsz; ++i) {
strm << mk_pp(dom[i], m) << " ";
}
if (range) {
strm << " and range: " << mk_pp(range, m);
}
strm << "\nExpected domain: ";
for (unsigned i = 0; i < dsz; ++i) {
strm << mk_pp(sig.m_dom[i].get(), m) << " ";
}
m.raise_exception(strm.str());
}
if (!range && dsz == 0) {
std::ostringstream strm;
strm << "Sort of polymorphic function '" << sig.m_name << "' ";
strm << "is ambiguous. Function takes no arguments and sort of range has not been constrained";
m.raise_exception(strm.str());
}
range_out = apply_binding(m_binding, sig.m_range);
SASSERT(range_out);
}
sort* seq_decl_plugin::apply_binding(ptr_vector const& binding, sort* s) {
unsigned i;
if (is_sort_param(s, i)) {
if (binding.size() <= i || !binding[i]) {
m_manager->raise_exception("Expecting type parameter to be bound");
}
return binding[i];
}
if (is_sort_of(s, m_family_id, SEQ_SORT) || is_sort_of(s, m_family_id, RE_SORT)) {
SASSERT(s->get_num_parameters() == 1);
SASSERT(s->get_parameter(0).is_ast());
SASSERT(is_sort(s->get_parameter(0).get_ast()));
sort* p = apply_binding(binding, to_sort(s->get_parameter(0).get_ast()));
parameter param(p);
if (p == m_char && s->get_decl_kind() == SEQ_SORT)
return m_string;
if (p == m_string && s->get_decl_kind() == RE_SORT)
return mk_reglan();
return mk_sort(s->get_decl_kind(), 1, ¶m);
}
return s;
}
void seq_decl_plugin::init() {
if (m_init) return;
ast_manager& m = *m_manager;
array_util autil(m);
m_init = true;
sort* A = m.mk_uninterpreted_sort(symbol(0u));
sort* strT = m_string;
parameter paramA(A);
parameter paramS(strT);
sort* seqA = m.mk_sort(m_family_id, SEQ_SORT, 1, ¶mA);
parameter paramSA(seqA);
sort* reA = m.mk_sort(m_family_id, RE_SORT, 1, ¶mSA);
sort* reT = m.mk_sort(m_family_id, RE_SORT, 1, ¶mS);
sort* boolT = m.mk_bool_sort();
sort* intT = arith_util(m).mk_int();
sort* predA = autil.mk_array_sort(A, boolT);
sort* seqAseqAseqA[3] = { seqA, seqA, seqA };
sort* seqAreAseqA[3] = { seqA, reA, seqA };
sort* seqAseqA[2] = { seqA, seqA };
sort* seqAreA[2] = { seqA, reA };
sort* reAreA[2] = { reA, reA };
sort* AreA[2] = { A, reA };
sort* seqAint2T[3] = { seqA, intT, intT };
sort* seq2AintT[3] = { seqA, seqA, intT };
sort* str2T[2] = { strT, strT };
sort* str3T[3] = { strT, strT, strT };
sort* strTint2T[3] = { strT, intT, intT };
sort* strTreT[2] = { strT, reT };
sort* str2TintT[3] = { strT, strT, intT };
sort* seqAintT[2] = { seqA, intT };
sort* seq3A[3] = { seqA, seqA, seqA };
m_sigs.resize(LAST_SEQ_OP);
// TBD: have (par ..) construct and load parameterized signature from premable.
m_sigs[OP_SEQ_UNIT] = alloc(psig, m, "seq.unit", 1, 1, &A, seqA);
m_sigs[OP_SEQ_EMPTY] = alloc(psig, m, "seq.empty", 1, 0, nullptr, seqA);
m_sigs[OP_SEQ_CONCAT] = alloc(psig, m, "seq.++", 1, 2, seqAseqA, seqA);
m_sigs[OP_SEQ_PREFIX] = alloc(psig, m, "seq.prefixof", 1, 2, seqAseqA, boolT);
m_sigs[OP_SEQ_SUFFIX] = alloc(psig, m, "seq.suffixof", 1, 2, seqAseqA, boolT);
m_sigs[OP_SEQ_CONTAINS] = alloc(psig, m, "seq.contains", 1, 2, seqAseqA, boolT);
m_sigs[OP_SEQ_EXTRACT] = alloc(psig, m, "seq.extract", 1, 3, seqAint2T, seqA);
m_sigs[OP_SEQ_REPLACE] = alloc(psig, m, "seq.replace", 1, 3, seq3A, seqA);
m_sigs[OP_SEQ_INDEX] = alloc(psig, m, "seq.indexof", 1, 3, seq2AintT, intT);
m_sigs[OP_SEQ_LAST_INDEX] = alloc(psig, m, "seq.last_indexof", 1, 2, seqAseqA, intT);
m_sigs[OP_SEQ_AT] = alloc(psig, m, "seq.at", 1, 2, seqAintT, seqA);
m_sigs[OP_SEQ_NTH] = alloc(psig, m, "seq.nth", 1, 2, seqAintT, A);
m_sigs[OP_SEQ_NTH_I] = alloc(psig, m, "seq.nth_i", 1, 2, seqAintT, A);
m_sigs[OP_SEQ_NTH_U] = alloc(psig, m, "seq.nth_u", 1, 2, seqAintT, A);
m_sigs[OP_SEQ_LENGTH] = alloc(psig, m, "seq.len", 1, 1, &seqA, intT);
m_sigs[OP_RE_PLUS] = alloc(psig, m, "re.+", 1, 1, &reA, reA);
m_sigs[OP_RE_STAR] = alloc(psig, m, "re.*", 1, 1, &reA, reA);
m_sigs[OP_RE_OPTION] = alloc(psig, m, "re.opt", 1, 1, &reA, reA);
m_sigs[OP_RE_RANGE] = alloc(psig, m, "re.range", 1, 2, seqAseqA, reA);
m_sigs[OP_RE_CONCAT] = alloc(psig, m, "re.++", 1, 2, reAreA, reA);
m_sigs[OP_RE_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
m_sigs[OP_RE_INTERSECT] = alloc(psig, m, "re.inter", 1, 2, reAreA, reA);
m_sigs[OP_RE_DIFF] = alloc(psig, m, "re.diff", 1, 2, reAreA, reA);
m_sigs[OP_RE_LOOP] = alloc(psig, m, "re.loop", 1, 1, &reA, reA);
m_sigs[OP_RE_POWER] = alloc(psig, m, "re.^", 1, 1, &reA, reA);
m_sigs[OP_RE_COMPLEMENT] = alloc(psig, m, "re.comp", 1, 1, &reA, reA);
m_sigs[OP_RE_EMPTY_SET] = alloc(psig, m, "re.empty", 1, 0, nullptr, reA);
m_sigs[OP_RE_FULL_SEQ_SET] = alloc(psig, m, "re.all", 1, 0, nullptr, reA);
m_sigs[OP_RE_FULL_CHAR_SET] = alloc(psig, m, "re.allchar", 1, 0, nullptr, reA);
m_sigs[OP_RE_OF_PRED] = alloc(psig, m, "re.of.pred", 1, 1, &predA, reA);
m_sigs[OP_RE_REVERSE] = alloc(psig, m, "re.reverse", 1, 1, &reA, reA);
m_sigs[OP_RE_DERIVATIVE] = alloc(psig, m, "re.derivative", 1, 2, AreA, reA);
m_sigs[_OP_RE_ANTIMIROV_UNION] = alloc(psig, m, "re.union", 1, 2, reAreA, reA);
m_sigs[OP_SEQ_TO_RE] = alloc(psig, m, "seq.to.re", 1, 1, &seqA, reA);
m_sigs[OP_SEQ_IN_RE] = alloc(psig, m, "seq.in.re", 1, 2, seqAreA, boolT);
m_sigs[OP_SEQ_REPLACE_RE_ALL] = alloc(psig, m, "str.replace_re_all", 1, 3, seqAreAseqA, seqA);
m_sigs[OP_SEQ_REPLACE_RE] = alloc(psig, m, "str.replace_re", 1, 3, seqAreAseqA, seqA);
m_sigs[OP_SEQ_REPLACE_ALL] = alloc(psig, m, "str.replace_all", 1, 3, seqAseqAseqA, seqA);
m_sigs[OP_STRING_CONST] = nullptr;
m_sigs[_OP_STRING_STRIDOF] = alloc(psig, m, "str.indexof", 0, 3, str2TintT, intT);
m_sigs[_OP_STRING_STRREPL] = alloc(psig, m, "str.replace", 0, 3, str3T, strT);
m_sigs[_OP_STRING_FROM_CHAR] = alloc(psig, m, "char", 1, 0, nullptr, strT);
m_sigs[OP_STRING_ITOS] = alloc(psig, m, "str.from_int", 0, 1, &intT, strT);
m_sigs[OP_STRING_STOI] = alloc(psig, m, "str.to_int", 0, 1, &strT, intT);
m_sigs[OP_STRING_LT] = alloc(psig, m, "str.<", 0, 2, str2T, boolT);
m_sigs[OP_STRING_LE] = alloc(psig, m, "str.<=", 0, 2, str2T, boolT);
m_sigs[OP_STRING_IS_DIGIT] = alloc(psig, m, "str.is_digit", 0, 1, &strT, boolT);
m_sigs[OP_STRING_TO_CODE] = alloc(psig, m, "str.to_code", 0, 1, &strT, intT);
m_sigs[OP_STRING_FROM_CODE] = alloc(psig, m, "str.from_code", 0, 1, &intT, strT);
m_sigs[_OP_STRING_CONCAT] = alloc(psig, m, "str.++", 1, 2, str2T, strT);
m_sigs[_OP_STRING_LENGTH] = alloc(psig, m, "str.len", 0, 1, &strT, intT);
m_sigs[_OP_STRING_STRCTN] = alloc(psig, m, "str.contains", 0, 2, str2T, boolT);
m_sigs[_OP_STRING_CHARAT] = alloc(psig, m, "str.at", 0, 2, strTint2T, strT);
m_sigs[_OP_STRING_PREFIX] = alloc(psig, m, "str.prefixof", 0, 2, str2T, boolT);
m_sigs[_OP_STRING_SUFFIX] = alloc(psig, m, "str.suffixof", 0, 2, str2T, boolT);
m_sigs[_OP_STRING_IN_REGEXP] = alloc(psig, m, "str.in_re", 0, 2, strTreT, boolT);
m_sigs[_OP_STRING_TO_REGEXP] = alloc(psig, m, "str.to_re", 0, 1, &strT, reT);
m_sigs[_OP_REGEXP_EMPTY] = alloc(psig, m, "re.none", 0, 0, nullptr, reT);
m_sigs[_OP_REGEXP_FULL_CHAR] = alloc(psig, m, "re.allchar", 0, 0, nullptr, reT);
m_sigs[_OP_STRING_SUBSTR] = alloc(psig, m, "str.substr", 0, 3, strTint2T, strT);
}
sort* seq_decl_plugin::mk_reglan() {
if (!m_reglan) {
ast_manager& m = *m_manager;
parameter paramS(m_string);
m_reglan = m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, 1, ¶mS));
m.inc_ref(m_reglan);
}
return m_reglan;
}
void seq_decl_plugin::set_manager(ast_manager* m, family_id id) {
decl_plugin::set_manager(m, id);
m_char_plugin = static_cast(m_manager->get_plugin(m_manager->mk_family_id("char")));
m_char = get_char_plugin().char_sort();
m->inc_ref(m_char);
parameter param(m_char);
m_string = m->mk_sort(symbol("String"), sort_info(m_family_id, SEQ_SORT, 1, ¶m));
m->inc_ref(m_string);
}
sort * seq_decl_plugin::mk_sort(decl_kind k, unsigned num_parameters, parameter const * parameters) {
init();
ast_manager& m = *m_manager;
switch (k) {
case SEQ_SORT:
if (num_parameters != 1) {
m.raise_exception("Invalid sequence sort, expecting one parameter");
}
if (!parameters[0].is_ast() || !is_sort(parameters[0].get_ast())) {
m.raise_exception("invalid sequence sort, parameter is not a sort");
}
if (parameters[0].get_ast() == m_char) {
return m_string;
}
return m.mk_sort(symbol("Seq"), sort_info(m_family_id, SEQ_SORT, num_parameters, parameters));
case RE_SORT: {
if (num_parameters != 1) {
m.raise_exception("Invalid regex sort, expecting one parameter");
}
if (!parameters[0].is_ast() || !is_sort(parameters[0].get_ast())) {
m.raise_exception("invalid regex sort, parameter is not a sort");
}
return m.mk_sort(symbol("RegEx"), sort_info(m_family_id, RE_SORT, num_parameters, parameters));
}
case _STRING_SORT:
return m_string;
case _REGLAN_SORT:
return mk_reglan();
default:
UNREACHABLE();
return nullptr;
}
}
func_decl* seq_decl_plugin::mk_seq_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_string) {
ast_manager& m = *m_manager;
sort_ref rng(m);
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(m_sigs[(domain[0] == m_string)?k_string:k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
}
func_decl* seq_decl_plugin::mk_str_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq) {
ast_manager& m = *m_manager;
sort_ref rng(m);
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k_seq));
}
func_decl* seq_decl_plugin::mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string) {
return mk_assoc_fun(k, arity, domain, range, k_seq, k_string, true);
}
func_decl* seq_decl_plugin::mk_left_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string) {
return mk_assoc_fun(k, arity, domain, range, k_seq, k_string, false);
}
func_decl* seq_decl_plugin::mk_ubv2s(unsigned arity, sort* const* domain) const {
ast_manager& m = *m_manager;
if (arity != 1)
m.raise_exception("Invalid str.from_ubv expects one bit-vector argument");
bv_util bv(m);
if (!bv.is_bv_sort(domain[0]))
m.raise_exception("Invalid str.from_ubv expects one bit-vector argument");
sort* rng = m_string;
return m.mk_func_decl(symbol("str.from_ubv"), arity, domain, rng, func_decl_info(m_family_id, OP_STRING_UBVTOS));
}
func_decl* seq_decl_plugin::mk_sbv2s(unsigned arity, sort* const* domain) const {
ast_manager &m = *m_manager;
if (arity != 1)
m.raise_exception("Invalid str.from_sbv expects one bit-vector argument");
bv_util bv(m);
if (!bv.is_bv_sort(domain[0]))
m.raise_exception("Invalid str.from_sbv expects one bit-vector argument");
sort *rng = m_string;
return m.mk_func_decl(symbol("str.from_sbv"), arity, domain, rng, func_decl_info(m_family_id, OP_STRING_SBVTOS));
}
func_decl* seq_decl_plugin::mk_assoc_fun(decl_kind k, unsigned arity, sort* const* domain, sort* range, decl_kind k_seq, decl_kind k_string, bool is_right) {
ast_manager& m = *m_manager;
sort_ref rng(m);
if (arity == 0) {
m.raise_exception("Invalid function application. At least one argument expected");
}
match_assoc(*m_sigs[k], arity, domain, range, rng);
func_decl_info info(m_family_id, k_seq);
if (is_right)
info.set_right_associative(true);
info.set_left_associative(true);
return m.mk_func_decl(m_sigs[(rng == m_string)?k_string:k_seq]->m_name, rng, rng, rng, info);
}
func_decl* seq_decl_plugin::mk_func_decl(decl_kind k, unsigned num_parameters, parameter const * parameters,
unsigned arity, sort * const * domain, sort * range) {
init();
m_has_seq = true;
ast_manager& m = *m_manager;
sort_ref rng(m);
switch(k) {
case OP_SEQ_EMPTY:
match(*m_sigs[k], arity, domain, range, rng);
if (rng == m_string) {
parameter param(zstring(""));
return mk_func_decl(OP_STRING_CONST, 1, ¶m, 0, nullptr, m_string);
}
else {
parameter param(rng.get());
func_decl_info info(m_family_id, k, 1, ¶m);
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, info);
}
case OP_RE_PLUS:
case OP_RE_STAR:
case OP_RE_OPTION:
case OP_RE_RANGE:
case OP_RE_OF_PRED:
case OP_RE_COMPLEMENT:
case OP_RE_REVERSE:
case OP_RE_DERIVATIVE:
case _OP_RE_ANTIMIROV_UNION:
m_has_re = true;
Z3_fallthrough;
case OP_SEQ_UNIT:
case OP_STRING_ITOS:
case OP_STRING_STOI:
case OP_STRING_LT:
case OP_STRING_LE:
case OP_STRING_IS_DIGIT:
case OP_STRING_TO_CODE:
case OP_STRING_FROM_CODE:
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
case OP_STRING_UBVTOS:
return mk_ubv2s(arity, domain);
case OP_STRING_SBVTOS:
return mk_sbv2s(arity, domain);
case _OP_REGEXP_FULL_CHAR:
m_has_re = true;
if (!range) range = mk_reglan();
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_FULL_CHAR_SET));
case OP_RE_FULL_CHAR_SET:
m_has_re = true;
if (!range) range = mk_reglan();
if (range == mk_reglan()) {
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(symbol("re.allchar"), arity, domain, rng, func_decl_info(m_family_id, k));
}
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
case OP_RE_FULL_SEQ_SET:
m_has_re = true;
if (!range) range = mk_reglan();
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
case _OP_REGEXP_EMPTY:
m_has_re = true;
if (!range) range = mk_reglan();
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(symbol("re.none"), arity, domain, rng, func_decl_info(m_family_id, OP_RE_EMPTY_SET));
case OP_RE_EMPTY_SET:
m_has_re = true;
if (!range) range = mk_reglan();
if (range == mk_reglan()) {
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(symbol("re.none"), arity, domain, rng, func_decl_info(m_family_id, k));
}
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, range, func_decl_info(m_family_id, k));
case OP_RE_LOOP:
m_has_re = true;
switch (arity) {
case 1:
match(*m_sigs[k], arity, domain, range, rng);
if (num_parameters == 0 || num_parameters > 2 || !parameters[0].is_int() || (num_parameters == 2 && !parameters[1].is_int())) {
m.raise_exception("Expecting two numeral parameters to function re-loop");
}
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters));
case 2:
if (mk_reglan() != domain[0] || !arith_util(m).is_int(domain[1])) {
m.raise_exception("Incorrect type of arguments passed to re.loop. Expecting regular expression and two integer parameters");
}
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, domain[0], func_decl_info(m_family_id, k, num_parameters, parameters));
case 3:
if (mk_reglan() != domain[0] || !arith_util(m).is_int(domain[1]) || !arith_util(m).is_int(domain[2])) {
m.raise_exception("Incorrect type of arguments passed to re.loop. Expecting regular expression and two integer parameters");
}
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, domain[0], func_decl_info(m_family_id, k, num_parameters, parameters));
default:
m.raise_exception("Incorrect number of arguments passed to loop. Expected 1 regular expression and two integer parameters");
}
case OP_RE_POWER:
m_has_re = true;
if (num_parameters == 1 && parameters[0].is_int() && arity == 1 && parameters[0].get_int() >= 0) {
rng = domain[0];
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k, num_parameters, parameters));
}
m.raise_exception("Incorrect arguments used for re.^. Expected one non-negative integer parameter");
case OP_STRING_CONST:
if (!(num_parameters == 1 && arity == 0 && parameters[0].is_zstring())) {
m.raise_exception("invalid string declaration");
}
return m.mk_const_decl(m_stringc_sym, m_string,
func_decl_info(m_family_id, OP_STRING_CONST, num_parameters, parameters));
case OP_RE_UNION:
case OP_RE_CONCAT:
case OP_RE_INTERSECT:
case OP_RE_DIFF:
m_has_re = true;
return mk_left_assoc_fun(k, arity, domain, range, k, k);
case OP_SEQ_REPLACE_RE_ALL:
case OP_SEQ_REPLACE_RE:
m_has_re = true;
Z3_fallthrough;
case OP_SEQ_REPLACE_ALL:
return mk_str_fun(k, arity, domain, range, k);
case OP_SEQ_CONCAT:
return mk_assoc_fun(k, arity, domain, range, k, _OP_STRING_CONCAT);
case _OP_STRING_CONCAT:
return mk_assoc_fun(k, arity, domain, range, OP_SEQ_CONCAT, k);
case _OP_STRING_FROM_CHAR: {
if (!(num_parameters == 1 && parameters[0].is_int()))
m.raise_exception("character literal expects integer parameter");
int i = parameters[0].get_int();
if (i < 0)
m.raise_exception("character literal expects a non-negative integer parameter");
if (i > (int)m_char_plugin->max_char())
m.raise_exception("character literal is out of bounds");
zstring zs(i);
parameter p(zs);
return m.mk_const_decl(m_stringc_sym, m_string,func_decl_info(m_family_id, OP_STRING_CONST, 1, &p));
}
case OP_SEQ_REPLACE:
return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRREPL);
case _OP_STRING_STRREPL:
return mk_str_fun(k, arity, domain, range, OP_SEQ_REPLACE);
case OP_SEQ_INDEX:
if (arity == 2) {
sort* dom[3] = { domain[0], domain[1], arith_util(m).mk_int() };
sort_ref rng(m);
match(*m_sigs[k], 3, dom, range, rng);
return m.mk_func_decl(m_sigs[(dom[0] == m_string)?_OP_STRING_STRIDOF:k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
}
return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRIDOF);
case _OP_STRING_STRIDOF:
if (arity == 2) {
sort* dom[3] = { domain[0], domain[1], arith_util(m).mk_int() };
sort_ref rng(m);
match(*m_sigs[k], 3, dom, range, rng);
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, OP_SEQ_INDEX));
}
return mk_str_fun(k, arity, domain, range, OP_SEQ_INDEX);
case OP_SEQ_LAST_INDEX:
if (arity != 2) {
m.raise_exception("two arguments expected tin last_indexof");
}
else {
return mk_seq_fun(k, arity, domain, range, OP_SEQ_LAST_INDEX);
}
case OP_SEQ_PREFIX:
return mk_seq_fun(k, arity, domain, range, _OP_STRING_PREFIX);
case _OP_STRING_PREFIX:
return mk_str_fun(k, arity, domain, range, OP_SEQ_PREFIX);
case OP_SEQ_SUFFIX:
return mk_seq_fun(k, arity, domain, range, _OP_STRING_SUFFIX);
case _OP_STRING_SUFFIX:
return mk_str_fun(k, arity, domain, range, OP_SEQ_SUFFIX);
case OP_SEQ_LENGTH:
return mk_seq_fun(k, arity, domain, range, _OP_STRING_LENGTH);
case _OP_STRING_LENGTH:
return mk_str_fun(k, arity, domain, range, OP_SEQ_LENGTH);
case OP_SEQ_CONTAINS:
return mk_seq_fun(k, arity, domain, range, _OP_STRING_STRCTN);
case _OP_STRING_STRCTN:
return mk_str_fun(k, arity, domain, range, OP_SEQ_CONTAINS);
case OP_SEQ_MAP:
case OP_SEQ_MAPI:
case OP_SEQ_FOLDL:
case OP_SEQ_FOLDLI:
add_map_sig();
return mk_str_fun(k, arity, domain, range, k);
case OP_SEQ_TO_RE:
m_has_re = true;
return mk_seq_fun(k, arity, domain, range, _OP_STRING_TO_REGEXP);
case _OP_STRING_TO_REGEXP:
m_has_re = true;
return mk_str_fun(k, arity, domain, range, OP_SEQ_TO_RE);
case OP_SEQ_IN_RE:
m_has_re = true;
return mk_seq_fun(k, arity, domain, range, _OP_STRING_IN_REGEXP);
case _OP_STRING_IN_REGEXP:
m_has_re = true;
return mk_str_fun(k, arity, domain, range, OP_SEQ_IN_RE);
case OP_SEQ_AT:
return mk_seq_fun(k, arity, domain, range, _OP_STRING_CHARAT);
case _OP_STRING_CHARAT:
return mk_str_fun(k, arity, domain, range, OP_SEQ_AT);
case OP_SEQ_NTH:
case OP_SEQ_NTH_I:
case OP_SEQ_NTH_U:
match(*m_sigs[k], arity, domain, range, rng);
return m.mk_func_decl(m_sigs[k]->m_name, arity, domain, rng, func_decl_info(m_family_id, k));
case OP_SEQ_EXTRACT:
return mk_seq_fun(k, arity, domain, range, _OP_STRING_SUBSTR);
case _OP_STRING_SUBSTR:
return mk_str_fun(k, arity, domain, range, OP_SEQ_EXTRACT);
case _OP_SEQ_SKOLEM: {
if (num_parameters == 0 || !parameters[0].is_symbol()) {
m.raise_exception("first parameter to skolem symbol should be a parameter");
}
symbol s = parameters[0].get_symbol();
return m.mk_func_decl(s, arity, domain, range, func_decl_info(m_family_id, k, num_parameters, parameters));
}
default:
UNREACHABLE();
return nullptr;
}
}
void seq_decl_plugin::add_map_sig() {
if (m_sigs[OP_SEQ_MAP])
return;
ast_manager& m = *m_manager;
array_util autil(m);
sort* A = m.mk_uninterpreted_sort(symbol(0u));
sort* B = m.mk_uninterpreted_sort(symbol(1u));
parameter paramA(A);
parameter paramB(B);
sort* seqA = m.mk_sort(m_family_id, SEQ_SORT, 1, ¶mA);
sort* seqB = m.mk_sort(m_family_id, SEQ_SORT, 1, ¶mB);
sort* intT = arith_util(m).mk_int();
sort* arrAB = autil.mk_array_sort(A, B);
sort* arrIAB = autil.mk_array_sort(intT, A, B);
sort* arrBAB = autil.mk_array_sort(B, A, B);
sort* arrIBAB = autil.mk_array_sort(intT, B, A, B);
sort* arrABseqA[2] = { arrAB, seqA };
sort* arrIABintTseqA[3] = { arrIAB, intT, seqA };
sort* arrBAB_BseqA[3] = { arrBAB, B,seqA };
sort* arrIBABintTBseqA[4] = { arrIBAB, intT, B, seqA };
m_sigs[OP_SEQ_MAP] = alloc(psig, m, "seq.map", 2, 2, arrABseqA, seqB);
m_sigs[OP_SEQ_MAPI] = alloc(psig, m, "seq.mapi", 2, 3, arrIABintTseqA, seqB);
m_sigs[OP_SEQ_FOLDL] = alloc(psig, m, "seq.fold_left", 2, 3, arrBAB_BseqA, B);
m_sigs[OP_SEQ_FOLDLI] = alloc(psig, m, "seq.fold_lefti", 2, 4, arrIBABintTBseqA, B);
}
void seq_decl_plugin::get_op_names(svector & op_names, symbol const & logic) {
init();
for (unsigned i = 0; i < m_sigs.size(); ++i) {
if (m_sigs[i])
op_names.push_back(builtin_name(m_sigs[i]->m_name.str(), i));
}
op_names.push_back(builtin_name("seq.map", OP_SEQ_MAP));
op_names.push_back(builtin_name("seq.mapi", OP_SEQ_MAPI));
op_names.push_back(builtin_name("seq.foldl", OP_SEQ_FOLDL));
op_names.push_back(builtin_name("seq.foldli", OP_SEQ_FOLDLI));
op_names.push_back(builtin_name("seq.fold_lefti", OP_SEQ_FOLDLI));
op_names.push_back(builtin_name("seq.fold_left", OP_SEQ_FOLDL));
op_names.push_back(builtin_name("str.in.re", _OP_STRING_IN_REGEXP));
op_names.push_back(builtin_name("str.in-re", _OP_STRING_IN_REGEXP));
op_names.push_back(builtin_name("str.to.re", _OP_STRING_TO_REGEXP));
op_names.push_back(builtin_name("str.to-re", _OP_STRING_TO_REGEXP));
op_names.push_back(builtin_name("str.to-int", OP_STRING_STOI));
op_names.push_back(builtin_name("str.to.int", OP_STRING_STOI));
op_names.push_back(builtin_name("str.from-int", OP_STRING_ITOS));
op_names.push_back(builtin_name("int.to.str", OP_STRING_ITOS));
op_names.push_back(builtin_name("re.nostr", _OP_REGEXP_EMPTY));
op_names.push_back(builtin_name("re.complement", OP_RE_COMPLEMENT));
op_names.push_back(builtin_name("str.from_ubv", OP_STRING_UBVTOS));
op_names.push_back(builtin_name("str.from_sbv", OP_STRING_SBVTOS));
}
void seq_decl_plugin::get_sort_names(svector & sort_names, symbol const & logic) {
init();
sort_names.push_back(builtin_name("Seq", SEQ_SORT));
sort_names.push_back(builtin_name("RegEx", RE_SORT));
// SMTLIB 2.6 RegLan, String
sort_names.push_back(builtin_name("RegLan", _REGLAN_SORT));
sort_names.push_back(builtin_name("String", _STRING_SORT));
// SMTLIB 2.5 compatibility
sort_names.push_back(builtin_name("StringSequence", _STRING_SORT));
}
app* seq_decl_plugin::mk_string(zstring const& s) {
parameter param(s);
func_decl* f = m_manager->mk_const_decl(m_stringc_sym, m_string,
func_decl_info(m_family_id, OP_STRING_CONST, 1, ¶m));
return m_manager->mk_const(f);
}
app* seq_decl_plugin::mk_char(unsigned u) {
return get_char_plugin().mk_char(u);
}
bool seq_decl_plugin::is_considered_uninterpreted(func_decl * f) {
seq_util util(*m_manager);
return util.str.is_nth_u(f);
}
bool seq_decl_plugin::is_unique_value(app* e) const {
return false;
}
bool seq_decl_plugin::is_value(app* e) const {
while (true) {
if (is_app_of(e, m_family_id, OP_SEQ_EMPTY))
return true;
if (is_app_of(e, m_family_id, OP_STRING_CONST))
return true;
if (is_app_of(e, m_family_id, OP_SEQ_UNIT) &&
m_manager->is_value(e->get_arg(0)))
return true;
if (is_app_of(e, m_family_id, OP_SEQ_CONCAT)) {
bool first = true;
for (expr* arg : *e) {
if (first) {
first = false;
}
else if (is_app(arg) && !is_value(to_app(arg))) {
return false;
}
}
if (!is_app(e->get_arg(0))) return false;
e = to_app(e->get_arg(0));
continue;
}
return false;
}
}
bool seq_decl_plugin::is_model_value(app* e) const {
if (is_app_of(e, m_family_id, OP_SEQ_EMPTY))
return true;
if (is_app_of(e, m_family_id, OP_STRING_CONST))
return true;
if (is_app_of(e, m_family_id, OP_SEQ_UNIT) &&
m_manager->is_value(e->get_arg(0)))
return true;
return false;
}
bool seq_decl_plugin::are_equal(app* a, app* b) const {
if (a == b) return true;
// handle concatenations
return false;
}
bool seq_decl_plugin::are_distinct(app* a, app* b) const {
if (a == b)
return false;
if (is_app_of(a, m_family_id, OP_STRING_CONST) &&
is_app_of(b, m_family_id, OP_STRING_CONST))
return true;
if (is_app_of(a, m_family_id, OP_SEQ_UNIT) &&
is_app_of(b, m_family_id, OP_SEQ_UNIT))
return m_manager->are_distinct(a->get_arg(0), b->get_arg(0));
if (is_app_of(a, m_family_id, OP_SEQ_EMPTY) &&
is_app_of(b, m_family_id, OP_SEQ_UNIT))
return true;
if (is_app_of(b, m_family_id, OP_SEQ_EMPTY) &&
is_app_of(a, m_family_id, OP_SEQ_UNIT))
return true;
return false;
}
expr* seq_decl_plugin::get_some_value(sort* s) {
seq_util util(*m_manager);
if (util.is_seq(s)) {
return util.str.mk_empty(s);
}
sort* seq;
if (util.is_re(s, seq)) {
return util.re.mk_to_re(util.str.mk_empty(seq));
}
UNREACHABLE();
return nullptr;
}
app* seq_util::mk_skolem(symbol const& name, unsigned n, expr* const* args, sort* range) {
SASSERT(range);
parameter param(name);
func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 1, ¶m, n, args, range);
return m.mk_app(f, n, args);
}
app* seq_util::str::mk_string(zstring const& s) const {
return u.seq.mk_string(s);
}
app* seq_util::str::mk_char(zstring const& s, unsigned idx) const {
return u.mk_char(s[idx]);
}
app* seq_util::str::mk_char(unsigned ch) const {
return u.mk_char(ch);
}
app* seq_util::str::mk_char_bit(expr* e, unsigned idx) {
return u.mk_char_bit(e, idx);
}
app* seq_util::mk_char_bit(expr* e, unsigned i) {
parameter params[2] = { parameter(symbol("char.bit")), parameter(i) };
sort* range = m.mk_bool_sort();
func_decl* f = m.mk_func_decl(get_family_id(), _OP_SEQ_SKOLEM, 2, params, 1, &e, range);
return m.mk_app(f, 1, &e);
}
unsigned seq_util::max_plus(unsigned x, unsigned y) const {
if (x + y < x || x + y < y)
return UINT_MAX;
return x + y;
}
unsigned seq_util::max_mul(unsigned x, unsigned y) const {
uint64_t r = ((uint64_t)x)*((uint64_t)y);
return (r > UINT_MAX) ? UINT_MAX : (unsigned)r;
}
bool seq_util::is_const_char(expr* e, unsigned& c) const {
return ch.is_const_char(e, c);
}
bool seq_util::is_char_le(expr const* e) const {
return ch.is_le(e);
}
bool seq_util::is_char2int(expr const* e) const {
return ch.is_to_int(e);
}
bool seq_util::is_bv2char(expr const* e) const {
return ch.is_bv2char(e);
}
bool seq_util::is_char2bv(expr const* e) const {
return ch.is_char2bv(e);
}
app* seq_util::mk_char(unsigned ch) const {
return seq.mk_char(ch);
}
app* seq_util::mk_le(expr* ch1, expr* ch2) const {
return ch.mk_le(ch1, ch2);
}
app* seq_util::mk_lt(expr* ch1, expr* ch2) const {
return m.mk_not(mk_le(ch2, ch1));
}
bool seq_util::is_char_const_range(expr const* x, expr* e, unsigned& l, unsigned& u, bool& negated) const {
expr* a, * b, * e0, * e1, * e2, * lb, * ub;
e1 = e;
negated = (m.is_not(e, e1)) ? true : false;
if (m.is_eq(e1, a, b) && (a == x && is_const_char(b, l))) {
u = l;
return true;
}
if (is_char_le(e1, a, b) && a == x && is_const_char(b, u)) {
// (x <= u)
l = 0;
return true;
}
if (is_char_le(e1, a, b) && b == x && is_const_char(a, l)) {
// (l <= x)
u = max_char();
return true;
}
if (m.is_and(e1, e0, e2) && is_char_le(e0, lb, a) && a == x && is_const_char(lb, l) &&
is_char_le(e2, b, ub) && b == x && is_const_char(ub, u))
// (l <= x) & (x <= u)
return true;
if (m.is_eq(e1, a, b) && b == x && is_const_char(a, l)) {
u = l;
return true;
}
if (m.is_and(e1, e0, e2) && is_char_le(e0, a, ub) && a == x && is_const_char(ub, u) &&
is_char_le(e2, lb, b) && b == x && is_const_char(lb, l))
// (x <= u) & (l <= x)
return true;
return false;
}
bool seq_util::str::is_string(func_decl const* f, zstring& s) const {
if (is_string(f)) {
s = f->get_parameter(0).get_zstring();
return true;
}
else {
return false;
}
}
bool seq_util::str::is_string(expr const* n, zstring& s) const {
return is_app(n) && is_string(to_app(n)->get_decl(), s);
}
bool seq_util::str::is_nth_i(expr const* n, expr*& s, unsigned& idx) const {
expr* i = nullptr;
if (!is_nth_i(n, s, i)) return false;
return arith_util(m).is_unsigned(i, idx);
}
app* seq_util::str::mk_nth_c(expr* s, unsigned i) const {
return mk_nth_i(s, arith_util(m).mk_int(i));
}
void seq_util::str::get_concat(expr* e, expr_ref_vector& es) const {
expr* e1, *e2;
while (is_concat(e, e1, e2)) {
get_concat(e1, es);
e = e2;
}
if (!is_empty(e)) {
es.push_back(e);
}
}
/*
Returns true if s is an expression of the form (l = |u|) |u|-k or (-k)+|u| or |u|+(-k).
Also returns true and assigns k=0 and l=s if s is |u|.
*/
bool seq_util::str::is_len_sub(expr const* s, expr*& l, expr*& u, rational& k) const {
expr* x;
rational v;
arith_util a(m);
if (is_length(s, l)) {
k = 0;
return true;
}
else if (a.is_sub(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonneg()) {
k = v;
return true;
}
else if (a.is_add(s, l, x) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
k = - v;
return true;
}
else if (a.is_add(s, x, l) && is_length(l, u) && a.is_numeral(x, v) && v.is_nonpos()) {
k = - v;
return true;
}
else
return false;
}
bool seq_util::str::is_concat_of_units(expr* s) const {
ptr_vector todo;
todo.push_back(s);
while (!todo.empty()) {
expr* e = todo.back();
todo.pop_back();
if (is_empty(e) || is_unit(e))
continue;
if (is_concat(e))
todo.append(to_app(e)->get_num_args(), to_app(e)->get_args());
else
return false;
}
return true;
}
bool seq_util::str::is_unit_string(expr const* s, expr_ref& c) const {
zstring z;
expr* ch = nullptr;
if (is_string(s, z) && z.length() == 1) {
c = mk_char(z[0]);
return true;
}
else if (is_unit(s, ch)) {
c = ch;
return true;
}
return false;
}
void seq_util::str::get_concat_units(expr* e, expr_ref_vector& es) const {
expr* e1, *e2;
while (is_concat(e, e1, e2)) {
get_concat_units(e1, es);
e = e2;
}
zstring s;
if (is_string(e, s)) {
unsigned sz = s.length();
for (unsigned j = 0; j < sz; ++j) {
es.push_back(mk_unit(mk_char(s, j)));
}
}
else if (!is_empty(e)) {
es.push_back(e);
}
}
app* seq_util::str::mk_is_empty(expr* s) const {
return m.mk_eq(s, mk_empty(s->get_sort()));
}
unsigned seq_util::str::min_length(expr* s) const {
SASSERT(u.is_seq(s));
unsigned result = 0;
expr* s1 = nullptr, *s2 = nullptr;
auto get_length = [&](expr* s1) {
zstring st;
if (is_unit(s1))
return 1u;
else if (is_string(s1, st))
return st.length();
else
return 0u;
};
while (is_concat(s, s1, s2)) {
if (is_concat(s1))
result += min_length(s1);
else
result += get_length(s1);
s = s2;
}
result += get_length(s);
return result;
}
unsigned seq_util::str::max_length(expr* s) const {
SASSERT(u.is_seq(s));
unsigned result = 0;
expr* s1 = nullptr, *s2 = nullptr, *s3 = nullptr;
unsigned n = 0;
zstring st;
auto get_length = [&](expr* s1) {
if (is_empty(s1))
return 0u;
else if (is_unit(s1))
return 1u;
else if (is_at(s1))
return 1u;
else if (is_extract(s1, s1, s2, s3))
return (arith_util(m).is_unsigned(s3, n)) ? n : UINT_MAX;
else if (is_string(s1, st))
return st.length();
else
return UINT_MAX;
};
while (is_concat(s, s1, s2)) {
if (is_concat(s1))
result = u.max_plus(max_length(s1), result);
else
result = u.max_plus(get_length(s1), result);
s = s2;
}
result = u.max_plus(get_length(s), result);
return result;
}
unsigned seq_util::rex::min_length(expr* r) const {
SASSERT(u.is_re(r));
return get_info(r).min_length;
}
unsigned seq_util::rex::max_length(expr* r) const {
SASSERT(u.is_re(r));
expr* r1 = nullptr, *r2 = nullptr, *s = nullptr;
unsigned lo = 0, hi = 0;
if (is_empty(r))
return 0;
if (is_concat(r, r1, r2))
return u.max_plus(max_length(r1), max_length(r2));
if (is_union(r, r1, r2) || m.is_ite(r, s, r1, r2))
return std::max(max_length(r1), max_length(r2));
if (is_intersection(r, r1, r2))
return std::min(max_length(r1), max_length(r2));
if (is_diff(r, r1, r2) || is_reverse(r, r1) || is_opt(r, r1))
return max_length(r1);
if (is_loop(r, r1, lo, hi))
return u.max_mul(hi, max_length(r1));
if (is_to_re(r, s))
return u.str.max_length(s);
if (is_range(r) || is_of_pred(r) || is_full_char(r))
return 1;
// Else: star, plus, complement, full_seq, loop(r,r1,lo), derivative
return UINT_MAX;
}
/**
\brief determine if \c n is a range regular expression where the lower and upper bounds
are given by single characters.
Range expressions where lower and upper bounds are not single characters are either
the empty language (when a bound is a string but not a single character) or symbolic
(when both bounds are not ground strings). The general is_range can be used to process
range expressions for these cases, but they don't correspond to mainstream regex usage.
*/
bool seq_util::rex::is_range(expr const* n, unsigned& lo, unsigned& hi) const {
expr* _lo, *_hi;
zstring los, his;
if (!is_range(n, _lo, _hi))
return false;
if (!u.str.is_string(_lo, los))
return false;
if (!u.str.is_string(_hi, his))
return false;
if (los.length() != 1 || his.length() != 1)
return false;
lo = los[0];
hi = his[0];
return true;
}
sort* seq_util::rex::to_seq(sort* re) {
(void)u;
SASSERT(u.is_re(re));
return to_sort(re->get_parameter(0).get_ast());
}
app* seq_util::rex::mk_power(expr* r, unsigned n) {
parameter param(n);
return m.mk_app(m_fid, OP_RE_POWER, 1, ¶m, 1, &r);
}
app* seq_util::rex::mk_loop(expr* r, unsigned lo) {
parameter param(lo);
return m.mk_app(m_fid, OP_RE_LOOP, 1, ¶m, 1, &r);
}
app* seq_util::rex::mk_loop(expr* r, unsigned lo, unsigned hi) {
parameter params[2] = { parameter(lo), parameter(hi) };
return m.mk_app(m_fid, OP_RE_LOOP, 2, params, 1, &r);
}
expr* seq_util::rex::mk_loop_proper(expr* r, unsigned lo, unsigned hi)
{
if (lo == 0 && hi == 0) {
sort* seq_sort = nullptr;
VERIFY(u.is_re(r, seq_sort));
// avoid creating a loop with both bounds 0
// such an expression is invalid as a loop
// it is BY DEFINITION = epsilon
return mk_epsilon(seq_sort);
}
if (lo == 1 && hi == 1)
// do not create a loop unless it actually is a loop
return r;
parameter params[2] = { parameter(lo), parameter(hi) };
return m.mk_app(m_fid, OP_RE_LOOP, 2, params, 1, &r);
}
app* seq_util::rex::mk_loop(expr* r, expr* lo) {
expr* rs[2] = { r, lo };
return m.mk_app(m_fid, OP_RE_LOOP, 0, nullptr, 2, rs);
}
app* seq_util::rex::mk_loop(expr* r, expr* lo, expr* hi) {
expr* rs[3] = { r, lo, hi };
return m.mk_app(m_fid, OP_RE_LOOP, 0, nullptr, 3, rs);
}
app* seq_util::rex::mk_full_char(sort* s) {
return m.mk_app(m_fid, OP_RE_FULL_CHAR_SET, 0, nullptr, 0, nullptr, s);
}
app* seq_util::rex::mk_full_seq(sort* s) {
return m.mk_app(m_fid, OP_RE_FULL_SEQ_SET, 0, nullptr, 0, nullptr, s);
}
app* seq_util::rex::mk_empty(sort* s) {
return m.mk_app(m_fid, OP_RE_EMPTY_SET, 0, nullptr, 0, nullptr, s);
}
app* seq_util::rex::mk_of_pred(expr* p) {
return m.mk_app(m_fid, OP_RE_OF_PRED, 0, nullptr, 1, &p);
}
bool seq_util::rex::is_loop(expr const* n, expr*& body, unsigned& lo, unsigned& hi) const {
if (is_loop(n)) {
app const* a = to_app(n);
if (a->get_num_args() == 1 && a->get_decl()->get_num_parameters() == 2) {
body = a->get_arg(0);
lo = a->get_decl()->get_parameter(0).get_int();
hi = a->get_decl()->get_parameter(1).get_int();
return true;
}
}
return false;
}
bool seq_util::rex::is_loop(expr const* n, expr*& body, unsigned& lo) const {
if (is_loop(n)) {
app const* a = to_app(n);
if (a->get_num_args() == 1 && a->get_decl()->get_num_parameters() == 1) {
body = a->get_arg(0);
lo = a->get_decl()->get_parameter(0).get_int();
return true;
}
}
return false;
}
bool seq_util::rex::is_loop(expr const* n, expr*& body, expr*& lo, expr*& hi) const {
if (is_loop(n)) {
app const* a = to_app(n);
if (a->get_num_args() == 3) {
body = a->get_arg(0);
lo = a->get_arg(1);
hi = a->get_arg(2);
return true;
}
}
return false;
}
bool seq_util::rex::is_loop(expr const* n, expr*& body, expr*& lo) const {
if (is_loop(n)) {
app const* a = to_app(n);
if (a->get_num_args() == 2) {
body = a->get_arg(0);
lo = a->get_arg(1);
return true;
}
}
return false;
}
/**
Returns true iff e is the epsilon regex.
*/
bool seq_util::rex::is_epsilon(expr* r) const {
expr* s;
return is_to_re(r, s) && u.str.is_empty(s);
}
/**
Makes the epsilon regex for a given sequence sort.
*/
app* seq_util::rex::mk_epsilon(sort* seq_sort) {
return mk_to_re(u.str.mk_empty(seq_sort));
}
/*
Produces compact view of concrete concatenations such as (abcd).
*/
bool seq_util::rex::pp::print_seq(std::ostream& out, expr* s) const {
zstring z;
expr* x, * j, * k, * l, * i, * x_;
if (re.u.str.is_empty(s))
out << "()";
else if (re.u.str.is_concat(s)) {
expr_ref_vector es(re.m);
re.u.str.get_concat(s, es);
for (expr* e : es)
print(out, e);
}
else if (re.u.str.is_string(s, z)) {
for (unsigned i = 0; i < z.length(); i++)
out << (char)z[i];
}
else if (re.u.str.is_at(s, x, i))
print(out, x) << "@", print(out, i);
else if (re.u.str.is_extract(s, x, j, k)) {
rational jv, iv;
print(out, x);
if (arith_util(re.m).is_numeral(j, jv)) {
if (arith_util(re.m).is_numeral(k, iv)) {
// output X[j,k]
out << "[" << jv.get_int32() << "," << jv.get_int32() << "]";
}
else if (arith_util(re.m).is_sub(k, l, i) && re.u.str.is_length(l, x_) && x == x_ &&
arith_util(re.m).is_numeral(i, iv) && iv == jv) {
// case X[j,|X|-j] is denoted by X[j..]
out << "[" << jv.get_int32() << "..]";
}
else if (((arith_util(re.m).is_add(k, l, i) && re.u.str.is_length(l, x_)) ||
(arith_util(re.m).is_add(k, i, l) && re.u.str.is_length(l, x_))) && x == x_ &&
arith_util(re.m).is_numeral(i, iv) && iv.get_int32() + jv.get_int32() == 0) {
// case X[j,|X|-j] is denoted by X[j..]
out << "[" << jv.get_int32() << "..]";
}
else {
out << "[" << jv.get_int32() << ",";
print(out, k);
out << "]";
}
}
else {
out << "[";
print(out, j);
out << ",";
print(out, k);
out << "]";
}
}
else
return false;
return true;
}
/*
Produces output such as [a-z] for a range.
*/
std::ostream& seq_util::rex::pp::print_range(std::ostream& out, expr* s1, expr* s2) const {
out << "[";
print(out, s1);
out << "-";
print(out, s2);
out << "]";
return out;
}
/*
Checks if parenthesis can be omitted in some cases in a loop body or in complement.
*/
bool seq_util::rex::pp::can_skip_parenth(expr* r) const {
expr* s;
return ((re.is_to_re(r, s) && re.u.str.is_unit(s)) || re.is_range(r) || re.is_empty(r) || re.is_epsilon(r) || re.is_full_char(r));
}
/*
Specialize output for a unit sequence converting to visible ASCII characters if possible.
*/
bool seq_util::rex::pp::print_unit(std::ostream& out, expr* s) const {
expr* e, * i;
unsigned n = 0;
if ((re.u.str.is_unit(s, e) && re.u.is_const_char(e, n)) || re.u.is_const_char(s, n)) {
char c = (char)n;
if (c == '\n')
out << "\\n";
else if (c == '\r')
out << "\\r";
else if (c == '\f')
out << "\\f";
else if (32 <= n && n < 127 && n != '\"' && n != ' '
&& n != '\\' && n != '\'' && n != '?' && n != '.' && n != '(' && n != ')' && n != '[' && n != ']'
&& n != '{' && n != '}' && n != '&') {
if (html_encode) {
if (c == '<')
out << "<";
else if (c == '>')
out << ">";
//else if (c == '&')
// out << "&";
//else if (c == '\"')
// out << """;
else
//out << "\\x" << std::hex << n;
out << c;
}
else
out << c;
}
else if (n <= 0xF)
out << "\\x0" << std::hex << n;
else if (n <= 0xFF)
out << "\\x" << std::hex << n;
else if (n <= 0xFFF)
out << "\\u0" << std::hex << n;
else
out << "\\u" << std::hex << n;
}
else if (re.u.str.is_nth_i(s, e, i)) {
print(out, e) << "[";
print(out, i) << "]";
}
else if (re.u.str.is_length(s, e))
print(out << "|", e) << "|";
else
return false;
return true;
}
/*
Pretty prints the regex r into the ostream out
*/
std::ostream& seq_util::rex::pp::print(std::ostream& out, expr* e) const {
expr* r1 = nullptr, * r2 = nullptr, * s = nullptr, * s2 = nullptr;
unsigned lo = 0, hi = 0;
arith_util a(re.m);
rational v;
if (!e)
out << "null";
else if (print_unit(out, e))
;
else if (print_seq(out, e))
;
else if (re.is_full_char(e))
out << ".";
else if (re.is_full_seq(e))
out << ".*";
else if (re.is_to_re(e, s))
print(out, s);
else if (re.is_range(e, s, s2))
print_range(out, s, s2);
else if (re.is_epsilon(e))
// ε = epsilon
out << (html_encode ? "ε" : "()");
else if (re.is_empty(e))
// ∅ = emptyset
out << (html_encode ? "∅" : "[]");
else if (re.is_concat(e, r1, r2)) {
print(out, r1);
print(out, r2);
}
else if (re.is_antimirov_union(e, r1, r2) || re.is_union(e, r1, r2)) {
out << "(";
print(out, r1);
out << (html_encode ? "⋃" : "|");
print(out, r2);
out << ")";
}
else if (re.is_intersection(e, r1, r2)) {
out << "(";
print(out, r1);
out << (html_encode ? "⋂" : "&");
print(out, r2);
out << ")";
}
else if (re.is_complement(e, r1)) {
out << "~";
if (can_skip_parenth(r1))
print(out, r1);
else {
out << "(";
print(out, r1);
out << ")";
}
}
else if (re.is_plus(e, r1)) {
if (can_skip_parenth(r1)) {
print(out, r1);
out << "+";
}
else {
out << "(";
print(out, r1);
out << ")+";
}
}
else if (re.is_star(e, r1)) {
if (can_skip_parenth(r1)) {
print(out, r1);
out << "*";
}
else {
out << "(";
print(out, r1);
out << ")*";
}
}
else if (re.is_loop(e, r1, lo)) {
if (can_skip_parenth(r1))
print(out, r1) << "{" << lo << ",}";
else {
out << "(";
print(out, r1);
out << "){" << lo << ",}";
}
}
else if (re.is_loop(e, r1, lo, hi)) {
if (can_skip_parenth(r1)) {
print(out, r1);
if (lo == hi)
out << "{" << lo << "}";
else
out << "{" << lo << "," << hi << "}";
}
else {
out << "(";
print(out, r1);
if (lo == hi)
out << "){" << lo << "}";
else
out << "){" << lo << "," << hi << "}";
}
}
else if (re.is_diff(e, r1, r2)) {
out << "(";
print(out, r1);
out << ")\\(";
print(out, r2);
out << ")";
}
else if (re.m.is_ite(e, s, r1, r2)) {
out << (html_encode ? "(𝐢𝐟 " : "(if ");
print(out, s);
out << (html_encode ? " 𝐭𝗵𝐞𝐧 " : " then ");
print(out, r1);
out << (html_encode ? " 𝐞𝐥𝘀𝐞 " : " else ");
print(out, r2);
out << ")";
}
else if (re.is_opt(e, r1)) {
if (can_skip_parenth(r1))
print(out, r1) << "?";
else {
out << "(";
print(out, r1);
out << ")?";
}
}
else if (re.is_reverse(e, r1)) {
out << "(reverse ";
print(out, r1);
out << ")";
}
else if (re.m.is_eq(e, r1, r2)) {
out << "(";
print(out, r1);
out << " = ";
print(out, r2);
out << ")";
}
else if (re.m.is_not(e, r1)) {
out << "!";
print(out, r1);
}
else if (a.is_add(e, s, s2) && a.is_numeral(s, v) && v < 0)
print(out, s2) << " - " << -v;
else if (a.is_add(e, s, s2) && a.is_numeral(s2, v) && v < 0)
print(out, s) << " - " << -v;
else if (a.is_add(e, s, s2))
print(out, s) << " + ", print(out, s2);
else if (a.is_sub(e, s, s2) && a.is_numeral(s2, v) && v > 0)
print(out, s) << " - " << v;
else if (a.is_le(e, s, s2))
print(out << "(", s) << " <= ", print(out, s2) << ")";
else if (re.m.is_value(e))
out << mk_pp(e, re.m);
else if (is_app(e) && to_app(e)->get_num_args() == 0)
out << mk_pp(e, re.m);
else if (is_app(e)) {
out << "(" << to_app(e)->get_decl()->get_name();
for (expr* arg : *to_app(e))
print(out << " ", arg);
out << ")";
}
else
// for all remaining cases use the default pretty printer
out << mk_pp(e, re.m);
return out;
}
std::ostream& seq_util::rex::pp::display(std::ostream& out) const {
return print(out, ex);
}
/*
Pretty prints the regex r into the output string
*/
std::string seq_util::rex::to_str(expr* r) const {
std::ostringstream out;
pp(u.re, r, false).display(out);
return out.str();
}
/*
Pretty prints the regex r into the output string that is htmlencoded
*/
std::string seq_util::rex::to_strh(expr* r) const {
std::ostringstream out;
pp(u.re, r, true).display(out);
return out.str();
}
/*
Returns true iff info has been computed for the regex r
*/
bool seq_util::rex::has_valid_info(expr* r) const {
return r->get_id() < m_infos.size() && m_infos[r->get_id()].is_valid();
}
/*
Returns the info in the cache if the info is valid. Returns invalid_info otherwise.
*/
seq_util::rex::info seq_util::rex::get_cached_info(expr* e) const {
if (has_valid_info(e))
return m_infos[e->get_id()];
else
return invalid_info;
}
/*
Get the information value associated with the regular expression e
*/
seq_util::rex::info seq_util::rex::get_info(expr* e) const
{
SASSERT(u.is_re(e));
auto result = get_cached_info(e);
if (result.is_valid())
return result;
m_info_pinned.push_back(e);
return get_info_rec(e);
}
/*
Gets the info value for the given regex e, possibly making a new info recursively over the structure of e.
*/
seq_util::rex::info seq_util::rex::get_info_rec(expr* e) const {
auto result = get_cached_info(e);
if (result.is_valid())
return result;
if (!is_app(e))
result = unknown_info;
else
result = mk_info_rec(to_app(e));
m_infos.setx(e->get_id(), result, invalid_info);
STRACE("re_info", tout << "compute_info(" << pp(u.re, e, false) << ")=" << result << std::endl;);
return result;
}
/*
Computes the info value for the given regex e recursively over the structure of e.
The regex e does not yet have an entry in the cache.
*/
seq_util::rex::info seq_util::rex::mk_info_rec(app* e) const {
info i1, i2;
lbool nullable(l_false);
unsigned min_length(0), lower_bound(0), upper_bound(UINT_MAX);
bool is_value(false);
if (e->get_family_id() == u.get_family_id()) {
switch (e->get_decl()->get_decl_kind()) {
case OP_RE_EMPTY_SET:
return info(true, l_false, UINT_MAX);
case OP_RE_FULL_SEQ_SET:
return info(true, l_true, 0);
case OP_RE_STAR:
i1 = get_info_rec(e->get_arg(0));
return i1.star();
case OP_RE_OPTION:
i1 = get_info_rec(e->get_arg(0));
return i1.opt();
case OP_RE_RANGE:
case OP_RE_FULL_CHAR_SET:
case OP_RE_OF_PRED:
//TBD: check if the character predicate contains uninterpreted symbols or is nonground or is unsat
//TBD: check if the range is unsat
return info(true, l_false, 1);
case OP_RE_CONCAT:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return i1.concat(i2, u.re.is_concat(e->get_arg(0)));
case OP_RE_UNION:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return i1.disj(i2);
case OP_RE_INTERSECT:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return i1.conj(i2);
case OP_SEQ_TO_RE:
min_length = u.str.min_length(e->get_arg(0));
is_value = m.is_value(e->get_arg(0));
nullable = (is_value && min_length == 0 ? l_true : (min_length > 0 ? l_false : l_undef));
return info(is_value, nullable, min_length);
case OP_RE_REVERSE:
return get_info_rec(e->get_arg(0));
case OP_RE_PLUS:
i1 = get_info_rec(e->get_arg(0));
return i1.plus();
case OP_RE_COMPLEMENT:
i1 = get_info_rec(e->get_arg(0));
return i1.complement();
case OP_RE_LOOP:
i1 = get_info_rec(e->get_arg(0));
if (e->get_decl()->get_num_parameters() >= 1)
lower_bound = e->get_decl()->get_parameter(0).get_int();
if (e->get_decl()->get_num_parameters() == 2)
upper_bound = e->get_decl()->get_parameter(1).get_int();
return i1.loop(lower_bound, upper_bound);
case OP_RE_DIFF:
i1 = get_info_rec(e->get_arg(0));
i2 = get_info_rec(e->get_arg(1));
return i1.diff(i2);
}
return unknown_info;
}
expr* c, * t, * f;
if (u.m.is_ite(e, c, t, f)) {
i1 = get_info_rec(t);
i2 = get_info_rec(f);
return i1.orelse(i2);
}
return unknown_info;
}
std::ostream& seq_util::rex::info::display(std::ostream& out) const {
if (is_known()) {
out << "info("
<< "nullable=" << (nullable == l_true ? "T" : (nullable == l_false ? "F" : "U")) << ", "
<< "min_length=" << min_length << ")";
}
else if (is_valid())
out << "UNKNOWN";
else
out << "INVALID";
return out;
}
/*
String representation of the info.
*/
std::string seq_util::rex::info::str() const {
std::ostringstream out;
display(out);
return out.str();
}
seq_util::rex::info seq_util::rex::info::star() const {
//if is_known() is false then all mentioned properties will remain false
return seq_util::rex::info(interpreted, l_true, 0);
}
seq_util::rex::info seq_util::rex::info::plus() const {
if (is_known()) {
//plus never occurs in a normalized regex
return info(interpreted, nullable, min_length);
}
else
return *this;
}
seq_util::rex::info seq_util::rex::info::opt() const {
// if is_known() is false then all mentioned properties will remain false
// optional construct never occurs in a normalized regex
return seq_util::rex::info(interpreted, l_true, 0);
}
seq_util::rex::info seq_util::rex::info::complement() const {
if (is_known()) {
lbool compl_nullable = (nullable == l_true ? l_false : (nullable == l_false ? l_true : l_undef));
unsigned compl_min_length = (compl_nullable == l_false ? 1 : 0);
return info(interpreted, compl_nullable, compl_min_length);
}
else
return *this;
}
seq_util::rex::info seq_util::rex::info::concat(seq_util::rex::info const& rhs, bool lhs_is_concat) const {
if (is_known()) {
if (rhs.is_known()) {
unsigned m = min_length + rhs.min_length;
if (m < min_length || m < rhs.min_length)
m = UINT_MAX;
return info(interpreted && rhs.interpreted,
((nullable == l_false || rhs.nullable == l_false) ? l_false : ((nullable == l_true && rhs.nullable == l_true) ? l_true : l_undef)),
m);
}
else
return rhs;
}
else
return *this;
}
seq_util::rex::info seq_util::rex::info::disj(seq_util::rex::info const& rhs) const {
if (is_known() || rhs.is_known()) {
//works correctly if one of the arguments is unknown
return info(interpreted && rhs.interpreted,
((nullable == l_true || rhs.nullable == l_true) ? l_true : ((nullable == l_false && rhs.nullable == l_false) ? l_false : l_undef)),
std::min(min_length, rhs.min_length));
}
else
return rhs;
}
seq_util::rex::info seq_util::rex::info::conj(seq_util::rex::info const& rhs) const {
if (is_known()) {
if (rhs.is_known()) {
return info(interpreted && rhs.interpreted,
((nullable == l_true && rhs.nullable == l_true) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
std::max(min_length, rhs.min_length));
}
else
return rhs;
}
else
return *this;
}
seq_util::rex::info seq_util::rex::info::diff(seq_util::rex::info const& rhs) const {
if (is_known()) {
if (rhs.is_known()) {
return info(interpreted & rhs.interpreted,
((nullable == l_true && rhs.nullable == l_false) ? l_true : ((nullable == l_false || rhs.nullable == l_false) ? l_false : l_undef)),
std::max(min_length, rhs.min_length));
}
else
return rhs;
}
else
return *this;
}
seq_util::rex::info seq_util::rex::info::orelse(seq_util::rex::info const& i) const {
if (is_known()) {
if (i.is_known()) {
// unsigned ite_min_length = std::min(min_length, i.min_length);
// lbool ite_nullable = (nullable == i.nullable ? nullable : l_undef);
// TBD: whether ite is interpreted or not depends on whether the condition is interpreted and both branches are interpreted
return info(false,
((nullable == l_true && i.nullable == l_true) ? l_true : ((nullable == l_false && i.nullable == l_false) ? l_false : l_undef)),
std::min(min_length, i.min_length));
}
else
return i;
}
else
return *this;
}
seq_util::rex::info seq_util::rex::info::loop(unsigned lower, unsigned upper) const {
if (is_known()) {
unsigned m = min_length * lower;
// Code review: this is not a complete overflow check.
if (m > 0 && (m < min_length || m < lower))
m = UINT_MAX;
lbool loop_nullable = (nullable == l_true || lower == 0 ? l_true : nullable);
return info(interpreted, loop_nullable, m);
}
else
return *this;
}
seq_util::rex::info& seq_util::rex::info::operator=(info const& other) {
if (this == &other) {
return *this;
}
known = other.known;
interpreted = other.interpreted;
nullable = other.nullable;
min_length = other.min_length;
return *this;
}