All Downloads are FREE. Search and download functionalities are using the official Maven repository.

z3-z3-4.12.6.src.smt.seq_regex.h Maven / Gradle / Ivy

There is a newer version: 4.13.0.1
Show newest version
/*++
Copyright (c) 2020 Microsoft Corporation

Module Name:

    seq_regex.h

Abstract:

    Solver for regexes 

Author:

    Nikolaj Bjorner (nbjorner) 2020-5-22

--*/
#pragma once

#include "util/scoped_vector.h"
#include "util/state_graph.h"
#include "ast/seq_decl_plugin.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/rewriter/seq_skolem.h"
#include "smt/smt_context.h"

/*
    *** Tracing and debugging in this module and related modules ***

    Tracing and debugging for the regex solver are split across several
    command-line flags.

        TRACING

        -tr:seq_regex and -tr:seq_regex_brief
        These are the main tags to trace what the regex solver is doing.
        They mostly trace the same things, except that seq_regex_brief
        avoids printing out expressions and tries to abbreviate the output
        as much as possible. seq_regex_brief shows the following output:
            Top-level propagations:
                PIR:      Propagating an in_re constraint
                PE/PNE:   Propagating an empty/non-empty constraint
                PEQ/PNEQ: Propagating a not-equal constraint
                PA:       Propagating an accept constraint
            In tracing, arguments are generally put in parentheses.
            To achieve abbreviated output, expressions are traced in one of two
            ways:
                id243 (expr ID):  the regex or expression with id 243
                3     (state ID): the regex with state ID 3
            When a regex is newly assigned to a state ID, we print this:
                new(id606)=4
            Of these, PA is the most important, and traces as follows:
                PA(x@i,r): propagate accept for string x at index i, regex r.
                (empty), (dead), (blocked), (unfold): info about whether this
                    PA was cut off early, or unfolded into the derivatives
                    (next states)
                d(r1)=r2: r2 is the derivative of r1
                n(r1)=b:  b = whether r1 is nullable or not
                USG(r):   updating state graph for regex r (add all derivatives)

        -tr:state_graph
        This is the tracing done by util/state_graph, the data structure
        that seq_regex uses to track live and dead regexes, which can
        altneratively be used to get a high-level picture of what states
        are being explored and updated as the solver progresses.

        -tr:seq_regex_verbose
        Used for some more frequent tracing (in the style of seq_regex,
        not in the style of seq_regex_brief)

        -tr:seq and -tr:seq_verbose
        These are the underlying sequence theory tracing, often used by
        the rewriter.

        DEBUGGING AND VIEWING STATE GRAPH GRAPHICAL OUTPUT

        -dbg:seq_regex
        Debugging that checks invariants. Currently, checks that derivative
        normal form is correctly preserved in the rewriter.

        -dbg:state_graph
        Debugging for the state graph, which
        1. Checks state graph invariants, and
        2. Generates the files .z3-state-graph.dgml and .z3-state-graph.dot
           which can be used to visually view the state graph being explored,
           during or after executing Z3.
           The output can be viewed:
              - Using Visual Studio for .dgml
              - Using a tool such as xdot (`xdot .z3-state-graph.dot`) for .dot
*/

namespace smt {

    class theory_seq;

    class seq_regex {
        // Data about a constraint of the form (str.in_re s R)
        struct s_in_re {
            literal m_lit;
            expr*   m_s;
            expr*   m_re;
            bool    m_active;
        s_in_re(literal l, expr* s, expr* r):
            m_lit(l), m_s(s), m_re(r), m_active(true) {}
        };

        theory_seq&                      th;
        context&                         ctx;
        ast_manager&                     m;
        vector                  m_s_in_re;

        /*
            state_graph for dead state detection, and associated methods
        */
        ptr_addr_map   m_expr_to_state;
        expr_ref_vector                m_state_to_expr;
        state_graph                    m_state_graph;
        /* map from uninterpreted regex constants to assigned regex expressions by EQ */
        // expr_map                       m_const_to_expr;
        unsigned                       m_max_state_graph_size { 10000 };
        // Convert between expressions and states (IDs)
        unsigned get_state_id(expr* e);
        expr* get_expr_from_id(unsigned id);
        // Cycle-detection heuristic
        // Note: Doesn't need to be sound or complete (doesn't affect soundness)
        bool can_be_in_cycle(expr* r1, expr* r2);
        // Update the graph
        bool update_state_graph(expr* r);

        // Printing expressions for seq_regex_brief
        std::string state_str(expr* e);
        std::string expr_id_str(expr* e);

        /*
            Solvers and utilities
        */
        seq_util& u();
        class seq_util::rex& re();
        class seq_util::str& str();
        seq_rewriter& seq_rw();
        seq::skolem& sk();
        arith_util& a();

        bool is_string_equality(literal lit);

        // Get a regex which overapproximates a given string
        expr_ref get_overapprox_regex(expr* s);

        void rewrite(expr_ref& e);

        bool coallesce_in_re(literal lit);

        bool block_unfolding(literal lit, unsigned i);

        expr_ref mk_first(expr* r, expr* n);

        bool is_member(expr* r, expr* u);

        expr_ref symmetric_diff(expr* r1, expr* r2);

        expr_ref is_nullable_wrapper(expr* r);
        expr_ref mk_derivative_wrapper(expr* hd, expr* r);

        // Various support for unfolding derivative expressions that are
        // returned by derivative_wrapper
        expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
        void get_derivative_targets(expr* r, expr_ref_vector& targets);
        void get_cofactors(expr* r, expr_ref_pair_vector& result);

        /* 
           Pretty print the regex of the state id to the out stream, 
           seq_regex_ptr must be a pointer to seq_regex and the 
           id must be a valid state id or else nothing is printed. 
        */
        static void pp_state(void* seq_regex_ptr, std::ostream& out, unsigned id, bool html_encode) {
            seq_regex* sr = (seq_regex*)seq_regex_ptr;
            if (sr) {
                seq_util::rex re_util(sr->re());
                if (1 <= id && id <= sr->m_state_to_expr.size()) {
                    expr* r = sr->get_expr_from_id(id);
                    seq_util::rex::pp(re_util, r, html_encode).display(out);
                }
            }
        }

        bool block_if_empty(expr* r, literal lit);

    public:

        seq_regex(theory_seq& th);

        void push_scope() {}
        void pop_scope(unsigned num_scopes) {}
        bool can_propagate() const { return false; }
        bool propagate() const { return false; }

        void propagate_in_re(literal lit);

        // (accept s i r) means 
        // the suffix of s after the first i characters is a member of r
        void propagate_accept(literal lit);

        void propagate_eq(expr* r1, expr* r2);

        void propagate_ne(expr* r1, expr* r2);        

        void propagate_is_empty(literal lit);

        void propagate_is_non_empty(literal lit);
        
    };

};




© 2015 - 2024 Weber Informatics LLC | Privacy Policy