All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cvc5-cvc5-1.2.0.src.theory.strings.regexp_operation.cpp Maven / Gradle / Ivy

The newest version!
/******************************************************************************
 * Top contributors (to current version):
 *   Tianyi Liang, Andrew Reynolds, Aina Niemetz
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2024 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * Symbolic Regular Expresion Operations
 */

#include "theory/strings/regexp_operation.h"

#include 

#include "expr/node_algorithm.h"
#include "options/strings_options.h"
#include "theory/rewriter.h"
#include "theory/strings/regexp_entail.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/strings/word.h"
#include "util/regexp.h"

using namespace cvc5::internal::kind;

namespace cvc5::internal {
namespace theory {
namespace strings {

RegExpOpr::RegExpOpr(Env& env, SkolemCache* sc)
    : EnvObj(env),
      d_true(NodeManager::currentNM()->mkConst(true)),
      d_false(NodeManager::currentNM()->mkConst(false)),
      d_emptyRegexp(NodeManager::currentNM()->mkNode(Kind::REGEXP_NONE,
                                                     std::vector{})),
      d_zero(NodeManager::currentNM()->mkConstInt(Rational(0))),
      d_one(NodeManager::currentNM()->mkConstInt(Rational(1))),
      d_sigma(NodeManager::currentNM()->mkNode(Kind::REGEXP_ALLCHAR,
                                               std::vector{})),
      d_sigma_star(
          NodeManager::currentNM()->mkNode(Kind::REGEXP_STAR, d_sigma)),
      d_sc(sc)
{
  d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());

  d_emptySingleton =
      NodeManager::currentNM()->mkNode(Kind::STRING_TO_REGEXP, d_emptyString);
  d_lastchar = options().strings.stringsAlphaCard - 1;
}

RegExpOpr::~RegExpOpr() {}

bool RegExpOpr::checkConstRegExp( Node r ) {
  Assert(r.getType().isRegExp());
  Trace("strings-regexp-cstre")
      << "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl;
  RegExpConstType rct = getRegExpConstType(r);
  return rct != RE_C_VARIABLE;
}

RegExpConstType RegExpOpr::getRegExpConstType(Node r)
{
  Assert(r.getType().isRegExp());
  std::unordered_map::iterator it;
  std::vector visit;
  TNode cur;
  visit.push_back(r);
  do
  {
    cur = visit.back();
    visit.pop_back();
    it = d_constCache.find(cur);

    Kind ck = cur.getKind();
    if (it == d_constCache.end())
    {
      if (ck == Kind::STRING_TO_REGEXP)
      {
        Node tmp = rewrite(cur[0]);
        d_constCache[cur] =
            tmp.isConst() ? RE_C_CONCRETE_CONSTANT : RE_C_VARIABLE;
      }
      else if (ck == Kind::REGEXP_ALLCHAR || ck == Kind::REGEXP_RANGE)
      {
        d_constCache[cur] = RE_C_CONSTANT;
      }
      else if (!utils::isRegExpKind(ck))
      {
        // non-regular expression applications, e.g. function applications
        // with regular expression return type are treated as variables.
        d_constCache[cur] = RE_C_VARIABLE;
      }
      else
      {
        d_constCache[cur] = RE_C_UNKNOWN;
        visit.push_back(cur);
        visit.insert(visit.end(), cur.begin(), cur.end());
      }
    }
    else if (it->second == RE_C_UNKNOWN)
    {
      RegExpConstType ret = ck == Kind::REGEXP_COMPLEMENT
                                ? RE_C_CONSTANT
                                : RE_C_CONCRETE_CONSTANT;
      for (const Node& cn : cur)
      {
        it = d_constCache.find(cn);
        Assert(it != d_constCache.end());
        if (it->second > ret)
        {
          ret = it->second;
        }
      }
      d_constCache[cur] = ret;
    }
  } while (!visit.empty());
  Assert(d_constCache.find(r) != d_constCache.end());
  return d_constCache[r];
}

// 0-unknown, 1-yes, 2-no
int RegExpOpr::delta( Node r, Node &exp ) {
  std::map >::const_iterator itd =
      d_delta_cache.find(r);
  if (itd != d_delta_cache.end())
  {
    // already computed
    exp = itd->second.second;
    return itd->second.first;
  }
  Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl;
  int ret = 0;
  NodeManager* nm = NodeManager::currentNM();
  Kind k = r.getKind();
  switch (k)
  {
    case Kind::REGEXP_NONE:
    case Kind::REGEXP_ALLCHAR:
    case Kind::REGEXP_RANGE:
    {
      // does not contain empty string
      ret = 2;
      break;
    }
    case Kind::STRING_TO_REGEXP:
    {
      Node tmp = rewrite(r[0]);
      if (tmp.isConst())
      {
        if (tmp == d_emptyString)
        {
          ret = 1;
        } else {
          ret = 2;
        }
      }
      else
      {
        ret = 0;
        if (tmp.getKind() == Kind::STRING_CONCAT)
        {
          for (const Node& tmpc : tmp)
          {
            if (tmpc.isConst())
            {
              ret = 2;
              break;
            }
          }
        }
        if (ret == 0)
        {
          exp = r[0].eqNode(d_emptyString);
        }
      }
      break;
    }
    case Kind::REGEXP_CONCAT:
    case Kind::REGEXP_UNION:
    case Kind::REGEXP_INTER:
    {
      // has there been an unknown child?
      bool hasUnknownChild = false;
      std::vector vec;
      int checkTmp = k == Kind::REGEXP_UNION ? 1 : 2;
      int retTmp = k == Kind::REGEXP_UNION ? 2 : 1;
      for (const Node& rc : r)
      {
        Node exp2;
        int tmp = delta(rc, exp2);
        if (tmp == checkTmp)
        {
          // return is implied by the child's return value
          ret = checkTmp;
          break;
        }
        else if (tmp == 0)
        {
          // unknown if child contains empty string
          Assert(!exp2.isNull());
          vec.push_back(exp2);
          hasUnknownChild = true;
        }
      }
      if (ret != checkTmp)
      {
        if (!hasUnknownChild)
        {
          ret = retTmp;
        } else {
          Kind kr = k == Kind::REGEXP_UNION ? Kind::OR : Kind::AND;
          exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec);
        }
      }
      break;
    }
    case Kind::REGEXP_STAR:
    case Kind::REGEXP_OPT:
    {
      // contains empty string
      ret = 1;
      break;
    }
    case Kind::REGEXP_PLUS:
    {
      ret = delta(r[0], exp);
      break;
    }
    case Kind::REGEXP_LOOP:
    {
      uint32_t lo = utils::getLoopMinOccurrences(r);
      if (lo == 0)
      {
        ret = 1;
      }
      else
      {
        ret = delta(r[0], exp);
      }
      break;
    }
    case Kind::REGEXP_COMPLEMENT:
    {
      int tmp = delta(r[0], exp);
      // flip the result if known
      ret = tmp == 0 ? 0 : (3 - tmp);
      exp = exp.isNull() ? exp : exp.negate();
      break;
    }
    default:
    {
      Assert(!utils::isRegExpKind(k));
      break;
    }
  }
  if (!exp.isNull())
  {
    exp = rewrite(exp);
  }
  std::pair p(ret, exp);
  d_delta_cache[r] = p;
  Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r
                        << ", expr = " << exp << std::endl;
  return ret;
}

// 0-unknown, 1-yes, 2-no
int RegExpOpr::derivativeS(Node r, cvc5::internal::String c, Node& retNode)
{
  Assert(c.size() < 2);
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;

  int ret = 1;
  retNode = d_emptyRegexp;
  NodeManager* nm = NodeManager::currentNM();
  SkolemManager* sm = nm->getSkolemManager();

  PairNodeStr dv = std::make_pair( r, c );
  if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
    retNode = d_deriv_cache[dv].first;
    ret = d_deriv_cache[dv].second;
  }
  else if (c.empty())
  {
    Node expNode;
    ret = delta( r, expNode );
    if(ret == 0) {
      retNode = NodeManager::currentNM()->mkNode(
          Kind::ITE, expNode, r, d_emptyRegexp);
    } else if(ret == 1) {
      retNode = r;
    }
    std::pair< Node, int > p(retNode, ret);
    d_deriv_cache[dv] = p;
  } else {
    switch( r.getKind() ) {
      case Kind::REGEXP_NONE:
      {
        ret = 2;
        break;
      }
      case Kind::REGEXP_ALLCHAR:
      {
        retNode = d_emptySingleton;
        break;
      }
      case Kind::REGEXP_RANGE:
      {
        cvc5::internal::String a = r[0].getConst();
        cvc5::internal::String b = r[1].getConst();
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
        break;
      }
      case Kind::STRING_TO_REGEXP:
      {
        Node tmp = rewrite(r[0]);
        if(tmp.isConst()) {
          if(tmp == d_emptyString) {
            ret = 2;
          } else {
            if (tmp.getConst().front() == c.front())
            {
              retNode =
                  nm->mkNode(Kind::STRING_TO_REGEXP,
                             Word::getLength(tmp) == 1 ? d_emptyString
                                                       : Word::substr(tmp, 1));
            } else {
              ret = 2;
            }
          }
        } else {
          ret = 0;
          Node rest;
          if (tmp.getKind() == Kind::STRING_CONCAT)
          {
            Node t2 = tmp[0];
            if(t2.isConst()) {
              if (t2.getConst().front() == c.front())
              {
                Node n = nm->mkNode(Kind::STRING_TO_REGEXP,
                                    Word::getLength(tmp) == 1
                                        ? d_emptyString
                                        : Word::substr(tmp, 1));
                std::vector< Node > vec_nodes;
                vec_nodes.push_back(n);
                for(unsigned i=1; imkNode(Kind::REGEXP_CONCAT, vec_nodes);
                ret = 1;
              } else {
                ret = 2;
              }
            } else {
              tmp = tmp[0];
              std::vector< Node > vec_nodes;
              for(unsigned i=1; imkNode(Kind::REGEXP_CONCAT, vec_nodes);
            }
          }
          if(ret == 0) {
            Node sk =
                sm->mkDummySkolem("rsp", nm->stringType(), "Split RegExp");
            retNode = nm->mkNode(Kind::STRING_TO_REGEXP, sk);
            if(!rest.isNull()) {
              retNode = rewrite(nm->mkNode(Kind::REGEXP_CONCAT, retNode, rest));
            }
            Node exp =
                tmp.eqNode(nm->mkNode(Kind::STRING_CONCAT, nm->mkConst(c), sk));
            retNode =
                rewrite(nm->mkNode(Kind::ITE, exp, retNode, d_emptyRegexp));
          }
        }
        break;
      }
      case Kind::REGEXP_CONCAT:
      {
        std::vector< Node > vec_nodes;
        std::vector< Node > delta_nodes;
        Node dnode = d_true;
        for(unsigned i=0; i vec_nodes2;
            if(dc != d_emptySingleton) {
              vec_nodes2.push_back( dc );
            }
            for(unsigned j=i+1; jmkNode(
                               Kind::REGEXP_CONCAT, vec_nodes2);
            if(dnode != d_true) {
              tmp = rewrite(nm->mkNode(Kind::ITE, dnode, tmp, d_emptyRegexp));
              ret = 0;
            }
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
              vec_nodes.push_back( tmp );
            }
          }
          Node exp3;
          int rt2 = delta( r[i], exp3 );
          if( rt2 == 0 ) {
            dnode = rewrite(nm->mkNode(Kind::AND, dnode, exp3));
          } else if( rt2 == 2 ) {
            break;
          }
        }
        retNode =
            vec_nodes.size() == 0
                ? d_emptyRegexp
                : (vec_nodes.size() == 1 ? vec_nodes[0]
                                         : NodeManager::currentNM()->mkNode(
                                             Kind::REGEXP_UNION, vec_nodes));
        if(retNode == d_emptyRegexp) {
          ret = 2;
        }
        break;
      }
      case Kind::REGEXP_UNION:
      {
        std::vector< Node > vec_nodes;
        for(unsigned i=0; imkNode(
                                             Kind::REGEXP_UNION, vec_nodes));
        if(retNode == d_emptyRegexp) {
          ret = 2;
        }
        break;
      }
      case Kind::REGEXP_INTER:
      {
        bool flag = true;
        bool flag_sg = false;
        std::vector< Node > vec_nodes;
        for(unsigned i=0; imkNode(
                                     Kind::REGEXP_INTER, vec_nodes));
            if(retNode == d_emptyRegexp) {
              ret = 2;
            }
          }
        } else {
          retNode = d_emptyRegexp;
          ret = 2;
        }
        break;
      }
      case Kind::REGEXP_STAR:
      {
        Node dc;
        ret = derivativeS(r[0], c, dc);
        retNode = dc == d_emptyRegexp ? dc
                                      : (dc == d_emptySingleton
                                             ? r
                                             : NodeManager::currentNM()->mkNode(
                                                 Kind::REGEXP_CONCAT, dc, r));
        break;
      }
      case Kind::REGEXP_LOOP:
      {
        uint32_t l = utils::getLoopMinOccurrences(r);
        uint32_t u = utils::getLoopMaxOccurrences(r);
        if (l == u && l == 0)
        {
          ret = 2;
          //retNode = d_emptyRegexp;
        } else {
          Node dc;
          ret = derivativeS(r[0], c, dc);
          if(dc==d_emptyRegexp) {
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
            Node r2 = nm->mkNode(Kind::REGEXP_LOOP, lop, r[0]);
            retNode = dc == d_emptySingleton ? r2
                                             : NodeManager::currentNM()->mkNode(
                                                 Kind::REGEXP_CONCAT, dc, r2);
          } else {
            retNode = d_emptyRegexp;
          }
        }
        break;
      }
      case Kind::REGEXP_COMPLEMENT:
      {
        // don't know result
        return 0;
        break;
      }
      default: {
        Assert(!utils::isRegExpKind(r.getKind()));
        return 0;
        break;
      }
    }
    if(retNode != d_emptyRegexp) {
      retNode = rewrite(retNode);
    }
    std::pair< Node, int > p(retNode, ret);
    d_deriv_cache[dv] = p;
  }

  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
  return ret;
}

Node RegExpOpr::derivativeSingle(Node r, cvc5::internal::String c)
{
  Assert(c.size() < 2);
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
  Node retNode = d_emptyRegexp;
  PairNodeStr dv = std::make_pair( r, c );
  NodeManager* nm = NodeManager::currentNM();
  if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
    retNode = d_dv_cache[dv];
  }
  else if (c.empty())
  {
    Node exp;
    int tmp = delta( r, exp );
    if(tmp == 0) {
      // TODO variable
      retNode = d_emptyRegexp;
    } else if(tmp == 1) {
      retNode = r;
    } else {
      retNode = d_emptyRegexp;
    }
  } else {
    Kind k = r.getKind();
    switch( k ) {
      case Kind::REGEXP_NONE:
      {
        retNode = d_emptyRegexp;
        break;
      }
      case Kind::REGEXP_ALLCHAR:
      {
        retNode = NodeManager::currentNM()->mkNode(Kind::STRING_TO_REGEXP,
                                                   d_emptyString);
        break;
      }
      case Kind::REGEXP_RANGE:
      {
        cvc5::internal::String a = r[0].getConst();
        cvc5::internal::String b = r[1].getConst();
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
        break;
      }
      case Kind::STRING_TO_REGEXP:
      {
        if(r[0].isConst()) {
          if(r[0] == d_emptyString) {
            retNode = d_emptyRegexp;
          } else {
            if (r[0].getConst().front() == c.front())
            {
              retNode = nm->mkNode(Kind::STRING_TO_REGEXP,
                                   Word::getLength(r[0]) == 1
                                       ? d_emptyString
                                       : Word::substr(r[0], 1));
            } else {
              retNode = d_emptyRegexp;
            }
          }
        } else {
          // TODO variable
          retNode = d_emptyRegexp;
        }
        break;
      }
      case Kind::REGEXP_CONCAT:
      {
        Node rees = NodeManager::currentNM()->mkNode(Kind::STRING_TO_REGEXP,
                                                     d_emptyString);
        std::vector< Node > vec_nodes;
        for(unsigned i=0; i vec_nodes2;
            if(dc != rees) {
              vec_nodes2.push_back( dc );
            }
            for(unsigned j=i+1; jmkNode(
                               Kind::REGEXP_CONCAT, vec_nodes2);
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
              vec_nodes.push_back( tmp );
            }
          }
          Node exp;
          if( delta( r[i], exp ) != 1 ) {
            break;
          }
        }
        retNode =
            vec_nodes.size() == 0
                ? d_emptyRegexp
                : (vec_nodes.size() == 1 ? vec_nodes[0]
                                         : NodeManager::currentNM()->mkNode(
                                             Kind::REGEXP_UNION, vec_nodes));
        break;
      }
      case Kind::REGEXP_UNION:
      {
        std::vector< Node > vec_nodes;
        for(unsigned i=0; imkNode(
                                             Kind::REGEXP_UNION, vec_nodes));
        break;
      }
      case Kind::REGEXP_INTER:
      {
        bool flag = true;
        bool flag_sg = false;
        std::vector< Node > vec_nodes;
        for(unsigned i=0; imkNode(
                                     Kind::REGEXP_INTER, vec_nodes));
          }
        } else {
          retNode = d_emptyRegexp;
        }
        break;
      }
      case Kind::REGEXP_STAR:
      {
        Node dc = derivativeSingle(r[0], c);
        if(dc != d_emptyRegexp) {
          retNode = dc == d_emptySingleton ? r
                                           : NodeManager::currentNM()->mkNode(
                                               Kind::REGEXP_CONCAT, dc, r);
        } else {
          retNode = d_emptyRegexp;
        }
        break;
      }
      case Kind::REGEXP_LOOP:
      {
        uint32_t l = utils::getLoopMinOccurrences(r);
        uint32_t u = utils::getLoopMaxOccurrences(r);
        if (l == u || l == 0)
        {
          retNode = d_emptyRegexp;
        } else {
          Node dc = derivativeSingle(r[0], c);
          if(dc != d_emptyRegexp) {
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
            Node r2 = nm->mkNode(Kind::REGEXP_LOOP, lop, r[0]);
            retNode = dc == d_emptySingleton ? r2
                                             : NodeManager::currentNM()->mkNode(
                                                 Kind::REGEXP_CONCAT, dc, r2);
          } else {
            retNode = d_emptyRegexp;
          }
        }
        //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
        break;
      }
      case Kind::REGEXP_COMPLEMENT:
      default: {
        Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
        Unreachable();
        break;
      }
    }
    if(retNode != d_emptyRegexp) {
      retNode = rewrite(retNode);
    }
    d_dv_cache[dv] = retNode;
  }
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
  return retNode;
}

void RegExpOpr::firstChars(Node r, std::set &pcset, SetNodes &pvset)
{
  Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl;
  std::map, SetNodes> >::const_iterator itr =
      d_fset_cache.find(r);
  if(itr != d_fset_cache.end()) {
    pcset.insert((itr->second).first.begin(), (itr->second).first.end());
    pvset.insert((itr->second).second.begin(), (itr->second).second.end());
  } else {
    // cset is code points
    std::set cset;
    SetNodes vset;
    Kind k = r.getKind();
    switch( k ) {
      case Kind::REGEXP_NONE:
      {
        break;
      }
      case Kind::REGEXP_RANGE:
      {
        unsigned a = r[0].getConst().front();
        unsigned b = r[1].getConst().front();
        Assert(a < b);
        Assert(b < std::numeric_limits::max());
        for (unsigned c = a; c <= b; c++)
        {
          cset.insert(c);
        }
        break;
      }
      case Kind::STRING_TO_REGEXP:
      {
        Node st = rewrite(r[0]);
        if(st.isConst()) {
          String s = st.getConst();
          if(s.size() != 0) {
            unsigned sc = s.front();
            cset.insert(sc);
          }
        }
        else if (st.getKind() == Kind::STRING_CONCAT)
        {
          if(st[0].isConst()) {
            String s = st[0].getConst();
            unsigned sc = s.front();
            cset.insert(sc);
          } else {
            vset.insert( st[0] );
          }
        }
        else
        {
          vset.insert(st);
        }
        break;
      }
      case Kind::REGEXP_CONCAT:
      {
        for(unsigned i=0; i::max());
        for (unsigned i = 0; i <= d_lastchar; i++)
        {
          cset.insert(i);
        }
        break;
      }
    }
    pcset.insert(cset.begin(), cset.end());
    pvset.insert(vset.begin(), vset.end());
    std::pair, SetNodes> p(cset, vset);
    d_fset_cache[r] = p;
  }

  if(TraceIsOn("regexp-fset")) {
    Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {";
    for (std::set::const_iterator it = pcset.begin();
         it != pcset.end();
         ++it)
    {
      if (it != pcset.begin())
      {
        Trace("regexp-fset") << ",";
      }
      Trace("regexp-fset") << (*it);
      }
    Trace("regexp-fset") << "}" << std::endl;
  }
}

Node RegExpOpr::simplify(Node t, bool polarity)
{
  Trace("strings-regexp-simpl")
      << "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl;
  Assert(t.getKind() == Kind::STRING_IN_REGEXP);
  Node tlit = polarity ? t : t.notNode();
  Node conc;
  std::map::const_iterator itr = d_simpCache.find(tlit);
  if (itr != d_simpCache.end())
  {
    return itr->second;
  }
  if (polarity)
  {
    std::vector newSkolems;
    conc = reduceRegExpPos(tlit, d_sc, newSkolems);
  }
  else
  {
    // see if we can use an optimized version of the reduction for re.++.
    Node r = t[1];
    if (r.getKind() == Kind::REGEXP_CONCAT)
    {
      // the index we are removing from the RE concatenation
      bool isRev;
      // As an optimization to the reduction, if we can determine that
      // all strings in the language of R1 have the same length, say n,
      // then the conclusion of the reduction is quantifier-free:
      //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,len(s)-n,n) in R2)
      Node reLen = getRegExpConcatFixed(r, isRev);
      if (!reLen.isNull())
      {
        conc = reduceRegExpNegConcatFixed(tlit, reLen, isRev);
      }
    }
    if (conc.isNull())
    {
      conc = reduceRegExpNeg(tlit);
    }
  }
  d_simpCache[tlit] = conc;
  Trace("strings-regexp-simpl")
      << "RegExpOpr::simplify: returns " << conc << std::endl;
  return conc;
}

Node RegExpOpr::getRegExpConcatFixed(Node r, bool& isRev)
{
  Assert(r.getKind() == Kind::REGEXP_CONCAT);
  isRev = false;
  Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]);
  if (!reLen.isNull())
  {
    return reLen;
  }
  // try from the opposite end
  size_t indexE = r.getNumChildren() - 1;
  reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
  if (!reLen.isNull())
  {
    isRev = true;
    return reLen;
  }
  return Node::null();
}

Node RegExpOpr::reduceRegExpNeg(Node mem)
{
  Assert(mem.getKind() == Kind::NOT
         && mem[0].getKind() == Kind::STRING_IN_REGEXP);
  Node s = mem[0][0];
  Node r = mem[0][1];
  NodeManager* nm = NodeManager::currentNM();
  Kind k = r.getKind();
  Node zero = nm->mkConstInt(Rational(0));
  Node conc;
  if (k == Kind::REGEXP_CONCAT)
  {
    // do not use length entailment, call regular expression concat
    Node reLen;
    conc = reduceRegExpNegConcatFixed(mem, reLen, false);
  }
  else if (k == Kind::REGEXP_STAR)
  {
    Node emp = Word::mkEmptyWord(s.getType());
    Node lens = nm->mkNode(Kind::STRING_LENGTH, s);
    Node sne = s.eqNode(emp).negate();
    Node b1 = SkolemCache::mkIndexVar(mem);
    Node b1v = nm->mkNode(Kind::BOUND_VAR_LIST, b1);
    Node g11n = nm->mkNode(Kind::LEQ, b1, zero);
    Node g12n = nm->mkNode(Kind::LT, lens, b1);
    // internal
    Node s1 = utils::mkPrefix(s, b1);
    Node s2 = utils::mkSuffix(s, b1);
    Node s1r1 = nm->mkNode(Kind::STRING_IN_REGEXP, s1, r[0]).negate();
    Node s2r2 = nm->mkNode(Kind::STRING_IN_REGEXP, s2, r).negate();

    conc = nm->mkNode(Kind::OR, {g11n, g12n, s1r1, s2r2});
    // must mark as an internal quantifier
    conc = utils::mkForallInternal(b1v, conc);
    conc = nm->mkNode(Kind::AND, sne, conc);
  }
  else
  {
    Assert(!utils::isRegExpKind(k));
  }
  return conc;
}

Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, bool isRev)
{
  Assert(mem.getKind() == Kind::NOT
         && mem[0].getKind() == Kind::STRING_IN_REGEXP);
  Node s = mem[0][0];
  Node r = mem[0][1];
  NodeManager* nm = NodeManager::currentNM();
  Assert(r.getKind() == Kind::REGEXP_CONCAT);
  Node zero = nm->mkConstInt(Rational(0));
  // The following simplification states that
  //    ~( s in R1 ++ R2 ++... ++ Rn )
  // is equivalent to
  //    forall x.
  //      0 <= x <= len(s) =>
  //        ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn)
  // Index is the child index of r that we are stripping off, which is either
  // from the beginning or the end.
  Node lens = nm->mkNode(Kind::STRING_LENGTH, s);
  Node b1;
  Node b1v;
  Node guard1n, guard2n;
  if (reLen.isNull())
  {
    b1 = SkolemCache::mkIndexVar(mem);
    b1v = nm->mkNode(Kind::BOUND_VAR_LIST, b1);
    guard1n = nm->mkNode(Kind::LT, b1, zero);
    guard2n = nm->mkNode(Kind::LT, nm->mkNode(Kind::STRING_LENGTH, s), b1);
  }
  else
  {
    b1 = reLen;
  }
  Node s1;
  Node s2;
  if (!isRev)
  {
    s1 = utils::mkPrefix(s, b1);
    s2 = utils::mkSuffix(s, b1);
  }
  else
  {
    s1 = utils::mkSuffixOfLen(s, b1);
    s2 = utils::mkPrefix(s, nm->mkNode(Kind::SUB, lens, b1));
  }
  size_t index = isRev ? r.getNumChildren() - 1 : 0;
  Node s1r1 = nm->mkNode(Kind::STRING_IN_REGEXP, s1, r[index]).negate();
  std::vector nvec;
  for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
  {
    if (i != index)
    {
      nvec.push_back(r[i]);
    }
  }
  Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(Kind::REGEXP_CONCAT, nvec);
  Node s2r2 = nm->mkNode(Kind::STRING_IN_REGEXP, s2, r2).negate();
  Node conc;
  if (!b1v.isNull())
  {
    conc = nm->mkNode(Kind::OR, {guard1n, guard2n, s1r1, s2r2});
    // must mark as an internal quantifier
    conc = utils::mkForallInternal(b1v, conc);
  }
  else
  {
    conc = nm->mkNode(Kind::OR, s1r1, s2r2);
  }
  return conc;
}

Node RegExpOpr::reduceRegExpPos(Node mem,
                                SkolemCache* sc,
                                std::vector& newSkolems)
{
  Assert(mem.getKind() == Kind::STRING_IN_REGEXP);
  Node s = mem[0];
  Node r = mem[1];
  NodeManager* nm = NodeManager::currentNM();
  Kind k = r.getKind();
  Node conc;
  if (k == Kind::REGEXP_CONCAT)
  {
    std::vector nvec;
    std::vector cc;
    SkolemManager* sm = nm->getSkolemManager();
    // Look up skolems for each of the components. If sc has optimizations
    // enabled, this will return arguments of str.to_re.
    for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
    {
      if (r[i].getKind() == Kind::STRING_TO_REGEXP)
      {
        // optimization, just take the body
        newSkolems.push_back(r[i][0]);
      }
      else
      {
        Node ivalue = nm->mkConstInt(Rational(i));
        Node sk = sm->mkSkolemFunction(SkolemId::RE_UNFOLD_POS_COMPONENT,
                                       {mem[0], mem[1], ivalue});
        newSkolems.push_back(sk);
        nvec.push_back(nm->mkNode(Kind::STRING_IN_REGEXP, newSkolems[i], r[i]));
      }
    }
    // (str.in_re x (re.++ R0 .... Rn)) =>
    // (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) )
    Node lem = s.eqNode(nm->mkNode(Kind::STRING_CONCAT, newSkolems));
    nvec.insert(nvec.begin(), lem);
    conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(Kind::AND, nvec);
  }
  else if (k == Kind::REGEXP_STAR)
  {
    Node emp = Word::mkEmptyWord(s.getType());
    Node se = s.eqNode(emp);
    Node sinr = nm->mkNode(Kind::STRING_IN_REGEXP, s, r[0]);
    Node reExpand = nm->mkNode(Kind::REGEXP_CONCAT, r[0], r, r[0]);
    Node sinRExp = nm->mkNode(Kind::STRING_IN_REGEXP, s, reExpand);
    // We unfold `x in R*` by considering three cases: `x` is empty, `x`
    // is matched by `R`, or `x` is matched by two or more `R`s. For the
    // last case, `x` will break into three pieces, making the beginning
    // and the end each match `R` and the middle match `R*`. Matching the
    // beginning and the end with `R` allows us to reason about the
    // beginning and the end of `x` simultaneously.
    //
    // x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R))

    // We also immediately unfold the last disjunct for re.*. The advantage
    // of doing this is that we use the same scheme for skolems above.
    std::vector newSkolemsC;
    sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC);
    Assert(newSkolemsC.size() == 3);
    // make the return lemma
    // can also assume the component match the first and last R are non-empty.
    // This means that the overall conclusion is:
    //   (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^
    //                          k1 in R ^ k2 in (re.* R) ^ k3 in R ^
    //                          k1 != ""  ^ k3 != "")
    conc = nm->mkNode(Kind::OR,
                      se,
                      sinr,
                      nm->mkNode(Kind::AND,
                                 sinRExp,
                                 newSkolemsC[0].eqNode(emp).negate(),
                                 newSkolemsC[2].eqNode(emp).negate()));
  }
  else
  {
    Assert(!utils::isRegExpKind(k));
  }
  return conc;
}

bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
  for(std::set< PairNodes >::const_iterator itr = s.begin();
      itr != s.end(); ++itr) {
    if((itr->first == n1 && itr->second == n2) ||
       (itr->first == n2 && itr->second == n1)) {
      return true;
    }
  }
  return false;
}

bool RegExpOpr::containC2(unsigned cnt, Node n) {
  if (n.getKind() == Kind::REGEXP_RV)
  {
    Assert(n[0].getConst() <= Rational(String::maxSize()))
        << "Exceeded UINT32_MAX in RegExpOpr::containC2";
    unsigned y = n[0].getConst().getNumerator().toUnsignedInt();
    return cnt == y;
  }
  else if (n.getKind() == Kind::REGEXP_CONCAT)
  {
    for( unsigned i=0; i() <= Rational(String::maxSize()))
        << "Exceeded UINT32_MAX in RegExpOpr::convert2";
    unsigned y = n[0].getConst().getNumerator().toUnsignedInt();
    r1 = d_emptySingleton;
    if(cnt == y) {
      r2 = d_emptyRegexp;
    } else {
      r2 = n;
    }
  }
  else if (nk == Kind::REGEXP_CONCAT)
  {
    bool flag = true;
    std::vector vr1, vr2;
    for( unsigned i=0; imkNode(Kind::REGEXP_CONCAT, vr1);
        vr2.push_back(t2);
        for( unsigned j=i+1; jmkNode(Kind::REGEXP_CONCAT, vr2);
        flag = false;
        break;
      } else {
        vr1.push_back(n[i]);
      }
    }
    if(flag) {
      r1 = d_emptySingleton;
      r2 = n;
    }
  }
  else if (nk == Kind::REGEXP_UNION)
  {
    std::vector vr1, vr2;
    for( unsigned i=0; imkNode(Kind::REGEXP_UNION, vr1);
    r2 = NodeManager::currentNM()->mkNode(Kind::REGEXP_UNION, vr2);
  }
  else if (nk == Kind::STRING_TO_REGEXP || nk == Kind::REGEXP_ALLCHAR
           || nk == Kind::REGEXP_RANGE || nk == Kind::REGEXP_COMPLEMENT
           || nk == Kind::REGEXP_LOOP)
  {
    // this leaves n unchanged
    r1 = d_emptySingleton;
    r2 = n;
  }
  else
  {
    //is it possible?
    Unreachable();
  }
}

Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) {
  //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
  if(r1 > r2) {
    TNode tmpNode = r1;
    r1 = r2;
    r2 = tmpNode;
  }
  NodeManager* nm = NodeManager::currentNM();
  Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n  "<< mkString(r1) << ",\n  " << mkString(r2) << std::endl;
  std::pair < Node, Node > p(r1, r2);
  std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p);
  Node rNode;
  if(itr != d_inter_cache.end()) {
    rNode = itr->second;
  } else {
    Trace("regexp-int-debug") << " ... not in cache" << std::endl;
    if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
      Trace("regexp-int-debug") << " ... one is empty set" << std::endl;
      rNode = d_emptyRegexp;
    } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
      Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl;
      Node exp;
      int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
      if(r == 0) {
        //TODO: variable
        Unreachable();
      } else if(r == 1) {
        rNode = d_emptySingleton;
      } else {
        rNode = d_emptyRegexp;
      }
    } else if(r1 == r2) {
      Trace("regexp-int-debug") << " ... equal" << std::endl;
      rNode = r1; //convert1(cnt, r1);
    } else {
      Trace("regexp-int-debug") << " ... normal checking" << std::endl;
      std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
      if(itrcache != cache.end()) {
        rNode = itrcache->second;
      } else {
        Trace("regexp-int-debug") << " ... normal without cache" << std::endl;
        std::vector cset;
        std::set cset1, cset2;
        std::set< Node > vset1, vset2;
        firstChars(r1, cset1, vset1);
        firstChars(r2, cset2, vset2);
        Trace("regexp-int-debug") << " ... got fset" << std::endl;
        std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(),
             std::inserter(cset, cset.begin()));
        std::vector< Node > vec_nodes;
        Node delta_exp;
        Trace("regexp-int-debug") << " ... try delta" << std::endl;
        int flag = delta(r1, delta_exp);
        int flag2 = delta(r2, delta_exp);
        Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl;
        if(flag != 2 && flag2 != 2) {
          if(flag == 1 && flag2 == 1) {
            vec_nodes.push_back(d_emptySingleton);
          } else {
            //TODO: variable
            Unreachable();
          }
        }
        if(TraceIsOn("regexp-int-debug")) {
          Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {";
          for (std::vector::const_iterator it = cset.begin();
               it != cset.end();
               ++it)
          {
            if (it != cset.begin())
            {
              Trace("regexp-int-debug") << ", ";
            }
            Trace("regexp-int-debug") << (*it);
          }
          Trace("regexp-int-debug") << std::endl;
        }
        std::map< PairNodes, Node > cacheX;
        for (std::vector::const_iterator it = cset.begin();
             it != cset.end();
             ++it)
        {
          std::vector cvec;
          cvec.push_back(*it);
          String c(cvec);
          Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
          Node r1l = derivativeSingle(r1, c);
          Node r2l = derivativeSingle(r2, c);
          Trace("regexp-int-debug") << "  ... got partial(r1,c) = " << mkString(r1l) << std::endl;
          Trace("regexp-int-debug") << "  ... got partial(r2,c) = " << mkString(r2l) << std::endl;
          Node rt;
          
          if(r1l > r2l) {
            Node tnode = r1l;
            r1l = r2l; r2l = tnode;
          }
          PairNodes pp(r1l, r2l);
          std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp);
          if(itr2 != cacheX.end()) {
            rt = itr2->second;
          } else {
            std::map< PairNodes, Node > cache2(cache);
            cache2[p] =
                nm->mkNode(Kind::REGEXP_RV, nm->mkConstInt(Rational(cnt)));
            rt = intersectInternal(r1l, r2l, cache2, cnt+1);
            cacheX[ pp ] = rt;
          }

          rt = rewrite(
              nm->mkNode(Kind::REGEXP_CONCAT,
                         nm->mkNode(Kind::STRING_TO_REGEXP, nm->mkConst(c)),
                         rt));

          Trace("regexp-int-debug") << "  ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl;
          vec_nodes.push_back(rt);
        }
        rNode = rewrite(vec_nodes.size() == 0 ? d_emptyRegexp
                        : vec_nodes.size() == 1
                            ? vec_nodes[0]
                            : nm->mkNode(Kind::REGEXP_UNION, vec_nodes));
        rNode = convert1(cnt, rNode);
        rNode = rewrite(rNode);
      }
    }
    Trace("regexp-int-debug") << "  ... try testing no RV of " << mkString(rNode) << std::endl;
    if (!expr::hasSubtermKind(Kind::REGEXP_RV, rNode))
    {
      d_inter_cache[p] = rNode;
    }
  }
  Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
  return rNode;
}

Node RegExpOpr::removeIntersection(Node r) {
  Assert(checkConstRegExp(r));
  NodeManager* nm = NodeManager::currentNM();
  std::unordered_map visited;
  std::unordered_map::iterator it;
  std::vector visit;
  TNode cur;
  visit.push_back(r);
  do
  {
    cur = visit.back();
    visit.pop_back();
    it = visited.find(cur);

    if (it == visited.end())
    {
      visited[cur] = Node::null();
      visit.push_back(cur);
      for (const Node& cn : cur)
      {
        visit.push_back(cn);
      }
    }
    else if (it->second.isNull())
    {
      Kind ck = cur.getKind();
      Node ret;
      bool childChanged = false;
      std::vector children;
      if (cur.getMetaKind() == kind::metakind::PARAMETERIZED)
      {
        children.push_back(cur.getOperator());
      }
      for (const Node& cn : cur)
      {
        it = visited.find(cn);
        Assert(it != visited.end());
        Assert(!it->second.isNull());
        if (ck == Kind::REGEXP_INTER)
        {
          if (ret.isNull())
          {
            ret = it->second;
          }
          else
          {
            ret = intersect(ret, it->second);
          }
        }
        else
        {
          // will construct below
          childChanged = childChanged || cn != it->second;
          children.push_back(it->second);
        }
      }
      if (ck != Kind::REGEXP_INTER)
      {
        if (childChanged)
        {
          ret = nm->mkNode(cur.getKind(), children);
        }
        else
        {
          ret = cur;
        }
      }
      visited[cur] = ret;
    }
  } while (!visit.empty());
  Assert(visited.find(r) != visited.end());
  Assert(!visited.find(r)->second.isNull());
  if (TraceIsOn("regexp-intersect"))
  {
    Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r)
                              << " ) = " << mkString(visited[r]) << std::endl;
  }
  return visited[r];
}

Node RegExpOpr::intersect(Node r1, Node r2)
{
  if (!checkConstRegExp(r1) || !checkConstRegExp(r2)
      || expr::hasSubtermKind(Kind::REGEXP_COMPLEMENT, r1)
      || expr::hasSubtermKind(Kind::REGEXP_COMPLEMENT, r2))
  {
    return Node::null();
  }
  Node rr1 = removeIntersection(r1);
  Node rr2 = removeIntersection(r2);
  std::map cache;
  Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl;
  Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl;
  Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1)
                            << ",\n\t" << mkString(r2) << ")" << std::endl;
  Node retNode = intersectInternal(rr1, rr2, cache, 1);
  Trace("regexp-intersect")
      << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2)
      << ") =\n\t" << mkString(retNode) << std::endl;
  Trace("regexp-intersect-node") << "Intersect finished." << std::endl;
  return retNode;
}

//printing
std::string RegExpOpr::niceChar(Node r) {
  if(r.isConst()) {
    std::string s = r.getConst().toString();
    return s == "." ? "\\." : s;
  } else {
    std::string ss = "$" + r.toString();
    return ss;
  }
}
std::string RegExpOpr::mkString( Node r ) {
  std::string retStr;
  if(r.isNull()) {
    retStr = "\\E";
  } else {
    Kind k = r.getKind();
    switch( k ) {
      case Kind::REGEXP_NONE:
      {
        retStr += "\\E";
        break;
      }
      case Kind::REGEXP_ALLCHAR:
      {
        retStr += ".";
        break;
      }
      case Kind::STRING_TO_REGEXP:
      {
        std::string tmp( niceChar( r[0] ) );
        retStr += tmp.size()==1? tmp : "(" + tmp + ")";
        break;
      }
      case Kind::REGEXP_CONCAT:
      {
        retStr += "(";
        for(unsigned i=0; i().getNumerator().toString();
        retStr += ">";
        break;
      }
      case Kind::REGEXP_COMPLEMENT:
      {
        retStr += "^(";
        retStr += mkString(r[0]);
        retStr += ")";
        break;
      }
      default:
      {
        std::stringstream ss;
        ss << r;
        retStr = ss.str();
        Assert(!utils::isRegExpKind(r.getKind()));
        break;
      }
    }
  }

  return retStr;
}

bool RegExpOpr::regExpIncludes(Node r1, Node r2)
{
  return RegExpEntail::regExpIncludes(r1, r2, d_inclusionCache);
}

}  // namespace strings
}  // namespace theory
}  // namespace cvc5::internal




© 2015 - 2024 Weber Informatics LLC | Privacy Policy