cvc5-cvc5-1.2.0.test.regress.cli.run_regression.py Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cvc5-turnkey Show documentation
TurnKey artifact for cvc5
The newest version!
#!/usr/bin/env python3
###############################################################################
# Top contributors (to current version):
#   Andres Noetzli, Abdalrhman Mohamed, Andrew Reynolds
#
# This file is part of the cvc5 project.
#
# Copyright (c) 2009-2024 by the authors listed in the file AUTHORS
# in the top-level source directory and their institutional affiliations.
# All rights reserved.  See the file COPYING in the top-level source
# directory for licensing information.
# #############################################################################
##
"""
Runs benchmark and checks for correct exit status and output.
"""

import argparse
import collections
import difflib
import os
import re
import shlex
import shutil
import subprocess
import sys
import tempfile
import threading

g_args = None

# For maximum reliability, we get a fully qualified path for the executable.
bash_bin = shutil.which("bash")
if bash_bin is None:
    raise FileNotFoundError("Bash executable is required but not found.")

class Color:
    BLUE = "\033[94m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
    RED = "\033[91m"
    ENDC = "\033[0m"

is_windows = sys.platform.startswith('win')

class BulletSymbol:
    # On Windows, the special characters cause this error:
    #   UnicodeEncodeError: 'charmap' codec can't encode character
    # when ctest runs the Python script
    INFO = "▶ " if not is_windows else "> "
    OK = "✓ " if not is_windows else "> "
    ERROR = "✖ " if not is_windows else "> "

def print_info(msg):
    print(Color.BLUE + BulletSymbol.INFO + msg + Color.ENDC)

def print_ok(msg):
    print(Color.GREEN + BulletSymbol.OK + msg + Color.ENDC)

def print_error(err):
    print(Color.RED + BulletSymbol.ERROR + err + Color.ENDC)

class Tester:

    def __init__(self, name):
        self.name = name

    def applies(self, benchmark_info):
        return True

    def check_exit_status(self, expected_exit_status, exit_status, output,
                          error, flags):
        if exit_status == STATUS_TIMEOUT:
            print_error("Timeout")
            return EXIT_SKIP if g_args.skip_timeout else EXIT_FAILURE
        elif exit_status != expected_exit_status:
            print_error(
                'Unexpected exit status: expected "{}" but got "{}"'.format(
                    expected_exit_status, exit_status))
            return EXIT_FAILURE
        return EXIT_OK

    def run(self, benchmark_info):
        """Prints information about the test that is about to run and then
        calls the internal run method to execute the test."""

        print()
        print_info(self.name)
        print("  Flags: {}".format(benchmark_info.command_line_args))
        return self.run_internal(benchmark_info)

    def run_internal(self, benchmark_info):
        """Runs cvc5 on a given benchmark and checks the output."""

        output, error, exit_status = run_benchmark(benchmark_info)
        exit_code = self.check_exit_status(benchmark_info.expected_exit_status,
                                           exit_status, output, error,
                                           benchmark_info.command_line_args)
        if exit_code == EXIT_SKIP:
            return exit_code

        if benchmark_info.compare_outputs and output != benchmark_info.expected_output:
            print_error("Unexpected output difference")
            print("  " + "=" * 78)
            print_diff(output, benchmark_info.expected_output)
            print("  " + "=" * 78)
            exit_code = EXIT_FAILURE
        if benchmark_info.compare_outputs and error != benchmark_info.expected_error:
            print_error("Unexpected error output difference")
            print("  " + "=" * 78)
            print_diff(error, benchmark_info.expected_error)
            print("  " + "=" * 78)
            exit_code = EXIT_FAILURE
        if exit_code == EXIT_OK:
            print_ok("OK")
        return exit_code


################################################################################
# The testers
#
# Testers use `Tester` as a base class and implement `applies()` and
# `run_internal()` methods. The `applies()` method returns `True` if a tester
# applies to a given benchmark and `run_internal()` runs the actual test. Most
# testers can invoke the `run_internal()` method in the base class, which calls
# the cvc5 binary with a set of arguments and checks the expected output (both
# stdout and stderr) as well as the exit status.
#
# To add a new tester, add a class and add it to the `g_tester` dictionary.
################################################################################


class BaseTester(Tester):

    def __init__(self):
        super().__init__("base")

    def run_internal(self, benchmark_info):
        return super().run_internal(benchmark_info)


class UnsatCoreTester(Tester):

    def __init__(self):
        super().__init__("unsat-core")
        pass

    def applies(self, benchmark_info):
        return (
            benchmark_info.benchmark_ext != ".sy"
            and "unsat" in benchmark_info.expected_output.split()
            and "--no-produce-unsat-cores" not in benchmark_info.command_line_args
            and "--no-check-unsat-cores" not in benchmark_info.command_line_args
            and "--check-unsat-cores" not in benchmark_info.command_line_args
            and "sygus-inference" not in benchmark_info.benchmark_content
            and "--unconstrained-simp" not in benchmark_info.command_line_args
        )

    def run_internal(self, benchmark_info):
        return super().run_internal(
            benchmark_info._replace(
                command_line_args=benchmark_info.command_line_args
                + ["--check-unsat-cores"]
            )
        )


class ProofTester(Tester):

    def __init__(self):
        super().__init__("proof")

    def applies(self, benchmark_info):
        expected_output_lines = benchmark_info.expected_output.split()
        return (
            benchmark_info.benchmark_ext != ".sy"
            and "unsat" in benchmark_info.expected_output.split()
        )

    def run_internal(self, benchmark_info):
        return super().run_internal(
            benchmark_info._replace(
                command_line_args=benchmark_info.command_line_args +
                ["--check-proofs", "--proof-granularity=theory-rewrite", "--proof-check=lazy"]
            )
        )

class DslProofTester(Tester):

    def __init__(self):
        super().__init__("dsl-proof")

    def applies(self, benchmark_info):
        expected_output_lines = benchmark_info.expected_output.split()
        return (
            benchmark_info.benchmark_ext != ".sy"
            and "unsat" in benchmark_info.expected_output.split()
        )

    def run_internal(self, benchmark_info):
        return super().run_internal(
            benchmark_info._replace(
                command_line_args=benchmark_info.command_line_args +
                ["--check-proofs", "--proof-granularity=dsl-rewrite", "--proof-check=lazy"]
            )
        )

class LfscTester(Tester):

    def __init__(self):
        super().__init__("lfsc")

    def applies(self, benchmark_info):
        return (
            benchmark_info.benchmark_ext != ".sy"
            and benchmark_info.expected_output.strip() == "unsat"
        )

    def run_internal(self, benchmark_info):
        exit_code = EXIT_OK
        with tempfile.NamedTemporaryFile() as tmpf:
            cvc5_args = [
                "--dump-proofs",
                "--proof-format=lfsc",
                "--proof-granularity=theory-rewrite",
                "--proof-check=lazy",
            ] + benchmark_info.command_line_args
            output, error, exit_status = run_process(
                [benchmark_info.cvc5_binary]
                + cvc5_args
                + [benchmark_info.benchmark_basename],
                benchmark_info.benchmark_dir,
                benchmark_info.timeout,
            )
            tmpf.write(output.strip("unsat\n".encode()))
            tmpf.flush()
            output, error = output.decode(), error.decode()
            exit_code = self.check_exit_status(EXIT_OK, exit_status, output,
                                               error, cvc5_args)
            if "check" not in output:
                print_error("Empty proof")
                print()
                print_outputs(output, error)
                return EXIT_FAILURE
            if exit_code != EXIT_OK:
                return exit_code
            output, error, exit_status = run_process(
                [benchmark_info.lfsc_binary] +
                benchmark_info.lfsc_sigs + [tmpf.name],
                benchmark_info.benchmark_dir,
                timeout=benchmark_info.timeout,
            )
            output, error = output.decode(), error.decode()
            exit_code = self.check_exit_status(EXIT_OK, exit_status, output,
                                               error, cvc5_args)
            if "success" not in output:
                print_error("Invalid proof")
                print()
                print_outputs(output, error)
                return EXIT_FAILURE
        if exit_code == EXIT_OK:
            print_ok("OK")
        return exit_code

class CpcTester(Tester):

    def __init__(self):
        super().__init__("cpc")

    def applies(self, benchmark_info):
        return (
            benchmark_info.benchmark_ext != ".sy"
            and benchmark_info.expected_output.strip() == "unsat"
        )

    def run_internal(self, benchmark_info):
        exit_code = EXIT_OK
        with tempfile.NamedTemporaryFile() as tmpf:
            cvc5_args = [
                "--dump-proofs",
                "--proof-format=cpc",
                "--proof-granularity=dsl-rewrite",
                "--proof-print-conclusion",
            ] + benchmark_info.command_line_args
            output, error, exit_status = run_process(
                [benchmark_info.cvc5_binary]
                + cvc5_args
                + [benchmark_info.benchmark_basename],
                benchmark_info.benchmark_dir,
                benchmark_info.timeout,
            )
            cpc_sig_dir = os.path.abspath(g_args.cpc_sig_dir)
            tmpf.write(("(include \"" + cpc_sig_dir + "/cpc/Cpc.eo\")").encode())
            tmpf.write(output.strip("unsat\n".encode()))
            tmpf.flush()
            output, error = output.decode(), error.decode()
            exit_code = self.check_exit_status(EXIT_OK, exit_status, output,
                                               error, cvc5_args)
            if ("step" not in output) and ("assume" not in output):
                print_error("Empty proof")
                print()
                print_outputs(output, error)
                return EXIT_FAILURE
            if exit_code != EXIT_OK:
                return exit_code
            output, error, exit_status = run_process(
                [benchmark_info.ethos_binary] +
                [tmpf.name],
                benchmark_info.benchmark_dir,
                timeout=benchmark_info.timeout,
            )
            output, error = output.decode(), error.decode()
            exit_code = self.check_exit_status(EXIT_OK, exit_status, output,
                                               error, cvc5_args)
            if ("correct" not in output) and ("incomplete" not in output):
                print_error("Invalid proof")
                print()
                print_outputs(output, error)
                return EXIT_FAILURE
        if exit_code == EXIT_OK:
            print_ok("OK")
        return exit_code

class ModelTester(Tester):

    def __init__(self):
        super().__init__("model")

    def applies(self, benchmark_info):
        expected_output_lines = benchmark_info.expected_output.split()
        return (benchmark_info.benchmark_ext != ".sy"
                and ("sat" in expected_output_lines
                     or "unknown" in expected_output_lines)
                and "--no-debug-check-models"
                not in benchmark_info.command_line_args
                and "--no-check-models" not in benchmark_info.command_line_args
                and "--debug-check-models"
                not in benchmark_info.command_line_args)

    def run_internal(self, benchmark_info):
        return super().run_internal(
            benchmark_info._replace(
                command_line_args=benchmark_info.command_line_args
                + ["--debug-check-models"]
            )
        )


class SynthTester(Tester):

    def __init__(self):
        super().__init__("synth")

    def applies(self, benchmark_info):
        return (
            benchmark_info.benchmark_ext == ".sy"
            and "--no-check-synth-sol" not in benchmark_info.command_line_args
            and "--sygus-rr-synth" not in benchmark_info.command_line_args
            and "--sygus-rr-verify" not in benchmark_info.command_line_args
            and "--check-synth-sol" not in benchmark_info.command_line_args
        )

    def run_internal(self, benchmark_info):
        return super().run_internal(
            benchmark_info._replace(
                command_line_args=benchmark_info.command_line_args
                + ["--check-synth-sol"]
            )
        )


class AbductTester(Tester):

    def __init__(self):
        super().__init__("abduct")

    def applies(self, benchmark_info):
        return (
            benchmark_info.benchmark_ext != ".sy"
            and "--no-check-abducts" not in benchmark_info.command_line_args
            and "--check-abducts" not in benchmark_info.command_line_args
            and "get-abduct" in benchmark_info.benchmark_content
        )

    def run_internal(self, benchmark_info):
        return super().run_internal(
            benchmark_info._replace(
                command_line_args=benchmark_info.command_line_args + ["--check-abducts"]
            )
        )


class DumpTester(Tester):

    def __init__(self):
        super().__init__("dump")

    def applies(self, benchmark_info):
        return True

    def run_internal(self, benchmark_info):
        ext_to_lang = {
            ".smt2": "smt2",
            ".sy": "sygus",
        }

        tmpf_name = None
        with tempfile.NamedTemporaryFile(delete=False) as tmpf:
            dump_args = [
                "--parse-only",
                "-o",
                "raw-benchmark",
                "--output-lang={}".format(ext_to_lang[benchmark_info.benchmark_ext]),
            ]
            dump_output, _, _ = run_process(
                [benchmark_info.cvc5_binary]
                + benchmark_info.command_line_args
                + dump_args
                + [benchmark_info.benchmark_basename],
                benchmark_info.benchmark_dir,
                benchmark_info.timeout,
            )

            tmpf_name = tmpf.name
            tmpf.write(dump_output)

        if not tmpf_name:
            return EXIT_FAILURE

        exit_code = super().run_internal(
            benchmark_info._replace(
                command_line_args=benchmark_info.command_line_args
                + [
                    "--parse-only",
                    "--lang={}".format(ext_to_lang[benchmark_info.benchmark_ext]),
                ],
                benchmark_basename=tmpf.name,
                expected_exit_status=0,
                compare_outputs=False,
            )
        )
        os.remove(tmpf.name)
        return exit_code


g_testers = {
    "base": BaseTester(),
    "unsat-core": UnsatCoreTester(),
    "proof": ProofTester(),
    "lfsc": LfscTester(),
    "model": ModelTester(),
    "synth": SynthTester(),
    "abduct": AbductTester(),
    "dump": DumpTester(),
    "dsl-proof": DslProofTester(),
    "cpc": CpcTester()
}

g_default_testers = [
    "base",
    "unsat-core",
    "proof",
    "model",
    "synth",
    "abduct",
    "dump",
]

################################################################################

BenchmarkInfo = collections.namedtuple(
    "BenchmarkInfo",
    [
        "wrapper",
        "scrubber",
        "error_scrubber",
        "timeout",
        "cvc5_binary",
        "lfsc_binary",
        "lfsc_sigs",
        "ethos_binary",
        "benchmark_dir",
        "benchmark_basename",
        "benchmark_ext",
        "benchmark_content",
        "expected_output",
        "expected_error",
        "expected_exit_status",
        "command_line_args",
        "compare_outputs",
    ],
)

SCRUBBER = "SCRUBBER:"
ERROR_SCRUBBER = "ERROR-SCRUBBER:"
EXPECT = "EXPECT:"
EXPECT_ERROR = "EXPECT-ERROR:"
EXIT = "EXIT:"
COMMAND_LINE = "COMMAND-LINE:"
REQUIRES = "REQUIRES:"
DISABLE_TESTER = "DISABLE-TESTER:"

EXIT_OK = 0
EXIT_FAILURE = 1
EXIT_SKIP = 77
STATUS_TIMEOUT = 124


def print_colored(color, text):
    """Prints `text` in color `color`."""

    for line in text.splitlines():
        print(color + line + Color.ENDC)


def print_segment(color, text):
    """Prints colored `text` inside a border."""
    print("  " + "=" * 78)
    for line in text.splitlines():
        print("  " + color + line + Color.ENDC)
    print("  " + "=" * 78)
    print()


def print_outputs(stdout, stderr):
    """Prints standard output and error."""
    if stdout.strip() != "":
        print("  Output:")
        print_segment(Color.BLUE, stdout)
    if stderr.strip() != "":
        print("  Error output:")
        print_segment(Color.YELLOW, stderr)


def print_diff(actual, expected):
    """Prints the difference between `actual` and `expected`."""

    for line in difflib.unified_diff(expected.splitlines(),
                                     actual.splitlines(),
                                     "expected",
                                     "actual",
                                     lineterm=""):
        if line.startswith("+"):
            print_colored(Color.GREEN, "  " + line)
        elif line.startswith("-"):
            print_colored(Color.RED, "  " + line)
        else:
            print("  " + line)


def run_process(args, cwd, timeout, s_input=None):
    """Runs a process with a timeout `timeout` in seconds. `args` are the
    arguments to execute, `cwd` is the working directory and `s_input` is the
    input to be sent to the process over stdin. If `args` is a list, the
    arguments are escaped and concatenated. If `args` is a string, it is
    executed as-is (without additional escaping. Returns the output, the error
    output and the exit code of the process. If the process times out, the
    output and the error output are empty and the exit code is 124."""

    cmd = " ".join([shlex.quote(a)
                    for a in args]) if isinstance(args, list) else args
    print("  $ {}".format(str(cmd)))
    out = bytes()
    err = bytes()
    exit_status = STATUS_TIMEOUT
    try:
        # Instead of setting shell=True, we explicitly call bash. Using
        # shell=True seems to produce different exit codes on different
        # platforms under certain circumstances.
        res = subprocess.run(
            [bash_bin,"-c", cmd],
            cwd=cwd,
            input=s_input,
            timeout=timeout,
            stderr=subprocess.PIPE,
            stdout=subprocess.PIPE,
        )
        out = res.stdout
        err = res.stderr
        exit_status = res.returncode
    except subprocess.TimeoutExpired:
        exit_status = STATUS_TIMEOUT

    return out, err, exit_status


def get_cvc5_features(cvc5_binary):
    """Returns a list of features supported by the cvc5 binary `cvc5_binary`."""

    output, _, _ = run_process([cvc5_binary, "--show-config"], None, None)
    if isinstance(output, bytes):
        output = output.decode()

    features = []
    disabled_features = []
    for line in output.split("\n"):
        tokens = [t.strip() for t in line.split(":")]
        if len(tokens) == 2:
            key, value = tokens
            if value == "yes":
                features.append(key)
            elif value == "no":
                disabled_features.append(key)

    return features, disabled_features

def check_scrubber(scrubber_error, scrubber):
    if len(scrubber_error) != 0:
        print_error("The scrubber's error output is not empty")
        print()
        print("  Command: {}".format(scrubber))
        print()
        print("  Error output")
        print("  " + "=" * 78)
        print(scrubber_error)
        print("  " + "=" * 78)
        return EXIT_FAILURE

def run_benchmark(benchmark_info):
    """Runs cvc5 on a benchmark with the given `benchmark_info`. It runs on the
    file `benchmark_basename` in the directory `benchmark_dir` using the binary
    `cvc5_binary` with the command line options `command_line_args`. The output
    is scrubbed using `scrubber` and `error_scrubber` for stdout and stderr,
    respectively."""

    bin_args = benchmark_info.wrapper[:]
    bin_args.append(benchmark_info.cvc5_binary)

    output = None
    error = None
    exit_status = None
    output, error, exit_status = run_process(
        bin_args
        + benchmark_info.command_line_args
        + [benchmark_info.benchmark_basename],
        benchmark_info.benchmark_dir,
        benchmark_info.timeout,
    )

    # If a scrubber command has been specified then apply it to the output.
    scrubber_error = ""
    if benchmark_info.scrubber:
        output, scrubber_error, _ = run_process(
            benchmark_info.scrubber,
            benchmark_info.benchmark_dir,
            benchmark_info.timeout,
            output,
        )
    # Make sure that the scrubber itself did not print anything to its error output
    check_result =  check_scrubber(scrubber_error, benchmark_info.scrubber)
    if check_result != None:
      return check_result
    
    scrubber_error = ""
    if benchmark_info.error_scrubber:
        error, scrubber_error, _ = run_process(
            benchmark_info.error_scrubber,
            benchmark_info.benchmark_dir,
            benchmark_info.timeout,
            error,
        )
    # Make sure that the error scrubber itself did not print anything to its error output
    check_result =  check_scrubber(scrubber_error, benchmark_info.error_scrubber)
    if check_result != None:
      return check_result

    # Popen in Python 3 returns a bytes object instead of a string for
    # stdout/stderr.
    if isinstance(output, bytes):
        output = output.decode()
    if isinstance(error, bytes):
        error = error.decode()
    output = re.sub(r"^[ \t]*|\r", "", output.strip(), flags=re.MULTILINE)
    error = re.sub(r"^[ \t]*|\r", "", error.strip(), flags=re.MULTILINE)
    # qemu (used for arm nightlies) emits additional error output for non-zero exit codes
    error = re.sub(r"qemu: uncaught target signal.*", "", error).strip()
    return (output, error, exit_status)


def run_regression(
    testers,
    wrapper,
    cvc5_binary,
    lfsc_binary,
    lfsc_sigs,
    ethos_binary,
    benchmark_path,
    timeout,
):
    """Determines the expected output for a benchmark, runs cvc5 on it using
    all the specified `testers` and then checks whether the output corresponds
    to the expected output. Optionally uses a wrapper `wrapper`."""

    if not os.access(cvc5_binary, os.X_OK):
        sys.exit('"{}" does not exist or is not executable'.format(cvc5_binary))
    if not os.path.isfile(benchmark_path):
        sys.exit('"{}" does not exist or is not a file'.format(benchmark_path))

    cvc5_features, cvc5_disabled_features = get_cvc5_features(cvc5_binary)

    basic_command_line_args = []

    benchmark_basename = os.path.basename(benchmark_path)
    benchmark_filename, benchmark_ext = os.path.splitext(benchmark_basename)
    benchmark_dir = os.path.dirname(benchmark_path)
    comment_char = "%"
    status_regex = None
    status_to_output = lambda s: s
    if benchmark_ext == ".smt2":
        status_regex = r"set-info\s*:status\s*(sat|unsat)"
        comment_char = ";"
    elif benchmark_ext == ".sy":
        comment_char = ";"
    else:
        sys.exit('"{}" must be *.smt2 or *.sy'.format(benchmark_basename))

    benchmark_lines = None
    with open(benchmark_path, "r") as benchmark_file:
        benchmark_lines = benchmark_file.readlines()
    benchmark_content = "".join(benchmark_lines)

    # Extract the metadata for the benchmark.
    scrubber = None
    error_scrubber = None
    expected_output = ""
    expected_error = ""
    expected_exit_status = None
    command_lines = []
    requires = []
    for line in benchmark_lines:
        # Skip lines that do not start with a comment character.
        if line[0] != comment_char:
            continue
        line = line[1:].lstrip()

        if line.startswith(SCRUBBER):
            scrubber = line[len(SCRUBBER) :].strip()
        elif line.startswith(ERROR_SCRUBBER):
            error_scrubber = line[len(ERROR_SCRUBBER) :].strip()
        elif line.startswith(EXPECT):
            expected_output += line[len(EXPECT) :].strip() + "\n"
        elif line.startswith(EXPECT_ERROR):
            expected_error += line[len(EXPECT_ERROR) :].strip() + "\n"
        elif line.startswith(EXIT):
            expected_exit_status = int(line[len(EXIT) :].strip())
        elif line.startswith(COMMAND_LINE):
            command_lines.append(line[len(COMMAND_LINE) :].strip())
        elif line.startswith(REQUIRES):
            requires.append(line[len(REQUIRES) :].strip())
        elif line.startswith(DISABLE_TESTER):
            disable_tester = line[len(DISABLE_TESTER) :].strip()
            if disable_tester not in g_testers:
                print("Unknown tester: {}".format(disable_tester))
                return EXIT_FAILURE
            if disable_tester in testers:
                testers.remove(disable_tester)
            if disable_tester == "dsl-proof":
                if "cpc" in testers:
                    testers.remove("cpc")
            if disable_tester == "proof":
                if "lfsc" in testers:
                    testers.remove("lfsc")
                if "dsl-proof" in testers:
                    testers.remove("dsl-proof")
                if "cpc" in testers:
                    testers.remove("cpc")

    expected_output = expected_output.strip()
    expected_error = expected_error.strip()

    # Expected output/expected error has not been defined in the metadata for
    # the benchmark. Try to extract the information from the benchmark itself.
    if expected_output == "" and expected_error == "":
        match = None
        if status_regex:
            match = re.findall(status_regex, benchmark_content)

        if match:
            expected_output = status_to_output("\n".join(match))
        elif expected_exit_status is None:
            # If there is no expected output/error and the exit status has not
            # been set explicitly, the benchmark is invalid.
            print_error('Cannot determine status of benchmark')
            return EXIT_FAILURE
    if expected_exit_status is None:
        expected_exit_status = 0

    if "CVC5_REGRESSION_ARGS" in os.environ:
        basic_command_line_args += shlex.split(os.environ["CVC5_REGRESSION_ARGS"])

    for req_feature in requires:
        is_negative = False
        if req_feature.startswith("no-"):
            req_feature = req_feature[len("no-") :]
            is_negative = True
        if req_feature not in (cvc5_features + cvc5_disabled_features):
            print_error(
                "Illegal requirement in regression: {}".format(req_feature))
            print("  Allowed requirements: {}".format(
                " ".join(cvc5_features + cvc5_disabled_features)))
            return EXIT_FAILURE
        if is_negative:
            if req_feature in cvc5_features:
                print_info("Skipped regression: not compatible with {}".format(
                    req_feature))
                return EXIT_SKIP if g_args.use_skip_return_code else EXIT_OK
        elif req_feature not in cvc5_features:
            print_info("Skipped regression: requires {}".format(req_feature))
            return EXIT_SKIP if g_args.use_skip_return_code else EXIT_OK

    if not command_lines:
        command_lines.append("")

    tests = []
    expected_output_lines = expected_output.split()
    command_line_args_configs = []
    for command_line in command_lines:
        args = shlex.split(command_line)
        all_args = basic_command_line_args + args

        command_line_args_configs.append(all_args)

        benchmark_info = BenchmarkInfo(
            wrapper=wrapper,
            scrubber=scrubber,
            error_scrubber=error_scrubber,
            timeout=timeout,
            cvc5_binary=cvc5_binary,
            lfsc_binary=lfsc_binary,
            lfsc_sigs=lfsc_sigs,
            ethos_binary=ethos_binary,
            benchmark_dir=benchmark_dir,
            benchmark_basename=benchmark_basename,
            benchmark_ext=benchmark_ext,
            benchmark_content=benchmark_content,
            expected_output=expected_output,
            expected_error=expected_error,
            expected_exit_status=expected_exit_status,
            command_line_args=all_args,
            compare_outputs=True,
        )
        for tester_name, tester in g_testers.items():
            if tester_name in testers and tester.applies(benchmark_info):
                tests.append((tester, benchmark_info))

    if len(tests) == 0:
        print("Skipped regression: no tests to run")
        return EXIT_SKIP if g_args.use_skip_return_code else EXIT_OK

    # Run cvc5 on the benchmark with the different testers and check whether
    # the exit status, stdout output, stderr output are as expected.
    exit_code = EXIT_OK
    for tester, benchmark_info in tests:
        test_exit_code = tester.run(benchmark_info)
        if exit_code == EXIT_FAILURE or test_exit_code == EXIT_FAILURE:
            exit_code = EXIT_FAILURE
        else:
            exit_code = test_exit_code

    return exit_code


def main():
    """Parses the command line arguments and then calls the core of the
    script."""

    global g_args

    parser = argparse.ArgumentParser(
        description="Runs benchmark and checks for correct exit status and output."
    )
    tester_choices = ["all"] + list(g_testers.keys())
    parser.add_argument("--use-skip-return-code", action="store_true")
    parser.add_argument("--skip-timeout", action="store_true")
    parser.add_argument("--tester", choices=tester_choices, action="append")
    parser.add_argument("--lfsc-binary", default="")
    parser.add_argument("--lfsc-sig-dir", default="")
    parser.add_argument("--ethos-binary", default="")
    parser.add_argument("--cpc-sig-dir", default="")
    parser.add_argument("wrapper", nargs="*")
    parser.add_argument("cvc5_binary")
    parser.add_argument("benchmark")

    argv = sys.argv[1:]
    # Append options passed via RUN_REGRESSION_ARGS to argv
    if os.environ.get("RUN_REGRESSION_ARGS"):
        argv.extend(shlex.split(os.getenv("RUN_REGRESSION_ARGS")))

    g_args = parser.parse_args(argv)

    cvc5_binary = os.path.abspath(g_args.cvc5_binary)
    lfsc_binary = os.path.abspath(g_args.lfsc_binary)
    ethos_binary = os.path.abspath(g_args.ethos_binary)

    wrapper = g_args.wrapper
    if os.environ.get("VALGRIND") == "1" and not wrapper:
        wrapper = ["libtool", "--mode=execute", "valgrind"]

    timeout = float(os.getenv("TEST_TIMEOUT", "600"))

    testers = g_args.tester
    if not testers:
        testers = g_default_testers
    elif "all" in testers:
        testers = list(g_testers.keys())

    lfsc_sigs = []
    if not g_args.lfsc_sig_dir == "":
        lfsc_sig_dir = os.path.abspath(g_args.lfsc_sig_dir)
        # `os.listdir` would be more appropriate if lfsc did not force us to
        # list the signatures in order.
        lfsc_sigs = ["core_defs", "util_defs", "theory_def", "nary_programs",
                     "boolean_programs", "boolean_rules", "cnf_rules",
                     "equality_rules", "arith_programs", "arith_rules",
                     "strings_programs", "strings_rules", "quantifiers_rules"]
        lfsc_sigs = [os.path.join(lfsc_sig_dir, sig + ".plf")
                     for sig in lfsc_sigs]
    cpc_sig_dir = os.path.abspath(g_args.cpc_sig_dir)
    return run_regression(
        testers,
        wrapper,
        cvc5_binary,
        lfsc_binary,
        lfsc_sigs,
        ethos_binary,
        g_args.benchmark,
        timeout,
    )


if __name__ == "__main__":
    exit_code = main()
    sys.exit(exit_code)