cvc5-cvc5-1.2.0.src.util.floatingpoint.h Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cvc5-turnkey Show documentation
TurnKey artifact for cvc5
The newest version!
/******************************************************************************
 * Top contributors (to current version):
 *   Aina Niemetz, Martin Brain, Mathias Preiner
 * Copyright (c) 2013  University of Oxford
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2024 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * A floating-point value.
 *
 * This file contains the data structures used by the constant and parametric
 * types of the floating point theory.
 */
#include "cvc5_public.h"

#ifndef CVC5__FLOATINGPOINT_H
#define CVC5__FLOATINGPOINT_H

#include 

#include "util/bitvector.h"
#include "util/floatingpoint_size.h"
#include "util/rational.h"
#include "util/roundingmode.h"

/* -------------------------------------------------------------------------- */

namespace cvc5::internal {

/* -------------------------------------------------------------------------- */

class FloatingPointLiteral;

class FloatingPoint
{
 public:
  /**
   * Wrappers to represent results of non-total functions (e.g., min/max).
   * The Boolean flag is true if the result is defined, and false otherwise.
   */
  using PartialFloatingPoint = std::pair;
  using PartialBitVector = std::pair;
  using PartialRational = std::pair;

  /**
   * Get the number of exponent bits in the unpacked format corresponding to a
   * given packed format.  This is the unpacked counter-part of
   * FloatingPointSize::exponentWidth().
   */
  static uint32_t getUnpackedExponentWidth(FloatingPointSize& size);
  /**
   * Get the number of exponent bits in the unpacked format corresponding to a
   * given packed format.  This is the unpacked counter-part of
   * FloatingPointSize::significandWidth().
   */
  static uint32_t getUnpackedSignificandWidth(FloatingPointSize& size);

  /** Constructors. */

  /** Create a FP value from its IEEE bit-vector representation. */
  FloatingPoint(uint32_t e, uint32_t s, const BitVector& bv);
  FloatingPoint(const FloatingPointSize& size, const BitVector& bv);

  /**
   * Create a FP value from a signed or unsigned bit-vector value with
   * respect to given rounding mode.
   */
  FloatingPoint(const FloatingPointSize& size,
                const RoundingMode& rm,
                const BitVector& bv,
                bool signedBV);

  /**
   * Create a FP value from a rational value with respect to given rounding
   * mode.
   */
  FloatingPoint(const FloatingPointSize& size,
                const RoundingMode& rm,
                const Rational& r);

  /** Copy constructor. */
  FloatingPoint(const FloatingPoint& fp);

  /** Destructor. */
  ~FloatingPoint();

  /**
   * Create a FP NaN value of given size.
   * size: The FP size (format).
   */
  static FloatingPoint makeNaN(const FloatingPointSize& size);
  /**
   * Create a FP infinity value of given size.
   * size: The FP size (format).
   * sign: True for -oo, false otherwise.
   */
  static FloatingPoint makeInf(const FloatingPointSize& size, bool sign);
  /**
   * Create a FP zero value of given size.
   * size: The FP size (format).
   * sign: True for -zero, false otherwise.
   */
  static FloatingPoint makeZero(const FloatingPointSize& size, bool sign);
  /**
   * Create the smallest subnormal FP value of given size.
   * size: The FP size (format).
   * sign: True for negative sign, false otherwise.
   */
  static FloatingPoint makeMinSubnormal(const FloatingPointSize& size,
                                        bool sign);
  /**
   * Create the largest subnormal FP value of given size.
   * size: The FP size (format).
   * sign: True for negative sign, false otherwise.
   */
  static FloatingPoint makeMaxSubnormal(const FloatingPointSize& size,
                                        bool sign);
  /**
   * Create the smallest normal FP value of given size.
   * size: The FP size (format).
   * sign: True for negative sign, false otherwise.
   */
  static FloatingPoint makeMinNormal(const FloatingPointSize& size, bool sign);
  /**
   * Create the largest normal FP value of given size.
   * size: The FP size (format).
   * sign: True for negative sign, false otherwise.
   */
  static FloatingPoint makeMaxNormal(const FloatingPointSize& size, bool sign);

  /** Return the size of this FP value. */
  const FloatingPointSize& getSize() const;

  /** Get the wrapped floating-point value. */
  const FloatingPointLiteral* getLiteral(void) const { return d_fpl.get(); }

  /**
   * Return a string representation of this floating-point.
   *
   * If printAsIndexed is true then it prints the bit-vector components of the
   * FP value as indexed symbols, otherwise in binary notation.
   */
  std::string toString(bool printAsIndexed = false) const;

  /**
   * Get the IEEE bit-vector representation of this floating-point value.
   * Stores the sign bit in `sign`, the exponent in `exp` and the significand
   * in `sig`.
   */
  void getIEEEBitvectors(BitVector& sign, BitVector& exp, BitVector& sig) const;

  /** Return the packed (IEEE-754) representation of this floating-point. */
  BitVector pack(void) const;

  /** Floating-point absolute value. */
  FloatingPoint absolute(void) const;
  /** Floating-point negation. */
  FloatingPoint negate(void) const;
  /** Floating-point addition. */
  FloatingPoint add(const RoundingMode& rm, const FloatingPoint& arg) const;
  /** Floating-point subtraction. */
  FloatingPoint sub(const RoundingMode& rm, const FloatingPoint& arg) const;
  /** Floating-point multiplication. */
  FloatingPoint mult(const RoundingMode& rm, const FloatingPoint& arg) const;
  /** Floating-point division. */
  FloatingPoint div(const RoundingMode& rm, const FloatingPoint& arg) const;
  /** Floating-point fused multiplication and addition. */
  FloatingPoint fma(const RoundingMode& rm,
                    const FloatingPoint& arg1,
                    const FloatingPoint& arg2) const;
  /** Floating-point square root. */
  FloatingPoint sqrt(const RoundingMode& rm) const;
  /** Floating-point round to integral. */
  FloatingPoint rti(const RoundingMode& rm) const;
  /** Floating-point remainder. */
  FloatingPoint rem(const FloatingPoint& arg) const;

  /**
   * Floating-point max (total version).
   * zeroCase: true to return the left (rather than the right operand) in case
   *           of max(-0,+0) or max(+0,-0).
   */
  FloatingPoint maxTotal(const FloatingPoint& arg, bool zeroCaseLeft) const;
  /**
   * Floating-point min (total version).
   * zeroCase: true to return the left (rather than the right operand) in case
   *           of min(-0,+0) or min(+0,-0).
   */
  FloatingPoint minTotal(const FloatingPoint& arg, bool zeroCaseLeft) const;

  /**
   * Floating-point max.
   *
   * Returns a partial floating-point, which is a pair of FloatingPoint and
   * bool. The boolean flag is true if the result is defined, and false if it
   * is undefined.
   */
  PartialFloatingPoint max(const FloatingPoint& arg) const;
  /** Floating-point min.
   *
   * Returns a partial floating-point, which is a pair of FloatingPoint and
   * bool. The boolean flag is true if the result is defined, and false if it
   * is undefined.
   */
  PartialFloatingPoint min(const FloatingPoint& arg) const;

  /** Equality (NOT: fp.eq but =) over floating-point values. */
  bool operator==(const FloatingPoint& fp) const;
  /** Floating-point less or equal than. */
  bool operator<=(const FloatingPoint& arg) const;
  /** Floating-point less than. */
  bool operator<(const FloatingPoint& arg) const;

  /** Get the exponent of this floating-point value. */
  BitVector getExponent() const;
  /** Get the significand of this floating-point value. */
  BitVector getSignificand() const;
  /** True if this value is a negative value. */
  bool getSign() const;

  /** Return true if this floating-point represents a normal value. */
  bool isNormal(void) const;
  /** Return true if this floating-point represents a subnormal value. */
  bool isSubnormal(void) const;
  /** Return true if this floating-point represents a zero value. */
  bool isZero(void) const;
  /** Return true if this floating-point represents an infinite value. */
  bool isInfinite(void) const;
  /** Return true if this floating-point represents a NaN value. */
  bool isNaN(void) const;
  /** Return true if this floating-point represents a negative value. */
  bool isNegative(void) const;
  /** Return true if this floating-point represents a positive value. */
  bool isPositive(void) const;

  /**
   * Convert this floating-point to a floating-point of given size, with
   * respect to given rounding mode.
   */
  FloatingPoint convert(const FloatingPointSize& target,
                        const RoundingMode& rm) const;

  /**
   * Convert this floating-point to a bit-vector of given size, with
   * respect to given rounding mode (total version).
   * Returns given bit-vector 'undefinedCase' in the undefined case.
   */
  BitVector convertToBVTotal(BitVectorSize width,
                             const RoundingMode& rm,
                             bool signedBV,
                             BitVector undefinedCase) const;
  /**
   * Convert this floating-point to a rational, with respect to given rounding
   * mode (total version).
   * Returns given rational 'undefinedCase' in the undefined case.
   */
  Rational convertToRationalTotal(Rational undefinedCase) const;

  /**
   * Convert this floating-point to a bit-vector of given size.
   *
   * Returns a partial bit-vector, which is a pair of BitVector and bool.
   * The boolean flag is true if the result is defined, and false if it
   * is undefined.
   */
  PartialBitVector convertToBV(BitVectorSize width,
                               const RoundingMode& rm,
                               bool signedBV) const;
  /**
   * Convert this floating-point to a Rational.
   *
   * Returns a partial Rational, which is a pair of Rational and bool.
   * The boolean flag is true if the result is defined, and false if it
   * is undefined.
   */
  PartialRational convertToRational(void) const;

 private:
  /**
   * Constructor.
   *
   * Note: This constructor takes ownership of 'fpl' and is not intended for
   *       public use.
   */
  FloatingPoint(FloatingPointLiteral* fpl);

  /** The floating-point literal of this floating-point value. */
  std::unique_ptr d_fpl;

}; /* class FloatingPoint */

/**
 * Hash function for floating-point values.
 */
struct FloatingPointHashFunction
{
  size_t operator()(const FloatingPoint& fp) const
  {
    FloatingPointSizeHashFunction fpshf;
    BitVectorHashFunction bvhf;

    return fpshf(fp.getSize()) ^ bvhf(fp.pack());
  }
}; /* struct FloatingPointHashFunction */

/* -------------------------------------------------------------------------- */

/**
 * The parameter type for the conversions to floating point.
 */
class FloatingPointConvertSort
{
 public:
  /** Constructors. */
  FloatingPointConvertSort(uint32_t _e, uint32_t _s) : d_fp_size(_e, _s) {}
  FloatingPointConvertSort(const FloatingPointSize& fps) : d_fp_size(fps) {}

  /** Operator overload for comparison of conversion sorts. */
  bool operator==(const FloatingPointConvertSort& fpcs) const
  {
    return d_fp_size == fpcs.d_fp_size;
  }

  /** Return the size of this FP convert sort. */
  FloatingPointSize getSize() const { return d_fp_size; }

 private:
  /** The floating-point size of this sort. */
  FloatingPointSize d_fp_size;
};

/** Hash function for conversion sorts. */
template 
struct FloatingPointConvertSortHashFunction
{
  inline size_t operator()(const FloatingPointConvertSort& fpcs) const
  {
    FloatingPointSizeHashFunction f;
    return f(fpcs.getSize()) ^ (0x00005300 | (key << 24));
  }
}; /* struct FloatingPointConvertSortHashFunction */

/**
 * As different conversions are different parameterised kinds, there
 * is a need for different (C++) types for each one.
 */

/**
 * Conversion from floating-point to IEEE bit-vector (first bit represents the
 * sign, the following exponent width bits the exponent, and the remaining bits
 * the significand).
 */
class FloatingPointToFPIEEEBitVector : public FloatingPointConvertSort
{
 public:
  /** Constructors. */
  FloatingPointToFPIEEEBitVector(uint32_t _e, uint32_t _s)
      : FloatingPointConvertSort(_e, _s)
  {
  }
  FloatingPointToFPIEEEBitVector(const FloatingPointConvertSort& old)
      : FloatingPointConvertSort(old)
  {
  }
};

/**
 * Conversion from floating-point to another floating-point (of possibly
 * different size).
 */
class FloatingPointToFPFloatingPoint : public FloatingPointConvertSort
{
 public:
  /** Constructors. */
  FloatingPointToFPFloatingPoint(uint32_t _e, uint32_t _s)
      : FloatingPointConvertSort(_e, _s)
  {
  }
  FloatingPointToFPFloatingPoint(const FloatingPointConvertSort& old)
      : FloatingPointConvertSort(old)
  {
  }
};

/**
 * Conversion from floating-point to Real.
 */
class FloatingPointToFPReal : public FloatingPointConvertSort
{
 public:
  /** Constructors. */
  FloatingPointToFPReal(uint32_t _e, uint32_t _s)
      : FloatingPointConvertSort(_e, _s)
  {
  }
  FloatingPointToFPReal(const FloatingPointConvertSort& old)
      : FloatingPointConvertSort(old)
  {
  }
};

/**
 * Conversion from floating-point to signed bit-vector value.
 */
class FloatingPointToFPSignedBitVector : public FloatingPointConvertSort
{
 public:
  /** Constructors. */
  FloatingPointToFPSignedBitVector(uint32_t _e, uint32_t _s)
      : FloatingPointConvertSort(_e, _s)
  {
  }
  FloatingPointToFPSignedBitVector(const FloatingPointConvertSort& old)
      : FloatingPointConvertSort(old)
  {
  }
};

/**
 * Conversion from floating-point to unsigned bit-vector value.
 */
class FloatingPointToFPUnsignedBitVector : public FloatingPointConvertSort
{
 public:
  /** Constructors. */
  FloatingPointToFPUnsignedBitVector(uint32_t _e, uint32_t _s)
      : FloatingPointConvertSort(_e, _s)
  {
  }
  FloatingPointToFPUnsignedBitVector(const FloatingPointConvertSort& old)
      : FloatingPointConvertSort(old)
  {
  }
};

/**
 * Base type for floating-point to bit-vector conversion.
 */
class FloatingPointToBV
{
 public:
  /** Constructors. */
  FloatingPointToBV(uint32_t s) : d_bv_size(s) {}
  FloatingPointToBV(const FloatingPointToBV& old) : d_bv_size(old.d_bv_size) {}
  FloatingPointToBV(const BitVectorSize& old) : d_bv_size(old) {}

  /** Return the bit-width of the bit-vector to convert to. */
  operator uint32_t() const { return d_bv_size; }

  /** The bit-width of the bit-vector to converto to. */
  BitVectorSize d_bv_size;
};

/**
 * Conversion from floating-point to unsigned bit-vector value.
 */
class FloatingPointToUBV : public FloatingPointToBV
{
 public:
  FloatingPointToUBV(uint32_t _s) : FloatingPointToBV(_s) {}
  FloatingPointToUBV(const FloatingPointToBV& old) : FloatingPointToBV(old) {}
};

/**
 * Conversion from floating-point to signed bit-vector value.
 */
class FloatingPointToSBV : public FloatingPointToBV
{
 public:
  FloatingPointToSBV(uint32_t _s) : FloatingPointToBV(_s) {}
  FloatingPointToSBV(const FloatingPointToBV& old) : FloatingPointToBV(old) {}
};

/**
 * Conversion from floating-point to unsigned bit-vector value (total version).
 */
class FloatingPointToUBVTotal : public FloatingPointToBV
{
 public:
  FloatingPointToUBVTotal(uint32_t _s) : FloatingPointToBV(_s) {}
  FloatingPointToUBVTotal(const FloatingPointToBV& old) : FloatingPointToBV(old)
  {
  }
};

/**
 * Conversion from floating-point to signed bit-vector value (total version).
 */
class FloatingPointToSBVTotal : public FloatingPointToBV
{
 public:
  FloatingPointToSBVTotal(uint32_t _s) : FloatingPointToBV(_s) {}
  FloatingPointToSBVTotal(const FloatingPointToBV& old) : FloatingPointToBV(old)
  {
  }
};

/**
 * Hash function for floating-point to bit-vector conversions.
 */
template 
struct FloatingPointToBVHashFunction
{
  inline size_t operator()(const FloatingPointToBV& fptbv) const
  {
    UnsignedHashFunction f;
    return (key ^ 0x46504256) ^ f(fptbv.d_bv_size);
  }
}; /* struct FloatingPointToBVHashFunction */

/* Note: It is not possible to pack a number without a size to pack to,
 * thus it is difficult to implement output stream operator overloads for
 * FloatingPointLiteral in a sensible way. Use FloatingPoint instead. */

/** Output stream operator overloading for floating-point values. */
std::ostream& operator<<(std::ostream& os, const FloatingPoint& fp);

/** Output stream operator overloading for floating-point formats. */
std::ostream& operator<<(std::ostream& os, const FloatingPointSize& fps);

/** Output stream operator overloading for floating-point conversion sorts. */
std::ostream& operator<<(std::ostream& os,
                         const FloatingPointConvertSort& fpcs);

}  // namespace cvc5::internal

#endif /* CVC5__FLOATINGPOINT_H */