llvm
diff --git a/‎libc/config/linux/aarch64/entrypoints.txt
Lines changed: 9 additions & 0 deletions b/‎libc/config/linux/aarch64/entrypoints.txt
Lines changed: 9 additions & 0 deletions
diff --git a/‎libc/config/linux/x86_64/entrypoints.txt
Lines changed: 9 additions & 0 deletions b/‎libc/config/linux/x86_64/entrypoints.txt
Lines changed: 9 additions & 0 deletions
diff --git a/‎libc/docs/math/index.rst
Lines changed: 1 addition & 1 deletion b/‎libc/docs/math/index.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎libc/include/llvm-libc-macros/float16-macros.h
Lines changed: 8 additions & 0 deletions b/‎libc/include/llvm-libc-macros/float16-macros.h
Lines changed: 8 additions & 0 deletions
diff --git a/‎libc/spec/stdc.td
Lines changed: 3 additions & 0 deletions b/‎libc/spec/stdc.td
Lines changed: 3 additions & 0 deletions
diff --git a/‎libc/src/__support/FPUtil/CMakeLists.txt
Lines changed: 0 additions & 1 deletion b/‎libc/src/__support/FPUtil/CMakeLists.txt
Lines changed: 0 additions & 1 deletion
diff --git a/‎libc/src/__support/FPUtil/dyadic_float.h
Lines changed: 2 additions & 2 deletions b/‎libc/src/__support/FPUtil/dyadic_float.h
Lines changed: 2 additions & 2 deletions
diff --git a/‎libc/src/__support/FPUtil/generic/CMakeLists.txt
Lines changed: 2 additions & 0 deletions b/‎libc/src/__support/FPUtil/generic/CMakeLists.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎libc/src/__support/FPUtil/generic/FMA.h
Lines changed: 48 additions & 80 deletions b/‎libc/src/__support/FPUtil/generic/FMA.h
Lines changed: 48 additions & 80 deletions
diff --git a/‎libc/src/__support/FPUtil/multiply_add.h
Lines changed: 4 additions & 3 deletions b/‎libc/src/__support/FPUtil/multiply_add.h
Lines changed: 4 additions & 3 deletions
@@ -508,7 +508,9 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ceilf16
     libc.src.math.copysignf16
     libc.src.math.f16divf
+    libc.src.math.f16fma
     libc.src.math.f16fmaf
+    libc.src.math.f16fmal
     libc.src.math.f16sqrtf
     libc.src.math.fabsf16
     libc.src.math.fdimf16
@@ -560,6 +562,13 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ufromfpf16
     libc.src.math.ufromfpxf16
   )
+
+  if(LIBC_TYPES_HAS_FLOAT128)
+    list(APPEND TARGET_LIBM_ENTRYPOINTS
+      # math.h C23 mixed _Float16 and _Float128 entrypoints
+      libc.src.math.f16fmaf128
+    )
+  endif()
 endif()
 
 if(LIBC_TYPES_HAS_FLOAT128)
 
@@ -538,7 +538,9 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ceilf16
     libc.src.math.copysignf16
     libc.src.math.f16divf
+    libc.src.math.f16fma
     libc.src.math.f16fmaf
+    libc.src.math.f16fmal
     libc.src.math.f16sqrtf
     libc.src.math.fabsf16
     libc.src.math.fdimf16
@@ -588,6 +590,13 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ufromfpf16
     libc.src.math.ufromfpxf16
   )
+
+  if(LIBC_TYPES_HAS_FLOAT128)
+    list(APPEND TARGET_LIBM_ENTRYPOINTS
+      # math.h C23 mixed _Float16 and _Float128 entrypoints
+      libc.src.math.f16fmaf128
+    )
+  endif()
 endif()
 
 if(LIBC_TYPES_HAS_FLOAT128)
 
@@ -126,7 +126,7 @@ Basic Operations
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | f16div           | |check|          |                 |                        | N/A                  |                        | 7.12.14.4              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| f16fma           | |check|          |                 |                        | N/A                  |                        | 7.12.14.5              | F.10.11                    |
+| f16fma           | |check|          | |check|         | |check|                | N/A                  | |check|                | 7.12.14.5              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fabs             | |check|          | |check|         | |check|                | |check|              | |check|                | 7.12.7.3               | F.10.4.3                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 
@@ -9,10 +9,18 @@
 #ifndef LLVM_LIBC_MACROS_FLOAT16_MACROS_H
 #define LLVM_LIBC_MACROS_FLOAT16_MACROS_H
 
+#include "../llvm-libc-types/float128.h"
+
 #if defined(__FLT16_MANT_DIG__) &&                                             \
     (!defined(__GNUC__) || __GNUC__ >= 13 || defined(__clang__)) &&            \
     !defined(__arm__) && !defined(_M_ARM) && !defined(__riscv)
 #define LIBC_TYPES_HAS_FLOAT16
+
+// TODO: This would no longer be required if HdrGen let us guard function
+// declarations with multiple macros.
+#ifdef LIBC_TYPES_HAS_FLOAT128
+#define LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128
+#endif // LIBC_TYPES_HAS_FLOAT128
 #endif
 
 #endif // LLVM_LIBC_MACROS_FLOAT16_MACROS_H
@@ -477,7 +477,10 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"fma", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
           FunctionSpec<"fmaf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>]>,
 
+          GuardedFunctionSpec<"f16fma", RetValSpec<Float16Type>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+          GuardedFunctionSpec<"f16fmal", RetValSpec<Float16Type>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
+          GuardedFunctionSpec<"f16fmaf128", RetValSpec<Float16Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">,
 
           FunctionSpec<"fmod", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
           FunctionSpec<"fmodf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
 
@@ -154,7 +154,6 @@ add_header_library(
   HDRS
     multiply_add.h
   DEPENDS
-    .fma
     libc.src.__support.common
 )
 
 
@@ -156,13 +156,13 @@ template <size_t Bits> struct DyadicFloat {
       // d_lo is denormal, but the output is normal.
       int scale_up_exponent = 1 - exp_lo;
       T scale_up_factor =
-          FPBits<T>::create_value(sign,
+          FPBits<T>::create_value(Sign::POS,
                                   static_cast<output_bits_t>(
                                       FPBits<T>::EXP_BIAS + scale_up_exponent),
                                   IMPLICIT_MASK)
               .get_val();
       T scale_down_factor =
-          FPBits<T>::create_value(sign,
+          FPBits<T>::create_value(Sign::POS,
                                   static_cast<output_bits_t>(
                                       FPBits<T>::EXP_BIAS - scale_up_exponent),
                                   IMPLICIT_MASK)
 
@@ -24,6 +24,8 @@ add_header_library(
     libc.src.__support.CPP.bit
     libc.src.__support.CPP.limits
     libc.src.__support.CPP.type_traits
+    libc.src.__support.FPUtil.basic_operations
+    libc.src.__support.FPUtil.dyadic_float
     libc.src.__support.FPUtil.fenv_impl
     libc.src.__support.FPUtil.fp_bits
     libc.src.__support.FPUtil.rounding_mode
 
@@ -12,7 +12,9 @@
 #include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/BasicOperations.h"
 #include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
 #include "src/__support/FPUtil/rounding_mode.h"
 #include "src/__support/big_int.h"
 #include "src/__support/macros/attributes.h"   // LIBC_INLINE
@@ -106,20 +108,52 @@ LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
                                  sizeof(OutType) <= sizeof(InType),
                              OutType>
 fma(InType x, InType y, InType z) {
-  using OutFPBits = fputil::FPBits<OutType>;
+  using OutFPBits = FPBits<OutType>;
   using OutStorageType = typename OutFPBits::StorageType;
-  using InFPBits = fputil::FPBits<InType>;
+  using InFPBits = FPBits<InType>;
   using InStorageType = typename InFPBits::StorageType;
 
   constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1;
   constexpr size_t PROD_LEN = 2 * IN_EXPLICIT_MANT_LEN;
   constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1);
   using TmpResultType = UInt<TMP_RESULT_LEN>;
+  using DyadicFloat = DyadicFloat<TMP_RESULT_LEN>;
 
-  constexpr size_t EXTRA_FRACTION_LEN =
-      TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN;
-  constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK =
-      (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1;
+  InFPBits x_bits(x), y_bits(y), z_bits(z);
+
+  if (LIBC_UNLIKELY(x_bits.is_nan() || y_bits.is_nan() || z_bits.is_nan())) {
+    if (x_bits.is_nan() || y_bits.is_nan()) {
+      if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan() ||
+          z_bits.is_signaling_nan())
+        raise_except_if_required(FE_INVALID);
+
+      if (x_bits.is_quiet_nan()) {
+        InStorageType x_payload = static_cast<InStorageType>(getpayload(x));
+        if ((x_payload & ~(OutFPBits::FRACTION_MASK >> 1)) == 0)
+          return OutFPBits::quiet_nan(x_bits.sign(),
+                                      static_cast<OutStorageType>(x_payload))
+              .get_val();
+      }
+
+      if (y_bits.is_quiet_nan()) {
+        InStorageType y_payload = static_cast<InStorageType>(getpayload(y));
+        if ((y_payload & ~(OutFPBits::FRACTION_MASK >> 1)) == 0)
+          return OutFPBits::quiet_nan(y_bits.sign(),
+                                      static_cast<OutStorageType>(y_payload))
+              .get_val();
+      }
+
+      if (z_bits.is_quiet_nan()) {
+        InStorageType z_payload = static_cast<InStorageType>(getpayload(z));
+        if ((z_payload & ~(OutFPBits::FRACTION_MASK >> 1)) == 0)
+          return OutFPBits::quiet_nan(z_bits.sign(),
+                                      static_cast<OutStorageType>(z_payload))
+              .get_val();
+      }
+
+      return OutFPBits::quiet_nan().get_val();
+    }
+  }
 
   if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0))
     return static_cast<OutType>(x * y + z);
@@ -142,7 +176,9 @@ fma(InType x, InType y, InType z) {
     z *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
   }
 
-  InFPBits x_bits(x), y_bits(y), z_bits(z);
+  x_bits = InFPBits(x);
+  y_bits = InFPBits(y);
+  z_bits = InFPBits(z);
   const Sign z_sign = z_bits.sign();
   Sign prod_sign = (x_bits.sign() == y_bits.sign()) ? Sign::POS : Sign::NEG;
   x_exp += x_bits.get_biased_exponent();
@@ -182,7 +218,6 @@ fma(InType x, InType y, InType z) {
   constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN;
   z_mant <<= RESULT_MIN_LEN;
   int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN);
-  bool round_bit = false;
   bool sticky_bits = false;
   bool z_shifted = false;
 
@@ -221,85 +256,18 @@ fma(InType x, InType y, InType z) {
     }
   }
 
-  OutStorageType result = 0;
-  int r_exp = 0; // Unbiased exponent of the result
-
-  int round_mode = fputil::quick_get_round();
-
-  // Normalize the result.
-  if (prod_mant != 0) {
-    int lead_zeros = cpp::countl_zero(prod_mant);
-    // Move the leading 1 to the most significant bit.
-    prod_mant <<= lead_zeros;
-    prod_lsb_exp -= lead_zeros;
-    r_exp = prod_lsb_exp + (cpp::numeric_limits<TmpResultType>::digits - 1) -
-            InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS;
-
-    if (r_exp > 0) {
-      // The result is normal.  We will shift the mantissa to the right by the
-      // amount of extra bits compared to the length of the explicit mantissa in
-      // the output type.  The rounding bit then becomes the highest bit that is
-      // shifted out, and the following lower bits are merged into sticky bits.
-      round_bit =
-          (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
-      sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0;
-      result = static_cast<OutStorageType>(prod_mant >> EXTRA_FRACTION_LEN);
-    } else {
-      if (r_exp < -OutFPBits::FRACTION_LEN) {
-        // The result is smaller than 1/2 of the smallest denormal number.
-        sticky_bits = true; // since the result is non-zero.
-        result = 0;
-      } else {
-        // The result is denormal.
-        TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp);
-        round_bit = (prod_mant & mask) != 0;
-        sticky_bits |= (prod_mant & (mask - 1)) != 0;
-        if (r_exp > -OutFPBits::FRACTION_LEN)
-          result = static_cast<OutStorageType>(
-              prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp));
-        else
-          result = 0;
-      }
-
-      r_exp = 0;
-    }
-  } else {
+  if (prod_mant == 0) {
     // When there is exact cancellation, i.e., x*y == -z exactly, return -0.0 if
     // rounding downward and +0.0 for other rounding modes.
-    if (round_mode == FE_DOWNWARD)
+    if (quick_get_round() == FE_DOWNWARD)
       prod_sign = Sign::NEG;
     else
       prod_sign = Sign::POS;
   }
 
-  // Finalize the result.
-  if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) {
-    if ((round_mode == FE_TOWARDZERO) ||
-        (round_mode == FE_UPWARD && prod_sign.is_neg()) ||
-        (round_mode == FE_DOWNWARD && prod_sign.is_pos())) {
-      return OutFPBits::max_normal(prod_sign).get_val();
-    }
-    return OutFPBits::inf(prod_sign).get_val();
-  }
-
-  // Remove hidden bit and append the exponent field and sign bit.
-  result = static_cast<OutStorageType>(
-      (result & OutFPBits::FRACTION_MASK) |
-      (static_cast<OutStorageType>(r_exp) << OutFPBits::FRACTION_LEN));
-  if (prod_sign.is_neg())
-    result |= OutFPBits::SIGN_MASK;
-
-  // Rounding.
-  if (round_mode == FE_TONEAREST) {
-    if (round_bit && (sticky_bits || ((result & 1) != 0)))
-      ++result;
-  } else if ((round_mode == FE_UPWARD && prod_sign.is_pos()) ||
-             (round_mode == FE_DOWNWARD && prod_sign.is_neg())) {
-    if (round_bit || sticky_bits)
-      ++result;
-  }
-
-  return cpp::bit_cast<OutType>(result);
+  DyadicFloat result(prod_sign, prod_lsb_exp - InFPBits::EXP_BIAS, prod_mant);
+  result.mantissa |= sticky_bits;
+  return result.template as<OutType, /*ShouldSignalExceptions=*/true>();
 }
 
 } // namespace generic
 
@@ -39,17 +39,18 @@ multiply_add(T x, T y, T z) {
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
 
 // FMA instructions are available.
-#include "FMA.h"
+// We use builtins directly instead of including FMA.h to avoid a circular
+// dependency: multiply_add.h -> FMA.h -> generic/FMA.h -> dyadic_float.h.
 
 namespace LIBC_NAMESPACE {
 namespace fputil {
 
 LIBC_INLINE float multiply_add(float x, float y, float z) {
-  return fma<float>(x, y, z);
+  return __builtin_fmaf(x, y, z);
 }
 
 LIBC_INLINE double multiply_add(double x, double y, double z) {
-  return fma<double>(x, y, z);
+  return __builtin_fma(x, y, z);
 }
 
 } // namespace fputil
Original file line number	Diff line number	Diff line change
`@@ -154,7 +154,6 @@ add_header_library(`
`154`	`154`	`HDRS`
`155`	`155`	`multiply_add.h`
`156`	`156`	`DEPENDS`
`157`		`- .fma`
`158`	`157`	`libc.src.__support.common`
`159`	`158`	`)`
`160`	`159`