From e880ee2eb2fc483085180c9a879518ff1ed52327 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Thu, 6 Jun 2024 17:07:37 +0200 Subject: [PATCH 1/7] [libc][math][c23] Add fmodf16 C23 math function --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 +- libc/spec/stdc.td | 1 + libc/src/__support/FPUtil/FPBits.h | 2 +- libc/src/__support/FPUtil/generic/FMod.h | 4 ++- libc/src/math/CMakeLists.txt | 1 + libc/src/math/fmodf16.h | 20 ++++++++++++ libc/src/math/generic/CMakeLists.txt | 13 ++++++++ libc/src/math/generic/fmodf16.cpp | 19 ++++++++++++ libc/test/src/math/smoke/CMakeLists.txt | 33 +++++++++++++++----- libc/test/src/math/smoke/FModTest.h | 37 ++++++++++++----------- libc/test/src/math/smoke/fmodf16_test.cpp | 13 ++++++++ 13 files changed, 118 insertions(+), 29 deletions(-) create mode 100644 libc/src/math/fmodf16.h create mode 100644 libc/src/math/generic/fmodf16.cpp create mode 100644 libc/test/src/math/smoke/fmodf16_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 33ecff813a1fb..193183ddeb78a 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -515,6 +515,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.fminimum_magf16 libc.src.math.fminimum_mag_numf16 libc.src.math.fminimum_numf16 + libc.src.math.fmodf16 libc.src.math.fromfpf16 libc.src.math.fromfpxf16 libc.src.math.llrintf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index ebacb1c59ceec..ee748b5cd8e1a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -548,6 +548,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.fminimum_magf16 libc.src.math.fminimum_mag_numf16 libc.src.math.fminimum_numf16 + libc.src.math.fmodf16 libc.src.math.fromfpf16 libc.src.math.fromfpxf16 libc.src.math.llrintf16 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index b9507f0887cd7..24b88a52f049d 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -156,7 +156,7 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fminimum_num | |check| | |check| | |check| | |check| | |check| | 7.12.12.9 | F.10.9.5 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| fmod | |check| | |check| | |check| | | |check| | 7.12.10.1 | F.10.7.1 | +| fmod | |check| | |check| | |check| | |check| | |check| | 7.12.10.1 | F.10.7.1 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fmul | N/A | | | N/A | | 7.12.14.3 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 210f2a1325169..90e5246a63505 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -478,6 +478,7 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"fmod", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"fmodf", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"fmodl", RetValSpec, [ArgSpec, ArgSpec]>, + GuardedFunctionSpec<"fmodf16", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, GuardedFunctionSpec<"fmodf128", RetValSpec, [ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT128">, FunctionSpec<"frexp", RetValSpec, [ArgSpec, ArgSpec]>, diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h index d3c96d2d613d6..559ecde767c30 100644 --- a/libc/src/__support/FPUtil/FPBits.h +++ b/libc/src/__support/FPUtil/FPBits.h @@ -744,7 +744,7 @@ struct FPRepImpl : public FPRepSem { if (LIBC_LIKELY(ep >= 0)) { // Implicit number bit will be removed by mask result.set_significand(number); - result.set_biased_exponent(ep + 1); + result.set_biased_exponent(static_cast(ep + 1)); } else { result.set_significand(number >> -ep); } diff --git a/libc/src/__support/FPUtil/generic/FMod.h b/libc/src/__support/FPUtil/generic/FMod.h index 211ab926d28b0..f840a92b1a5a2 100644 --- a/libc/src/__support/FPUtil/generic/FMod.h +++ b/libc/src/__support/FPUtil/generic/FMod.h @@ -210,7 +210,9 @@ class FMod { e_x - e_y <= int(FPB::EXP_LEN))) { StorageType m_x = sx.get_explicit_mantissa(); StorageType m_y = sy.get_explicit_mantissa(); - StorageType d = (e_x == e_y) ? (m_x - m_y) : (m_x << (e_x - e_y)) % m_y; + StorageType d = (e_x == e_y) + ? (m_x - m_y) + : static_cast(m_x << (e_x - e_y)) % m_y; if (d == 0) return FPB::zero(); // iy - 1 because of "zero power" for number with power 1 diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 7a349ddc53724..141f66817e53e 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -183,6 +183,7 @@ add_math_entrypoint_object(fminimum_mag_numf128) add_math_entrypoint_object(fmod) add_math_entrypoint_object(fmodf) add_math_entrypoint_object(fmodl) +add_math_entrypoint_object(fmodf16) add_math_entrypoint_object(fmodf128) add_math_entrypoint_object(frexp) diff --git a/libc/src/math/fmodf16.h b/libc/src/math/fmodf16.h new file mode 100644 index 0000000000000..ab658430275d8 --- /dev/null +++ b/libc/src/math/fmodf16.h @@ -0,0 +1,20 @@ +//===-- Implementation header for fmodf16 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_FMODF16_H +#define LLVM_LIBC_SRC_MATH_FMODF16_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +float16 fmodf16(float16 x, float16 y); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_FMODF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index b1d786fc6b29f..9c9073c0ea7bf 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2887,6 +2887,19 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + fmodf16 + SRCS + fmodf16.cpp + HDRS + ../fmodf16.h + DEPENDS + libc.src.__support.macros.properties.types + libc.src.__support.FPUtil.generic.fmod + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( fmodf128 SRCS diff --git a/libc/src/math/generic/fmodf16.cpp b/libc/src/math/generic/fmodf16.cpp new file mode 100644 index 0000000000000..0a54a65806de9 --- /dev/null +++ b/libc/src/math/generic/fmodf16.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of fmodf16 function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/fmodf16.h" +#include "src/__support/FPUtil/generic/FMod.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float16, fmodf16, (float16 x, float16 y)) { + return fputil::generic::FMod::eval(x, y); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 110fa1de97d6d..07e8b5dddfa6c 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3111,10 +3111,10 @@ add_fp_unittest( HDRS FModTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno libc.src.math.fmodf - libc.src.__support.FPUtil.basic_operations - libc.src.__support.FPUtil.nearest_integer_operations + libc.src.__support.FPUtil.fenv_impl # FIXME: Currently fails on the GPU build. UNIT_TEST_ONLY ) @@ -3128,10 +3128,10 @@ add_fp_unittest( HDRS FModTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno libc.src.math.fmod - libc.src.__support.FPUtil.basic_operations - libc.src.__support.FPUtil.nearest_integer_operations + libc.src.__support.FPUtil.fenv_impl # FIXME: Currently fails on the GPU build. UNIT_TEST_ONLY ) @@ -3145,10 +3145,27 @@ add_fp_unittest( HDRS FModTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno libc.src.math.fmodl - libc.src.__support.FPUtil.basic_operations - libc.src.__support.FPUtil.nearest_integer_operations + libc.src.__support.FPUtil.fenv_impl + # FIXME: Currently fails on the GPU build. + UNIT_TEST_ONLY +) + +add_fp_unittest( + fmodf16_test + SUITE + libc-math-smoke-tests + SRCS + fmodf16_test.cpp + HDRS + FModTest.h + DEPENDS + libc.hdr.fenv_macros + libc.src.errno.errno + libc.src.math.fmodf16 + libc.src.__support.FPUtil.fenv_impl # FIXME: Currently fails on the GPU build. UNIT_TEST_ONLY ) @@ -3162,10 +3179,10 @@ add_fp_unittest( HDRS FModTest.h DEPENDS + libc.hdr.fenv_macros libc.src.errno.errno libc.src.math.fmodf128 - libc.src.__support.FPUtil.basic_operations - libc.src.__support.FPUtil.nearest_integer_operations + libc.src.__support.FPUtil.fenv_impl # FIXME: Currently fails on the GPU build. UNIT_TEST_ONLY ) diff --git a/libc/test/src/math/smoke/FModTest.h b/libc/test/src/math/smoke/FModTest.h index f1015d6497fcd..405e3107438d4 100644 --- a/libc/test/src/math/smoke/FModTest.h +++ b/libc/test/src/math/smoke/FModTest.h @@ -9,13 +9,13 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_FMODTEST_H #define LLVM_LIBC_TEST_SRC_MATH_FMODTEST_H -#include "src/__support/FPUtil/BasicOperations.h" -#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/errno/libc_errno.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "hdr/math_macros.h" +#include "hdr/fenv_macros.h" #define TEST_SPECIAL(x, y, expected, dom_err, expected_exception) \ EXPECT_FP_EQ(expected, f(x, y)); \ @@ -210,7 +210,8 @@ class FmodTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { } void testRegularExtreme(FModFunc f) { - + if constexpr (sizeof(T) < sizeof(float)) + return; TEST_REGULAR(0x1p127L, 0x3p-149L, 0x1p-149L); TEST_REGULAR(0x1p127L, -0x3p-149L, 0x1p-149L); TEST_REGULAR(0x1p127L, 0x3p-148L, 0x1p-147L); @@ -224,20 +225,20 @@ class FmodTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { TEST_REGULAR(-0x1p127L, 0x3p-126L, -0x1p-125L); TEST_REGULAR(-0x1p127L, -0x3p-126L, -0x1p-125L); - if constexpr (sizeof(T) >= sizeof(double)) { - TEST_REGULAR(0x1p1023L, 0x3p-1074L, 0x1p-1073L); - TEST_REGULAR(0x1p1023L, -0x3p-1074L, 0x1p-1073L); - TEST_REGULAR(0x1p1023L, 0x3p-1073L, 0x1p-1073L); - TEST_REGULAR(0x1p1023L, -0x3p-1073L, 0x1p-1073L); - TEST_REGULAR(0x1p1023L, 0x3p-1022L, 0x1p-1021L); - TEST_REGULAR(0x1p1023L, -0x3p-1022L, 0x1p-1021L); - TEST_REGULAR(-0x1p1023L, 0x3p-1074L, -0x1p-1073L); - TEST_REGULAR(-0x1p1023L, -0x3p-1074L, -0x1p-1073L); - TEST_REGULAR(-0x1p1023L, 0x3p-1073L, -0x1p-1073L); - TEST_REGULAR(-0x1p1023L, -0x3p-1073L, -0x1p-1073L); - TEST_REGULAR(-0x1p1023L, 0x3p-1022L, -0x1p-1021L); - TEST_REGULAR(-0x1p1023L, -0x3p-1022L, -0x1p-1021L); - } + if constexpr (sizeof(T) < sizeof(double)) + return; + TEST_REGULAR(0x1p1023L, 0x3p-1074L, 0x1p-1073L); + TEST_REGULAR(0x1p1023L, -0x3p-1074L, 0x1p-1073L); + TEST_REGULAR(0x1p1023L, 0x3p-1073L, 0x1p-1073L); + TEST_REGULAR(0x1p1023L, -0x3p-1073L, 0x1p-1073L); + TEST_REGULAR(0x1p1023L, 0x3p-1022L, 0x1p-1021L); + TEST_REGULAR(0x1p1023L, -0x3p-1022L, 0x1p-1021L); + TEST_REGULAR(-0x1p1023L, 0x3p-1074L, -0x1p-1073L); + TEST_REGULAR(-0x1p1023L, -0x3p-1074L, -0x1p-1073L); + TEST_REGULAR(-0x1p1023L, 0x3p-1073L, -0x1p-1073L); + TEST_REGULAR(-0x1p1023L, -0x3p-1073L, -0x1p-1073L); + TEST_REGULAR(-0x1p1023L, 0x3p-1022L, -0x1p-1021L); + TEST_REGULAR(-0x1p1023L, -0x3p-1022L, -0x1p-1021L); } }; diff --git a/libc/test/src/math/smoke/fmodf16_test.cpp b/libc/test/src/math/smoke/fmodf16_test.cpp new file mode 100644 index 0000000000000..9a48c5aa0d609 --- /dev/null +++ b/libc/test/src/math/smoke/fmodf16_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for fmodf16 ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FModTest.h" + +#include "src/math/fmodf16.h" + +LIST_FMOD_TESTS(float16, LIBC_NAMESPACE::fmodf16) From f6621ffd0a175965caa7764103796d83fab3fff2 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Thu, 6 Jun 2024 19:58:53 +0200 Subject: [PATCH 2/7] [libc][math][c23] Change fmodf16 to use generic FMod with uint32_t --- libc/src/math/generic/fmodf16.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/math/generic/fmodf16.cpp b/libc/src/math/generic/fmodf16.cpp index 0a54a65806de9..a5bfd78113f63 100644 --- a/libc/src/math/generic/fmodf16.cpp +++ b/libc/src/math/generic/fmodf16.cpp @@ -13,7 +13,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float16, fmodf16, (float16 x, float16 y)) { - return fputil::generic::FMod::eval(x, y); + return fputil::generic::FMod::eval(x, y); } } // namespace LIBC_NAMESPACE From 002ebebef39e0e7a592be36cdfd17275d7321efa Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 7 Jun 2024 23:02:44 +0200 Subject: [PATCH 3/7] [libc][math][c23] Add performance test for different implementations of fmodf16 --- .../BinaryOpSingleOutputPerf.h | 31 ++++++++++--------- .../math/performance_testing/CMakeLists.txt | 12 +++++++ .../math/performance_testing/fmodf16_perf.cpp | 24 ++++++++++++++ 3 files changed, 53 insertions(+), 14 deletions(-) create mode 100644 libc/test/src/math/performance_testing/fmodf16_perf.cpp diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h index 504d1be94b891..3a469b2f6c44d 100644 --- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h +++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h @@ -33,13 +33,15 @@ template class BinaryOpSingleOutputPerf { } StorageType step = (endingBit - startingBit) / N; - for (StorageType bitsX = startingBit, bitsY = endingBit;; - bitsX += step, bitsY -= step) { - T x = FPBits(bitsX).get_val(); - T y = FPBits(bitsY).get_val(); - result = func(x, y); - if (endingBit - bitsX < step) { - break; + for (int i = 0; i < 5000; i++) { + for (StorageType bitsX = startingBit, bitsY = endingBit;; + bitsX += step, bitsY -= step) { + T x = FPBits(bitsX).get_val(); + T y = FPBits(bitsY).get_val(); + result = func(x, y); + if (endingBit - bitsX < step) { + break; + } } } }; @@ -49,7 +51,7 @@ template class BinaryOpSingleOutputPerf { runner(myFunc); timer.stop(); - double my_average = static_cast(timer.nanoseconds()) / N; + double my_average = static_cast(timer.nanoseconds()) / (N * 5000); log << "-- My function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << my_average << " ns/op \n"; @@ -60,7 +62,7 @@ template class BinaryOpSingleOutputPerf { runner(otherFunc); timer.stop(); - double other_average = static_cast(timer.nanoseconds()) / N; + double other_average = static_cast(timer.nanoseconds()) / (N * 5000); log << "-- Other function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << other_average << " ns/op \n"; @@ -76,17 +78,17 @@ template class BinaryOpSingleOutputPerf { log << " Performance tests with inputs in denormal range:\n"; run_perf_in_range(myFunc, otherFunc, /* startingBit= */ StorageType(0), /* endingBit= */ FPBits::max_subnormal().uintval(), - 10'000'001, log); + FPBits::max_subnormal().uintval(), log); log << "\n Performance tests with inputs in normal range:\n"; run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits::min_normal().uintval(), /* endingBit= */ FPBits::max_normal().uintval(), - 10'000'001, log); + FPBits::max_normal().uintval() - FPBits::min_normal().uintval(), log); log << "\n Performance tests with inputs in normal range with exponents " "close to each other:\n"; run_perf_in_range( myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(), - /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), 1'001'001, log); + /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), FPBits(T(0x1.0p+10)).uintval() - FPBits(T(0x1.0p-10)).uintval(), log); } static void run_diff(Func myFunc, Func otherFunc, const char *logFile) { @@ -115,8 +117,9 @@ template class BinaryOpSingleOutputPerf { } // namespace LIBC_NAMESPACE #define BINARY_OP_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename) \ - int main() { \ + { \ + LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf::run_perf( \ + &myFunc, &otherFunc, filename); \ LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf::run_perf( \ &myFunc, &otherFunc, filename); \ - return 0; \ } diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt index d1fb24e37f728..d10f5ab848ec7 100644 --- a/libc/test/src/math/performance_testing/CMakeLists.txt +++ b/libc/test/src/math/performance_testing/CMakeLists.txt @@ -343,6 +343,18 @@ add_perf_binary( -fno-builtin ) +add_perf_binary( + fmodf16_perf + SRCS + fmodf16_perf.cpp + DEPENDS + .binary_op_single_output_diff + libc.include.llvm-libc-macros.stdint_macros + libc.src.math.fmodf16 + libc.src.__support.FPUtil.generic.fmod + libc.src.__support.macros.properties.types +) + add_perf_binary( fmodf128_perf SRCS diff --git a/libc/test/src/math/performance_testing/fmodf16_perf.cpp b/libc/test/src/math/performance_testing/fmodf16_perf.cpp new file mode 100644 index 0000000000000..5ae2e43a54931 --- /dev/null +++ b/libc/test/src/math/performance_testing/fmodf16_perf.cpp @@ -0,0 +1,24 @@ +//===-- Differential test for fmodf16 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BinaryOpSingleOutputPerf.h" + +//#include "include/llvm-libc-macros/stdint-macros.h" +#include "src/__support/FPUtil/generic/FMod.h" +#include "src/__support/macros/properties/types.h" + +#define FMOD_FUNC(U) (LIBC_NAMESPACE::fputil::generic::FMod::eval) + +int main() { +BINARY_OP_SINGLE_OUTPUT_PERF(float16, FMOD_FUNC(uint16_t), FMOD_FUNC(uint32_t), + "fmodf16_u16_vs_u32_perf.log") + +BINARY_OP_SINGLE_OUTPUT_PERF(float16, FMOD_FUNC(uint16_t), FMOD_FUNC(uint64_t), + "fmodf16_u16_vs_u64_perf.log") + return 0; +} From fff5340770fd8d14e7a0391a0ec8c0f5dc88050a Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 7 Jun 2024 23:38:17 +0200 Subject: [PATCH 4/7] [libc][math][c23] Clean up performance test for different implementations of fmodf16 --- .../BinaryOpSingleOutputPerf.h | 43 +++++++++++++------ .../math/performance_testing/CMakeLists.txt | 4 +- .../math/performance_testing/fmodf16_perf.cpp | 15 ++++--- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h index 3a469b2f6c44d..cebd159b634c3 100644 --- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h +++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h @@ -6,9 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/algorithm.h" #include "src/__support/FPUtil/FPBits.h" #include "test/src/math/performance_testing/Timer.h" +#include #include namespace LIBC_NAMESPACE { @@ -25,7 +27,12 @@ template class BinaryOpSingleOutputPerf { static void run_perf_in_range(Func myFunc, Func otherFunc, StorageType startingBit, StorageType endingBit, - StorageType N, std::ofstream &log) { + size_t N, size_t rounds, std::ofstream &log) { + if (endingBit - startingBit < N) + N = endingBit - startingBit; + + size_t total_ops = N * rounds; + auto runner = [=](Func func) { volatile T result; if (endingBit < startingBit) { @@ -33,7 +40,7 @@ template class BinaryOpSingleOutputPerf { } StorageType step = (endingBit - startingBit) / N; - for (int i = 0; i < 5000; i++) { + for (size_t i = 0; i < rounds; i++) { for (StorageType bitsX = startingBit, bitsY = endingBit;; bitsX += step, bitsY -= step) { T x = FPBits(bitsX).get_val(); @@ -51,7 +58,7 @@ template class BinaryOpSingleOutputPerf { runner(myFunc); timer.stop(); - double my_average = static_cast(timer.nanoseconds()) / (N * 5000); + double my_average = static_cast(timer.nanoseconds()) / total_ops; log << "-- My function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << my_average << " ns/op \n"; @@ -62,7 +69,7 @@ template class BinaryOpSingleOutputPerf { runner(otherFunc); timer.stop(); - double other_average = static_cast(timer.nanoseconds()) / (N * 5000); + double other_average = static_cast(timer.nanoseconds()) / total_ops; log << "-- Other function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << other_average << " ns/op \n"; @@ -73,22 +80,24 @@ template class BinaryOpSingleOutputPerf { log << " Mine / Other's : " << my_average / other_average << " \n"; } - static void run_perf(Func myFunc, Func otherFunc, const char *logFile) { + static void run_perf(Func myFunc, Func otherFunc, int rounds, + const char *logFile) { std::ofstream log(logFile); log << " Performance tests with inputs in denormal range:\n"; run_perf_in_range(myFunc, otherFunc, /* startingBit= */ StorageType(0), /* endingBit= */ FPBits::max_subnormal().uintval(), - FPBits::max_subnormal().uintval(), log); + 10'000'001, rounds, log); log << "\n Performance tests with inputs in normal range:\n"; run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits::min_normal().uintval(), /* endingBit= */ FPBits::max_normal().uintval(), - FPBits::max_normal().uintval() - FPBits::min_normal().uintval(), log); + 10'000'001, rounds, log); log << "\n Performance tests with inputs in normal range with exponents " "close to each other:\n"; - run_perf_in_range( - myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(), - /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), FPBits(T(0x1.0p+10)).uintval() - FPBits(T(0x1.0p-10)).uintval(), log); + run_perf_in_range(myFunc, otherFunc, + /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(), + /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), + 1'001'001, rounds, log); } static void run_diff(Func myFunc, Func otherFunc, const char *logFile) { @@ -117,9 +126,17 @@ template class BinaryOpSingleOutputPerf { } // namespace LIBC_NAMESPACE #define BINARY_OP_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename) \ - { \ + int main() { \ + LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf::run_perf( \ + &myFunc, &otherFunc, 1, filename); \ + return 0; \ + } + +#define BINARY_OP_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds, \ + filename) \ + { \ LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf::run_perf( \ - &myFunc, &otherFunc, filename); \ + &myFunc, &otherFunc, rounds, filename); \ LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf::run_perf( \ - &myFunc, &otherFunc, filename); \ + &myFunc, &otherFunc, rounds, filename); \ } diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt index d10f5ab848ec7..10522d972fb2d 100644 --- a/libc/test/src/math/performance_testing/CMakeLists.txt +++ b/libc/test/src/math/performance_testing/CMakeLists.txt @@ -88,6 +88,9 @@ add_header_library( binary_op_single_output_diff HDRS BinaryOpSingleOutputPerf.h + DEPENDS + libc.src.__support.CPP.algorithm + libc.src.__support.FPUtil.fp_bits ) add_perf_binary( @@ -349,7 +352,6 @@ add_perf_binary( fmodf16_perf.cpp DEPENDS .binary_op_single_output_diff - libc.include.llvm-libc-macros.stdint_macros libc.src.math.fmodf16 libc.src.__support.FPUtil.generic.fmod libc.src.__support.macros.properties.types diff --git a/libc/test/src/math/performance_testing/fmodf16_perf.cpp b/libc/test/src/math/performance_testing/fmodf16_perf.cpp index 5ae2e43a54931..35bb0a15dfbab 100644 --- a/libc/test/src/math/performance_testing/fmodf16_perf.cpp +++ b/libc/test/src/math/performance_testing/fmodf16_perf.cpp @@ -8,17 +8,20 @@ #include "BinaryOpSingleOutputPerf.h" -//#include "include/llvm-libc-macros/stdint-macros.h" #include "src/__support/FPUtil/generic/FMod.h" #include "src/__support/macros/properties/types.h" +#include + #define FMOD_FUNC(U) (LIBC_NAMESPACE::fputil::generic::FMod::eval) int main() { -BINARY_OP_SINGLE_OUTPUT_PERF(float16, FMOD_FUNC(uint16_t), FMOD_FUNC(uint32_t), - "fmodf16_u16_vs_u32_perf.log") + BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, FMOD_FUNC(uint16_t), + FMOD_FUNC(uint32_t), 5000, + "fmodf16_u16_vs_u32_perf.log") -BINARY_OP_SINGLE_OUTPUT_PERF(float16, FMOD_FUNC(uint16_t), FMOD_FUNC(uint64_t), - "fmodf16_u16_vs_u64_perf.log") - return 0; + BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, FMOD_FUNC(uint16_t), + FMOD_FUNC(uint64_t), 5000, + "fmodf16_u16_vs_u64_perf.log") + return 0; } From 21b7fe299c43fe320cb61c6e76061149f87bf945 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 7 Jun 2024 23:41:03 +0200 Subject: [PATCH 5/7] [libc][math][c23] Remove unused dependency from performance test --- .../test/src/math/performance_testing/BinaryOpSingleOutputPerf.h | 1 - libc/test/src/math/performance_testing/CMakeLists.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h index cebd159b634c3..4c04b5b329fe6 100644 --- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h +++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/CPP/algorithm.h" #include "src/__support/FPUtil/FPBits.h" #include "test/src/math/performance_testing/Timer.h" diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt index 10522d972fb2d..4ea78f9999e4d 100644 --- a/libc/test/src/math/performance_testing/CMakeLists.txt +++ b/libc/test/src/math/performance_testing/CMakeLists.txt @@ -89,7 +89,6 @@ add_header_library( HDRS BinaryOpSingleOutputPerf.h DEPENDS - libc.src.__support.CPP.algorithm libc.src.__support.FPUtil.fp_bits ) From bc2bc7608bc4a33df4bfd860f56b85e1bd72c178 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Sat, 8 Jun 2024 00:17:55 +0200 Subject: [PATCH 6/7] [libc][math][c23] Clean up performance test again --- .../performance_testing/BinaryOpSingleOutputPerf.h | 12 +++++------- .../src/math/performance_testing/fmodf16_perf.cpp | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h index 4c04b5b329fe6..861840e87f7e7 100644 --- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h +++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h @@ -30,8 +30,6 @@ template class BinaryOpSingleOutputPerf { if (endingBit - startingBit < N) N = endingBit - startingBit; - size_t total_ops = N * rounds; - auto runner = [=](Func func) { volatile T result; if (endingBit < startingBit) { @@ -57,7 +55,7 @@ template class BinaryOpSingleOutputPerf { runner(myFunc); timer.stop(); - double my_average = static_cast(timer.nanoseconds()) / total_ops; + double my_average = static_cast(timer.nanoseconds()) / N / rounds; log << "-- My function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << my_average << " ns/op \n"; @@ -68,7 +66,7 @@ template class BinaryOpSingleOutputPerf { runner(otherFunc); timer.stop(); - double other_average = static_cast(timer.nanoseconds()) / total_ops; + double other_average = static_cast(timer.nanoseconds()) / N / rounds; log << "-- Other function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << other_average << " ns/op \n"; @@ -85,18 +83,18 @@ template class BinaryOpSingleOutputPerf { log << " Performance tests with inputs in denormal range:\n"; run_perf_in_range(myFunc, otherFunc, /* startingBit= */ StorageType(0), /* endingBit= */ FPBits::max_subnormal().uintval(), - 10'000'001, rounds, log); + 1'000'001, rounds, log); log << "\n Performance tests with inputs in normal range:\n"; run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits::min_normal().uintval(), /* endingBit= */ FPBits::max_normal().uintval(), - 10'000'001, rounds, log); + 1'000'001, rounds, log); log << "\n Performance tests with inputs in normal range with exponents " "close to each other:\n"; run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(), /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), - 1'001'001, rounds, log); + 1'000'001, rounds, log); } static void run_diff(Func myFunc, Func otherFunc, const char *logFile) { diff --git a/libc/test/src/math/performance_testing/fmodf16_perf.cpp b/libc/test/src/math/performance_testing/fmodf16_perf.cpp index 35bb0a15dfbab..ff01fa6ca5870 100644 --- a/libc/test/src/math/performance_testing/fmodf16_perf.cpp +++ b/libc/test/src/math/performance_testing/fmodf16_perf.cpp @@ -1,4 +1,4 @@ -//===-- Differential test for fmodf16 -------------------------------------===// +//===-- Performance test for fmodf16 --------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 324607f02ad209285085d714341ca3f99ceffe00 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Sat, 8 Jun 2024 00:22:11 +0200 Subject: [PATCH 7/7] [libc][math][c23] Format performance test (oops) --- .../src/math/performance_testing/BinaryOpSingleOutputPerf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h index 861840e87f7e7..3027932c70f40 100644 --- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h +++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h @@ -66,7 +66,8 @@ template class BinaryOpSingleOutputPerf { runner(otherFunc); timer.stop(); - double other_average = static_cast(timer.nanoseconds()) / N / rounds; + double other_average = + static_cast(timer.nanoseconds()) / N / rounds; log << "-- Other function --\n"; log << " Total time : " << timer.nanoseconds() << " ns \n"; log << " Average runtime : " << other_average << " ns/op \n";