From 6dcab52a48d11aea138ae30374afee7004782671 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 1 Apr 2025 10:45:09 -0400 Subject: [PATCH 1/2] Replace __libcpp_{ctz,clz} with __builtin_{ctzg,clzg} --- libcxx/include/__algorithm/sort.h | 12 ++-- libcxx/include/__bit/countl.h | 62 ------------------- libcxx/include/__bit/countr.h | 49 --------------- libcxx/include/__bit_reference | 4 +- libcxx/include/__charconv/to_chars_integral.h | 15 ++--- libcxx/include/__charconv/traits.h | 30 ++++----- libcxx/include/__hash_table | 2 +- 7 files changed, 30 insertions(+), 144 deletions(-) diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index d7bc1381ba5e1..06cb5b8ce7057 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -359,10 +359,10 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos( // Swap one pair on each iteration as long as both bitsets have at least one // element for swapping. while (__left_bitset != 0 && __right_bitset != 0) { - difference_type __tz_left = __libcpp_ctz(__left_bitset); - __left_bitset = __libcpp_blsr(__left_bitset); - difference_type __tz_right = __libcpp_ctz(__right_bitset); - __right_bitset = __libcpp_blsr(__right_bitset); + difference_type __tz_left = std::__countr_zero(__left_bitset); + __left_bitset = std::__libcpp_blsr(__left_bitset); + difference_type __tz_right = std::__countr_zero(__right_bitset); + __right_bitset = std::__libcpp_blsr(__right_bitset); _Ops::iter_swap(__first + __tz_left, __last - __tz_right); } } @@ -458,7 +458,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within( // Swap within the left side. Need to find set positions in the reverse // order. while (__left_bitset != 0) { - difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset); + difference_type __tz_left = __detail::__block_size - 1 - std::__countl_zero(__left_bitset); __left_bitset &= (static_cast(1) << __tz_left) - 1; _RandomAccessIterator __it = __first + __tz_left; if (__it != __lm1) { @@ -471,7 +471,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within( // Swap within the right side. Need to find set positions in the reverse // order. while (__right_bitset != 0) { - difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset); + difference_type __tz_right = __detail::__block_size - 1 - std::__countl_zero(__right_bitset); __right_bitset &= (static_cast(1) << __tz_right) - 1; _RandomAccessIterator __it = __lm1 - __tz_right; if (__it != __first) { diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index d4df1d049b294..9c37749f92577 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can -// refactor this code to exclusively use __builtin_clzg. - #ifndef _LIBCPP___BIT_COUNTL_H #define _LIBCPP___BIT_COUNTL_H @@ -27,69 +24,10 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { - return __builtin_clz(__x); -} - -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { - return __builtin_clzl(__x); -} - -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { - return __builtin_clzll(__x); -} - -#if _LIBCPP_HAS_INT128 -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT { -# if __has_builtin(__builtin_clzg) - return __builtin_clzg(__x); -# else - // The function is written in this form due to C++ constexpr limitations. - // The algorithm: - // - Test whether any bit in the high 64-bits is set - // - No bits set: - // - The high 64-bits contain 64 leading zeros, - // - Add the result of the low 64-bits. - // - Any bits set: - // - The number of leading zeros of the input is the number of leading - // zeros in the high 64-bits. - return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast(__x))) - : __builtin_clzll(static_cast(__x >> 64)); -# endif -} -#endif // _LIBCPP_HAS_INT128 - template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type"); -#if __has_builtin(__builtin_clzg) return __builtin_clzg(__t, numeric_limits<_Tp>::digits); -#else // __has_builtin(__builtin_clzg) - if (__t == 0) - return numeric_limits<_Tp>::digits; - - if (sizeof(_Tp) <= sizeof(unsigned int)) - return std::__libcpp_clz(static_cast(__t)) - - (numeric_limits::digits - numeric_limits<_Tp>::digits); - else if (sizeof(_Tp) <= sizeof(unsigned long)) - return std::__libcpp_clz(static_cast(__t)) - - (numeric_limits::digits - numeric_limits<_Tp>::digits); - else if (sizeof(_Tp) <= sizeof(unsigned long long)) - return std::__libcpp_clz(static_cast(__t)) - - (numeric_limits::digits - numeric_limits<_Tp>::digits); - else { - int __ret = 0; - int __iter = 0; - const unsigned int __ulldigits = numeric_limits::digits; - while (true) { - __t = std::__rotl(__t, __ulldigits); - if ((__iter = std::__countl_zero(static_cast(__t))) != __ulldigits) - break; - __ret += __iter; - } - return __ret + __iter; - } -#endif // __has_builtin(__builtin_clzg) } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index 46c43921fc60d..f87175971bed3 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -6,14 +6,10 @@ // //===----------------------------------------------------------------------===// -// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can -// refactor this code to exclusively use __builtin_ctzg. - #ifndef _LIBCPP___BIT_COUNTR_H #define _LIBCPP___BIT_COUNTR_H #include <__assert> -#include <__bit/rotate.h> #include <__concepts/arithmetic.h> #include <__config> #include <__type_traits/is_unsigned.h> @@ -28,55 +24,10 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { - return __builtin_ctz(__x); -} - -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { - return __builtin_ctzl(__x); -} - -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { - return __builtin_ctzll(__x); -} - -// A constexpr implementation for C++11 and later (using clang extensions for constexpr support) -// Precondition: __t != 0 (the caller __countr_zero handles __t == 0 as a special case) -template -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero_impl(_Tp __t) _NOEXCEPT { - _LIBCPP_ASSERT_INTERNAL(__t != 0, "__countr_zero_impl called with zero value"); - static_assert(is_unsigned<_Tp>::value, "__countr_zero_impl only works with unsigned types"); - if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) { - return std::__libcpp_ctz(static_cast(__t)); - } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) { - return std::__libcpp_ctz(static_cast(__t)); - } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) { - return std::__libcpp_ctz(static_cast(__t)); - } else { -#if _LIBCPP_STD_VER == 11 - unsigned long long __ull = static_cast(__t); - const unsigned int __ulldigits = numeric_limits::digits; - return __ull == 0ull ? __ulldigits + std::__countr_zero_impl<_Tp>(__t >> __ulldigits) : std::__libcpp_ctz(__ull); -#else - int __ret = 0; - const unsigned int __ulldigits = numeric_limits::digits; - while (static_cast(__t) == 0uLL) { - __ret += __ulldigits; - __t >>= __ulldigits; - } - return __ret + std::__libcpp_ctz(static_cast(__t)); -#endif - } -} - template [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT { static_assert(is_unsigned<_Tp>::value, "__countr_zero only works with unsigned types"); -#if __has_builtin(__builtin_ctzg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19. return __builtin_ctzg(__t, numeric_limits<_Tp>::digits); -#else - return __t != 0 ? std::__countr_zero_impl(__t) : numeric_limits<_Tp>::digits; -#endif } #if _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference index 552e0f5e4d799..a3e6defd405f8 100644 --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -165,7 +165,7 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT { - return __bit_iterator<_Cp, false>(__seg_, static_cast(std::__libcpp_ctz(__mask_))); + return __bit_iterator<_Cp, false>(__seg_, static_cast(std::__countr_zero(__mask_))); } private: @@ -234,7 +234,7 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT { - return __bit_iterator<_Cp, true>(__seg_, static_cast(std::__libcpp_ctz(__mask_))); + return __bit_iterator<_Cp, true>(__seg_, static_cast(std::__countr_zero(__mask_))); } private: diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h index 238c96d7c7a04..f10cc3566875a 100644 --- a/libcxx/include/__charconv/to_chars_integral.h +++ b/libcxx/include/__charconv/to_chars_integral.h @@ -114,9 +114,8 @@ struct _LIBCPP_HIDDEN __integral<2> { template _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT { // If value == 0 still need one digit. If the value != this has no - // effect since the code scans for the most significant bit set. (Note - // that __libcpp_clz doesn't work for 0.) - return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1); + // effect since the code scans for the most significant bit set. + return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1); } template @@ -150,9 +149,8 @@ struct _LIBCPP_HIDDEN __integral<8> { template _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT { // If value == 0 still need one digit. If the value != this has no - // effect since the code scans for the most significat bit set. (Note - // that __libcpp_clz doesn't work for 0.) - return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3; + // effect since the code scans for the most significat bit set. + return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3; } template @@ -186,9 +184,8 @@ struct _LIBCPP_HIDDEN __integral<16> { template _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT { // If value == 0 still need one digit. If the value != this has no - // effect since the code scans for the most significat bit set. (Note - // that __libcpp_clz doesn't work for 0.) - return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4; + // effect since the code scans for the most significat bit set. + return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4; } template diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h index 085a3f7886f31..3ee59ff82a09c 100644 --- a/libcxx/include/__charconv/traits.h +++ b/libcxx/include/__charconv/traits.h @@ -43,12 +43,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t(__v | 1))) * 1233 >> 12; + auto __t = (32 - std::__countl_zero(static_cast(__v | 1))) * 1233 >> 12; return __t - (__v < __itoa::__pow10_32[__t]) + 1; } @@ -69,12 +69,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t(__v | 1))) * 1233 >> 12; + auto __t = (64 - std::__countl_zero(static_cast(__v | 1))) * 1233 >> 12; return __t - (__v < __itoa::__pow10_64[__t]) + 1; } @@ -96,15 +96,15 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t numeric_limits::max(), "The optimizations for this algorithm fail when this isn't true."); // There's always a bit set in the upper 64-bits. - auto __t = (128 - std::__libcpp_clz(static_cast(__v >> 64))) * 1233 >> 12; + auto __t = (128 - std::__countl_zero(static_cast(__v >> 64))) * 1233 >> 12; _LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds"); // __t is adjusted since the lookup table misses the lower entries. return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1; diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index fca58ca296067..aefa8e19c1864 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) { } inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) { - return __n < 2 ? __n : (size_t(1) << (numeric_limits::digits - __libcpp_clz(__n - 1))); + return __n < 2 ? __n : (size_t(1) << (numeric_limits::digits - std::__countl_zero(__n - 1))); } template From f4c514a7c3a6c613d6ffebb065c191a97a34c743 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sun, 13 Apr 2025 08:58:05 -0400 Subject: [PATCH 2/2] Remove comments that no longer apply --- libcxx/include/__charconv/traits.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h index 3ee59ff82a09c..9fd0092ca79c3 100644 --- a/libcxx/include/__charconv/traits.h +++ b/libcxx/include/__charconv/traits.h @@ -43,10 +43,7 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t(__v | 1))) * 1233 >> 12; return __t - (__v < __itoa::__pow10_32[__t]) + 1; @@ -69,10 +66,7 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t(__v | 1))) * 1233 >> 12; return __t - (__v < __itoa::__pow10_64[__t]) + 1; @@ -96,10 +90,7 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t numeric_limits::max(), "The optimizations for this algorithm fail when this isn't true.");