From 937b17a9eef2e4445b7d2b4f9de609edf922489c Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 11 Mar 2024 13:18:12 +0100 Subject: [PATCH] [libc++] Optimize ranges::minmax --- libcxx/benchmarks/CMakeLists.txt | 1 + libcxx/benchmarks/algorithms/minmax.bench.cpp | 68 +++++++++++++++++++ libcxx/docs/ReleaseNotes/19.rst | 2 + libcxx/include/__algorithm/comp.h | 3 + libcxx/include/__algorithm/ranges_minmax.h | 17 ++++- libcxx/include/__functional/operations.h | 6 ++ .../include/__functional/ranges_operations.h | 3 + libcxx/include/__type_traits/desugars_to.h | 1 + 8 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 libcxx/benchmarks/algorithms/minmax.bench.cpp diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 387e013afeb6c..928238c1ac69b 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -182,6 +182,7 @@ set(BENCHMARK_TESTS algorithms/make_heap.bench.cpp algorithms/make_heap_then_sort_heap.bench.cpp algorithms/min.bench.cpp + algorithms/minmax.bench.cpp algorithms/min_max_element.bench.cpp algorithms/mismatch.bench.cpp algorithms/pop_heap.bench.cpp diff --git a/libcxx/benchmarks/algorithms/minmax.bench.cpp b/libcxx/benchmarks/algorithms/minmax.bench.cpp new file mode 100644 index 0000000000000..b0ff7f91c1993 --- /dev/null +++ b/libcxx/benchmarks/algorithms/minmax.bench.cpp @@ -0,0 +1,68 @@ +#include +#include + +#include + +void run_sizes(auto benchmark) { + benchmark->Arg(1) + ->Arg(2) + ->Arg(3) + ->Arg(4) + ->Arg(5) + ->Arg(6) + ->Arg(7) + ->Arg(8) + ->Arg(9) + ->Arg(10) + ->Arg(11) + ->Arg(12) + ->Arg(13) + ->Arg(14) + ->Arg(15) + ->Arg(16) + ->Arg(17) + ->Arg(18) + ->Arg(19) + ->Arg(20) + ->Arg(21) + ->Arg(22) + ->Arg(23) + ->Arg(24) + ->Arg(25) + ->Arg(26) + ->Arg(27) + ->Arg(28) + ->Arg(29) + ->Arg(30) + ->Arg(31) + ->Arg(32) + ->Arg(64) + ->Arg(512) + ->Arg(1024) + ->Arg(4000) + ->Arg(4096) + ->Arg(5500) + ->Arg(64000) + ->Arg(65536) + ->Arg(70000); +} + +template +static void BM_std_minmax(benchmark::State& state) { + std::vector vec(state.range(), 3); + + for (auto _ : state) { + benchmark::DoNotOptimize(vec); + benchmark::DoNotOptimize(std::ranges::minmax(vec)); + } +} +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); + +BENCHMARK_MAIN(); diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst index 2da9df54a5319..a420b599cd597 100644 --- a/libcxx/docs/ReleaseNotes/19.rst +++ b/libcxx/docs/ReleaseNotes/19.rst @@ -54,6 +54,8 @@ Improvements and New Features resulting in a performance increase of up to 1400x. - The ``std::mismatch`` algorithm has been optimized for integral types, which can lead up to 40x performance improvements. +- The ``std::ranges::minmax`` algorithm has been optimized for integral types, resulting in a performance increase of + up to 100x. - The ``_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM`` macro has been added to make the declarations in ```` available. diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h index a089375e3da13..a0fa88d6d2acd 100644 --- a/libcxx/include/__algorithm/comp.h +++ b/libcxx/include/__algorithm/comp.h @@ -41,6 +41,9 @@ struct __less { } }; +template +inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_COMP_H diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 22a62b620c936..ca5722523336f 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -23,7 +23,9 @@ #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/is_reference.h> +#include <__type_traits/is_trivially_copyable.h> #include <__type_traits/remove_cvref.h> #include <__utility/forward.h> #include <__utility/move.h> @@ -83,7 +85,20 @@ struct __fn { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__first != __last, "range has to contain at least one element"); - if constexpr (forward_range<_Range>) { + // This optimiation is not in minmax_element because clang doesn't see through the pointers and as a result doesn't + // vectorize the code. + if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> && + __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value && + __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) { + minmax_result<_ValueT> __result = {__r[0], __r[0]}; + for (auto __e : __r) { + if (__e < __result.min) + __result.min = __e; + if (__result.max < __e) + __result.max = __e; + } + return __result; + } else if constexpr (forward_range<_Range>) { // Special-case the one element case. Avoid repeatedly initializing objects from the result of an iterator // dereference when doing so might not be idempotent. The `if constexpr` avoids the extra branch in cases where // it's not needed. diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h index 9aa28e4925069..240f127e54255 100644 --- a/libcxx/include/__functional/operations.h +++ b/libcxx/include/__functional/operations.h @@ -359,6 +359,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less); +template +inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true; + #if _LIBCPP_STD_VER >= 14 template <> struct _LIBCPP_TEMPLATE_VIS less { @@ -370,6 +373,9 @@ struct _LIBCPP_TEMPLATE_VIS less { } typedef void is_transparent; }; + +template +inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Tp> = true; #endif #if _LIBCPP_STD_VER >= 14 diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index a9dffaf696258..27f06eadd0eb1 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -99,6 +99,9 @@ struct greater_equal { template inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = true; +template +inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true; + #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h index a8f69c28dfc52..97a2ee5448f20 100644 --- a/libcxx/include/__type_traits/desugars_to.h +++ b/libcxx/include/__type_traits/desugars_to.h @@ -20,6 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // Tags to represent the canonical operations struct __equal_tag {}; struct __plus_tag {}; +struct __less_tag {}; // This class template is used to determine whether an operation "desugars" // (or boils down) to a given canonical operation.