Skip to content

[libc][math][c23] Enable C23 _Float16 math functions on GPUs #99248

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions libc/cmake/modules/CheckCompilerFeatures.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ set(
# Making sure ALL_COMPILER_FEATURES is sorted.
list(SORT ALL_COMPILER_FEATURES)

# Compiler features that are unavailable on GPU targets with the in-tree Clang.
set(
CPU_ONLY_COMPILER_FEATURES
"float128"
)

# Function to check whether the compiler supports the provided set of features.
# Usage:
# compiler_supports(
Expand Down Expand Up @@ -65,13 +71,26 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(CMAKE_TRY_COMPILE_TARGET_TYPE EXECUTABLE)
endif()

try_compile(
has_feature
${CMAKE_CURRENT_BINARY_DIR}/compiler_features
SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
LINK_OPTIONS ${link_options}
)
if(LIBC_TARGET_OS_IS_GPU)
# CUDA shouldn't be required to build the libc, only to test it, so we can't
# try to build CUDA binaries here. Since GPU builds are always compiled with
# the in-tree Clang, we just hardcode which compiler features are available
# when targeting GPUs.
if(feature IN_LIST CPU_ONLY_COMPILER_FEATURES)
set(has_feature FALSE)
else()
set(has_feature TRUE)
endif()
else()
try_compile(
has_feature
${CMAKE_CURRENT_BINARY_DIR}/compiler_features
SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp
COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options}
LINK_OPTIONS ${link_options}
)
endif()

if(has_feature)
list(APPEND AVAILABLE_COMPILER_FEATURES ${feature})
if(${feature} STREQUAL "float16")
Expand Down
6 changes: 4 additions & 2 deletions libc/cmake/modules/LLVMLibCFlagRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,10 @@ if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
set(SKIP_FLAG_EXPANSION_EXPLICIT_SIMD_OPT TRUE)
endif()

# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
# Skip ROUND_OPT flag for targets that don't support rounding instructions. On
# x86, these are SSE4.1 instructions, but we already had code to check for
# SSE4.2 support.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR
LIBC_TARGET_ARCHITECTURE_IS_AARCH64))
LIBC_TARGET_ARCHITECTURE_IS_AARCH64 OR LIBC_TARGET_OS_IS_GPU))
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()
68 changes: 68 additions & 0 deletions libc/config/gpu/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,74 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.truncf
)

if(LIBC_TYPES_HAS_FLOAT16)
list(APPEND TARGET_LIBM_ENTRYPOINTS
# math.h C23 _Float16 entrypoints
libc.src.math.canonicalizef16
libc.src.math.ceilf16
libc.src.math.copysignf16
libc.src.math.f16add
libc.src.math.f16addf
libc.src.math.f16div
libc.src.math.f16divf
libc.src.math.f16fma
libc.src.math.f16fmaf
libc.src.math.f16mul
libc.src.math.f16mulf
libc.src.math.f16sqrt
libc.src.math.f16sqrtf
libc.src.math.f16sub
libc.src.math.f16subf
libc.src.math.fabsf16
libc.src.math.fdimf16
libc.src.math.floorf16
libc.src.math.fmaxf16
libc.src.math.fmaximum_mag_numf16
libc.src.math.fmaximum_magf16
libc.src.math.fmaximum_numf16
libc.src.math.fmaximumf16
libc.src.math.fminf16
libc.src.math.fminimum_mag_numf16
libc.src.math.fminimum_magf16
libc.src.math.fminimum_numf16
libc.src.math.fminimumf16
libc.src.math.fmodf16
libc.src.math.frexpf16
libc.src.math.fromfpf16
libc.src.math.fromfpxf16
libc.src.math.getpayloadf16
libc.src.math.ilogbf16
libc.src.math.ldexpf16
libc.src.math.llogbf16
libc.src.math.llrintf16
libc.src.math.llroundf16
libc.src.math.logbf16
libc.src.math.lrintf16
libc.src.math.lroundf16
libc.src.math.modff16
libc.src.math.nanf16
libc.src.math.nearbyintf16
libc.src.math.nextafterf16
libc.src.math.nextdownf16
libc.src.math.nexttowardf16
libc.src.math.nextupf16
libc.src.math.remainderf16
libc.src.math.remquof16
libc.src.math.rintf16
libc.src.math.roundevenf16
libc.src.math.roundf16
libc.src.math.scalblnf16
libc.src.math.scalbnf16
libc.src.math.setpayloadf16
libc.src.math.setpayloadsigf16
libc.src.math.totalorderf16
libc.src.math.totalordermagf16
libc.src.math.truncf16
libc.src.math.ufromfpf16
libc.src.math.ufromfpxf16
)
endif()

set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
Expand Down
4 changes: 4 additions & 0 deletions libc/src/__support/macros/properties/cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,8 @@
#define LIBC_TARGET_CPU_HAS_NEAREST_INT
#endif

#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || defined(LIBC_TARGET_ARCH_IS_GPU)
#define LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS
#endif

#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H
12 changes: 6 additions & 6 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
libc.src.__support.macros.properties.architectures
libc.src.__support.macros.properties.cpu_features
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mind running the bazel builds to see if these extra dependencies need to be added to bazel overlay?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bazel build seems to work:

~/projects/llvm-project/utils/bazel libc-math-float16-gpu ➜ bazel build --config=generic_clang @llvm-project//libc/...
INFO: Analyzed 897 targets (68 packages loaded, 6253 targets configured).
INFO: Found 897 targets...
INFO: Elapsed time: 166.512s, Critical Path: 151.57s
INFO: 3876 processes: 2132 internal, 1744 linux-sandbox.
INFO: Build completed successfully, 3876 total actions
~/projects/llvm-project/utils/bazel libc-math-float16-gpu ➜ bazel test --config=generic_clang @llvm-project//libc/...
INFO: Analyzed 897 targets (0 packages loaded, 0 targets configured).
INFO: Found 659 targets and 238 test targets...
INFO: Elapsed time: 5.366s, Critical Path: 4.26s
INFO: 239 processes: 476 linux-sandbox.
INFO: Build completed successfully, 239 total actions
@llvm-project//libc/test/src/__support:arg_list_test                     PASSED in 0.0s
...

Executed 238 out of 238 tests: 238 tests pass.
There were tests whose specified size is too big. Use the --test_verbose_timeout_warnings command line option to see which ones these are.
~/projects/llvm-project/utils/bazel libc-math-float16-gpu ➜

Why are only some math functions listed in https://github.com/llvm/llvm-project/blob/main/utils/bazel/llvm-project-overlay/libc/BUILD.bazel?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

B/c I've been lazy / forget to update it, especially since there are quite some work need to be done to add smoke tests to the lists. I planned to update it all at once at some point in late July, early August.

FLAGS
ROUND_OPT
)
Expand Down Expand Up @@ -503,7 +503,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
libc.src.__support.macros.properties.architectures
libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
Expand Down Expand Up @@ -572,7 +572,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
libc.src.__support.macros.properties.architectures
libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
Expand Down Expand Up @@ -641,7 +641,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
libc.src.__support.macros.properties.architectures
libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
Expand Down Expand Up @@ -710,7 +710,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
libc.src.__support.macros.properties.architectures
libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
Expand Down Expand Up @@ -903,7 +903,7 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
libc.src.__support.macros.properties.architectures
libc.src.__support.macros.properties.cpu_features
FLAGS
ROUND_OPT
)
Expand Down
4 changes: 2 additions & 2 deletions libc/src/math/generic/ceilf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(float16, ceilf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
defined(LIBC_TARGET_ARCH_IS_AARCH64)
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_ceilf(x));
#else
return fputil::ceil(x);
Expand Down
4 changes: 2 additions & 2 deletions libc/src/math/generic/floorf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(float16, floorf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
defined(LIBC_TARGET_ARCH_IS_AARCH64)
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_floorf(x));
#else
return fputil::floor(x);
Expand Down
4 changes: 2 additions & 2 deletions libc/src/math/generic/rintf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(float16, rintf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
defined(LIBC_TARGET_ARCH_IS_AARCH64)
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_rintf(x));
#else
return fputil::round_using_current_rounding_mode(x);
Expand Down
4 changes: 2 additions & 2 deletions libc/src/math/generic/roundevenf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(float16, roundevenf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_ROUNDEVEN) && \
defined(LIBC_TARGET_ARCH_IS_AARCH64)
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_roundevenf(x));
#else
return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST);
Expand Down
5 changes: 3 additions & 2 deletions libc/src/math/generic/roundf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(float16, roundf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_ROUND) && defined(LIBC_TARGET_ARCH_IS_AARCH64)
#if defined(__LIBC_USE_BUILTIN_ROUND) && \
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_roundf(x));
#else
return fputil::round(x);
Expand Down
4 changes: 2 additions & 2 deletions libc/src/math/generic/truncf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(float16, truncf16, (float16 x)) {
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \
defined(LIBC_TARGET_ARCH_IS_AARCH64)
defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS)
return static_cast<float16>(__builtin_truncf(x));
#else
return fputil::trunc(x);
Expand Down
Loading