diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake index a6d793d495c45..361c1e710b187 100644 --- a/libc/cmake/modules/CheckCompilerFeatures.cmake +++ b/libc/cmake/modules/CheckCompilerFeatures.cmake @@ -15,6 +15,12 @@ set( # Making sure ALL_COMPILER_FEATURES is sorted. list(SORT ALL_COMPILER_FEATURES) +# Compiler features that are unavailable on GPU targets with the in-tree Clang. +set( + CPU_ONLY_COMPILER_FEATURES + "float128" +) + # Function to check whether the compiler supports the provided set of features. # Usage: # compiler_supports( @@ -65,13 +71,26 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES) set(CMAKE_TRY_COMPILE_TARGET_TYPE EXECUTABLE) endif() - try_compile( - has_feature - ${CMAKE_CURRENT_BINARY_DIR}/compiler_features - SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp - COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options} - LINK_OPTIONS ${link_options} - ) + if(LIBC_TARGET_OS_IS_GPU) + # CUDA shouldn't be required to build the libc, only to test it, so we can't + # try to build CUDA binaries here. Since GPU builds are always compiled with + # the in-tree Clang, we just hardcode which compiler features are available + # when targeting GPUs. + if(feature IN_LIST CPU_ONLY_COMPILER_FEATURES) + set(has_feature FALSE) + else() + set(has_feature TRUE) + endif() + else() + try_compile( + has_feature + ${CMAKE_CURRENT_BINARY_DIR}/compiler_features + SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/compiler_features/check_${feature}.cpp + COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${compile_options} + LINK_OPTIONS ${link_options} + ) + endif() + if(has_feature) list(APPEND AVAILABLE_COMPILER_FEATURES ${feature}) if(${feature} STREQUAL "float16") diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake index eca7ba8d183e6..92245ffab4746 100644 --- a/libc/cmake/modules/LLVMLibCFlagRules.cmake +++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake @@ -276,8 +276,10 @@ if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2"))) set(SKIP_FLAG_EXPANSION_EXPLICIT_SIMD_OPT TRUE) endif() -# Skip ROUND_OPT flag for targets that don't support SSE 4.2. +# Skip ROUND_OPT flag for targets that don't support rounding instructions. On +# x86, these are SSE4.1 instructions, but we already had code to check for +# SSE4.2 support. if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR - LIBC_TARGET_ARCHITECTURE_IS_AARCH64)) + LIBC_TARGET_ARCHITECTURE_IS_AARCH64 OR LIBC_TARGET_OS_IS_GPU)) set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE) endif() diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index b8eb743cf587a..b6fdd9dbc65b2 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -346,6 +346,74 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.truncf ) +if(LIBC_TYPES_HAS_FLOAT16) + list(APPEND TARGET_LIBM_ENTRYPOINTS + # math.h C23 _Float16 entrypoints + libc.src.math.canonicalizef16 + libc.src.math.ceilf16 + libc.src.math.copysignf16 + libc.src.math.f16add + libc.src.math.f16addf + libc.src.math.f16div + libc.src.math.f16divf + libc.src.math.f16fma + libc.src.math.f16fmaf + libc.src.math.f16mul + libc.src.math.f16mulf + libc.src.math.f16sqrt + libc.src.math.f16sqrtf + libc.src.math.f16sub + libc.src.math.f16subf + libc.src.math.fabsf16 + libc.src.math.fdimf16 + libc.src.math.floorf16 + libc.src.math.fmaxf16 + libc.src.math.fmaximum_mag_numf16 + libc.src.math.fmaximum_magf16 + libc.src.math.fmaximum_numf16 + libc.src.math.fmaximumf16 + libc.src.math.fminf16 + libc.src.math.fminimum_mag_numf16 + libc.src.math.fminimum_magf16 + libc.src.math.fminimum_numf16 + libc.src.math.fminimumf16 + libc.src.math.fmodf16 + libc.src.math.frexpf16 + libc.src.math.fromfpf16 + libc.src.math.fromfpxf16 + libc.src.math.getpayloadf16 + libc.src.math.ilogbf16 + libc.src.math.ldexpf16 + libc.src.math.llogbf16 + libc.src.math.llrintf16 + libc.src.math.llroundf16 + libc.src.math.logbf16 + libc.src.math.lrintf16 + libc.src.math.lroundf16 + libc.src.math.modff16 + libc.src.math.nanf16 + libc.src.math.nearbyintf16 + libc.src.math.nextafterf16 + libc.src.math.nextdownf16 + libc.src.math.nexttowardf16 + libc.src.math.nextupf16 + libc.src.math.remainderf16 + libc.src.math.remquof16 + libc.src.math.rintf16 + libc.src.math.roundevenf16 + libc.src.math.roundf16 + libc.src.math.scalblnf16 + libc.src.math.scalbnf16 + libc.src.math.setpayloadf16 + libc.src.math.setpayloadsigf16 + libc.src.math.totalorderf16 + libc.src.math.totalordermagf16 + libc.src.math.truncf16 + libc.src.math.ufromfpf16 + libc.src.math.ufromfpxf16 + ) +endif() + set(TARGET_LLVMLIBC_ENTRYPOINTS ${TARGET_LIBC_ENTRYPOINTS} ${TARGET_LIBM_ENTRYPOINTS} diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index 80d48be702070..ba6e5b314e9de 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -49,4 +49,8 @@ #define LIBC_TARGET_CPU_HAS_NEAREST_INT #endif +#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || defined(LIBC_TARGET_ARCH_IS_GPU) +#define LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS +#endif + #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index d775026fabb3e..51743784ff156 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -111,7 +111,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations - libc.src.__support.macros.properties.architectures + libc.src.__support.macros.properties.cpu_features FLAGS ROUND_OPT ) @@ -503,7 +503,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations - libc.src.__support.macros.properties.architectures + libc.src.__support.macros.properties.cpu_features FLAGS ROUND_OPT ) @@ -572,7 +572,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations - libc.src.__support.macros.properties.architectures + libc.src.__support.macros.properties.cpu_features FLAGS ROUND_OPT ) @@ -641,7 +641,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations - libc.src.__support.macros.properties.architectures + libc.src.__support.macros.properties.cpu_features FLAGS ROUND_OPT ) @@ -710,7 +710,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations - libc.src.__support.macros.properties.architectures + libc.src.__support.macros.properties.cpu_features FLAGS ROUND_OPT ) @@ -903,7 +903,7 @@ add_entrypoint_object( DEPENDS libc.src.__support.macros.properties.types libc.src.__support.FPUtil.nearest_integer_operations - libc.src.__support.macros.properties.architectures + libc.src.__support.macros.properties.cpu_features FLAGS ROUND_OPT ) diff --git a/libc/src/math/generic/ceilf16.cpp b/libc/src/math/generic/ceilf16.cpp index 708bc4cfd4860..8af31c6623a02 100644 --- a/libc/src/math/generic/ceilf16.cpp +++ b/libc/src/math/generic/ceilf16.cpp @@ -10,13 +10,13 @@ #include "src/__support/FPUtil/NearestIntegerOperations.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, ceilf16, (float16 x)) { #if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \ - defined(LIBC_TARGET_ARCH_IS_AARCH64) + defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS) return static_cast(__builtin_ceilf(x)); #else return fputil::ceil(x); diff --git a/libc/src/math/generic/floorf16.cpp b/libc/src/math/generic/floorf16.cpp index 84e4b0730ac68..3092048f5ab06 100644 --- a/libc/src/math/generic/floorf16.cpp +++ b/libc/src/math/generic/floorf16.cpp @@ -10,13 +10,13 @@ #include "src/__support/FPUtil/NearestIntegerOperations.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, floorf16, (float16 x)) { #if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \ - defined(LIBC_TARGET_ARCH_IS_AARCH64) + defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS) return static_cast(__builtin_floorf(x)); #else return fputil::floor(x); diff --git a/libc/src/math/generic/rintf16.cpp b/libc/src/math/generic/rintf16.cpp index 0e8c091efcf9b..3a53dd28e3d10 100644 --- a/libc/src/math/generic/rintf16.cpp +++ b/libc/src/math/generic/rintf16.cpp @@ -10,13 +10,13 @@ #include "src/__support/FPUtil/NearestIntegerOperations.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, rintf16, (float16 x)) { #if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \ - defined(LIBC_TARGET_ARCH_IS_AARCH64) + defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS) return static_cast(__builtin_rintf(x)); #else return fputil::round_using_current_rounding_mode(x); diff --git a/libc/src/math/generic/roundevenf16.cpp b/libc/src/math/generic/roundevenf16.cpp index b45670bd24ff1..c3dbd779b9739 100644 --- a/libc/src/math/generic/roundevenf16.cpp +++ b/libc/src/math/generic/roundevenf16.cpp @@ -10,13 +10,13 @@ #include "src/__support/FPUtil/NearestIntegerOperations.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, roundevenf16, (float16 x)) { #if defined(__LIBC_USE_BUILTIN_ROUNDEVEN) && \ - defined(LIBC_TARGET_ARCH_IS_AARCH64) + defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS) return static_cast(__builtin_roundevenf(x)); #else return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST); diff --git a/libc/src/math/generic/roundf16.cpp b/libc/src/math/generic/roundf16.cpp index cb668c0e76388..a5e2b44fbd54b 100644 --- a/libc/src/math/generic/roundf16.cpp +++ b/libc/src/math/generic/roundf16.cpp @@ -10,12 +10,13 @@ #include "src/__support/FPUtil/NearestIntegerOperations.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, roundf16, (float16 x)) { -#if defined(__LIBC_USE_BUILTIN_ROUND) && defined(LIBC_TARGET_ARCH_IS_AARCH64) +#if defined(__LIBC_USE_BUILTIN_ROUND) && \ + defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS) return static_cast(__builtin_roundf(x)); #else return fputil::round(x); diff --git a/libc/src/math/generic/truncf16.cpp b/libc/src/math/generic/truncf16.cpp index b931053e53438..31b1214a9a0e4 100644 --- a/libc/src/math/generic/truncf16.cpp +++ b/libc/src/math/generic/truncf16.cpp @@ -10,13 +10,13 @@ #include "src/__support/FPUtil/NearestIntegerOperations.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/properties/architectures.h" +#include "src/__support/macros/properties/cpu_features.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, truncf16, (float16 x)) { #if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_RINT_TRUNC) && \ - defined(LIBC_TARGET_ARCH_IS_AARCH64) + defined(LIBC_TARGET_CPU_HAS_FAST_FLOAT16_OPS) return static_cast(__builtin_truncf(x)); #else return fputil::trunc(x);