From 9a69b7802b84cecdddaf7945748d590e1c6fb1df Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 11 Apr 2022 19:59:58 +0800 Subject: [PATCH 01/22] [SYCL][libdevice] Add sycl imf devicelib Signed-off-by: jinge90 --- clang/lib/Driver/Driver.cpp | 10 ++- clang/lib/Driver/ToolChains/Gnu.cpp | 1 + libdevice/cmake/modules/SYCLLibdevice.cmake | 87 ++++++++++++++++++- libdevice/device.h | 8 ++ libdevice/device_math.h | 1 + libdevice/fallback-imf.cpp | 26 ++++++ libdevice/imf_wrapper.cpp | 20 +++++ .../sycl-post-link/SYCLDeviceLibReqMask.cpp | 3 + .../sycl-post-link/SYCLDeviceLibReqMask.h | 2 + sycl/include/CL/sycl/builtins.hpp | 1 + .../program_manager/program_manager.cpp | 12 ++- .../program_manager/program_manager.hpp | 4 +- 12 files changed, 170 insertions(+), 5 deletions(-) create mode 100644 libdevice/fallback-imf.cpp create mode 100644 libdevice/imf_wrapper.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 05c13bff525d5..606108203552c 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4628,6 +4628,8 @@ class OffloadingActionBuilder final { llvm::StringMap devicelib_link_info = {{"libc", true}, {"libm-fp32", true}, {"libm-fp64", true}, + {"libimf-fp32", true}, + {"libimf-fp64", true}, {"internal", true}}; if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ, options::OPT_fno_sycl_device_lib_EQ)) { @@ -4670,7 +4672,9 @@ class OffloadingActionBuilder final { {"libsycl-complex", "libm-fp32"}, {"libsycl-complex-fp64", "libm-fp64"}, {"libsycl-cmath", "libm-fp32"}, - {"libsycl-cmath-fp64", "libm-fp64"}}; + {"libsycl-cmath-fp64", "libm-fp64"}, + {"libsycl-imf", "libimf-fp32"}, + {"libsycl-imf-fp64", "libimf-fp64"}}; // For AOT compilation, we need to link sycl_device_fallback_libs as // default too. const SYCLDeviceLibsList sycl_device_fallback_libs = { @@ -4679,7 +4683,9 @@ class OffloadingActionBuilder final { {"libsycl-fallback-complex", "libm-fp32"}, {"libsycl-fallback-complex-fp64", "libm-fp64"}, {"libsycl-fallback-cmath", "libm-fp32"}, - {"libsycl-fallback-cmath-fp64", "libm-fp64"}}; + {"libsycl-fallback-cmath-fp64", "libm-fp64"}, + {"libsycl-fallback-imf", "libimf-fp32"}, + {"libsycl-fallback-imf-fp64", "libimf-fp64"}}; // ITT annotation libraries are linked in separately whenever the device // code instrumentation is enabled. const SYCLDeviceLibsList sycl_device_annotation_libs = { diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 23019e88a8f47..dde7409891885 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -674,6 +674,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_fsycl) && !Args.hasArg(options::OPT_nolibsycl)) { CmdArgs.push_back("-lsycl"); + CmdArgs.push_back("-lsycl-devicelib-host"); // Use of -fintelfpga implies -lOpenCL. // FIXME: Adjust to use plugin interface when available. if (Args.hasArg(options::OPT_fintelfpga)) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 9f844182c4065..09c7b2f5a1f37 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -7,6 +7,8 @@ else() set(spv_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") endif() set(clang $) +set(llvm-link $) +set(llc $) string(CONCAT sycl_targets_opt "-fsycl-targets=" @@ -82,6 +84,25 @@ add_custom_command(OUTPUT ${devicelib-obj-cmath-fp64} DEPENDS device_math.h device.h sycl-compiler VERBATIM) +set(devicelib-obj-imf ${obj_binary_dir}/libsycl-imf.${lib-suffix}) +add_custom_command(OUTPUT ${devicelib-obj-imf} + COMMAND ${clang} -fsycl -c + ${compile_opts} ${sycl_targets_opt} + ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp + -o ${devicelib-obj-imf} + MAIN_DEPENDENCY imf_wrapper.cpp + DEPENDS device_math.h device.h sycl-compiler + VERBATIM) + +set(devicelib-host-imf-wrapper-bc ${obj_binary_dir}/sycl-libdevice-host-imf-wrapper.bc) +add_custom_command(OUTPUT ${devicelib-host-imf-wrapper-bc} + COMMAND ${clang} -c -emit-llvm -D__LIBDEVICE_HOST_IMPL__ -O2 + ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp + -o ${devicelib-host-imf-wrapper-bc} + MAIN_DEPENDENCY imf_wrapper.cpp + DEPENDS device_math.h device.h sycl-compiler + VERBATIM) + add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-cassert.spv COMMAND ${clang} -fsycl-device-only -fno-sycl-use-bitcode ${compile_opts} @@ -190,6 +211,33 @@ add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-cmath-fp64.${lib-su DEPENDS device_math.h device.h sycl-compiler VERBATIM) +add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf.spv + COMMAND ${clang} -fsycl-device-only -fno-sycl-use-bitcode + ${compile_opts} + ${CMAKE_CURRENT_SOURCE_DIR}/fallback-imf.cpp + -o ${spv_binary_dir}/libsycl-fallback-imf.spv + MAIN_DEPENDENCY fallback-imf.cpp + DEPENDS device_math.h device.h sycl-compiler + VERBATIM) + +add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} + COMMAND ${clang} -fsycl -c + ${compile_opts} ${sycl_targets_opt} + ${CMAKE_CURRENT_SOURCE_DIR}/fallback-imf.cpp + -o ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} + MAIN_DEPENDENCY fallback-imf.cpp + DEPENDS device_math.h device.h sycl-compiler + VERBATIM) + +set(devicelib-host-imf-fallback-bc ${obj_binary_dir}/sycl-libdevice-host-imf-fallback.bc) +add_custom_command(OUTPUT ${devicelib-host-imf-fallback-bc} + COMMAND ${clang} -c -emit-llvm -D__LIBDEVICE_HOST_IMPL__ -O2 + ${CMAKE_CURRENT_SOURCE_DIR}/fallback-imf.cpp + -o ${devicelib-host-imf-fallback-bc} + MAIN_DEPENDENCY fallback-imf.cpp + DEPENDS device_math.h device.h sycl-compiler + VERBATIM) + add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} COMMAND ${clang} -fsycl -c ${compile_opts} ${sycl_targets_opt} @@ -217,6 +265,30 @@ add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-user-wrappers.${lib-suff DEPENDS device_itt.h spirv_vars.h device.h sycl-compiler VERBATIM) + +set(devicelib-host-imf-bc ${obj_binary_dir}/sycl-devicelib-host-imf.bc) +set(devicelib-host-imf-obj ${obj_binary_dir}/sycl-devicelib-host-imf.${lib-suffix}) +add_custom_command(OUTPUT ${devicelib-host-imf-bc} + COMMAND ${llvm-link} ${devicelib-host-imf-wrapper-bc} ${devicelib-host-imf-fallback-bc} + -o ${devicelib-host-imf-bc} + DEPENDS ${devicelib-host-imf-wrapper-bc} ${devicelib-host-imf-fallback-bc} sycl-compiler + VERBATIM) + +add_custom_command(OUTPUT ${devicelib-host-imf-obj} + COMMAND ${llc} -filetype=obj ${devicelib-host-imf-bc} -o ${devicelib-host-imf-obj} + DEPENDS ${devicelib-host-imf-bc} sycl-compiler + VERBATIM) + +if (WIN32) +else() +set(devicelib-host ${obj_binary_dir}/libsycl-devicelib-host.a) +add_custom_command(OUTPUT ${devicelib-host} + COMMAND ar rcs ${devicelib-host} + ${devicelib-host-imf-obj} + DEPENDS ${devicelib-host-imf-obj} sycl-compiler + VERBATIM) +endif() + set(devicelib-obj-itt-files ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} ${obj_binary_dir}/libsycl-itt-compiler-wrappers.${lib-suffix} @@ -230,7 +302,13 @@ add_custom_target(libsycldevice-obj DEPENDS ${devicelib-obj-cmath} ${devicelib-obj-cmath-fp64} ${devicelib-obj-itt-files} + ${devicelib-obj-imf} +) + +add_custom_target(libsycldevice-host DEPENDS + ${devicelib-host} ) + add_custom_target(libsycldevice-spv DEPENDS ${spv_binary_dir}/libsycl-fallback-cassert.spv ${spv_binary_dir}/libsycl-fallback-cstring.spv @@ -238,6 +316,7 @@ add_custom_target(libsycldevice-spv DEPENDS ${spv_binary_dir}/libsycl-fallback-complex-fp64.spv ${spv_binary_dir}/libsycl-fallback-cmath.spv ${spv_binary_dir}/libsycl-fallback-cmath-fp64.spv + ${spv_binary_dir}/libsycl-fallback-imf.spv ) add_custom_target(libsycldevice-fallback-obj DEPENDS ${obj_binary_dir}/libsycl-fallback-cassert.${lib-suffix} @@ -246,11 +325,13 @@ add_custom_target(libsycldevice-fallback-obj DEPENDS ${obj_binary_dir}/libsycl-fallback-complex-fp64.${lib-suffix} ${obj_binary_dir}/libsycl-fallback-cmath.${lib-suffix} ${obj_binary_dir}/libsycl-fallback-cmath-fp64.${lib-suffix} + ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} ) add_custom_target(libsycldevice DEPENDS libsycldevice-obj libsycldevice-fallback-obj - libsycldevice-spv) + libsycldevice-spv + libsycldevice-host) # Place device libraries near the libsycl.so library in an install # directory as well @@ -274,6 +355,9 @@ install(FILES ${devicelib-obj-file} ${devicelib-obj-cmath-fp64} ${obj_binary_dir}/libsycl-fallback-cmath-fp64.${lib-suffix} ${devicelib-obj-itt-files} + ${devicelib-obj-imf} + ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} + ${devicelib-host} DESTINATION ${install_dest_lib} COMPONENT libsycldevice) @@ -283,5 +367,6 @@ install(FILES ${spv_binary_dir}/libsycl-fallback-cassert.spv ${spv_binary_dir}/libsycl-fallback-complex-fp64.spv ${spv_binary_dir}/libsycl-fallback-cmath.spv ${spv_binary_dir}/libsycl-fallback-cmath-fp64.spv + ${spv_binary_dir}/libsycl-fallback-imf.spv DESTINATION ${install_dest_spv} COMPONENT libsycldevice) diff --git a/libdevice/device.h b/libdevice/device.h index 1e37da3052a78..40667c282bc92 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -25,4 +25,12 @@ #define DEVICE_EXTERN_C DEVICE_EXTERNAL EXTERN_C #endif // __SPIR__ +#if defined(__SPIR__) || defined(__LIBDEVICE_HOST_IMPL__) +#define __LIBDEVICE_IMF_ENABLED__ +#endif // __SPIR__ || __LIBDEVICE_HOST_IMPL__ + +#ifdef __LIBDEVICE_HOST_IMPL__ +#define DEVICE_EXTERN_C __attribute__((weak)) EXTERN_C +#endif // __LIBDEVICE_HOST_IMPL__ + #endif // __LIBDEVICE_DEVICE_H__ diff --git a/libdevice/device_math.h b/libdevice/device_math.h index a4644f6e03f19..c6a9dd171e535 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -270,5 +270,6 @@ float __devicelib_scalbnf(float x, int n); DEVICE_EXTERN_C double __devicelib_scalbn(double x, int exp); + #endif // __SPIR__ #endif // __LIBDEVICE_DEVICE_MATH_H__ diff --git a/libdevice/fallback-imf.cpp b/libdevice/fallback-imf.cpp new file mode 100644 index 0000000000000..9f35e1ef729dd --- /dev/null +++ b/libdevice/fallback-imf.cpp @@ -0,0 +1,26 @@ +//==-- fallback-imf.cpp - fallback implementation of intel math functions --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "device_math.h" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + +static inline float __fclamp(float x, float y, float z) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_fmin(__builtin_fmax(x, y), z); +#elif defined(__SPIR__) + return __spirv_ocl_fclamp(x, y, z); +#endif +} + +DEVICE_EXTERN_C +float __devicelib_imf_saturatef(float x) { + return __fclamp(x, .0f, 1.f); +} + +#endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp new file mode 100644 index 0000000000000..729693e749b07 --- /dev/null +++ b/libdevice/imf_wrapper.cpp @@ -0,0 +1,20 @@ +//==----- imf_wrapper.cpp - wrappers for intel math library functions ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "device.h" + +#ifdef __LIBDEVICE_IMF_ENABLED__ +DEVICE_EXTERN_C +float __devicelib_imf_saturatef(float x); + +DEVICE_EXTERN_C +float __imf_saturatef(float x) { + return __devicelib_imf_saturatef(x); +} + +#endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index 59e53f6c0cac9..7766421484040 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -171,6 +171,7 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_memcmp", DeviceLibExt::cl_intel_devicelib_cstring}, {"__devicelib_assert_read", DeviceLibExt::cl_intel_devicelib_assert}, {"__devicelib_assert_fail", DeviceLibExt::cl_intel_devicelib_assert}, + {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, }; // Each fallback device library corresponds to one bit in "require mask" which @@ -183,6 +184,8 @@ SYCLDeviceLibFuncMap SDLMap = { // fallback-complex: 0x8 // fallback-complex-fp64: 0x10 // fallback-cstring: 0x20 +// fallback-imf: 0x40 +// fallback-imf-fp64: 0x80 uint32_t getDeviceLibBits(const std::string &FuncName) { auto DeviceLibFuncIter = SDLMap.find(FuncName); return ((DeviceLibFuncIter == SDLMap.end()) diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h index ba73ad3cdb39a..15cae43da0779 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.h @@ -32,6 +32,8 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_complex, cl_intel_devicelib_complex_fp64, cl_intel_devicelib_cstring, + cl_intel_devicelib_imf, + cl_intel_devicelib_imf_fp64, }; uint32_t getSYCLDeviceLibReqMask(const Module &M); diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 0a9814da3eed0..f5f32dccf1fbb 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1648,6 +1648,7 @@ extern SYCL_EXTERNAL double hypot(double x, double y); extern SYCL_EXTERNAL void *memcpy(void *dest, const void *src, size_t n); extern SYCL_EXTERNAL void *memset(void *dest, int c, size_t n); extern SYCL_EXTERNAL int memcmp(const void *s1, const void *s2, size_t n); +extern SYCL_EXTERNAL float __imf_saturatef(float x); } #ifdef __GLIBC__ extern "C" { diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 8108b642c823b..919a9c3107f30 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -755,6 +755,10 @@ static const char *getDeviceLibFilename(DeviceLibExt Extension) { return "libsycl-fallback-complex-fp64.spv"; case DeviceLibExt::cl_intel_devicelib_cstring: return "libsycl-fallback-cstring.spv"; + case DeviceLibExt::cl_intel_devicelib_imf: + return "libsycl-fallback-imf.spv"; + case DeviceLibExt::cl_intel_devicelib_imf_fp64: + return "libsycl-fallback-imf-fp64.spv"; } throw compile_program_error("Unhandled (new?) device library extension", PI_INVALID_OPERATION); @@ -774,6 +778,10 @@ static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { return "cl_intel_devicelib_complex_fp64"; case DeviceLibExt::cl_intel_devicelib_cstring: return "cl_intel_devicelib_cstring"; + case DeviceLibExt::cl_intel_devicelib_imf: + return "cl_intel_devicelib_imf"; + case DeviceLibExt::cl_intel_devicelib_imf_fp64: + return "cl_intel_devicelib_imf_fp64"; } throw compile_program_error("Unhandled (new?) device library extension", PI_INVALID_OPERATION); @@ -940,7 +948,9 @@ static std::vector getDeviceLibPrograms( {DeviceLibExt::cl_intel_devicelib_math_fp64, false}, {DeviceLibExt::cl_intel_devicelib_complex, false}, {DeviceLibExt::cl_intel_devicelib_complex_fp64, false}, - {DeviceLibExt::cl_intel_devicelib_cstring, false}}; + {DeviceLibExt::cl_intel_devicelib_cstring, false}, + {DeviceLibExt::cl_intel_devicelib_imf, false}, + {DeviceLibExt::cl_intel_devicelib_imf_fp64, false}}; // Disable all devicelib extensions requiring fp64 support if at least // one underlying device doesn't support cl_khr_fp64. diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 504162e5deae8..79fd6654345ef 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -63,7 +63,9 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_math_fp64, cl_intel_devicelib_complex, cl_intel_devicelib_complex_fp64, - cl_intel_devicelib_cstring + cl_intel_devicelib_cstring, + cl_intel_devicelib_imf, + cl_intel_devicelib_imf_fp64, }; // Provides single loading and building OpenCL programs with unique contexts From 1065621804060032d7f5f443bd540016ebcf524e Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 11 Apr 2022 20:09:45 +0800 Subject: [PATCH 02/22] fix clang format Signed-off-by: jinge90 --- clang/lib/Driver/Driver.cpp | 9 +++------ libdevice/fallback-imf.cpp | 4 +--- libdevice/imf_wrapper.cpp | 4 +--- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 606108203552c..9dc81dff8a32c 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4625,12 +4625,9 @@ class OffloadingActionBuilder final { int NumOfDeviceLibLinked = 0; // Currently, all SYCL device libraries will be linked by default. Linkage // of "internal" libraries cannot be affected via -fno-sycl-device-lib. - llvm::StringMap devicelib_link_info = {{"libc", true}, - {"libm-fp32", true}, - {"libm-fp64", true}, - {"libimf-fp32", true}, - {"libimf-fp64", true}, - {"internal", true}}; + llvm::StringMap devicelib_link_info = { + {"libc", true}, {"libm-fp32", true}, {"libm-fp64", true}, + {"libimf-fp32", true}, {"libimf-fp64", true}, {"internal", true}}; if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ, options::OPT_fno_sycl_device_lib_EQ)) { if (A->getValues().size() == 0) diff --git a/libdevice/fallback-imf.cpp b/libdevice/fallback-imf.cpp index 9f35e1ef729dd..3f24bc6c59974 100644 --- a/libdevice/fallback-imf.cpp +++ b/libdevice/fallback-imf.cpp @@ -19,8 +19,6 @@ static inline float __fclamp(float x, float y, float z) { } DEVICE_EXTERN_C -float __devicelib_imf_saturatef(float x) { - return __fclamp(x, .0f, 1.f); -} +float __devicelib_imf_saturatef(float x) { return __fclamp(x, .0f, 1.f); } #endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index 729693e749b07..67e11b9e9d5ce 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -13,8 +13,6 @@ DEVICE_EXTERN_C float __devicelib_imf_saturatef(float x); DEVICE_EXTERN_C -float __imf_saturatef(float x) { - return __devicelib_imf_saturatef(x); -} +float __imf_saturatef(float x) { return __devicelib_imf_saturatef(x); } #endif // __LIBDEVICE_IMF_ENABLED__ From 63a6aac409a49b28cd151ce987a3461bbb8966f9 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 12 Apr 2022 15:35:28 +0800 Subject: [PATCH 03/22] Enable imf sycl devicelib on Windows. Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/MSVC.cpp | 1 + libdevice/cmake/modules/SYCLLibdevice.cmake | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index d08eb20d6e268..db00520a2b33f 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -139,6 +139,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-defaultlib:sycld.lib"); else CmdArgs.push_back("-defaultlib:sycl.lib"); + CmdArgs.push_back("-defaultlib:sycl-devicelib-host.lib"); } for (const auto *A : Args.filtered(options::OPT_foffload_static_lib_EQ)) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 09c7b2f5a1f37..fae3bf52c1dc2 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -280,6 +280,11 @@ add_custom_command(OUTPUT ${devicelib-host-imf-obj} VERBATIM) if (WIN32) +set(devicelib-host ${obj_binary_dir}/sycl-devicelib-host.lib) +add_custom_command(OUTPUT ${devicelib-host} + COMMAND lib ${devicelib-host-imf-obj} /OUT:${devicelib-host} + DEPENDS ${devicelib-host-imf-obj} sycl-compiler + VERBATIM) else() set(devicelib-host ${obj_binary_dir}/libsycl-devicelib-host.a) add_custom_command(OUTPUT ${devicelib-host} From 0c77cf2aec6f9a8c4026dcb8739a7c34f306f80f Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 17 May 2022 14:58:08 +0800 Subject: [PATCH 04/22] Clang format Signed-off-by: jinge90 --- clang/lib/Driver/ToolChains/SYCL.cpp | 7 ++++++- libdevice/imf_wrapper.cpp | 8 -------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 873a498b381d9..7acc32f1f444d 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -136,6 +136,8 @@ static llvm::SmallVector SYCLDeviceLibList{ "cmath-fp64", "complex", "complex-fp64", + "imf", + "imf-fp64", "itt-compiler-wrappers", "itt-stubs", "itt-user-wrappers", @@ -144,7 +146,10 @@ static llvm::SmallVector SYCLDeviceLibList{ "fallback-cmath", "fallback-cmath-fp64", "fallback-complex", - "fallback-complex-fp64"}; + "fallback-complex-fp64", + "fallback-imf", + "fallback-imf-fp64", +}; const char *SYCL::Linker::constructLLVMLinkCommand( Compilation &C, const JobAction &JA, const InputInfo &Output, diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index 15bbb12c97a84..43859bad15d07 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -127,14 +127,6 @@ float __devicelib_imf_ull2float_rz(unsigned long long int); DEVICE_EXTERN_C_INLINE float __imf_saturatef(float x) { return __devicelib_imf_saturatef(x); } -DEVICE_EXTERN_C_INLINE -float __imf_expf(float x) { return __devicelib_imf_expf(x); } - -DEVICE_EXTERN_C_INLINE -_iml_half_internal __imf_expf16(_iml_half_internal x) { - return __devicelib_imf_expf16(x); -} - DEVICE_EXTERN_C_INLINE int __imf_float2int_rd(float x) { return __devicelib_imf_float2int_rd(x); } From 523ce39a72e23ac2d53cc8f3189cdb1a4926e102 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 17 May 2022 15:33:03 +0800 Subject: [PATCH 05/22] add imf utils functions in SYCLDeviceLibReqList Signed-off-by: jinge90 --- .../sycl-post-link/SYCLDeviceLibReqMask.cpp | 96 +++++++++++++++++++ sycl/include/CL/sycl/builtins.hpp | 75 +++++++++++++++ 2 files changed, 171 insertions(+) diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index 7766421484040..e01ce75fc5a9f 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -172,6 +172,102 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_assert_read", DeviceLibExt::cl_intel_devicelib_assert}, {"__devicelib_assert_fail", DeviceLibExt::cl_intel_devicelib_assert}, {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2int_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2uint_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2uint_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2uint_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2uint_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ll_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ll_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ll_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ll_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ull_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ull_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ull_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float2ull_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float_as_int", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_int_as_float", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_float_as_uint", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ll2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ll2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ll2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ll2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uint_as_float", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ull2float_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ull2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ull2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ull2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_double2float_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2float_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2float_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2float_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2int_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2int_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2int_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2int_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2uint_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2uint_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2uint_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2uint_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2hiint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2loint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ll_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ll_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ll_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ll_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ull_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ull_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ull_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double2ull_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_double_as_longlong", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_hiloint2double", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_int2double_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ll2double_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ll2double_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ll2double_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ll2double_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ull2double_rd", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ull2double_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ull2double_ru", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ull2double_rz", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_uint2double_rn", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_longlong_as_double", + DeviceLibExt::cl_intel_devicelib_imf_fp64}, }; // Each fallback device library corresponds to one bit in "require mask" which diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index eecca6aa001a8..60e5c4f41a5e5 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1739,6 +1739,81 @@ extern SYCL_EXTERNAL void *memcpy(void *dest, const void *src, size_t n); extern SYCL_EXTERNAL void *memset(void *dest, int c, size_t n); extern SYCL_EXTERNAL int memcmp(const void *s1, const void *s2, size_t n); extern SYCL_EXTERNAL float __imf_saturatef(float x); +extern SYCL_EXTERNAL int __imf_float2int_rd(float x); +extern SYCL_EXTERNAL int __imf_float2int_rn(float x); +extern SYCL_EXTERNAL int __imf_float2int_ru(float x); +extern SYCL_EXTERNAL int __imf_float2int_rz(float x); +extern SYCL_EXTERNAL unsigned int __imf_float2uint_rd(float x); +extern SYCL_EXTERNAL unsigned int __imf_float2uint_rn(float x); +extern SYCL_EXTERNAL unsigned int __imf_float2uint_ru(float x); +extern SYCL_EXTERNAL unsigned int __imf_float2uint_rz(float x); +extern SYCL_EXTERNAL long long int __imf_float2ll_rd(float x); +extern SYCL_EXTERNAL long long int __imf_float2ll_rn(float x); +extern SYCL_EXTERNAL long long int __imf_float2ll_ru(float x); +extern SYCL_EXTERNAL long long int __imf_float2ll_rz(float x); +extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rd(float x); +extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rn(float x); +extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_ru(float x); +extern SYCL_EXTERNAL unsigned long long int __imf_float2ull_rz(float x); +extern SYCL_EXTERNAL int __imf_float_as_int(float x); +extern SYCL_EXTERNAL unsigned int __imf_float_as_uint(float x); +extern SYCL_EXTERNAL float __imf_int2float_rd(int x); +extern SYCL_EXTERNAL float __imf_int2float_rn(int x); +extern SYCL_EXTERNAL float __imf_int2float_ru(int x); +extern SYCL_EXTERNAL float __imf_int2float_rz(int x); +extern SYCL_EXTERNAL float __imf_int_as_float(int x); +extern SYCL_EXTERNAL float __imf_ll2float_rd(long long int x); +extern SYCL_EXTERNAL float __imf_ll2float_rn(long long int x); +extern SYCL_EXTERNAL float __imf_ll2float_ru(long long int x); +extern SYCL_EXTERNAL float __imf_ll2float_rz(long long int x); +extern SYCL_EXTERNAL float __imf_uint2float_rd(unsigned int x); +extern SYCL_EXTERNAL float __imf_uint2float_rn(unsigned int x); +extern SYCL_EXTERNAL float __imf_uint2float_ru(unsigned int x); +extern SYCL_EXTERNAL float __imf_uint2float_rz(unsigned int x); +extern SYCL_EXTERNAL float __imf_uint_as_float(unsigned int x); +extern SYCL_EXTERNAL float __imf_ull2float_rd(unsigned long long int x); +extern SYCL_EXTERNAL float __imf_ull2float_rn(unsigned long long int x); +extern SYCL_EXTERNAL float __imf_ull2float_ru(unsigned long long int x); +extern SYCL_EXTERNAL float __imf_ull2float_rz(unsigned long long int x); +extern SYCL_EXTERNAL float __imf_half2float(_Float16 x); +extern SYCL_EXTERNAL float __imf_double2float_rd(double x); +extern SYCL_EXTERNAL float __imf_double2float_rn(double x); +extern SYCL_EXTERNAL float __imf_double2float_ru(double x); +extern SYCL_EXTERNAL float __imf_double2float_rz(double x); +extern SYCL_EXTERNAL int __imf_double2hiint(double x); +extern SYCL_EXTERNAL int __imf_double2loint(double x); +extern SYCL_EXTERNAL int __imf_double2int_rd(double x); +extern SYCL_EXTERNAL int __imf_double2int_rn(double x); +extern SYCL_EXTERNAL int __imf_double2int_ru(double x); +extern SYCL_EXTERNAL int __imf_double2int_rz(double x); +extern SYCL_EXTERNAL double __imf_int2double_rn(int x); +extern SYCL_EXTERNAL unsigned int __imf_double2uint_rd(double x); +extern SYCL_EXTERNAL unsigned int __imf_double2uint_rn(double x); +extern SYCL_EXTERNAL unsigned int __imf_double2uint_ru(double x); +extern SYCL_EXTERNAL unsigned int __imf_double2uint_rz(double x); +extern SYCL_EXTERNAL long long int __imf_double2ll_rd(double x); +extern SYCL_EXTERNAL long long int __imf_double2ll_rn(double x); +extern SYCL_EXTERNAL long long int __imf_double2ll_ru(double x); +extern SYCL_EXTERNAL long long int __imf_double2ll_rz(double x); +extern SYCL_EXTERNAL double __imf_ll2double_rd(long long int x); +extern SYCL_EXTERNAL double __imf_ll2double_rn(long long int x); +extern SYCL_EXTERNAL double __imf_ll2double_ru(long long int x); +extern SYCL_EXTERNAL double __imf_ll2double_rz(long long int x); +extern SYCL_EXTERNAL double __imf_ull2double_rd(unsigned long long int x); +extern SYCL_EXTERNAL double __imf_ull2double_rn(unsigned long long int x); +extern SYCL_EXTERNAL double __imf_ull2double_ru(unsigned long long int x); +extern SYCL_EXTERNAL double __imf_ull2double_rz(unsigned long long int x); +extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rd(double x); +extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rn(double x); +extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_ru(double x); +extern SYCL_EXTERNAL unsigned long long int __imf_double2ull_rz(double x); +extern SYCL_EXTERNAL long long int __imf_double_as_longlong(double x); +extern SYCL_EXTERNAL double __imf_longlong_as_double(long long int x); +extern SYCL_EXTERNAL double __imf_uint2double_rd(unsigned int x); +extern SYCL_EXTERNAL double __imf_uint2double_rn(unsigned int x); +extern SYCL_EXTERNAL double __imf_uint2double_ru(unsigned int x); +extern SYCL_EXTERNAL double __imf_uint2double_rz(unsigned int x); +extern SYCL_EXTERNAL double __imf_hiloint2double(int hi, int lo); } #ifdef __GLIBC__ extern "C" { From c2ff474e472fd5fdfb72c47ec43a79b040c4a2f2 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 18 May 2022 11:36:55 +0800 Subject: [PATCH 06/22] Add ineteger functions file Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 3 +- libdevice/device_imf.hpp | 40 ++++++++++++++ libdevice/imf_utils/integer_misc.cpp | 54 +++++++++++++++++++ libdevice/imf_wrapper.cpp | 43 +++++++++++++++ .../sycl-post-link/SYCLDeviceLibReqMask.cpp | 7 +++ sycl/include/CL/sycl/builtins.hpp | 8 +++ 6 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 libdevice/imf_utils/integer_misc.cpp diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index a2b164fa8c21c..68475065fb9ca 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -124,7 +124,8 @@ set(bc_binary_dir ${obj_binary_dir}/libdevice) set(fallback-imf-src imf_utils/saturatef.cpp imf_utils/float_convert.cpp - imf_utils/half_convert.cpp) + imf_utils/half_convert.cpp + imf_utils/integer_misc.cpp) set(fallback-imf-fp64-src imf_utils/double_convert.cpp) set(wrapper-imf-src imf_wrapper.cpp imf_wrapper_fp64.cpp) set(imf-src ${wrapper-imf-src} ${fallback-imf-src} ${fallback-imf-fp64-src}) diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index 0684686065a13..676889b3aee25 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -412,5 +412,45 @@ static inline _iml_half __trunc(_iml_half x) { #endif } +static inline int __clz(int x) { + uint32_t xi32 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_clz(xi32); +#elif defined(__SPIR__) + return __spirv_ocl_clz(xi32); +#endif +} + +static inline int __clzll(long long int x) { + uint64_t xi64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_clzll(xi64); +#elif defined(__SPIR__) + return __spirv_ocl_clz(xi64); +#endif +} + +static inline int __popc(unsigned int x) { + uint32_t xui32 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_popcount(xui32); +#elif defined(__SPIR__) + return __spirv_ocl_popcount(xui32); +#endif +} + +static inline int __popcll(unsigned long long int x) { + uint64_t xui64 = x; +#if defined(__LIBDEVICE_HOST_IMPL__) + return __builtin_popcountll(xui64); +#elif defined(__SPIR__) + return __spirv_ocl_popcount(xui64); +#endif +} + +static inline unsigned int __abs(int x) { + return x < 0 ? -x : x; +} + #endif // __LIBDEVICE_IMF_ENABLED__ #endif // __LIBDEVICE_DEVICE_IMF_H__ diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp new file mode 100644 index 0000000000000..dd57af5675bc1 --- /dev/null +++ b/libdevice/imf_utils/integer_misc.cpp @@ -0,0 +1,54 @@ +//==------ integer_misc.cpp - fallback implementation of a bunch of integer +// functions ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../device_imf.hpp" +#include +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_brev(unsigned int x) { + unsigned int res = 0; + size_t bit_count = 8 * sizeof(unsigned int); + for (size_t idx = 0; idx < bit_count; ++idx) { + res |= x & 0x1; + res <<= 1; + x >>= 1; + } + return res; +} + +DEVICE_EXTERN_C_INLINE +unsigned long int __devicelib_imf_brevll(unsigned long long int x) { + unsigned long long int res = 0; + size_t bit_count = 8 * sizeof(unsigned long long int); + for (size_t idx = 0; idx < bit_count; ++idx) { + res |= x & 0x1; + res <<= 1; + x >>= 1; + } + return res; +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_clz(int x) { return __clz(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_clzll(long long int x) { return __clzll(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_popc(unsigned int x) { return __popc(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_popcll(unsigned long long int x) { return __popcll(x); } + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_sad(int x, int y, unsigned int z) { + return __abs(x - y) + z; +} +#endif //__LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index 43859bad15d07..af9ecdbd47f0e 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -292,4 +292,47 @@ float __imf_half2float(_iml_half_internal x) { return __devicelib_imf_half2float(x); } +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_brev(unsigned int); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_brevll(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_brev(unsigned int x) { return __devicelib_imf_brev(x); } + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_brevll(unsigned long long int x) { + return __devicelib_imf_brevll(x); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_clz(int); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_clzll(long long int); + +DEVICE_EXTERN_C_INLINE +int __imf_clz(int x) { return __devicelib_imf_clz(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_clzll(long long int x) { return __devicelib_imf_clzll(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_popc(unsigned int); + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_popcll(unsigned long long int); + +DEVICE_EXTERN_C_INLINE +int __imf_popc(unsigned int x) { return __devicelib_imf_popc(x); } + +DEVICE_EXTERN_C_INLINE +int __imf_popcll(unsigned long long int x) { return __devicelib_imf_popcll(x); } + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_sad(int, int, unsigned int); + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_sad(int x, int y, unsigned int z) { return __devicelib_imf_sad(x, y, z); } #endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index e01ce75fc5a9f..b8bcb96ba1269 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -171,6 +171,13 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_memcmp", DeviceLibExt::cl_intel_devicelib_cstring}, {"__devicelib_assert_read", DeviceLibExt::cl_intel_devicelib_assert}, {"__devicelib_assert_fail", DeviceLibExt::cl_intel_devicelib_assert}, + {"__devicelib_imf_brev", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_brevll", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_clz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_clzll", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_popc", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_popcll", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_sad", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 60e5c4f41a5e5..a5e00927c020a 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1738,6 +1738,14 @@ extern SYCL_EXTERNAL double hypot(double x, double y); extern SYCL_EXTERNAL void *memcpy(void *dest, const void *src, size_t n); extern SYCL_EXTERNAL void *memset(void *dest, int c, size_t n); extern SYCL_EXTERNAL int memcmp(const void *s1, const void *s2, size_t n); +extern SYCL_EXTERNAL unsigned int __imf_brev(unsigned int x); +extern SYCL_EXTERNAL unsigned long long int +__imf_brevll(unsigned long long int x); +extern SYCL_EXTERNAL int __imf_clz(int x); +extern SYCL_EXTERNAL int __imf_clzll(long long int x); +extern SYCL_EXTERNAL int __imf_popc(unsigned int x); +extern SYCL_EXTERNAL int __imf_popcll(unsigned long long int x); +extern SYCL_EXTERNAL unsigned int __imf_sad(int x, int y, unsigned int z); extern SYCL_EXTERNAL float __imf_saturatef(float x); extern SYCL_EXTERNAL int __imf_float2int_rd(float x); extern SYCL_EXTERNAL int __imf_float2int_rn(float x); From 4e357feb2168dd3caac1cb25722e265e6d908a6c Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 18 May 2022 11:42:07 +0800 Subject: [PATCH 07/22] Clang format Signed-off-by: jinge90 --- libdevice/device_imf.hpp | 4 +--- libdevice/imf_wrapper.cpp | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index 676889b3aee25..448ad3b98c15b 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -448,9 +448,7 @@ static inline int __popcll(unsigned long long int x) { #endif } -static inline unsigned int __abs(int x) { - return x < 0 ? -x : x; -} +static inline unsigned int __abs(int x) { return x < 0 ? -x : x; } #endif // __LIBDEVICE_IMF_ENABLED__ #endif // __LIBDEVICE_DEVICE_IMF_H__ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index af9ecdbd47f0e..d6f23281809cd 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -334,5 +334,7 @@ DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_sad(int, int, unsigned int); DEVICE_EXTERN_C_INLINE -unsigned int __imf_sad(int x, int y, unsigned int z) { return __devicelib_imf_sad(x, y, z); } +unsigned int __imf_sad(int x, int y, unsigned int z) { + return __devicelib_imf_sad(x, y, z); +} #endif // __LIBDEVICE_IMF_ENABLED__ From d196cf3aad00d00da861f743fb490dd2a4771e6b Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 23 May 2022 15:25:48 +0800 Subject: [PATCH 08/22] add __imf_byte_perm, __imf_ffs/ll, __imf_usad, __imf_sad Signed-off-by: jinge90 --- libdevice/device_imf.hpp | 29 ++++++++++++ libdevice/imf_utils/integer_misc.cpp | 44 ++++++++++++++++++- libdevice/imf_wrapper.cpp | 29 ++++++++++++ .../sycl-post-link/SYCLDeviceLibReqMask.cpp | 5 +++ sycl/include/CL/sycl/builtins.hpp | 8 +++- 5 files changed, 113 insertions(+), 2 deletions(-) diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index 448ad3b98c15b..caf5b9e5924f5 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -11,6 +11,7 @@ #include "device.h" #include "imf_half.hpp" +#include #include #ifdef __LIBDEVICE_IMF_ENABLED__ @@ -450,5 +451,33 @@ static inline int __popcll(unsigned long long int x) { static inline unsigned int __abs(int x) { return x < 0 ? -x : x; } +template +static inline Ty2 __get_bytes_by_index(Ty1 x, size_t idx) { + static_assert(!std::is_signed::value && !std::is_signed::value, + "__get_bytes_by_index can only accept unsigned value."); + static_assert(std::is_integral::value && std::is_integral::value, + "__get_bytes_by_index can only accept integral type."); + size_t bits_shift = idx * sizeof(Ty2) * 8; + Ty1 mask1 = static_cast(-1); + x >>= bits_shift; + x = x & mask1; + return static_cast(x); +} + +template +Ty1 __assemble_integral_value(Ty2 *x) { + static_assert(!std::is_signed::value && !std::is_signed::value, + "__assemble_integeral_value can only accept unsigned value."); + static_assert(std::is_integral::value && std::is_integral::value, + "__assemble_integeral_value can only accept integral value."); + static_assert(sizeof(Ty1) == N * sizeof(Ty2), + "size mismatch for __assemble_integeral_value"); + Ty1 res = 0; + for (size_t idx = 0; idx < N; ++idx) { + res <<= sizeof(Ty2) * 8; + res |= static_cast(x[N - 1 - idx]); + } + return res; +} #endif // __LIBDEVICE_IMF_ENABLED__ #endif // __LIBDEVICE_DEVICE_IMF_H__ diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index dd57af5675bc1..5267827b4a76c 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "../device_imf.hpp" -#include #ifdef __LIBDEVICE_IMF_ENABLED__ DEVICE_EXTERN_C_INLINE @@ -51,4 +50,47 @@ DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_sad(int x, int y, unsigned int z) { return __abs(x - y) + z; } + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_usad(unsigned int x, unsigned int y, + unsigned int z) { + return __abs(x - y) + z; +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_byte_perm(unsigned int x, unsigned int y, + unsigned int s) { + uint8_t buf[4] = { + 0, + }; + for (size_t idx = 0; idx < 4; ++idx) { + uint8_t select_idx = static_cast(s & 0x00000007); + if (select_idx < 4) + buf[idx] = __get_bytes_by_index(x, select_idx); + else + buf[idx] = __get_bytes_by_index(y, select_idx - 3); + s >>= 4; + } + return __assemble_integral_value(buf); +} + +template static inline int __do_imf_ffs(Ty x) { + static_assert(std::is_integral::value, + "ffs can only accept integral type."); + if (x == 0) + return 0; + size_t idx; + for (idx = 0; idx < sizeof(Ty) * 8; ++idx) { + if (0x1 == (0x1 & x)) + break; + x >>= 1; + } + return idx + 1; +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_ffs(int x) { return __do_imf_ffs(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_ffsll(long long int x) { return __do_imf_ffs(x); } #endif //__LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index d6f23281809cd..bb9e250a9e848 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -333,8 +333,37 @@ int __imf_popcll(unsigned long long int x) { return __devicelib_imf_popcll(x); } DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_sad(int, int, unsigned int); +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_usad(unsigned int, unsigned int, unsigned int); + DEVICE_EXTERN_C_INLINE unsigned int __imf_sad(int x, int y, unsigned int z) { return __devicelib_imf_sad(x, y, z); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_usad(unsigned int x, unsigned int y, unsigned int z) { + return __devicelib_imf_usad(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_byte_perm(unsigned int, unsigned int, + unsigned int); + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_byte_perm(unsigned int x, unsigned int y, unsigned int s) { + return __devicelib_imf_byte_perm(x, y, s); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_ffs(int); + +DEVICE_EXTERN_C_INLINE +int __imf_ffs(int x) { return __devicelib_imf_ffs(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_ffsll(long long int); + +DEVICE_EXTERN_C_INLINE +int __imf_ffsll(long long int x) { return __devicelib_imf_ffsll(x); } #endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index b8bcb96ba1269..cc7e3c6c747a2 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -173,11 +173,16 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_assert_fail", DeviceLibExt::cl_intel_devicelib_assert}, {"__devicelib_imf_brev", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_brevll", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_byte_perm", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ffs", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ffsll", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_clz", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_clzll", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_popc", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_popcll", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_sad", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index a5e00927c020a..815a17429b2e3 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -751,7 +751,7 @@ sycl::detail::enable_if_t::value, T> ctz( } // namespace ext namespace __SYCL2020_DEPRECATED("use 'ext::intel' instead") intel { - using namespace ext::intel; +using namespace ext::intel; } // geninteger mad_hi (geninteger a, geninteger b, geninteger c) @@ -1741,11 +1741,17 @@ extern SYCL_EXTERNAL int memcmp(const void *s1, const void *s2, size_t n); extern SYCL_EXTERNAL unsigned int __imf_brev(unsigned int x); extern SYCL_EXTERNAL unsigned long long int __imf_brevll(unsigned long long int x); +extern SYCL_EXTERNAL unsigned int +__imf_byte_perm(unsigned int x, unsigned int y, unsigned int s); +extern SYCL_EXTERNAL int __imf_ffs(int x); +extern SYCL_EXTERNAL int __imf_ffsll(long long int x); extern SYCL_EXTERNAL int __imf_clz(int x); extern SYCL_EXTERNAL int __imf_clzll(long long int x); extern SYCL_EXTERNAL int __imf_popc(unsigned int x); extern SYCL_EXTERNAL int __imf_popcll(unsigned long long int x); extern SYCL_EXTERNAL unsigned int __imf_sad(int x, int y, unsigned int z); +extern SYCL_EXTERNAL unsigned int __imf_usad(unsigned int x, unsigned int y, + unsigned int z); extern SYCL_EXTERNAL float __imf_saturatef(float x); extern SYCL_EXTERNAL int __imf_float2int_rd(float x); extern SYCL_EXTERNAL int __imf_float2int_rn(float x); From fb4aa612d3118fe2745ea60e12e825a11f0cc8c2 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 23 May 2022 16:11:47 +0800 Subject: [PATCH 09/22] Fix __imf_usad Signed-off-by: jinge90 --- libdevice/device_imf.hpp | 4 ++++ libdevice/imf_utils/integer_misc.cpp | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index caf5b9e5924f5..5c57b2e7b2630 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -451,6 +451,10 @@ static inline int __popcll(unsigned long long int x) { static inline unsigned int __abs(int x) { return x < 0 ? -x : x; } +static inline unsigned long long int __abs(long long int x) { + return x < 0 ? -x : x; +} + template static inline Ty2 __get_bytes_by_index(Ty1 x, size_t idx) { static_assert(!std::is_signed::value && !std::is_signed::value, diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index 5267827b4a76c..0677ee19db9a5 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -54,7 +54,8 @@ unsigned int __devicelib_imf_sad(int x, int y, unsigned int z) { DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_usad(unsigned int x, unsigned int y, unsigned int z) { - return __abs(x - y) + z; + long long int xll = x, yll = y; + return static_cast(__abs(xll - yll)) + z; } DEVICE_EXTERN_C_INLINE From 09fa0cbb23662e97a6a94114c84b7d247348e8ed Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 23 May 2022 17:43:17 +0800 Subject: [PATCH 10/22] Add trivial imf inline functions Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 6 +- libdevice/imf/imf_inline_fp32.cpp | 138 +++++++++++++ libdevice/imf/imf_inline_fp64.cpp | 61 ++++++ libdevice/imf_wrapper.cpp | 193 ++++++++++++++++++ .../sycl-post-link/SYCLDeviceLibReqMask.cpp | 39 ++++ sycl/include/CL/sycl/builtins.hpp | 40 ++++ 6 files changed, 475 insertions(+), 2 deletions(-) create mode 100644 libdevice/imf/imf_inline_fp32.cpp create mode 100644 libdevice/imf/imf_inline_fp64.cpp diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index fd06341ee86d0..1b3eb9acdd4b1 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -128,8 +128,10 @@ set(bc_binary_dir ${obj_binary_dir}/libdevice) set(fallback-imf-src imf_utils/saturatef.cpp imf_utils/float_convert.cpp imf_utils/half_convert.cpp - imf_utils/integer_misc.cpp) -set(fallback-imf-fp64-src imf_utils/double_convert.cpp) + imf_utils/integer_misc.cpp + imf/imf_inline_fp32.cpp) +set(fallback-imf-fp64-src imf_utils/double_convert.cpp + imf/imf_inline_fp64.cpp) set(wrapper-imf-src imf_wrapper.cpp imf_wrapper_fp64.cpp) set(imf-src ${wrapper-imf-src} ${fallback-imf-src} ${fallback-imf-fp64-src}) diff --git a/libdevice/imf/imf_inline_fp32.cpp b/libdevice/imf/imf_inline_fp32.cpp new file mode 100644 index 0000000000000..84a1144f3e78f --- /dev/null +++ b/libdevice/imf/imf_inline_fp32.cpp @@ -0,0 +1,138 @@ +//==----- imf_inline_fp32.cpp - some fp32 trivial intel math functions -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../device_imf.hpp" +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16( + _iml_half_internal a, _iml_half_internal b, _iml_half_internal c) { + _iml_half ha(a), hb(b), hc(c); + return __fma(ha, hb, hc).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_floorf16(_iml_half_internal x) { + _iml_half hx(x); + return __floor(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_ceilf16(_iml_half_internal x) { + _iml_half hx(x); + return __ceil(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_truncf16(_iml_half_internal x) { + _iml_half hx(x); + return __trunc(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_rintf16(_iml_half_internal x) { + _iml_half hx(x); + return __rint(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_nearbyintf16(_iml_half_internal x) { + _iml_half hx(x); + return __rint(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_sqrtf16(_iml_half_internal a) { + _iml_half ha(a); + return __sqrt(ha).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_rsqrtf16(_iml_half_internal a) { + _iml_half ha(a); + return __rsqrt(ha).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_invf16(_iml_half_internal a) { + _iml_half ha(a), h1(1.0f); + return (h1 / ha).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_fmaxf16(_iml_half_internal a, _iml_half_internal b) { + _iml_half ha(a), hb(b); + return __fmax(ha, hb).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_fminf16(_iml_half_internal a, _iml_half_internal b) { + _iml_half ha(a), hb(b); + return __fmin(ha, hb).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_fabsf16(_iml_half_internal x) { + _iml_half hx(x); + return __fabs(hx).get_internal(); +} + +DEVICE_EXTERN_C_INLINE _iml_half_internal +__devicelib_imf_copysignf16(_iml_half_internal a, _iml_half_internal b) { + _iml_half ha(a), hb(b); + return __copysign(ha, hb).get_internal(); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_fmaf(float a, float b, float c) { + return __fma(a, b, c); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_floorf(float x) { + return __floor(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_ceilf(float x) { + return __ceil(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_truncf(float x) { + return __trunc(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_rintf(float x) { + return __rint(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_nearbyintf(float x) { + return __rint(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_sqrtf(float a) { + return __sqrt(a); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_rsqrtf(float a) { + return __rsqrt(a); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_invf(float a) { return 1.0f / a; } + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_fmaxf(float a, float b) { + return __fmax(a, b); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_fminf(float a, float b) { + return __fmin(a, b); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_fabsf(float x) { + return __fabs(x); +} + +DEVICE_EXTERN_C_INLINE float __devicelib_imf_copysignf(float a, float b) { + return __copysign(a, b); +} +#endif /*__LIBDEVICE_IMF_ENABLED__*/ diff --git a/libdevice/imf/imf_inline_fp64.cpp b/libdevice/imf/imf_inline_fp64.cpp new file mode 100644 index 0000000000000..a0c566ff1d2ea --- /dev/null +++ b/libdevice/imf/imf_inline_fp64.cpp @@ -0,0 +1,61 @@ +//==----- imf_inline_fp64.cpp - some fp64 trivial intel math functions -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../device_imf.hpp" +#ifdef __LIBDEVICE_IMF_ENABLED__ + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_fma(double a, double b, + double c) { + return __fma(a, b, c); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_floor(double x) { + return __floor(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_ceil(double x) { + return __ceil(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_trunc(double x) { + return __trunc(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_rint(double x) { + return __rint(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_nearbyint(double x) { + return __rint(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_sqrt(double a) { + return __sqrt(a); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_rsqrt(double a) { + return 1.0 / __sqrt(a); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_inv(double a) { return 1.0 / a; } + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_fmax(double a, double b) { + return __fmax(a, b); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_fmin(double a, double b) { + return __fmin(a, b); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) { + return __fabs(x); +} + +DEVICE_EXTERN_C_INLINE double __devicelib_imf_copysign(double a, double b) { + return __copysign(a, b); +} +#endif /*__LIBDEVICE_IMF_ENABLED__*/ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index bb9e250a9e848..98fe06dacef94 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -366,4 +366,197 @@ int __devicelib_imf_ffsll(long long int); DEVICE_EXTERN_C_INLINE int __imf_ffsll(long long int x) { return __devicelib_imf_ffsll(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fmaf(float, float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fmaf(float x, float y, float z) { + return __devicelib_imf_fmaf(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_floorf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_floorf(float x) { return __devicelib_imf_floorf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_ceilf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_ceilf(float x) { return __devicelib_imf_ceilf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_truncf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_truncf(float x) { return __devicelib_imf_truncf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_rintf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_rintf(float x) { return __devicelib_imf_rintf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_nearbyintf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_nearbyintf(float x) { return __devicelib_imf_nearbyintf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_sqrtf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_sqrtf(float x) { return __devicelib_imf_sqrtf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_rsqrtf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_rsqrtf(float x) { return __devicelib_imf_rsqrtf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_invf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_invf(float x) { return __devicelib_imf_invf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fabsf(float); + +DEVICE_EXTERN_C_INLINE +float __imf_fabsf(float x) { return __devicelib_imf_fabsf(x); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fmaxf(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fmaxf(float x, float y) { return __devicelib_imf_fmaxf(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fminf(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fminf(float x, float y) { return __devicelib_imf_fminf(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_copysignf(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_copysignf(float x, float y) { + return __devicelib_imf_copysignf(x, y); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_fmaf16(_iml_half_internal, + _iml_half_internal, + _iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_fmaf16(_iml_half_internal x, _iml_half_internal y, + _iml_half_internal z) { + return __devicelib_imf_fmaf16(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_floorf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_floorf16(_iml_half_internal x) { + return __devicelib_imf_floorf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_ceilf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_ceilf16(_iml_half_internal x) { + return __devicelib_imf_ceilf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_truncf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_truncf16(_iml_half_internal x) { + return __devicelib_imf_truncf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_rintf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_rintf16(_iml_half_internal x) { + return __devicelib_imf_rintf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_nearbyintf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_nearbyintf16(_iml_half_internal x) { + return __devicelib_imf_nearbyintf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_sqrtf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_sqrtf16(_iml_half_internal x) { + return __devicelib_imf_sqrtf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_rsqrtf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_rsqrtf16(_iml_half_internal x) { + return __devicelib_imf_rsqrtf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_invf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_invf16(_iml_half_internal x) { + return __devicelib_imf_invf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_fabsf16(_iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_fabsf16(_iml_half_internal x) { + return __devicelib_imf_fabsf16(x); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_fmaxf16(_iml_half_internal, + _iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_fmaxf16(_iml_half_internal x, _iml_half_internal y) { + return __devicelib_imf_fmaxf16(x, y); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_fminf16(_iml_half_internal, + _iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_fminf16(_iml_half_internal x, _iml_half_internal y) { + return __devicelib_imf_fminf16(x, y); +} + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __devicelib_imf_copysignf16(_iml_half_internal, + _iml_half_internal); + +DEVICE_EXTERN_C_INLINE +_iml_half_internal __imf_copysignf16(_iml_half_internal x, + _iml_half_internal y) { + return __devicelib_imf_copysignf16(x, y); +} #endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index cc7e3c6c747a2..a7846a2e9ceb7 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -184,6 +184,19 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fmaf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_floorf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ceilf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fabsf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_truncf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rintf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_nearbyintf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_invf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_sqrtf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rsqrtf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fmaxf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fminf", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_copysignf", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, @@ -220,6 +233,32 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_ull2float_rn", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_ull2float_ru", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_ull2float_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fmaf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_floorf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_ceilf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fabsf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_truncf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rintf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_nearbyintf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_invf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_sqrtf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rsqrtf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fmaxf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fminf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_copysignf16", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fma", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_floor", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ceil", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_fabs", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_trunc", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_rint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_nearbyint", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_inv", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_sqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_rsqrt", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_fmax", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_fmin", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_copysign", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_double2float_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_double2float_rn", diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 815a17429b2e3..b4cc4ef79e7c8 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1753,6 +1753,19 @@ extern SYCL_EXTERNAL unsigned int __imf_sad(int x, int y, unsigned int z); extern SYCL_EXTERNAL unsigned int __imf_usad(unsigned int x, unsigned int y, unsigned int z); extern SYCL_EXTERNAL float __imf_saturatef(float x); +extern SYCL_EXTERNAL float __imf_fmaf(float x, float y, float z); +extern SYCL_EXTERNAL float __imf_fabsf(float x); +extern SYCL_EXTERNAL float __imf_floorf(float x); +extern SYCL_EXTERNAL float __imf_ceilf(float x); +extern SYCL_EXTERNAL float __imf_truncf(float x); +extern SYCL_EXTERNAL float __imf_rintf(float x); +extern SYCL_EXTERNAL float __imf_nearbyintf(float x); +extern SYCL_EXTERNAL float __imf_sqrtf(float x); +extern SYCL_EXTERNAL float __imf_rsqrtf(float x); +extern SYCL_EXTERNAL float __imf_invf(float x); +extern SYCL_EXTERNAL float __imf_fmaxf(float x, float y); +extern SYCL_EXTERNAL float __imf_fminf(float x, float y); +extern SYCL_EXTERNAL float __imf_copysignf(float x, float y); extern SYCL_EXTERNAL int __imf_float2int_rd(float x); extern SYCL_EXTERNAL int __imf_float2int_rn(float x); extern SYCL_EXTERNAL int __imf_float2int_ru(float x); @@ -1789,7 +1802,34 @@ extern SYCL_EXTERNAL float __imf_ull2float_rd(unsigned long long int x); extern SYCL_EXTERNAL float __imf_ull2float_rn(unsigned long long int x); extern SYCL_EXTERNAL float __imf_ull2float_ru(unsigned long long int x); extern SYCL_EXTERNAL float __imf_ull2float_rz(unsigned long long int x); + +extern SYCL_EXTERNAL _Float16 __imf_fmaf16(_Float16 x, _Float16 y, _Float16 z); +extern SYCL_EXTERNAL _Float16 __imf_fabsf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_floorf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_ceilf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_truncf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_rintf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_nearbyintf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_sqrtf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_rsqrtf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_invf16(_Float16 x); +extern SYCL_EXTERNAL _Float16 __imf_fmaxf16(_Float16 x, _Float16 y); +extern SYCL_EXTERNAL _Float16 __imf_fminf16(_Float16 x, _Float16 y); +extern SYCL_EXTERNAL _Float16 __imf_copysignf16(_Float16 x, _Float16 y); extern SYCL_EXTERNAL float __imf_half2float(_Float16 x); +extern SYCL_EXTERNAL double __imf_fma(double x, double y, double z); +extern SYCL_EXTERNAL double __imf_fabs(double x); +extern SYCL_EXTERNAL double __imf_floor(double x); +extern SYCL_EXTERNAL double __imf_ceil(double x); +extern SYCL_EXTERNAL double __imf_trunc(double x); +extern SYCL_EXTERNAL double __imf_rint(double x); +extern SYCL_EXTERNAL double __imf_nearbyint(double x); +extern SYCL_EXTERNAL double __imf_sqrt(double x); +extern SYCL_EXTERNAL double __imf_rsqrt(double x); +extern SYCL_EXTERNAL double __imf_inv(double x); +extern SYCL_EXTERNAL double __imf_fmax(double x, double y); +extern SYCL_EXTERNAL double __imf_fmin(double x, double y); +extern SYCL_EXTERNAL double __imf_copysign(double x, double y); extern SYCL_EXTERNAL float __imf_double2float_rd(double x); extern SYCL_EXTERNAL float __imf_double2float_rn(double x); extern SYCL_EXTERNAL float __imf_double2float_ru(double x); From 8b5d22aeec62c9a80335897d01efc277f113e260 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 23 May 2022 17:55:18 +0800 Subject: [PATCH 11/22] add double trivial inline functions Signed-off-by: jinge90 --- libdevice/imf_wrapper_fp64.cpp | 81 ++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp index 713899ccc7f60..eddba077a3f30 100644 --- a/libdevice/imf_wrapper_fp64.cpp +++ b/libdevice/imf_wrapper_fp64.cpp @@ -280,4 +280,85 @@ double __imf_ull2double_rz(unsigned long long int x) { return __devicelib_imf_ull2double_rz(x); } +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_fma(double, double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_fma(double x, double y, double z) { + return __devicelib_imf_fma(x, y, z); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_floor(double); + +DEVICE_EXTERN_C_INLINE +double __imf_floor(double x) { return __devicelib_imf_floor(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ceil(double); + +DEVICE_EXTERN_C_INLINE +double __imf_ceil(double x) { return __devicelib_imf_ceil(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_trunc(double); + +DEVICE_EXTERN_C_INLINE +double __imf_trunc(double x) { return __devicelib_imf_trunc(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_rint(double); + +DEVICE_EXTERN_C_INLINE +double __imf_rint(double x) { return __devicelib_imf_rint(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_nearbyint(double); + +DEVICE_EXTERN_C_INLINE +double __imf_nearbyint(double x) { return __devicelib_imf_nearbyint(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_sqrt(double); + +DEVICE_EXTERN_C_INLINE +double __imf_sqrt(double x) { return __devicelib_imf_sqrt(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_rsqrt(double); + +DEVICE_EXTERN_C_INLINE +double __imf_rsqrt(double x) { return __devicelib_imf_rsqrt(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_inv(double); + +DEVICE_EXTERN_C_INLINE +double __imf_inv(double x) { return __devicelib_imf_inv(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_fabs(double); + +DEVICE_EXTERN_C_INLINE +double __imf_fabs(double x) { return __devicelib_imf_fabs(x); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_fmax(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_fmax(double x, double y) { return __devicelib_imf_fmax(x, y); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_fmin(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_fmin(double x, double y) { return __devicelib_imf_fmin(x, y); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_copysign(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_copysign(double x, double y) { + return __devicelib_imf_copysign(x, y); +} #endif // __LIBDEVICE_IMF_ENABLED__ From dde4d2265b9d5d1cbd3f9d7c3aeeda7da105e96d Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 24 May 2022 18:28:26 +0800 Subject: [PATCH 12/22] Add __imf_mul24, __imf_mulhi, __imf_hadd, __imf_rhadd, __imf_urhadd Signed-off-by: jinge90 --- libdevice/device_imf.hpp | 40 ++++++++++++++ libdevice/imf_utils/integer_misc.cpp | 53 +++++++++++++++++++ libdevice/imf_wrapper.cpp | 50 +++++++++++++++++ .../sycl-post-link/SYCLDeviceLibReqMask.cpp | 8 ++- sycl/include/CL/sycl/builtins.hpp | 7 +++ 5 files changed, 157 insertions(+), 1 deletion(-) diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index 5c57b2e7b2630..439f7ada00f80 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -483,5 +483,45 @@ Ty1 __assemble_integral_value(Ty2 *x) { } return res; } + +template static inline Ty __uhadd(Ty x, Ty y) { + static_assert(std::is_integral::value && !std::is_signed::value, + "__uhadd can only accept unsigned integral type."); +#if defined(__LIBDEVICE_HOST_IMPL__) + return (x >> 1) + (y >> 1) + ((x & y) & 0x1); +#elif defined(__SPIR__) + return __spirv_ocl_u_hadd(x, y); +#endif +} + +template static inline Ty __shadd(Ty x, Ty y) { + static_assert(std::is_integral::value && std::is_signed::value, + "__shadd can only accept signed integral type."); +#if defined(__LIBDEVICE_HOST_IMPL__) + return (x >> 1) + (y >> 1) + ((x & y) & 0x1); +#elif defined(__SPIR__) + return __spirv_ocl_s_hadd(x, y); +#endif +} + +template static inline Ty __urhadd(Ty x, Ty y) { + static_assert(std::is_integral::value && !std::is_signed::value, + "__urhadd can only accept unsigned integral type."); +#if defined(__LIBDEVICE_HOST_IMPL__) + return (x >> 1) + (y >> 1) + ((x | y) & 0x1); +#elif defined(__SPIR__) + return __spirv_ocl_u_rhadd(x, y); +#endif +} + +template static inline Ty __srhadd(Ty x, Ty y) { + static_assert(std::is_integral::value && std::is_signed::value, + "__srhadd can only accept signed integral type."); +#if defined(__LIBDEVICE_HOST_IMPL__) + return (x >> 1) + (y >> 1) + ((x | y) & 0x1); +#elif defined(__SPIR__) + return __spirv_ocl_s_rhadd(x, y); +#endif +} #endif // __LIBDEVICE_IMF_ENABLED__ #endif // __LIBDEVICE_DEVICE_IMF_H__ diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index 0677ee19db9a5..e95893d7aff6e 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -94,4 +94,57 @@ int __devicelib_imf_ffs(int x) { return __do_imf_ffs(x); } DEVICE_EXTERN_C_INLINE int __devicelib_imf_ffsll(long long int x) { return __do_imf_ffs(x); } + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_rhadd(int x, int y) { return __srhadd(x, y); } + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_uhadd(unsigned int x, unsigned int y) { + return __uhadd(x, y); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_urhadd(unsigned int x, unsigned int y) { + return __urhadd(x, y); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_mul24(int x, int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return x * y; +#elif defined(__SPIR__) + return __spirv_ocl_s_mul24(x, y); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_umul24(unsigned int x, unsigned int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + return x * y; +#elif defined(__SPIR__) + return __spirv_ocl_u_mul24(x, y); +#endif +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_mulhi(int x, int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + int64_t p = static_cast(x) * static_cast(y); + p >>= 32; + return static_cast(p); +#elif defined(__SPIR__) + return __spirv_ocl_s_mul_hi(x, y); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_umulhi(unsigned int x, unsigned int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + uint64_t p = static_cast(x) * static_cast(y); + p >>= 32; + return static_cast(p); +#elif defined(__SPIR__) + return __spirv_ocl_u_mul_hi(x, y); +#endif +} #endif //__LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index 98fe06dacef94..14a11b92048c5 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -367,6 +367,56 @@ int __devicelib_imf_ffsll(long long int); DEVICE_EXTERN_C_INLINE int __imf_ffsll(long long int x) { return __devicelib_imf_ffsll(x); } +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_rhadd(int, int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_uhadd(int, int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_urhadd(unsigned int, unsigned int); + +DEVICE_EXTERN_C_INLINE +int __imf_rhadd(int x, int y) { return __devicelib_imf_rhadd(x, y); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_uhadd(unsigned int x, unsigned int y) { + return __devicelib_imf_uhadd(x, y); +} + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_urhadd(unsigned int x, unsigned int y) { + return __devicelib_imf_urhadd(x, y); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_mul24(int, int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_umul24(unsigned int, unsigned int); + +DEVICE_EXTERN_C_INLINE +int __imf_mul24(int x, int y) { return __devicelib_imf_mul24(x, y); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_umul24(unsigned int x, unsigned int y) { + return __devicelib_imf_umul24(x, y); +} + +DEVICE_EXTERN_C_INLINE +int __devicelib_imf_mulhi(int, int); + +DEVICE_EXTERN_C_INLINE +unsigned int __devicelib_imf_umulhi(unsigned int, unsigned int); + +DEVICE_EXTERN_C_INLINE +int __imf_mulhi(int x, int y) { return __devicelib_imf_mulhi(x, y); } + +DEVICE_EXTERN_C_INLINE +unsigned int __imf_umulhi(unsigned int x, unsigned int y) { + return __devicelib_imf_umulhi(x, y); +} + DEVICE_EXTERN_C_INLINE float __devicelib_imf_fmaf(float, float, float); diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index a7846a2e9ceb7..37cdd686507e6 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -182,7 +182,13 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_popcll", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_sad", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, - {"__devicelib_imf_usad", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_uhadd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_urhadd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_rhadd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_mul24", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_umul24", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_mulhi", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_umulhi", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_fmaf", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_floorf", DeviceLibExt::cl_intel_devicelib_imf}, diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index b4cc4ef79e7c8..f8ef03073d1b3 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1752,6 +1752,13 @@ extern SYCL_EXTERNAL int __imf_popcll(unsigned long long int x); extern SYCL_EXTERNAL unsigned int __imf_sad(int x, int y, unsigned int z); extern SYCL_EXTERNAL unsigned int __imf_usad(unsigned int x, unsigned int y, unsigned int z); +extern SYCL_EXTERNAL int __imf_rhadd(int x, int y); +extern SYCL_EXTERNAL unsigned int __imf_urhadd(unsigned int x, unsigned int y); +extern SYCL_EXTERNAL unsigned int __imf_uhadd(unsigned int x, unsigned int y); +extern SYCL_EXTERNAL int __imf_mul24(int x, int y); +extern SYCL_EXTERNAL unsigned int __imf_umul24(unsigned int x, unsigned int y); +extern SYCL_EXTERNAL int __imf_mulhi(int x, int y); +extern SYCL_EXTERNAL unsigned int __imf_umulhi(unsigned int x, unsigned int y); extern SYCL_EXTERNAL float __imf_saturatef(float x); extern SYCL_EXTERNAL float __imf_fmaf(float x, float y, float z); extern SYCL_EXTERNAL float __imf_fabsf(float x); From 867e97af9040aed54f828c32711baa48c48c5797 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 25 May 2022 12:46:51 +0800 Subject: [PATCH 13/22] Add __imf_mul64hi, __imf_umul64hi Signed-off-by: jinge90 --- libdevice/imf_utils/integer_misc.cpp | 24 +++++++++++++++++++ libdevice/imf_wrapper.cpp | 18 ++++++++++++++ .../sycl-post-link/SYCLDeviceLibReqMask.cpp | 2 ++ sycl/include/CL/sycl/builtins.hpp | 4 ++++ 4 files changed, 48 insertions(+) diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index e95893d7aff6e..e68fd49891c62 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -147,4 +147,28 @@ unsigned int __devicelib_imf_umulhi(unsigned int x, unsigned int y) { return __spirv_ocl_u_mul_hi(x, y); #endif } + +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_mul64hi(long long int x, long long int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + __int128_t p = static_cast<__int128_t>(x) * static_cast<__int128_t>(y); + p >>= 64; + return static_cast(p); +#elif defined(__SPIR__) + return __spirv_ocl_s_mul_hi(static_cast(x), static_cast(y)); +#endif +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_umul64hi(unsigned long long int x, + unsigned long long int y) { +#if defined(__LIBDEVICE_HOST_IMPL__) + __uint128_t p = static_cast<__uint128_t>(x) * static_cast<__uint128_t>(y); + p >>= 64; + return static_cast(p); +#elif defined(__SPIR__) + return __spirv_ocl_u_mul_hi(static_cast(x), + static_cast(y)); +#endif +} #endif //__LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index 14a11b92048c5..6197845aad440 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -409,6 +409,24 @@ int __devicelib_imf_mulhi(int, int); DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_umulhi(unsigned int, unsigned int); +DEVICE_EXTERN_C_INLINE +long long int __devicelib_imf_mul64hi(long long int, long long int); + +DEVICE_EXTERN_C_INLINE +unsigned long long int __devicelib_imf_umul64hi(unsigned long long int, + unsigned long long int); + +DEVICE_EXTERN_C_INLINE +long long int __imf_mul64hi(long long int x, long long int y) { + return __devicelib_imf_mul64hi(x, y); +} + +DEVICE_EXTERN_C_INLINE +unsigned long long int __imf_umul64hi(unsigned long long int x, + unsigned long long int y) { + return __devicelib_imf_umul64hi(x, y); +} + DEVICE_EXTERN_C_INLINE int __imf_mulhi(int x, int y) { return __devicelib_imf_mulhi(x, y); } diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index 37cdd686507e6..ca20506d14160 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -189,6 +189,8 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_umul24", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_mulhi", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_umulhi", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_mul64hi", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_umul64hi", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_saturatef", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_fmaf", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_floorf", DeviceLibExt::cl_intel_devicelib_imf}, diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index f8ef03073d1b3..3ca2f945be111 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -1759,6 +1759,10 @@ extern SYCL_EXTERNAL int __imf_mul24(int x, int y); extern SYCL_EXTERNAL unsigned int __imf_umul24(unsigned int x, unsigned int y); extern SYCL_EXTERNAL int __imf_mulhi(int x, int y); extern SYCL_EXTERNAL unsigned int __imf_umulhi(unsigned int x, unsigned int y); +extern SYCL_EXTERNAL long long int __imf_mul64hi(long long int x, + long long int y); +extern SYCL_EXTERNAL unsigned long long int +__imf_umul64hi(unsigned long long int x, unsigned long long int y); extern SYCL_EXTERNAL float __imf_saturatef(float x); extern SYCL_EXTERNAL float __imf_fmaf(float x, float y, float z); extern SYCL_EXTERNAL float __imf_fabsf(float x); From a5fb6eee52a4ec45c6f735a796444c24f1af45d7 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 25 May 2022 15:51:58 +0800 Subject: [PATCH 14/22] allow library mismatch on Windows Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 1b3eb9acdd4b1..f2a0daf8b5cbe 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -201,6 +201,10 @@ function(add_devicelib_bc src_file sycl_target) else() set(bc_compile_flags -fsycl -fsycl-device-only -fsycl-targets=${sycl_target}) endif() + if (WIN32) + list(APPEND bc_compile_flags -D_ALLOW_RUNTIME_LIBRARY_MISMATCH) + list(APPEND bc_compile_flags -D_ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH) + endif() add_custom_command(OUTPUT ${devicelib-bc} COMMAND ${clang} ${bc_compile_flags} ${CMAKE_CURRENT_SOURCE_DIR}/${src_file} From ad899b8f2c05e34d204b829a4508d450f49ef888 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 25 May 2022 18:11:20 +0800 Subject: [PATCH 15/22] Add lit test for libimf link Signed-off-by: jinge90 --- clang/include/clang/Driver/Options.td | 4 +-- clang/test/Driver/sycl-device-lib-win.cpp | 35 +++++++++++++++++++++++ clang/test/Driver/sycl-device-lib.cpp | 35 +++++++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 675e94e65e36c..b59a98ec0e142 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4847,9 +4847,9 @@ def fno_sycl_dead_args_optimization : Flag<["-"], "fno-sycl-dead-args-optimizati Group, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Disables " "elimination of DPC++ dead kernel arguments">; def fsycl_device_lib_EQ : CommaJoined<["-"], "fsycl-device-lib=">, Group, Flags<[NoXarchOption, CoreOption]>, - Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control inclusion of " + Values<"libc, libm-fp32, libm-fp64, libimf-fp32, libimf-fp64, all">, HelpText<"Control inclusion of " "device libraries into device binary linkage. Valid arguments " - "are libc, libm-fp32, libm-fp64, all">; + "are libc, libm-fp32, libm-fp64, libimf-fp32, libimf-fp64, all">; def fno_sycl_device_lib_EQ : CommaJoined<["-"], "fno-sycl-device-lib=">, Group, Flags<[NoXarchOption, CoreOption]>, Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control exclusion of " "device libraries from device binary linkage. Valid arguments " diff --git a/clang/test/Driver/sycl-device-lib-win.cpp b/clang/test/Driver/sycl-device-lib-win.cpp index 96659f0f490ef..63d376bd3bb12 100644 --- a/clang/test/Driver/sycl-device-lib-win.cpp +++ b/clang/test/Driver/sycl-device-lib-win.cpp @@ -21,6 +21,8 @@ // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-msvc-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test sycl fallback device libraries are not linked by default @@ -48,6 +50,8 @@ // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-msvc-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### @@ -59,6 +63,8 @@ // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-msvc-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### @@ -66,6 +72,33 @@ // RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32,libm-fp64 --sysroot=%S/Inputs/SYCL-windows -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of -fno-sycl-device-lib=libimf-fp32 +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libimf-fp32 --sysroot=%S/Inputs/SYCL-windows -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32 +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of -fno-sycl-device-lib=libimf-fp64 +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libimf-fp64 --sysroot=%S/Inputs/SYCL-windows -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64 +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" + /// ########################################################################### /// test behavior of disabling all device libraries @@ -112,6 +145,8 @@ // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.obj" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.obj" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-msvc-math.obj" "-output={{.*}}libsycl-msvc-math-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.obj" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.obj" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings" /// ########################################################################### diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index 60fdb7d256e5e..62f1c59b63fb1 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -20,6 +20,8 @@ // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test sycl fallback device libraries are not linked by default @@ -46,6 +48,8 @@ // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_WITH_FP64-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### @@ -56,12 +60,41 @@ // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" + /// ########################################################################### /// test behavior of -fno-sycl-device-lib=libm-fp32,libm-fp64 // RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32,libm-fp64 --sysroot=%S/Inputs/SYCL -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of -fno-sycl-device-lib=libimf-fp32 +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libimf-fp32 --sysroot=%S/Inputs/SYCL -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32 +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" + +/// ########################################################################### + +/// test behavior of -fno-sycl-device-lib=libimf-fp64 +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libimf-fp64 --sysroot=%S/Inputs/SYCL -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64 +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBIMF_FP64: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" /// ########################################################################### @@ -108,6 +141,8 @@ // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath.o" "-output={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-cmath-fp64.o" "-output={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf.o" "-output={{.*}}libsycl-imf-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-imf-fp64.o" "-output={{.*}}libsycl-imf-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings" /// ########################################################################### From 7505a25bc265f9a139c5c18a65345bd2fa606e9e Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 25 May 2022 18:47:40 +0800 Subject: [PATCH 16/22] Add lit test for fsycl-device-lib=libimf-fp32 Signed-off-by: jinge90 --- clang/test/Driver/sycl-device-lib-win.cpp | 2 ++ clang/test/Driver/sycl-device-lib.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/clang/test/Driver/sycl-device-lib-win.cpp b/clang/test/Driver/sycl-device-lib-win.cpp index 63d376bd3bb12..72dc3fd8b769e 100644 --- a/clang/test/Driver/sycl-device-lib-win.cpp +++ b/clang/test/Driver/sycl-device-lib-win.cpp @@ -15,6 +15,8 @@ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT // RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp32 --sysroot=%S/Inputs/SYCL-windows -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libimf-fp32 --sysroot=%S/Inputs/SYCL-windows -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.obj" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.obj" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.obj" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index 7363ffef3bff5..a0085d56d4f7f 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -15,6 +15,8 @@ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT // RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp32 --sysroot=%S/Inputs/SYCL -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT +// RUN: %clangxx -fsycl %s -fsycl-device-lib=libimf-fp32 --sysroot=%S/Inputs/SYCL -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-crt.o" "-output={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex.o" "-output={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown" "-input={{.*}}libsycl-complex-fp64.o" "-output={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" From 36f883d1bf0b45cd294cac4a4efe2d22923eef04 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Mon, 30 May 2022 16:30:38 +0800 Subject: [PATCH 17/22] Add dummy imf devicelib for lit test Signed-off-by: jinge90 --- .../Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf-fp64.obj | 0 .../test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf.obj | 0 clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf-fp64.obj | 0 clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf.obj | 0 clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-msvc-math.obj | 0 clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf-fp64.o | 0 clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf.o | 0 clang/test/Driver/Inputs/SYCL/lib/libsycl-imf-fp64.o | 0 clang/test/Driver/Inputs/SYCL/lib/libsycl-imf.o | 0 9 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf-fp64.obj create mode 100644 clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf.obj create mode 100644 clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf-fp64.obj create mode 100644 clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf.obj create mode 100644 clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-msvc-math.obj create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf-fp64.o create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf.o create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-imf-fp64.o create mode 100644 clang/test/Driver/Inputs/SYCL/lib/libsycl-imf.o diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf-fp64.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf-fp64.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-fallback-imf.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf-fp64.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf-fp64.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-imf.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-msvc-math.obj b/clang/test/Driver/Inputs/SYCL-windows/lib/libsycl-msvc-math.obj new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf-fp64.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf-fp64.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-fallback-imf.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-imf-fp64.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-imf-fp64.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/SYCL/lib/libsycl-imf.o b/clang/test/Driver/Inputs/SYCL/lib/libsycl-imf.o new file mode 100644 index 0000000000000..e69de29bb2d1d From 1fe22d4d7adcc806e4de850d1082f3482f706d09 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Sun, 5 Jun 2022 10:57:56 +0800 Subject: [PATCH 18/22] Move __imf_saturatef to inline functions. Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 3 +-- libdevice/imf/imf_inline_fp32.cpp | 3 +++ libdevice/imf_utils/saturatef.cpp | 16 ---------------- 3 files changed, 4 insertions(+), 18 deletions(-) delete mode 100644 libdevice/imf_utils/saturatef.cpp diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index da96089f128fa..569bf018b7f02 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -125,8 +125,7 @@ add_fallback_devicelib(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp D file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice) set(bc_binary_dir ${obj_binary_dir}/libdevice) -set(fallback-imf-src imf_utils/saturatef.cpp - imf_utils/float_convert.cpp +set(fallback-imf-src imf_utils/float_convert.cpp imf_utils/half_convert.cpp imf_utils/integer_misc.cpp imf/imf_inline_fp32.cpp) diff --git a/libdevice/imf/imf_inline_fp32.cpp b/libdevice/imf/imf_inline_fp32.cpp index 84a1144f3e78f..0273a1c4902ed 100644 --- a/libdevice/imf/imf_inline_fp32.cpp +++ b/libdevice/imf/imf_inline_fp32.cpp @@ -86,6 +86,9 @@ __devicelib_imf_copysignf16(_iml_half_internal a, _iml_half_internal b) { return __copysign(ha, hb).get_internal(); } +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_saturatef(float x) { return __fclamp(x, .0f, 1.f); } + DEVICE_EXTERN_C_INLINE float __devicelib_imf_fmaf(float a, float b, float c) { return __fma(a, b, c); } diff --git a/libdevice/imf_utils/saturatef.cpp b/libdevice/imf_utils/saturatef.cpp deleted file mode 100644 index 1c20b9fd0a57f..0000000000000 --- a/libdevice/imf_utils/saturatef.cpp +++ /dev/null @@ -1,16 +0,0 @@ -//==------ saturatef.cpp - fallback implementation of __imf_saturatef ------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "../device_imf.hpp" - -#ifdef __LIBDEVICE_IMF_ENABLED__ - -DEVICE_EXTERN_C_INLINE -float __devicelib_imf_saturatef(float x) { return __fclamp(x, .0f, 1.f); } - -#endif //__LIBDEVICE_IMF_ENABLED__ From 61b50edbcbdcbf6fc0efa8e32b4ea72ccffaf2d9 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 9 Jun 2022 12:17:04 +0800 Subject: [PATCH 19/22] Add TODO for bitcast Signed-off-by: jinge90 --- libdevice/device_imf.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index 439f7ada00f80..46715fa1e0596 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -16,7 +16,7 @@ #ifdef __LIBDEVICE_IMF_ENABLED__ -// Bitcast is valid to trivially copyable object only but using +// TODO: Bitcast is valid to trivially copyable object only but using // is_trivially_copyable check will lead to compiling error in some // pre-ci tests, the pre-ci environment used some legacy c++ std library // which doesn't include this function. Need to report to pre-ci owners. From c47c0716a38980cf46e85f0c64c69ca341d6eeca Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 9 Jun 2022 18:40:36 +0800 Subject: [PATCH 20/22] Double confirm __SPIR__ or __LIBDEVICE_HOST_IMPL__ is defined for device_imf.hpp Signed-off-by: jinge90 --- libdevice/device_imf.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index 46715fa1e0596..afd3bfc8c758e 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -16,6 +16,11 @@ #ifdef __LIBDEVICE_IMF_ENABLED__ +#if !defined(__SPIR__) && !defined(__LIBDEVICE_HOST_IMPL__) +#error \ + "__SPIR__ or __LIBDEVICE_HOST_IMPL__ must be defined to enable device imf functions!" +#endif + // TODO: Bitcast is valid to trivially copyable object only but using // is_trivially_copyable check will lead to compiling error in some // pre-ci tests, the pre-ci environment used some legacy c++ std library From 6dfa25030b3b479ae3da79bc51c553341d7bec15 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Sat, 11 Jun 2022 21:58:39 +0800 Subject: [PATCH 21/22] Fix bugs in brev, byte_perm Signed-off-by: jinge90 --- libdevice/device_imf.hpp | 4 ++++ libdevice/imf_utils/integer_misc.cpp | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/libdevice/device_imf.hpp b/libdevice/device_imf.hpp index afd3bfc8c758e..9466f2ce3970a 100644 --- a/libdevice/device_imf.hpp +++ b/libdevice/device_imf.hpp @@ -419,6 +419,8 @@ static inline _iml_half __trunc(_iml_half x) { } static inline int __clz(int x) { + if (x == 0) + return 32; uint32_t xi32 = x; #if defined(__LIBDEVICE_HOST_IMPL__) return __builtin_clz(xi32); @@ -428,6 +430,8 @@ static inline int __clz(int x) { } static inline int __clzll(long long int x) { + if (x == 0) + return 64; uint64_t xi64 = x; #if defined(__LIBDEVICE_HOST_IMPL__) return __builtin_clzll(xi64); diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index e68fd49891c62..f665b575c98e9 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -14,11 +14,12 @@ DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_brev(unsigned int x) { unsigned int res = 0; size_t bit_count = 8 * sizeof(unsigned int); - for (size_t idx = 0; idx < bit_count; ++idx) { + for (size_t idx = 0; idx < bit_count - 1; ++idx) { res |= x & 0x1; res <<= 1; x >>= 1; } + res |= x & 0x1; return res; } @@ -26,11 +27,12 @@ DEVICE_EXTERN_C_INLINE unsigned long int __devicelib_imf_brevll(unsigned long long int x) { unsigned long long int res = 0; size_t bit_count = 8 * sizeof(unsigned long long int); - for (size_t idx = 0; idx < bit_count; ++idx) { + for (size_t idx = 0; idx < bit_count - 1; ++idx) { res |= x & 0x1; res <<= 1; x >>= 1; } + res |= x & 0x1; return res; } @@ -69,7 +71,7 @@ unsigned int __devicelib_imf_byte_perm(unsigned int x, unsigned int y, if (select_idx < 4) buf[idx] = __get_bytes_by_index(x, select_idx); else - buf[idx] = __get_bytes_by_index(y, select_idx - 3); + buf[idx] = __get_bytes_by_index(y, select_idx - 4); s >>= 4; } return __assemble_integral_value(buf); From d85f86f5e3c0d860e6ee9d4ef71a273d93f3d730 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Sat, 11 Jun 2022 23:03:41 +0800 Subject: [PATCH 22/22] Fix brevll prototype Signed-off-by: jinge90 --- libdevice/imf_utils/integer_misc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index f665b575c98e9..800a42d69da5a 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -24,7 +24,7 @@ unsigned int __devicelib_imf_brev(unsigned int x) { } DEVICE_EXTERN_C_INLINE -unsigned long int __devicelib_imf_brevll(unsigned long long int x) { +unsigned long long int __devicelib_imf_brevll(unsigned long long int x) { unsigned long long int res = 0; size_t bit_count = 8 * sizeof(unsigned long long int); for (size_t idx = 0; idx < bit_count - 1; ++idx) {