From ff52930c96b643b1ca0e31398df3353d6356052f Mon Sep 17 00:00:00 2001
From: jinge90 <ge.jin@intel.com>
Date: Thu, 28 Jul 2022 20:39:41 +0800
Subject: [PATCH 1/8] [SYCL] Add sycl cpp header for imf libdevice API

Signed-off-by: jinge90 <ge.jin@intel.com>
---
 sycl/include/sycl/ext/intel/math.hpp | 32 ++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 sycl/include/sycl/ext/intel/math.hpp

diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
new file mode 100644
index 0000000000000..e51268f9295bb
--- /dev/null
+++ b/sycl/include/sycl/ext/intel/math.hpp
@@ -0,0 +1,32 @@
+//==------------- math.hpp - Intel specific math API -----------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// The main header of Intel specific math API
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+extern "C" {
+float __imf_saturatef(float);
+float __imf_copysignf(float, float);
+double __imf_copysign(double, double);
+};
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace ext {
+namespace intel {
+float saturate(float x) { return __imf_saturatef(x); }
+
+float copysign(float x, float y) { return __imf_copysignf(x, y); }
+double copysign(double x, double y) { return __imf_copysign(x, y); }
+
+} // namespace intel
+} // namespace ext
+
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)

From 6b550dc274bc9b14b0396f0ca5bdde70a40e19ff Mon Sep 17 00:00:00 2001
From: jinge90 <ge.jin@intel.com>
Date: Mon, 15 Aug 2022 14:47:13 +0800
Subject: [PATCH 2/8] add imf APIs to top level sycl headers

Signed-off-by: jinge90 <ge.jin@intel.com>
---
 sycl/CMakeLists.txt                  | 2 ++
 sycl/include/sycl/builtins.hpp       | 3 +++
 sycl/include/sycl/ext/intel/math.hpp | 7 ++++---
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt
index cd1296fcdf022..e99bd3c52f5c8 100644
--- a/sycl/CMakeLists.txt
+++ b/sycl/CMakeLists.txt
@@ -48,6 +48,8 @@ else ()
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-deprecated-declarations")
 endif()
 
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__IGNORE_IMF_DEFINES__")
+
 if(SYCL_ENABLE_WERROR)
   if(MSVC)
     set(CMAKE_CXX_FLAGS "/WX ${CMAKE_CXX_FLAGS}")
diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp
index e0032a3343c8e..904c3674be3a7 100644
--- a/sycl/include/sycl/builtins.hpp
+++ b/sycl/include/sycl/builtins.hpp
@@ -12,6 +12,9 @@
 #include <sycl/detail/builtins.hpp>
 #include <sycl/detail/common.hpp>
 #include <sycl/detail/generic_type_traits.hpp>
+#ifndef __IGNORE_IMF_DEFINES__
+#include <sycl/ext/intel/math.hpp>
+#endif
 #include <sycl/types.hpp>
 
 // TODO Decide whether to mark functions with this attribute.
diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
index e51268f9295bb..51df09683bc66 100644
--- a/sycl/include/sycl/ext/intel/math.hpp
+++ b/sycl/include/sycl/ext/intel/math.hpp
@@ -16,17 +16,18 @@ float __imf_copysignf(float, float);
 double __imf_copysign(double, double);
 };
 
-__SYCL_INLINE_NAMESPACE(cl) {
 namespace sycl {
+__SYCL_INLINE_VER_NAMESPACE(_V1) {
 namespace ext {
 namespace intel {
+namespace math {
 float saturate(float x) { return __imf_saturatef(x); }
 
 float copysign(float x, float y) { return __imf_copysignf(x, y); }
 double copysign(double x, double y) { return __imf_copysign(x, y); }
 
+} // namespace math
 } // namespace intel
 } // namespace ext
-
+} // __SYCL_INLINE_VER_NAMESPACE(_V1)
 } // namespace sycl
-} // __SYCL_INLINE_NAMESPACE(cl)

From 42951cab92ff031286087e4f8c7399ef0c5410cf Mon Sep 17 00:00:00 2001
From: jinge90 <ge.jin@intel.com>
Date: Mon, 15 Aug 2022 22:44:58 +0800
Subject: [PATCH 3/8] disable IMF APIs for NVPTX

Signed-off-by: jinge90 <ge.jin@intel.com>
---
 sycl/include/sycl/ext/intel/math.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
index 51df09683bc66..6f622922a9e36 100644
--- a/sycl/include/sycl/ext/intel/math.hpp
+++ b/sycl/include/sycl/ext/intel/math.hpp
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#ifndef __NVPTX__
 extern "C" {
 float __imf_saturatef(float);
 float __imf_copysignf(float, float);
@@ -31,3 +32,4 @@ double copysign(double x, double y) { return __imf_copysign(x, y); }
 } // namespace ext
 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
 } // namespace sycl
+#endif

From cfc5237096dfaa0b5ced1a1969a1407e5de1da84 Mon Sep 17 00:00:00 2001
From: jinge90 <ge.jin@intel.com>
Date: Mon, 15 Aug 2022 23:15:59 +0800
Subject: [PATCH 4/8] remove imf defines from sycl.hpp

Signed-off-by: jinge90 <ge.jin@intel.com>
---
 sycl/CMakeLists.txt                  | 2 --
 sycl/include/sycl/builtins.hpp       | 3 ---
 sycl/include/sycl/ext/intel/math.hpp | 2 --
 3 files changed, 7 deletions(-)

diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt
index e99bd3c52f5c8..cd1296fcdf022 100644
--- a/sycl/CMakeLists.txt
+++ b/sycl/CMakeLists.txt
@@ -48,8 +48,6 @@ else ()
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-deprecated-declarations")
 endif()
 
-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__IGNORE_IMF_DEFINES__")
-
 if(SYCL_ENABLE_WERROR)
   if(MSVC)
     set(CMAKE_CXX_FLAGS "/WX ${CMAKE_CXX_FLAGS}")
diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp
index 904c3674be3a7..e0032a3343c8e 100644
--- a/sycl/include/sycl/builtins.hpp
+++ b/sycl/include/sycl/builtins.hpp
@@ -12,9 +12,6 @@
 #include <sycl/detail/builtins.hpp>
 #include <sycl/detail/common.hpp>
 #include <sycl/detail/generic_type_traits.hpp>
-#ifndef __IGNORE_IMF_DEFINES__
-#include <sycl/ext/intel/math.hpp>
-#endif
 #include <sycl/types.hpp>
 
 // TODO Decide whether to mark functions with this attribute.
diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
index 6f622922a9e36..51df09683bc66 100644
--- a/sycl/include/sycl/ext/intel/math.hpp
+++ b/sycl/include/sycl/ext/intel/math.hpp
@@ -10,7 +10,6 @@
 
 #pragma once
 
-#ifndef __NVPTX__
 extern "C" {
 float __imf_saturatef(float);
 float __imf_copysignf(float, float);
@@ -32,4 +31,3 @@ double copysign(double x, double y) { return __imf_copysign(x, y); }
 } // namespace ext
 } // __SYCL_INLINE_VER_NAMESPACE(_V1)
 } // namespace sycl
-#endif

From 40c215aa99b205909b6e40ed12a7de78fe0cbdb3 Mon Sep 17 00:00:00 2001
From: jinge90 <ge.jin@intel.com>
Date: Tue, 16 Aug 2022 09:49:31 +0800
Subject: [PATCH 5/8] Dispatch to corresponding C imf APIs in template function

Signed-off-by: jinge90 <ge.jin@intel.com>
---
 sycl/include/sycl/ext/intel/math.hpp | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
index 51df09683bc66..d9516a804290c 100644
--- a/sycl/include/sycl/ext/intel/math.hpp
+++ b/sycl/include/sycl/ext/intel/math.hpp
@@ -9,7 +9,7 @@
 //===----------------------------------------------------------------------===//
 
 #pragma once
-
+#include <type_traits>
 extern "C" {
 float __imf_saturatef(float);
 float __imf_copysignf(float, float);
@@ -21,10 +21,22 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
 namespace ext {
 namespace intel {
 namespace math {
-float saturate(float x) { return __imf_saturatef(x); }
+template <typename Tp> Tp saturate(Tp x) {
+  static_assert(std::is_same<Tp, float>::value,
+                "sycl::ext::intel::math::saturate only supports fp32 version.");
+  if (std::is_same<Tp, float>::value)
+    return __imf_saturatef(x);
+}
 
-float copysign(float x, float y) { return __imf_copysignf(x, y); }
-double copysign(double x, double y) { return __imf_copysign(x, y); }
+template <typename Tp> Tp copysign(Tp x, Tp y) {
+  static_assert(
+      std::is_same<Tp, float>::value || std::is_same<Tp, double>::value,
+      "sycl::ext::intel::math::copysign only supports fp32, fp64 version.");
+  if (std::is_same<Tp, float>::value)
+    return __imf_copysignf(x, y);
+  if (std::is_same<Tp, double>::value)
+    return __imf_copysign(x, y);
+}
 
 } // namespace math
 } // namespace intel

From d9c2426868c5ad0021cd921c5f61db3cfceca09e Mon Sep 17 00:00:00 2001
From: jinge90 <ge.jin@intel.com>
Date: Tue, 16 Aug 2022 12:41:59 +0800
Subject: [PATCH 6/8] use compile-time if

Signed-off-by: jinge90 <ge.jin@intel.com>
---
 sycl/include/sycl/ext/intel/math.hpp | 36 +++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
index d9516a804290c..2f43f624a7210 100644
--- a/sycl/include/sycl/ext/intel/math.hpp
+++ b/sycl/include/sycl/ext/intel/math.hpp
@@ -9,11 +9,23 @@
 //===----------------------------------------------------------------------===//
 
 #pragma once
+#include <sycl/half_type.hpp>
 #include <type_traits>
+
+// _iml_half_internal is internal representation for fp16 type used in intel
+// math device library. The definition here should align with definition in
+// https://github.com/intel/llvm/blob/sycl/libdevice/imf_half.hpp
+#if defined(__SPIR__)
+typedef _Float16 _iml_half_internal;
+#else
+typedef uint16_t _iml_half_internal;
+#endif
+
 extern "C" {
 float __imf_saturatef(float);
 float __imf_copysignf(float, float);
 double __imf_copysign(double, double);
+_iml_half_internal __imf_copysignf16(_iml_half_internal, _iml_half_internal);
 };
 
 namespace sycl {
@@ -21,23 +33,35 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
 namespace ext {
 namespace intel {
 namespace math {
+
+#if __cplusplus >= 201703L
 template <typename Tp> Tp saturate(Tp x) {
   static_assert(std::is_same<Tp, float>::value,
                 "sycl::ext::intel::math::saturate only supports fp32 version.");
-  if (std::is_same<Tp, float>::value)
+  if constexpr (std::is_same<Tp, float>::value)
     return __imf_saturatef(x);
 }
 
 template <typename Tp> Tp copysign(Tp x, Tp y) {
-  static_assert(
-      std::is_same<Tp, float>::value || std::is_same<Tp, double>::value,
-      "sycl::ext::intel::math::copysign only supports fp32, fp64 version.");
-  if (std::is_same<Tp, float>::value)
+  static_assert(std::is_same<Tp, float>::value ||
+                    std::is_same<Tp, double>::value ||
+                    std::is_same<Tp, sycl::half>::value,
+                "sycl::ext::intel::math::copysign only supports fp16, fp32, "
+                "fp64 version.");
+  if constexpr (std::is_same<Tp, float>::value)
     return __imf_copysignf(x, y);
-  if (std::is_same<Tp, double>::value)
+  if constexpr (std::is_same<Tp, double>::value)
     return __imf_copysign(x, y);
+  if constexpr (std::is_same<Tp, sycl::half>::value) {
+    static_assert(sizeof(sycl::half) == sizeof(_iml_half_internal),
+                  "sycl::half is not compatible with _iml_half_internal.");
+    _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
+    _iml_half_internal yi = __builtin_bit_cast(_iml_half_internal, y);
+    return __builtin_bit_cast(sycl::half, __imf_copysignf16(xi, yi));
+  }
 }
 
+#endif
 } // namespace math
 } // namespace intel
 } // namespace ext

From aba34a79e3b79acf862887154847d9f61a3683b6 Mon Sep 17 00:00:00 2001
From: jinge90 <ge.jin@intel.com>
Date: Wed, 24 Aug 2022 12:03:16 +0800
Subject: [PATCH 7/8] Address some review comments

Signed-off-by: jinge90 <ge.jin@intel.com>
---
 sycl/include/sycl/ext/intel/math.hpp | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
index 2f43f624a7210..af2a5ca5c69bd 100644
--- a/sycl/include/sycl/ext/intel/math.hpp
+++ b/sycl/include/sycl/ext/intel/math.hpp
@@ -16,9 +16,9 @@
 // math device library. The definition here should align with definition in
 // https://github.com/intel/llvm/blob/sycl/libdevice/imf_half.hpp
 #if defined(__SPIR__)
-typedef _Float16 _iml_half_internal;
+using _iml_half_internal = _Float16;
 #else
-typedef uint16_t _iml_half_internal;
+using _iml_half_internal = uint16_t;
 #endif
 
 extern "C" {
@@ -35,24 +35,21 @@ namespace intel {
 namespace math {
 
 #if __cplusplus >= 201703L
-template <typename Tp> Tp saturate(Tp x) {
-  static_assert(std::is_same<Tp, float>::value,
-                "sycl::ext::intel::math::saturate only supports fp32 version.");
-  if constexpr (std::is_same<Tp, float>::value)
-    return __imf_saturatef(x);
+template <typename Tp>
+typename std::enable_if<std::is_same_v<Tp, float>, float>::type saturate(Tp x) {
+  return __imf_saturatef(x);
 }
 
 template <typename Tp> Tp copysign(Tp x, Tp y) {
-  static_assert(std::is_same<Tp, float>::value ||
-                    std::is_same<Tp, double>::value ||
-                    std::is_same<Tp, sycl::half>::value,
+  static_assert(std::is_same_v<Tp, float> || std::is_same_v<Tp, double> ||
+                    std::is_same_v<Tp, sycl::half>,
                 "sycl::ext::intel::math::copysign only supports fp16, fp32, "
                 "fp64 version.");
-  if constexpr (std::is_same<Tp, float>::value)
+  if constexpr (std::is_same_v<Tp, float>)
     return __imf_copysignf(x, y);
-  if constexpr (std::is_same<Tp, double>::value)
+  if constexpr (std::is_same_v<Tp, double>)
     return __imf_copysign(x, y);
-  if constexpr (std::is_same<Tp, sycl::half>::value) {
+  if constexpr (std::is_same_v<Tp, sycl::half>) {
     static_assert(sizeof(sycl::half) == sizeof(_iml_half_internal),
                   "sycl::half is not compatible with _iml_half_internal.");
     _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);

From 190b9edef3caae1174d5c4ca6c67d858cf0f6d2f Mon Sep 17 00:00:00 2001
From: jinge90 <ge.jin@intel.com>
Date: Thu, 25 Aug 2022 10:30:43 +0800
Subject: [PATCH 8/8] Use std::enable_if_t to replace static_assert

Signed-off-by: jinge90 <ge.jin@intel.com>
---
 sycl/include/sycl/ext/intel/math.hpp | 36 +++++++++++++++-------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/sycl/include/sycl/ext/intel/math.hpp b/sycl/include/sycl/ext/intel/math.hpp
index af2a5ca5c69bd..647530187aef7 100644
--- a/sycl/include/sycl/ext/intel/math.hpp
+++ b/sycl/include/sycl/ext/intel/math.hpp
@@ -36,26 +36,28 @@ namespace math {
 
 #if __cplusplus >= 201703L
 template <typename Tp>
-typename std::enable_if<std::is_same_v<Tp, float>, float>::type saturate(Tp x) {
+std::enable_if_t<std::is_same_v<Tp, float>, float> saturate(Tp x) {
   return __imf_saturatef(x);
 }
 
-template <typename Tp> Tp copysign(Tp x, Tp y) {
-  static_assert(std::is_same_v<Tp, float> || std::is_same_v<Tp, double> ||
-                    std::is_same_v<Tp, sycl::half>,
-                "sycl::ext::intel::math::copysign only supports fp16, fp32, "
-                "fp64 version.");
-  if constexpr (std::is_same_v<Tp, float>)
-    return __imf_copysignf(x, y);
-  if constexpr (std::is_same_v<Tp, double>)
-    return __imf_copysign(x, y);
-  if constexpr (std::is_same_v<Tp, sycl::half>) {
-    static_assert(sizeof(sycl::half) == sizeof(_iml_half_internal),
-                  "sycl::half is not compatible with _iml_half_internal.");
-    _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
-    _iml_half_internal yi = __builtin_bit_cast(_iml_half_internal, y);
-    return __builtin_bit_cast(sycl::half, __imf_copysignf16(xi, yi));
-  }
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, float>, float> copysign(Tp x, Tp y) {
+  return __imf_copysignf(x, y);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, double>, double> copysign(Tp x, Tp y) {
+  return __imf_copysign(x, y);
+}
+
+template <typename Tp>
+std::enable_if_t<std::is_same_v<Tp, sycl::half>, sycl::half> copysign(Tp x,
+                                                                      Tp y) {
+  static_assert(sizeof(sycl::half) == sizeof(_iml_half_internal),
+                "sycl::half is not compatible with _iml_half_internal.");
+  _iml_half_internal xi = __builtin_bit_cast(_iml_half_internal, x);
+  _iml_half_internal yi = __builtin_bit_cast(_iml_half_internal, y);
+  return __builtin_bit_cast(sycl::half, __imf_copysignf16(xi, yi));
 }
 
 #endif