diff --git a/SYCL/DeviceLib/sycl_ext_intel_math_fp16.cpp b/SYCL/DeviceLib/sycl_ext_intel_math_fp16.cpp
new file mode 100644
index 0000000000..dc88172fa3
--- /dev/null
+++ b/SYCL/DeviceLib/sycl_ext_intel_math_fp16.cpp
@@ -0,0 +1,67 @@
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.noblt.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.noblt.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.noblt.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.noblt.out
+//
+// UNSUPPORTED: cuda || hip
+
+#include "imf_utils.hpp"
+#include <sycl/ext/intel/math.hpp>
+
+class is_close {
+public:
+  bool operator()(sycl::half x, sycl::half y) {
+    return sycl::fabs((x - y) / (x + y)) <
+           std::numeric_limits<sycl::half>::epsilon() * 2;
+  }
+};
+
+class is_close2 {
+public:
+  bool operator()(sycl::half2 x, sycl::half2 y) {
+    is_close cls1;
+    return cls1(x.s0(), y.s0()) && cls1(x.s1(), y.s1());
+  }
+};
+
+int main(int, char **) {
+  sycl::queue device_queue(sycl::default_selector_v);
+  std::cout << "Running on "
+            << device_queue.get_device().get_info<sycl::info::device::name>()
+            << "\n";
+
+  if (!device_queue.get_device().has(sycl::aspect::fp16)) {
+    std::cout << "Test skipped on platform without fp16 support." << std::endl;
+    return 0;
+  }
+
+  // sycl::ext::intel::math::inv
+  {
+    std::initializer_list<sycl::half> input_vals = {1.f, 2.0f,  0.5f,  100.f,
+                                                    3.f, 0.25f, 0.002f};
+    std::initializer_list<sycl::half> ref_vals = {
+        1.f, 0.5f, 2.0f, 0.01f, 0.333333333f, 4.f, 500.f};
+    constexpr auto inv_op = [](sycl::half x) {
+      return sycl::ext::intel::math::inv(x);
+    };
+    test<sycl::half, sycl::half, decltype(inv_op), is_close>(
+        device_queue, input_vals, ref_vals, inv_op);
+
+    constexpr auto inv_op2 = [](sycl::half2 x) {
+      return sycl::ext::intel::math::inv(x);
+    };
+    std::initializer_list<sycl::half2> input_vals2 = {
+        {1.f, 2.0f}, {0.5f, 100.f}, {3.f, 0.25f}, {0.002f, 0.005f}};
+    std::initializer_list<sycl::half2> ref_vals2 = {
+        {1.f, 0.5f}, {2.0f, 0.01f}, {0.333333333f, 4.f}, {500.f, 200.f}};
+    test<sycl::half2, sycl::half2, decltype(inv_op2), is_close2>(
+        device_queue, input_vals2, ref_vals2, inv_op2);
+    std::cout << "sycl::ext::intel::math::inv passes" << std::endl;
+  }
+  return 0;
+}
diff --git a/SYCL/DeviceLib/sycl_ext_intel_math_fp32.cpp b/SYCL/DeviceLib/sycl_ext_intel_math_fp32.cpp
new file mode 100644
index 0000000000..59dbb722be
--- /dev/null
+++ b/SYCL/DeviceLib/sycl_ext_intel_math_fp32.cpp
@@ -0,0 +1,43 @@
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.noblt.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.noblt.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.noblt.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.noblt.out
+//
+// UNSUPPORTED: cuda || hip
+
+#include "imf_utils.hpp"
+#include <sycl/ext/intel/math.hpp>
+
+class is_close {
+public:
+  bool operator()(float x, float y) {
+    return std::fabs((x - y) / (x + y)) <
+           std::numeric_limits<float>::epsilon() * 2;
+  }
+};
+
+int main(int, char **) {
+  sycl::queue device_queue(sycl::default_selector_v);
+  std::cout << "Running on "
+            << device_queue.get_device().get_info<sycl::info::device::name>()
+            << "\n";
+
+  // sycl::ext::intel::math::inv
+  {
+    std::initializer_list<float> input_vals = {1.f, 2.0f,  0.5f,  100.f,
+                                               3.f, 0.25f, 0.002f};
+    std::initializer_list<float> ref_vals = {1.f,          0.5f, 2.0f, 0.01f,
+                                             0.333333333f, 4.f,  500.f};
+    constexpr auto inv_op = [](float x) {
+      return sycl::ext::intel::math::inv(x);
+    };
+    test<float, float, decltype(inv_op), is_close>(device_queue, input_vals,
+                                                   ref_vals, inv_op);
+    std::cout << "sycl::ext::intel::math::inv passes" << std::endl;
+  }
+}
diff --git a/SYCL/DeviceLib/sycl_ext_intel_math_fp64.cpp b/SYCL/DeviceLib/sycl_ext_intel_math_fp64.cpp
new file mode 100644
index 0000000000..e3baf28e30
--- /dev/null
+++ b/SYCL/DeviceLib/sycl_ext_intel_math_fp64.cpp
@@ -0,0 +1,48 @@
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+
+// RUN: %clangxx -fsycl -fno-builtin -fsycl-device-lib-jit-link %s -o %t.noblt.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.noblt.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.noblt.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.noblt.out
+//
+// UNSUPPORTED: cuda || hip
+
+#include "imf_utils.hpp"
+#include <sycl/ext/intel/math.hpp>
+
+class is_close {
+public:
+  bool operator()(double x, double y) {
+    return std::fabs((x - y) / (x + y)) <
+           std::numeric_limits<double>::epsilon() * 2;
+  }
+};
+
+int main(int, char **) {
+  sycl::queue device_queue(sycl::default_selector_v);
+  std::cout << "Running on "
+            << device_queue.get_device().get_info<sycl::info::device::name>()
+            << "\n";
+
+  if (!device_queue.get_device().has(sycl::aspect::fp64)) {
+    std::cout << "Test skipped on platform without fp64 support." << std::endl;
+    return 0;
+  }
+
+  // sycl::ext::intel::math::inv
+  {
+    std::initializer_list<double> input_vals = {1., 2.0,  0.5,  100.,
+                                                3., 0.25, 0.002};
+    std::initializer_list<double> ref_vals = {
+        1., 0.5, 2.0, 0.01, 0.3333333333333333, 4., 500.};
+    constexpr auto inv_op = [](double x) {
+      return sycl::ext::intel::math::inv(x);
+    };
+    test<double, double, decltype(inv_op), is_close>(device_queue, input_vals,
+                                                     ref_vals, inv_op);
+    std::cout << "sycl::ext::intel::math::inv passes" << std::endl;
+  }
+}