From 1e3af564106f211b01b931969ba9a35e31af9eec Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Wed, 24 Jan 2024 13:40:48 -0800 Subject: [PATCH 1/9] [SYCL][joint matrix] add missing licence to test and add combination-based query --- .../Matrix/SG32/element_wise_all_ops.cpp | 10 ++----- sycl/test-e2e/Matrix/common.hpp | 20 ++++++++++++++ sycl/test-e2e/Matrix/element_wise_all_ops.cpp | 10 ++----- .../Matrix/element_wise_all_ops_impl.hpp | 26 ++++--------------- 4 files changed, 29 insertions(+), 37 deletions(-) diff --git a/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp b/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp index 91b36ee032e27..7b90389af548b 100644 --- a/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp +++ b/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp @@ -5,19 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: matrix +// REQUIRES: cpu, gpu // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943 // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include -#include -#include - -using namespace sycl; -using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::bfloat16; +#include "../common.hpp" constexpr size_t SG_SZ = 32; constexpr size_t TN = 16; diff --git a/sycl/test-e2e/Matrix/common.hpp b/sycl/test-e2e/Matrix/common.hpp index 675261a17f3cb..93a52a4c9bfeb 100644 --- a/sycl/test-e2e/Matrix/common.hpp +++ b/sycl/test-e2e/Matrix/common.hpp @@ -1,3 +1,10 @@ +//==------------------ common.hpp - DPC++ joint_matrix---------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// #include #include #include @@ -173,3 +180,16 @@ bool matrix_compare(unsigned int rows, unsigned int cols, T1 *src, T2 *ref) { } return true; } + +bool is_type_supported_by_device(queue q, matrix_type type) { + std::vector combinations = + q.get_device() + .get_info(); + for (int i = 0; i < combinations.size(); i++) { + if (combinations[i].atype == type) { + return true; + } + } + return false; +} diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops.cpp b/sycl/test-e2e/Matrix/element_wise_all_ops.cpp index fd3648664a52c..c4a9967a658db 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops.cpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops.cpp @@ -5,18 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// REQUIRES: matrix +// REQUIRES: cpu, gpu // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include -#include -#include - -using namespace sycl; -using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::bfloat16; +#include "common.hpp" #define SG_SZ 16 constexpr size_t TN = 16; diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index b11d3093bf08d..b0e4b51cbbd72 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -5,24 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -static float make_fp32(bfloat16 x) { - unsigned int y = *((int *)&x); - y = y << 16; - float *res = reinterpret_cast(&y); - return *res; -} - -template struct big_matrix { -public: - T *mat; - -public: - T *get_data() { return mat; } - void set_data(T *data) { mat = data; } - big_matrix(T *data) : mat(data) {} -}; - template void assert_ops_ref(host_accessor mat, const float ref) { @@ -181,9 +163,11 @@ int main() { static constexpr size_t MATRIX_M = TM * 2; static constexpr size_t MATRIX_N = TN * 2; static constexpr size_t MATRIX_K = TK * 2; - - test_ewops_a(); - test_ewops_c(); + queue q; + if (is_type_supported_by_device(q, matrix_type::bf16)) { + test_ewops_a(); + test_ewops_c(); + } return 0; } From 5d7963b950aa0b224be1e1d3ad27e80b2a69deba Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 26 Jan 2024 06:59:34 -0800 Subject: [PATCH 2/9] Remove XMX8 as it was combined with the main test using the runtime query --- .../Matrix/SG32/element_wise_all_ops.cpp | 5 +- .../Matrix/XMX8/element_wise_all_ops.cpp | 25 ----- sycl/test-e2e/Matrix/common.hpp | 11 ++ sycl/test-e2e/Matrix/element_wise_all_ops.cpp | 5 +- .../Matrix/element_wise_all_ops_impl.hpp | 101 +++++++++++------- 5 files changed, 76 insertions(+), 71 deletions(-) delete mode 100644 sycl/test-e2e/Matrix/XMX8/element_wise_all_ops.cpp diff --git a/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp b/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp index 7b90389af548b..ef4d3221d6ef7 100644 --- a/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp +++ b/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp @@ -7,13 +7,14 @@ //===----------------------------------------------------------------------===// // REQUIRES: cpu, gpu // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943 +// SG size = 32 is unsupported on DG2 +// UNSUPPORTED: gpu-intel-dg2 // RUN: %{build} -o %t.out // RUN: %{run} %t.out #include "../common.hpp" -constexpr size_t SG_SZ = 32; -constexpr size_t TN = 16; +#define SG_SZ 32 #include "../element_wise_all_ops_impl.hpp" diff --git a/sycl/test-e2e/Matrix/XMX8/element_wise_all_ops.cpp b/sycl/test-e2e/Matrix/XMX8/element_wise_all_ops.cpp deleted file mode 100644 index f1f7bf84899a4..0000000000000 --- a/sycl/test-e2e/Matrix/XMX8/element_wise_all_ops.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//==------------ element_wise_all_ops.cpp - DPC++ joint_matrix-------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// REQUIRES: matrix-xmx8 -// REQUIRES: TEMPORARY_DISBLED - -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -#include -#include -#include - -using namespace sycl; -using namespace sycl::ext::oneapi::experimental::matrix; -using bfloat16 = sycl::ext::oneapi::bfloat16; - -#define SG_SZ 8 -constexpr size_t TN = 8; - -#include "../element_wise_all_ops_impl.hpp" diff --git a/sycl/test-e2e/Matrix/common.hpp b/sycl/test-e2e/Matrix/common.hpp index 93a52a4c9bfeb..155cf012d9754 100644 --- a/sycl/test-e2e/Matrix/common.hpp +++ b/sycl/test-e2e/Matrix/common.hpp @@ -193,3 +193,14 @@ bool is_type_supported_by_device(queue q, matrix_type type) { } return false; } + +template size_t get_sg_size(queue q) { + auto KernelID = get_kernel_id(); + auto KB = + get_kernel_bundle(q.get_context(), {KernelID}); + auto kernel = KB.get_kernel(KernelID); + + return kernel + .template get_info( + q.get_device()); +} diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops.cpp b/sycl/test-e2e/Matrix/element_wise_all_ops.cpp index c4a9967a658db..a56389fa6f548 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops.cpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops.cpp @@ -6,13 +6,12 @@ // //===----------------------------------------------------------------------===// // REQUIRES: cpu, gpu +// Due to current bug in A750 +// UNSUPPORTED: gpu-intel-dg2 // RUN: %{build} -o %t.out // RUN: %{run} %t.out #include "common.hpp" -#define SG_SZ 16 -constexpr size_t TN = 16; - #include "element_wise_all_ops_impl.hpp" diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index b0e4b51cbbd72..3ca231d6cf52b 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -21,7 +21,7 @@ void assert_ops_ref(host_accessor mat, } template + size_t SUB_COLS, class kernel_name, typename OP> void verify_op_a(const T l, const T r, const float ref, OP op) { T mat[NUM_ROWS][NUM_COLS]; big_matrix big_mat((T *)&mat); @@ -29,12 +29,17 @@ void verify_op_a(const T l, const T r, const float ref, OP op) { buffer bufMat(big_mat.get_data(), range<2>(NUM_ROWS, NUM_COLS)); queue q; + size_t sg_size = get_sg_size(q); q.submit([&](handler &cgh) { sycl::accessor accessMat{bufMat, cgh, sycl::read_write}; - cgh.parallel_for( - nd_range<2>({NUM_ROWS / SUB_ROWS, NUM_COLS / SUB_COLS * SG_SZ}, - {1, 1 * SG_SZ}), - [=](nd_item<2> spmd_item) [[intel::reqd_sub_group_size(SG_SZ)]] { + cgh.parallel_for( + nd_range<2>({NUM_ROWS / SUB_ROWS, NUM_COLS / SUB_COLS * sg_size}, + {1, 1 * sg_size}), + [=](nd_item<2> spmd_item) +#ifdef SG_SZ + [[intel::reqd_sub_group_size(SG_SZ)]] +#endif + { const auto global_idx = spmd_item.get_global_id(0); const auto global_idy = spmd_item.get_global_id(1); const auto sg_startx = global_idx - spmd_item.get_local_id(0); @@ -50,7 +55,7 @@ void verify_op_a(const T l, const T r, const float ref, OP op) { sg, sub_mat, accessMat.template get_multi_ptr() + (sg_startx * SUB_ROWS) * NUM_COLS + - sg_starty / SG_SZ * SUB_COLS, + sg_starty / sg_size * SUB_COLS, NUM_COLS); }); // parallel for }).wait(); @@ -58,20 +63,24 @@ void verify_op_a(const T l, const T r, const float ref, OP op) { } template + size_t SUB_COLS, class kernel_name, typename OP> void verify_op_c(const T l, const T r, const float ref, OP op) { T mat[NUM_ROWS][NUM_COLS]; big_matrix big_mat((T *)&mat); buffer bufMat(big_mat.get_data(), range<2>(NUM_ROWS, NUM_COLS)); - queue q; + size_t sg_size = get_sg_size(q); q.submit([&](handler &cgh) { sycl::accessor accessMat{bufMat, cgh, sycl::read_write}; - cgh.parallel_for( - nd_range<2>({NUM_ROWS / SUB_ROWS, NUM_COLS / SUB_COLS * SG_SZ}, - {1, 1 * SG_SZ}), - [=](nd_item<2> spmd_item) [[intel::reqd_sub_group_size(SG_SZ)]] { + cgh.parallel_for( + nd_range<2>({NUM_ROWS / SUB_ROWS, NUM_COLS / SUB_COLS * sg_size}, + {1, 1 * sg_size}), + [=](nd_item<2> spmd_item) +#ifdef SG_SZ + [[intel::reqd_sub_group_size(SG_SZ)]] +#endif + { const auto global_idx = spmd_item.get_global_id(0); const auto global_idy = spmd_item.get_global_id(1); const auto sg_startx = global_idx - spmd_item.get_local_id(0); @@ -87,7 +96,7 @@ void verify_op_c(const T l, const T r, const float ref, OP op) { sg, sub_mat, accessMat.template get_multi_ptr() + (sg_startx * SUB_ROWS) * NUM_COLS + - sg_starty / SG_SZ * SUB_COLS, + sg_starty / sg_size * SUB_COLS, NUM_COLS, layout::row_major); }); // parallel for }).wait(); @@ -97,61 +106,62 @@ void verify_op_c(const T l, const T r, const float ref, OP op) { template void test_ewops_a() { - verify_op_a( + verify_op_a( T(5.0), T(2.0), 7.0, [](auto l, auto r) { return l + r; }); - verify_op_a( + verify_op_a( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l - r; }); - verify_op_a( + verify_op_a( T(5.0), T(2.0), 10.0, [](auto l, auto r) { return l * r; }); - verify_op_a( + verify_op_a( T(5.0), T(2.0), 2.5, [](auto l, auto r) { return l / r; }); - verify_op_a( + verify_op_a( T(5.0), T(5.0), 5.0, [](auto l, auto r) { return l == r ? l : T(1.0); }); - verify_op_a( + verify_op_a( T(5.0), T(4.0), 4.0, [](auto l, auto r) { return l == r ? l : r; }); - verify_op_a( + verify_op_a( T(5.0), T(5.0), 1.0, [](auto l, auto r) { return l != r ? l : T(1.0); }); - verify_op_a( + verify_op_a( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l > r ? T(3.0) : T(2.0); }); - verify_op_a( + verify_op_a( T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l < r ? T(3.0) : T(2.0); }); - verify_op_a( + verify_op_a( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l >= r ? T(3.0) : T(2.0); }); - verify_op_a( + verify_op_a( T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l <= r ? T(3.0) : T(2.0); }); } - +// Avoid same kernel name for different Sg sizes +template class ewops_c {}; template void test_ewops_c() { - verify_op_c( + verify_op_c>( T(5.0), T(2.0), 7.0, [](auto l, auto r) { return l + r; }); - verify_op_c( + verify_op_c>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l - r; }); - verify_op_c( + verify_op_c>( T(5.0), T(2.0), 10.0, [](auto l, auto r) { return l * r; }); - verify_op_c( + verify_op_c>( T(5.0), T(2.0), 2.5, [](auto l, auto r) { return l / r; }); - verify_op_c( + verify_op_c>( T(5.0), T(5.0), 5.0, [](auto l, auto r) { return l == r ? l : T(1.0); }); - verify_op_c( + verify_op_c>( T(5.0), T(4.0), 4.0, [](auto l, auto r) { return l == r ? l : r; }); - verify_op_c( + verify_op_c>( T(5.0), T(5.0), 1.0, [](auto l, auto r) { return l != r ? l : T(1.0); }); - verify_op_c( + verify_op_c>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l > r ? T(3.0) : T(2.0); }); - verify_op_c( + verify_op_c>( T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l < r ? T(3.0) : T(2.0); }); - verify_op_c( + verify_op_c>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l >= r ? T(3.0) : T(2.0); }); - verify_op_c( + verify_op_c>( T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l <= r ? T(3.0) : T(2.0); }); } @@ -161,13 +171,22 @@ int main() { static constexpr size_t TK = 16; static constexpr size_t MATRIX_M = TM * 2; - static constexpr size_t MATRIX_N = TN * 2; + static constexpr size_t MATRIX_N = 16 * 2; static constexpr size_t MATRIX_K = TK * 2; queue q; - if (is_type_supported_by_device(q, matrix_type::bf16)) { - test_ewops_a(); - test_ewops_c(); + std::vector combinations = + q.get_device() + .get_info(); + for (int i = 0; i < combinations.size(); i++) { + if (combinations[i].atype == matrix_type::bf16) { + test_ewops_a(); + if (combinations[i].nsize == 0 || combinations[i].nsize == 16) + test_ewops_c(); + else + test_ewops_c(); + return 0; + } } - return 0; } From d05f3254b702f2360305d585b38362460548963b Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 26 Jan 2024 07:09:24 -0800 Subject: [PATCH 3/9] comment clarification --- sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp | 2 +- sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp b/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp index ef4d3221d6ef7..e2831636da0d8 100644 --- a/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp +++ b/sycl/test-e2e/Matrix/SG32/element_wise_all_ops.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: cpu, gpu // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943 -// SG size = 32 is unsupported on DG2 +// SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2 // UNSUPPORTED: gpu-intel-dg2 // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index 3ca231d6cf52b..043b0e9f4f9d4 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -171,7 +171,7 @@ int main() { static constexpr size_t TK = 16; static constexpr size_t MATRIX_M = TM * 2; - static constexpr size_t MATRIX_N = 16 * 2; + static constexpr size_t MATRIX_N = /*TN*/ 16 * 2; static constexpr size_t MATRIX_K = TK * 2; queue q; std::vector combinations = From d4dfcb69cea3017358939092a188212ae013125f Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 26 Jan 2024 07:14:26 -0800 Subject: [PATCH 4/9] comment clarification --- sycl/test-e2e/Matrix/element_wise_all_ops.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops.cpp b/sycl/test-e2e/Matrix/element_wise_all_ops.cpp index a56389fa6f548..4ee8383154e85 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops.cpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // REQUIRES: cpu, gpu -// Due to current bug in A750 +// Test is flaky/timeouts on some variants of DG2 and temporary disabled. Needs +// to be investigated. // UNSUPPORTED: gpu-intel-dg2 // RUN: %{build} -o %t.out From 8acd53220177b967c5750457ef42a9d8f1e32741 Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 26 Jan 2024 08:31:54 -0800 Subject: [PATCH 5/9] Fix the way we query the type because otherwise it will be inefficient to add other types --- sycl/test-e2e/Matrix/common.hpp | 8 ++++---- .../test-e2e/Matrix/element_wise_all_ops_impl.hpp | 15 ++++++--------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/sycl/test-e2e/Matrix/common.hpp b/sycl/test-e2e/Matrix/common.hpp index 155cf012d9754..34fbb68f2d5a5 100644 --- a/sycl/test-e2e/Matrix/common.hpp +++ b/sycl/test-e2e/Matrix/common.hpp @@ -181,17 +181,17 @@ bool matrix_compare(unsigned int rows, unsigned int cols, T1 *src, T2 *ref) { return true; } -bool is_type_supported_by_device(queue q, matrix_type type) { +unsigned int get_combination_index(queue q, matrix_type type) { std::vector combinations = q.get_device() .get_info(); - for (int i = 0; i < combinations.size(); i++) { + for (unsigned int i = 0; i < combinations.size(); i++) { if (combinations[i].atype == type) { - return true; + return i; } } - return false; + return -1; } template size_t get_sg_size(queue q) { diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index 043b0e9f4f9d4..42b9725df9a30 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -178,15 +178,12 @@ int main() { q.get_device() .get_info(); - for (int i = 0; i < combinations.size(); i++) { - if (combinations[i].atype == matrix_type::bf16) { - test_ewops_a(); - if (combinations[i].nsize == 0 || combinations[i].nsize == 16) - test_ewops_c(); - else - test_ewops_c(); - return 0; - } + if (unsigned int i = get_combination_index(q, matrix_type::bf16) != -1) { + test_ewops_a(); + if (combinations[i].nsize == 0 || combinations[i].nsize == 16) + test_ewops_c(); + else + test_ewops_c(); } return 0; } From be69f9084ea8f0fc55b84b92cf7f2a7e1da8d266 Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 26 Jan 2024 08:46:32 -0800 Subject: [PATCH 6/9] Add type to the kernel name class to make adding a new type in the test possible --- .../Matrix/element_wise_all_ops_impl.hpp | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index 42b9725df9a30..3e4701f1583e1 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -103,65 +103,68 @@ void verify_op_c(const T l, const T r, const float ref, OP op) { assert_ops_ref(bufMat.get_host_access(read_only), ref); } +// Avoid same kernel name for different Sg sizes +template class ewops_a {}; template void test_ewops_a() { - verify_op_a( + verify_op_a>( T(5.0), T(2.0), 7.0, [](auto l, auto r) { return l + r; }); - verify_op_a( + verify_op_a>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l - r; }); - verify_op_a( + verify_op_a>( T(5.0), T(2.0), 10.0, [](auto l, auto r) { return l * r; }); - verify_op_a( + verify_op_a>( T(5.0), T(2.0), 2.5, [](auto l, auto r) { return l / r; }); - verify_op_a( + verify_op_a>( T(5.0), T(5.0), 5.0, [](auto l, auto r) { return l == r ? l : T(1.0); }); - verify_op_a( + verify_op_a>( T(5.0), T(4.0), 4.0, [](auto l, auto r) { return l == r ? l : r; }); - verify_op_a( + verify_op_a>( T(5.0), T(5.0), 1.0, [](auto l, auto r) { return l != r ? l : T(1.0); }); - verify_op_a( + verify_op_a>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l > r ? T(3.0) : T(2.0); }); - verify_op_a( + verify_op_a>( T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l < r ? T(3.0) : T(2.0); }); - verify_op_a( + verify_op_a>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l >= r ? T(3.0) : T(2.0); }); - verify_op_a( + verify_op_a>( T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l <= r ? T(3.0) : T(2.0); }); } // Avoid same kernel name for different Sg sizes -template class ewops_c {}; +template class ewops_c {}; template void test_ewops_c() { - verify_op_c>( + verify_op_c>( T(5.0), T(2.0), 7.0, [](auto l, auto r) { return l + r; }); - verify_op_c>( + verify_op_c>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l - r; }); - verify_op_c>( + verify_op_c>( T(5.0), T(2.0), 10.0, [](auto l, auto r) { return l * r; }); - verify_op_c>( + verify_op_c>( T(5.0), T(2.0), 2.5, [](auto l, auto r) { return l / r; }); - verify_op_c>( + verify_op_c>( T(5.0), T(5.0), 5.0, [](auto l, auto r) { return l == r ? l : T(1.0); }); - verify_op_c>( + verify_op_c>( T(5.0), T(4.0), 4.0, [](auto l, auto r) { return l == r ? l : r; }); - verify_op_c>( + verify_op_c>( T(5.0), T(5.0), 1.0, [](auto l, auto r) { return l != r ? l : T(1.0); }); - verify_op_c>( + verify_op_c>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l > r ? T(3.0) : T(2.0); }); - verify_op_c>( + verify_op_c>( T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l < r ? T(3.0) : T(2.0); }); - verify_op_c>( + verify_op_c>( T(5.0), T(2.0), 3.0, [](auto l, auto r) { return l >= r ? T(3.0) : T(2.0); }); - verify_op_c>( + verify_op_c>( T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l <= r ? T(3.0) : T(2.0); }); } From 08649c4daf1ba497d18fd562a1b3a72cb3a1d50c Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 26 Jan 2024 12:42:59 -0800 Subject: [PATCH 7/9] Address Yury's comments:syntax improvements --- sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index 3e4701f1583e1..dbf4669f9335c 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -103,7 +103,7 @@ void verify_op_c(const T l, const T r, const float ref, OP op) { assert_ops_ref(bufMat.get_host_access(read_only), ref); } -// Avoid same kernel name for different Sg sizes +// Avoid same kernel name for different types template class ewops_a {}; template void test_ewops_a() { @@ -135,7 +135,7 @@ void test_ewops_a() { T(5.0), T(2.0), 2.0, [](auto l, auto r) { return l <= r ? T(3.0) : T(2.0); }); } -// Avoid same kernel name for different Sg sizes +// Avoid same kernel name for different types and numbers of columns template class ewops_c {}; template void test_ewops_c() { @@ -171,18 +171,17 @@ void test_ewops_c() { int main() { static constexpr size_t TM = 8; - static constexpr size_t TK = 16; static constexpr size_t MATRIX_M = TM * 2; - static constexpr size_t MATRIX_N = /*TN*/ 16 * 2; - static constexpr size_t MATRIX_K = TK * 2; + static constexpr size_t MATRIX_N = 32; + static constexpr size_t MATRIX_K = 32; queue q; std::vector combinations = q.get_device() .get_info(); if (unsigned int i = get_combination_index(q, matrix_type::bf16) != -1) { - test_ewops_a(); + test_ewops_a(); if (combinations[i].nsize == 0 || combinations[i].nsize == 16) test_ewops_c(); else From fcb7d38f940bc020446982affcaaa57ada7b3794 Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 26 Jan 2024 13:53:34 -0800 Subject: [PATCH 8/9] Improve the logic of the query --- sycl/test-e2e/Matrix/common.hpp | 8 ++++---- sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp | 14 ++++++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/sycl/test-e2e/Matrix/common.hpp b/sycl/test-e2e/Matrix/common.hpp index 34fbb68f2d5a5..155cf012d9754 100644 --- a/sycl/test-e2e/Matrix/common.hpp +++ b/sycl/test-e2e/Matrix/common.hpp @@ -181,17 +181,17 @@ bool matrix_compare(unsigned int rows, unsigned int cols, T1 *src, T2 *ref) { return true; } -unsigned int get_combination_index(queue q, matrix_type type) { +bool is_type_supported_by_device(queue q, matrix_type type) { std::vector combinations = q.get_device() .get_info(); - for (unsigned int i = 0; i < combinations.size(); i++) { + for (int i = 0; i < combinations.size(); i++) { if (combinations[i].atype == type) { - return i; + return true; } } - return -1; + return false; } template size_t get_sg_size(queue q) { diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index dbf4669f9335c..fc4d5d1cdb06e 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -180,12 +180,18 @@ int main() { q.get_device() .get_info(); - if (unsigned int i = get_combination_index(q, matrix_type::bf16) != -1) { - test_ewops_a(); - if (combinations[i].nsize == 0 || combinations[i].nsize == 16) + for (unsigned int i = 0; i < combinations.size(); i++) { + if (combinations[i].atype == matrix_type::bf16 && + (combinations[i].nsize == 0 || combinations[i].nsize == 16)) { + test_ewops_a(); test_ewops_c(); - else + break; + } else if (combinations[i].atype == matrix_type::bf16 && + combinations[i].nsize == 8) { + test_ewops_a(); test_ewops_c(); + break; + } } return 0; } From cc965c57ec25dc341f903f4374b34e40b33564a5 Mon Sep 17 00:00:00 2001 From: Dounia Khaldi Date: Fri, 26 Jan 2024 14:09:18 -0800 Subject: [PATCH 9/9] minor improvement --- .../Matrix/element_wise_all_ops_impl.hpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index fc4d5d1cdb06e..55d1162ebd3af 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -181,16 +181,17 @@ int main() { .get_info(); for (unsigned int i = 0; i < combinations.size(); i++) { - if (combinations[i].atype == matrix_type::bf16 && - (combinations[i].nsize == 0 || combinations[i].nsize == 16)) { - test_ewops_a(); - test_ewops_c(); - break; - } else if (combinations[i].atype == matrix_type::bf16 && - combinations[i].nsize == 8) { - test_ewops_a(); - test_ewops_c(); - break; + if (combinations[i].atype == matrix_type::bf16) { + if (combinations[i].nsize == 0 || combinations[i].nsize == 16) { + test_ewops_a(); + test_ewops_c(); + break; + } + if (combinations[i].nsize == 8) { + test_ewops_a(); + test_ewops_c(); + break; + } } } return 0;