From aa5cf4566af5ddfff87d161534917a99d561dda9 Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Mon, 14 Feb 2022 11:47:17 +0800 Subject: [PATCH 1/7] [Matrix][SYCL] Add bfloat16 support for wi_slice --- .../sycl/ext/oneapi/matrix/matrix-jit.hpp | 160 ++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp index d541891dcb6a7..81d7c3259c18b 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp @@ -11,6 +11,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -737,6 +738,165 @@ class wi_element { } }; +template +class wi_element { + joint_matrix &M; + std::size_t idx; + +public: + wi_element(joint_matrix &Mat, + std::size_t i) + : M(Mat), idx(i) {} + operator sycl::ext::intel::experimental::bfloat16() { +#ifdef __SYCL_DEVICE_ONLY__ + return __spirv_VectorExtractDynamic(M.spvm, idx); +#else + throw runtime_error("joint matrix is not supported on host device.", + PI_INVALID_DEVICE); +#endif // __SYCL_DEVICE_ONLY__ + } + + explicit operator bool() { +#ifdef __SYCL_DEVICE_ONLY__ + return std::fabs(static_cast(__spirv_VectorExtractDynamic( + M.spvm, idx))) >= std::numeric_limits::epsilon(); +#else + throw runtime_error("joint matrix is not supported on host device.", + PI_INVALID_DEVICE); +#endif // __SYCL_DEVICE_ONLY__ + } + + wi_element &operator=(const sycl::ext::intel::experimental::bfloat16 &rhs) { +#ifdef __SYCL_DEVICE_ONLY__ + M.spvm = __spirv_VectorInsertDynamic(M.spvm, rhs, idx); + return *this; +#else + (void)rhs; + throw runtime_error("joint matrix is not supported on host device.", + PI_INVALID_DEVICE); +#endif // __SYCL_DEVICE_ONLY__ + } + + wi_element & + operator=(const wi_element &rhs) { +#ifdef __SYCL_DEVICE_ONLY__ + M.spvm = __spirv_VectorInsertDynamic( + M.spvm, __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx), idx); + return *this; +#else + (void)rhs; + throw runtime_error("joint matrix is not supported on host device.", + PI_INVALID_DEVICE); +#endif // __SYCL_DEVICE_ONLY__ + } + +#if __SYCL_DEVICE_ONLY__ +#define OP(opassign, op) \ + wi_element &operator opassign( \ + const sycl::ext::intel::experimental::bfloat16 &rhs) { \ + M.spvm = __spirv_VectorInsertDynamic( \ + M.spvm, \ + static_cast(__spirv_VectorExtractDynamic(M.spvm, idx) \ + op static_cast(rhs))), \ + idx); \ + return *this; \ + } +#else // __SYCL_DEVICE_ONLY__ +#define OP(opassign, op) \ + wi_element &operator opassign( \ + const sycl::ext::intel::experimental::bfloat16 &rhs) { \ + (void)rhs; \ + throw runtime_error("joint matrix is not supported on host device.", \ + PI_INVALID_DEVICE); \ + } +#endif // __SYCL_DEVICE_ONLY__ + OP(+=, +) + OP(-=, -) + OP(*=, *) + OP(/=, /) +#undef OP + +#if __SYCL_DEVICE_ONLY__ +#define OP(type, op) \ + friend type operator op( \ + const wi_element &lhs, \ + const sycl::ext::intel::experimental::bfloat16 &rhs) { \ + return static_cast(__spirv_VectorExtractDynamic( \ + lhs.M.spvm, lhs.idx)) op static_cast(rhs); \ + } \ + friend type operator op( \ + const sycl::ext::intel::experimental::bfloat16 &lhs, \ + const wi_element &rhs) { \ + return static_cast(__spirv_VectorExtractDynamic( \ + rhs.M.spvm, rhs.idx)) op static_cast(lhs); \ + } + OP(sycl::ext::intel::experimental::bfloat16, +) + OP(sycl::ext::intel::experimental::bfloat16, -) + OP(sycl::ext::intel::experimental::bfloat16, *) + OP(sycl::ext::intel::experimental::bfloat16, /) +#undef OP +#define OP(type, op) \ + friend type operator op( \ + const wi_element &lhs, \ + const sycl::ext::intel::experimental::bfloat16 &rhs) { \ + return type{static_cast(__spirv_VectorExtractDynamic( \ + lhs.M.spvm, lhs.idx)) op static_cast(rhs)}; \ + } \ + friend type operator op( \ + const sycl::ext::intel::experimental::bfloat16 &lhs, \ + const wi_element &rhs) { \ + return type{static_cast(__spirv_VectorExtractDynamic( \ + rhs.M.spvm, rhs.idx)) op static_cast(lhs)}; \ + } + OP(bool, ==) + OP(bool, !=) + OP(bool, <) + OP(bool, >) + OP(bool, <=) + OP(bool, >=) +#undef OP +#else // __SYCL_DEVICE_ONLY__ +#define OP(type, op) \ + friend type operator op( \ + const wi_element &lhs, \ + const sycl::ext::intel::experimental::bfloat16 &rhs) { \ + (void)lhs; \ + (void)rhs; \ + throw runtime_error("joint matrix is not supported on host device.", \ + PI_INVALID_DEVICE); \ + } \ + friend type operator op( \ + const sycl::ext::intel::experimental::bfloat16 &lhs, \ + const wi_element &rhs) { \ + (void)lhs; \ + (void)rhs; \ + throw runtime_error("joint matrix is not supported on host device.", \ + PI_INVALID_DEVICE); \ + } + OP(sycl::ext::intel::experimental::bfloat16, +) + OP(sycl::ext::intel::experimental::bfloat16, -) + OP(sycl::ext::intel::experimental::bfloat16, *) + OP(sycl::ext::intel::experimental::bfloat16, /) + OP(bool, ==) + OP(bool, !=) + OP(bool, <) + OP(bool, >) + OP(bool, <=) + OP(bool, >=) +#undef OP +#endif // __SYCL_DEVICE_ONLY__ +}; + template class wi_slice { From c310976d718e456b9406692f2b0132181d4bdc21 Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Mon, 14 Feb 2022 13:07:59 +0800 Subject: [PATCH 2/7] small fix --- sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp index 81d7c3259c18b..17aa2d1d7644b 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp @@ -799,10 +799,7 @@ class wi_element(__spirv_VectorExtractDynamic(M.spvm, idx) \ - op static_cast(rhs))), \ - idx); \ + M.spvm, __spirv_VectorExtractDynamic(M.spvm, idx) op rhs, idx); \ return *this; \ } #else // __SYCL_DEVICE_ONLY__ @@ -826,15 +823,13 @@ class wi_element &lhs, \ const sycl::ext::intel::experimental::bfloat16 &rhs) { \ - return static_cast(__spirv_VectorExtractDynamic( \ - lhs.M.spvm, lhs.idx)) op static_cast(rhs); \ + return __spirv_VectorExtractDynamic(lhs.M.spvm, lhs.idx) op rhs; \ } \ friend type operator op( \ const sycl::ext::intel::experimental::bfloat16 &lhs, \ const wi_element &rhs) { \ - return static_cast(__spirv_VectorExtractDynamic( \ - rhs.M.spvm, rhs.idx)) op static_cast(lhs); \ + return __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx) op lhs; \ } OP(sycl::ext::intel::experimental::bfloat16, +) OP(sycl::ext::intel::experimental::bfloat16, -) From 37d75f5f56014e73194ed56dbf4ea04c6f878ab7 Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Mon, 11 Apr 2022 23:17:32 +0800 Subject: [PATCH 3/7] small rebase --- .../sycl/ext/oneapi/matrix/matrix-jit.hpp | 58 +++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp index 1f214daf9a81d..8cf80b506141c 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -455,18 +455,18 @@ class wi_element { }; template -class wi_element { - joint_matrix &M; std::size_t idx; public: - wi_element(joint_matrix &Mat, std::size_t i) : M(Mat), idx(i) {} - operator sycl::ext::intel::experimental::bfloat16() { + operator sycl::ext::oneapi::experimental::bfloat16() { #ifdef __SYCL_DEVICE_ONLY__ return __spirv_VectorExtractDynamic(M.spvm, idx); #else @@ -485,7 +485,7 @@ class wi_element &rhs) { #ifdef __SYCL_DEVICE_ONLY__ M.spvm = __spirv_VectorInsertDynamic( @@ -513,7 +513,7 @@ class wi_element &lhs, \ - const sycl::ext::intel::experimental::bfloat16 &rhs) { \ + const sycl::ext::oneapi::experimental::bfloat16 &rhs) { \ return __spirv_VectorExtractDynamic(lhs.M.spvm, lhs.idx) op rhs; \ } \ friend type operator op( \ - const sycl::ext::intel::experimental::bfloat16 &lhs, \ - const wi_element &rhs) { \ return __spirv_VectorExtractDynamic(rhs.M.spvm, rhs.idx) op lhs; \ } - OP(sycl::ext::intel::experimental::bfloat16, +) - OP(sycl::ext::intel::experimental::bfloat16, -) - OP(sycl::ext::intel::experimental::bfloat16, *) - OP(sycl::ext::intel::experimental::bfloat16, /) + OP(sycl::ext::oneapi::experimental::bfloat16, +) + OP(sycl::ext::oneapi::experimental::bfloat16, -) + OP(sycl::ext::oneapi::experimental::bfloat16, *) + OP(sycl::ext::oneapi::experimental::bfloat16, /) #undef OP #define OP(type, op) \ friend type operator op( \ - const wi_element &lhs, \ - const sycl::ext::intel::experimental::bfloat16 &rhs) { \ + const sycl::ext::oneapi::experimental::bfloat16 &rhs) { \ return type{static_cast(__spirv_VectorExtractDynamic( \ lhs.M.spvm, lhs.idx)) op static_cast(rhs)}; \ } \ friend type operator op( \ - const sycl::ext::intel::experimental::bfloat16 &lhs, \ - const wi_element &rhs) { \ return type{static_cast(__spirv_VectorExtractDynamic( \ rhs.M.spvm, rhs.idx)) op static_cast(lhs)}; \ @@ -577,27 +577,27 @@ class wi_element &lhs, \ - const sycl::ext::intel::experimental::bfloat16 &rhs) { \ + const sycl::ext::oneapi::experimental::bfloat16 &rhs) { \ (void)lhs; \ (void)rhs; \ throw runtime_error("joint matrix is not supported on host device.", \ PI_INVALID_DEVICE); \ } \ friend type operator op( \ - const sycl::ext::intel::experimental::bfloat16 &lhs, \ - const wi_element &rhs) { \ (void)lhs; \ (void)rhs; \ throw runtime_error("joint matrix is not supported on host device.", \ PI_INVALID_DEVICE); \ } - OP(sycl::ext::intel::experimental::bfloat16, +) - OP(sycl::ext::intel::experimental::bfloat16, -) - OP(sycl::ext::intel::experimental::bfloat16, *) - OP(sycl::ext::intel::experimental::bfloat16, /) + OP(sycl::ext::oneapi::experimental::bfloat16, +) + OP(sycl::ext::oneapi::experimental::bfloat16, -) + OP(sycl::ext::oneapi::experimental::bfloat16, *) + OP(sycl::ext::oneapi::experimental::bfloat16, /) OP(bool, ==) OP(bool, !=) OP(bool, <) From a6a7c821f8e816d79996151c22ebc3edbc7ccef5 Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Thu, 21 Apr 2022 16:24:11 +0800 Subject: [PATCH 4/7] add a testcase for bfloat16 --- sycl/test/matrix/matrix-bfloat16-test.cpp | 192 ++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 sycl/test/matrix/matrix-bfloat16-test.cpp diff --git a/sycl/test/matrix/matrix-bfloat16-test.cpp b/sycl/test/matrix/matrix-bfloat16-test.cpp new file mode 100644 index 0000000000000..ebf6b292ad8d2 --- /dev/null +++ b/sycl/test/matrix/matrix-bfloat16-test.cpp @@ -0,0 +1,192 @@ +// RUN: %clangxx -fsycl -O2 %s -o %t.out +#include +#if (SYCL_EXT_ONEAPI_MATRIX == 2) +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental::matrix; +using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; + +#define TILE_SZ 16 +#define TM (TILE_SZ - 1) +#define TN (TILE_SZ - 1) +#define TK (2 * TILE_SZ - 2) + +#define SG_SZ 16 + +template struct big_matrix { +public: + T *mat; + +public: + T *get_data() { return mat; } + void set_data(T *data) { mat = data; } + big_matrix(T *data) : mat(data) {} +}; + +template +void matrix_multiply(big_matrix &C, + big_matrix &A, + big_matrix &B) { + size_t M = NUM_ROWS_C; + size_t N = NUM_COLS_C; + size_t K = NUM_COLS_A; + // B => K/4 x N*4, A => M x K, C => M, N + // stride should be X's cols, e.g., B's stirde = N*4 + assert(NUM_ROWS_C == NUM_ROWS_A && NUM_COLS_A == NUM_ROWS_B * 2); + size_t NDRangeM = M / TM; + size_t NDRangeN = N / TN; + buffer bufA(A.get_data(), range<2>(M, K)); + buffer bufB(B.get_data(), range<2>(K, N)); + buffer bufC((float *)C.get_data(), range<2>(M, N)); + + queue q; + q.submit([&](handler &cgh) { + auto accC = bufC.get_access(cgh); + auto accA = bufA.get_access(cgh); + auto accB = bufB.get_access(cgh); + + cgh.parallel_for( + nd_range<2>({NDRangeM, NDRangeN * SG_SZ}, {1, 1 * SG_SZ}), + [accA, accB, accC, M, N, K](nd_item<2> spmd_item) + + { + // The submatrix API has to be accessed by all the workitems in a + // subgroup these functions will be called once by the subgroup no + // code divergence between the workitems + const auto global_idx = spmd_item.get_global_id(0); + const auto global_idy = spmd_item.get_global_id(1); + const auto sg_startx = global_idx - spmd_item.get_local_id(0); + const auto sg_starty = global_idy - spmd_item.get_local_id(1); + + ext::oneapi::sub_group sg = spmd_item.get_sub_group(); + joint_matrix sub_a(sg); + // For B, since current implementation does not support non-packed + // layout, users need to specify the updated VNNI sizes along with + // the packed_b layout. By default, the layout is row_major and size + // is (TK, TN). + joint_matrix sub_b(sg); + joint_matrix sub_c(sg); + + // AMX: 8 register tiles : 1k byte size, SMmaxxSKmax =16x64 + // strideX = X's cols, so strideC = N, strideA = K, strideB = N*4 + joint_matrix_load(sg, sub_c, + accC.get_pointer() + (sg_startx * TM) * N + + sg_starty / SG_SZ * TN, + N, matrix_layout::row_major); + for (int k = 0; k < K / TK; k += 1) { // + joint_matrix_load( + sg, sub_a, accA.get_pointer() + (sg_startx * TM) * K + k * TK, + K, matrix_layout::row_major); + // Assuming B data is already in VNNI format. + joint_matrix_load(sg, sub_b, + accB.get_pointer() + (k * TK / 2) * (N * 2) + + sg_starty / SG_SZ * TN * 2, + N * 2, matrix_layout::packed_b); + sub_c = joint_matrix_mad(sg, sub_a, sub_b, sub_c); + } + joint_matrix_store(sg, sub_c, + accC.get_pointer() + (sg_startx * TM) * N + + sg_starty / SG_SZ * TN, + N, matrix_layout::row_major); + }); // parallel for + }).wait(); +} + +static constexpr size_t MATRIX_M = TM * 2; +static constexpr size_t MATRIX_N = TN * 2; +static constexpr size_t MATRIX_K = TK * 2; +bfloat16 A[MATRIX_M][MATRIX_K]; +bfloat16 B[MATRIX_K / 2][MATRIX_N * 2]; +unsigned short Aref[MATRIX_M][MATRIX_K]; +unsigned short Bref[MATRIX_K / 2][MATRIX_N * 2]; +float C[MATRIX_M][MATRIX_N]; +float D[MATRIX_M][MATRIX_N]; + +float make_fp32(short x) { + unsigned int y = x; + y = y << 16; + float *res = reinterpret_cast(&y); + return *res; +} + +unsigned short make_bf16(float x) { + int *res = reinterpret_cast(&x); + *res = *res >> 16; + return (unsigned short)*res; +} + +void matrix_multiply_ref(int *A_mem, int *B_mem, int *C_mem, int M, int N, + int K) { + // tiling + for (int m = 0; m < M; m++) + for (int n = 0; n < N; n++) { + for (int k = 0; k < K; k++) { + short *va = (short *)(A_mem + m * K + k); + short *vb = (short *)(B_mem + k * N + n); + float acc = *((float *)(C_mem + m * N + n)); + // FIXME: Should we do reduce-add in another version? + for (int i = 0; i < 2; i++) { + acc += (make_fp32(va[i]) * make_fp32(vb[i])); + } + *((float *)(C_mem + m * N + n)) = acc; + } + } +} + +int main() { + for (int i = 0; i < MATRIX_M; i++) { + for (int j = 0; j < MATRIX_K; j++) { + // Ee create bfloat16 from unsigned short since float-to-bfloat's + // conversion is not allowed. + A[i][j] = bfloat16::from_bits(make_bf16(1.0f * (i + j))); + Aref[i][j] = make_bf16(1.0f * (i + j)); + } + } + for (int i = 0; i < MATRIX_K / 2; i++) { + for (int j = 0; j < MATRIX_N * 2; j++) { + B[i][j] = bfloat16::from_bits((make_bf16(2.0f * i + 3.0f * j))); + Bref[i][j] = make_bf16(2.0f * i + 3.0f * j); + } + } + for (int i = 0; i < MATRIX_M; i++) { + for (int j = 0; j < MATRIX_N; j++) { + C[i][j] = 1.0; + D[i][j] = 1.0; + } + } + + big_matrix MC((float *)&C); + big_matrix MD((float *)&D); + big_matrix MA((bfloat16 *)&A); + big_matrix MB((bfloat16 *)&B); + matrix_multiply(MC, MA, MB); + matrix_multiply_ref((int32_t *)Aref, (int32_t *)Bref, (int32_t *)D, MATRIX_M, + MATRIX_N, MATRIX_K / 2); + + bool res = true; + for (int i = 0; i < MATRIX_M; i++) { + for (int j = 0; j < MATRIX_N; j++) { + if (C[i][j] != D[i][j]) + res = false; + } + } + if (res) + std::cout << "passed\n"; + else + std::cout << "failed\n"; + for (int i = 0; i < MATRIX_M; i++) { + for (int j = 0; j < MATRIX_N; j++) + std::cout << C[i][j] << ", "; + std::cout << "\n"; + } + std::cout << std::endl; + for (int i = 0; i < MATRIX_M; i++) { + for (int j = 0; j < MATRIX_N; j++) + std::cout << D[i][j] << ", "; + std::cout << "\n"; + } +} +#endif // (SYCL_EXT_ONEAPI_MATRIX == 2) From 6bc53d52d668c5f3cd99199b05b283063f423e80 Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Fri, 22 Apr 2022 17:31:48 +0800 Subject: [PATCH 5/7] address Keryell's comments --- sycl/test/matrix/matrix-bfloat16-test.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/sycl/test/matrix/matrix-bfloat16-test.cpp b/sycl/test/matrix/matrix-bfloat16-test.cpp index ebf6b292ad8d2..8ab28adc408d9 100644 --- a/sycl/test/matrix/matrix-bfloat16-test.cpp +++ b/sycl/test/matrix/matrix-bfloat16-test.cpp @@ -3,7 +3,6 @@ #if (SYCL_EXT_ONEAPI_MATRIX == 2) #include -using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; @@ -38,19 +37,19 @@ void matrix_multiply(big_matrix &C, assert(NUM_ROWS_C == NUM_ROWS_A && NUM_COLS_A == NUM_ROWS_B * 2); size_t NDRangeM = M / TM; size_t NDRangeN = N / TN; - buffer bufA(A.get_data(), range<2>(M, K)); - buffer bufB(B.get_data(), range<2>(K, N)); - buffer bufC((float *)C.get_data(), range<2>(M, N)); + sycl::buffer bufA(A.get_data(), sycl::range<2>(M, K)); + sycl::buffer bufB(B.get_data(), sycl::range<2>(K, N)); + sycl::buffer bufC((float *)C.get_data(), sycl::range<2>(M, N)); - queue q; - q.submit([&](handler &cgh) { - auto accC = bufC.get_access(cgh); - auto accA = bufA.get_access(cgh); - auto accB = bufB.get_access(cgh); + sycl::queue q; + q.submit([&](sycl::handler &cgh) { + auto accC = bufC.get_access(cgh); + auto accA = bufA.get_access(cgh); + auto accB = bufB.get_access(cgh); cgh.parallel_for( - nd_range<2>({NDRangeM, NDRangeN * SG_SZ}, {1, 1 * SG_SZ}), - [accA, accB, accC, M, N, K](nd_item<2> spmd_item) + sycl::nd_range<2>({NDRangeM, NDRangeN * SG_SZ}, {1, 1 * SG_SZ}), + [accA, accB, accC, M, N, K](sycl::nd_item<2> spmd_item) { // The submatrix API has to be accessed by all the workitems in a @@ -61,7 +60,7 @@ void matrix_multiply(big_matrix &C, const auto sg_startx = global_idx - spmd_item.get_local_id(0); const auto sg_starty = global_idy - spmd_item.get_local_id(1); - ext::oneapi::sub_group sg = spmd_item.get_sub_group(); + sycl::ext::oneapi::sub_group sg = spmd_item.get_sub_group(); joint_matrix sub_a(sg); // For B, since current implementation does not support non-packed // layout, users need to specify the updated VNNI sizes along with From b5c11947007e358180362a1b01550a9b87612ade Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Fri, 22 Apr 2022 17:45:20 +0800 Subject: [PATCH 6/7] change macro to constexpr --- sycl/test/matrix/matrix-bfloat16-test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/test/matrix/matrix-bfloat16-test.cpp b/sycl/test/matrix/matrix-bfloat16-test.cpp index 8ab28adc408d9..8a5010e007293 100644 --- a/sycl/test/matrix/matrix-bfloat16-test.cpp +++ b/sycl/test/matrix/matrix-bfloat16-test.cpp @@ -6,12 +6,12 @@ using namespace sycl::ext::oneapi::experimental::matrix; using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; -#define TILE_SZ 16 -#define TM (TILE_SZ - 1) -#define TN (TILE_SZ - 1) -#define TK (2 * TILE_SZ - 2) +static constexpr size_t TILE_SZ = 16; +static constexpr size_t TM = TILE_SZ - 1; +static constexpr size_t TN = TILE_SZ - 1; +static constexpr size_t TK = 2 * TILE_SZ - 2; -#define SG_SZ 16 +static constexpr size_t SG_SZ = 16; template struct big_matrix { public: From a42e3aa110f8f6aa5d3a074f603b7066547d630f Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Sun, 24 Apr 2022 15:20:04 +0800 Subject: [PATCH 7/7] small changes --- sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp | 12 ++++-------- sycl/test/matrix/matrix-bfloat16-test.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp index 8cf80b506141c..af4d1927fd176 100644 --- a/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp +++ b/sycl/include/sycl/ext/oneapi/matrix/matrix-jit.hpp @@ -578,19 +578,15 @@ class wi_element &lhs, \ - const sycl::ext::oneapi::experimental::bfloat16 &rhs) { \ - (void)lhs; \ - (void)rhs; \ + NumCols, Layout, Group> &, \ + const sycl::ext::oneapi::experimental::bfloat16 &) { \ throw runtime_error("joint matrix is not supported on host device.", \ PI_INVALID_DEVICE); \ } \ friend type operator op( \ - const sycl::ext::oneapi::experimental::bfloat16 &lhs, \ + const sycl::ext::oneapi::experimental::bfloat16 &, \ const wi_element &rhs) { \ - (void)lhs; \ - (void)rhs; \ + NumCols, Layout, Group> &) { \ throw runtime_error("joint matrix is not supported on host device.", \ PI_INVALID_DEVICE); \ } diff --git a/sycl/test/matrix/matrix-bfloat16-test.cpp b/sycl/test/matrix/matrix-bfloat16-test.cpp index 8a5010e007293..fb9995cd0e322 100644 --- a/sycl/test/matrix/matrix-bfloat16-test.cpp +++ b/sycl/test/matrix/matrix-bfloat16-test.cpp @@ -6,12 +6,12 @@ using namespace sycl::ext::oneapi::experimental::matrix; using bfloat16 = sycl::ext::oneapi::experimental::bfloat16; -static constexpr size_t TILE_SZ = 16; -static constexpr size_t TM = TILE_SZ - 1; -static constexpr size_t TN = TILE_SZ - 1; -static constexpr size_t TK = 2 * TILE_SZ - 2; +static constexpr auto TILE_SZ = 16; +static constexpr auto TM = TILE_SZ - 1; +static constexpr auto TN = TILE_SZ - 1; +static constexpr auto TK = 2 * TILE_SZ - 2; -static constexpr size_t SG_SZ = 16; +static constexpr auto SG_SZ = 16; template struct big_matrix { public: