diff --git a/sycl/test-e2e/Matrix/SG32/element_wise_all_ops_tf32.cpp b/sycl/test-e2e/Matrix/SG32/element_wise_all_ops_tf32.cpp index 301cdaf73190f..d99ef23a5d8bf 100644 --- a/sycl/test-e2e/Matrix/SG32/element_wise_all_ops_tf32.cpp +++ b/sycl/test-e2e/Matrix/SG32/element_wise_all_ops_tf32.cpp @@ -19,6 +19,5 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; constexpr size_t SG_SZ = 32; -constexpr size_t TN = 16; #include "../element_wise_all_ops_tf32_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_fill_store.cpp b/sycl/test-e2e/Matrix/joint_matrix_fill_store.cpp new file mode 100644 index 0000000000000..50ddf48d23ca2 --- /dev/null +++ b/sycl/test-e2e/Matrix/joint_matrix_fill_store.cpp @@ -0,0 +1,13 @@ +//==-- joint_matrix_fill_store.cpp = Test for Joint Matrix fill and store --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------------===// +// REQUIRES: matrix + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include "joint_matrix_fill_store_impl.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_fill_store_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_fill_store_impl.hpp new file mode 100644 index 0000000000000..5b35872631221 --- /dev/null +++ b/sycl/test-e2e/Matrix/joint_matrix_fill_store_impl.hpp @@ -0,0 +1,103 @@ +//==----------------------------------------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------------===// + +// TODO: add this test to XMX8 and SG32 folders +#include "common.hpp" +#define SG_SZ 16 + +template +void matrix_fill_store(big_matrix &C, big_matrix &A, + big_matrix &B) { + buffer bufA(A.get_data(), range<2>(TM, TK)); + buffer bufB(B.get_data(), range<2>(TK / 2, TN * 2)); + buffer bufC((float *)C.get_data(), range<2>(TM, TN)); + + queue q; + q.submit([&](handler &cgh) { + auto accC = bufC.get_access(cgh); + auto accA = bufA.get_access(cgh); + auto accB = bufB.get_access(cgh); + + cgh.parallel_for( + nd_range<2>({1, 1 * SG_SZ}, {1, 1 * SG_SZ}), + [=](nd_item<2> spmd_item) [[intel::reqd_sub_group_size(SG_SZ)]] { + const auto global_idx = spmd_item.get_global_id(0); + const auto global_idy = spmd_item.get_global_id(1); + const auto sg_startx = global_idx - spmd_item.get_local_id(0); + const auto sg_starty = global_idy - spmd_item.get_local_id(1); + + sub_group sg = spmd_item.get_sub_group(); + joint_matrix + sub_a; + + // For B, we assume B has been already VNNIed. + joint_matrix + sub_b; + joint_matrix sub_c; + + // TODO: uncomment these calls to add testing for other types of + // matrices + // joint_matrix_fill(sg, sub_a, 5.0); + // joint_matrix_fill(sg, sub_b, 5.0); + joint_matrix_fill(sg, sub_c, 5.0); + + // ext::intel::experimental::matrix::joint_matrix_store( + // sg, sub_a, accA.template + // get_multi_ptr(), TK); + + // ext::intel::experimental::matrix::joint_matrix_store( + // sg, sub_b, accB.template + // get_multi_ptr(), TN * 2); + + joint_matrix_store( + sg, sub_c, accC.template get_multi_ptr(), + TN, layout::row_major); + }); // parallel for + }).wait(); +} + +template bool run_test() { + + bfloat16 A[TM][TK]; + bfloat16 A_ref[TM][TK]; + bfloat16 B[TK / 2][TN * 2]; + bfloat16 B_ref[TK / 2][TN * 2]; + float C[TM][TN]; + float C_ref[TM][TN]; + + matrix_fill(TM, TK, (bfloat16 *)A, (bfloat16)0); + matrix_fill(TK / 2, TN * 2, (bfloat16 *)B, (bfloat16)0); + matrix_fill(TM, TN, (float *)C, 0.0f); + + matrix_fill(TM, TK, (bfloat16 *)A_ref, (bfloat16)5); + matrix_fill(TK / 2, TN * 2, (bfloat16 *)B_ref, (bfloat16)5); + matrix_fill(TM, TN, (float *)C_ref, 5.0f); + + big_matrix MC((float *)&C); + big_matrix MA((bfloat16 *)&A); + big_matrix MB((bfloat16 *)&B); + + matrix_fill_store(MC, MA, MB); + + // TODO: uncomment these calls to verify other types of matrices + // bool res = matrix_compare(TM, TK, (bfloat16 *)A, (bfloat16 *)A_ref); + // res &= matrix_compare(TK / 2, TN * 2, (bfloat16 *)B, (bfloat16 *)B_ref); + bool res = matrix_compare(TM, TN, (float *)C, (float *)C_ref); + + return res; +} + +int main() { + // TODO: add all supported size and types combinations + bool res = run_test<8, 16, 16>(); + res &= run_test<32, 64, 16>(); + res &= run_test<16, 16, 16>(); + std::cout << (res ? "passed" : "failed") << std::endl; + return !res; +}