From 281f0e0694ed2b0c19ec54aea2e8a5d5fd403ff5 Mon Sep 17 00:00:00 2001 From: Gang Y Chen Date: Wed, 23 Sep 2020 10:57:47 -0700 Subject: [PATCH] [SYCL][ESIMD] reduce restriction on some esimd API based upon user feedback - allow 256-byte block load for slm - allow vector-length of 1/2/4/8/16/32 for gather and scatter Signed-off-by: Gang Y Chen --- .../CL/sycl/INTEL/esimd/esimd_memory.hpp | 16 ++++++------ .../test/basic_tests/esimd/gather_scatter.cpp | 25 ++++++++++++++++--- sycl/test/basic_tests/esimd/slm_block.cpp | 10 ++++++++ 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp b/sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp index b3be166b3f219..7fe1ede878f81 100644 --- a/sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp +++ b/sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp @@ -84,7 +84,7 @@ constexpr unsigned int ElemsPerAddrEncoding() { template ESIMD_INLINE ESIMD_NODEBUG - typename std::enable_if<((n == 8 || n == 16 || n == 32) && + typename std::enable_if<(__esimd::isPowerOf2(n, 32) && (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4)), simd>::type @@ -120,7 +120,7 @@ ESIMD_INLINE ESIMD_NODEBUG template ESIMD_INLINE ESIMD_NODEBUG - typename std::enable_if<((n == 8 || n == 16 || n == 32) && + typename std::enable_if<(__esimd::isPowerOf2(n, 32) && (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4)), void>::type @@ -487,9 +487,9 @@ ESIMD_INLINE ESIMD_NODEBUG simd slm_block_load(uint32_t offset) { static_assert(Sz % __esimd::OWORD == 0, "block size must be whole number of owords"); static_assert(__esimd::isPowerOf2(Sz / __esimd::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * __esimd::OWORD, - "block size must be at most 8 owords"); + "block must be 1, 2, 4, 8, 16 owords long"); + static_assert(Sz <= 16 * __esimd::OWORD, + "block size must be at most 16 owords"); return __esimd_slm_block_read(offset); } @@ -503,9 +503,9 @@ ESIMD_INLINE ESIMD_NODEBUG void slm_block_store(uint32_t offset, static_assert(Sz % __esimd::OWORD == 0, "block size must be whole number of owords"); static_assert(__esimd::isPowerOf2(Sz / __esimd::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * __esimd::OWORD, - "block size must be at most 8 owords"); + "block must be 1, 2, 4, 8, or 16 owords long"); + static_assert(Sz <= 16 * __esimd::OWORD, + "block size must be at most 16 owords"); // offset in genx.oword.st is in owords __esimd_slm_block_write(offset >> 4, vals.data()); diff --git a/sycl/test/basic_tests/esimd/gather_scatter.cpp b/sycl/test/basic_tests/esimd/gather_scatter.cpp index 80c0400a94d41..0420fa4798b54 100644 --- a/sycl/test/basic_tests/esimd/gather_scatter.cpp +++ b/sycl/test/basic_tests/esimd/gather_scatter.cpp @@ -9,7 +9,22 @@ using namespace sycl::INTEL::gpu; using namespace cl::sycl; -void kernel(accessor &buf) __attribute__((sycl_device)) { +void kernel0(accessor &buf) + __attribute__((sycl_device)) { + simd offsets(0, 1); + simd v1(0, 1); + + auto v0 = gather(buf.get_pointer(), offsets); + + v0 = v0 + v1; + + scatter(buf.get_pointer(), v0, offsets); +} + +void kernel(accessor &buf) + __attribute__((sycl_device)) { simd offsets(0, 1); simd v1(0, 1); @@ -20,7 +35,9 @@ void kernel(accessor(buf.get_pointer(), v0, offsets); } -void kernel(accessor &buf) __attribute__((sycl_device)) { +void kernel(accessor &buf) + __attribute__((sycl_device)) { simd offsets(0, 1); simd v1(0, 1); @@ -33,7 +50,9 @@ void kernel(accessor(buf.get_pointer(), v0, offsets); } -void kernel(accessor &buf) __attribute__((sycl_device)) { +void kernel(accessor &buf) + __attribute__((sycl_device)) { simd offsets(0, 1); simd v1(0, 1); diff --git a/sycl/test/basic_tests/esimd/slm_block.cpp b/sycl/test/basic_tests/esimd/slm_block.cpp index d8d2937a9f67a..055f5e4355f11 100644 --- a/sycl/test/basic_tests/esimd/slm_block.cpp +++ b/sycl/test/basic_tests/esimd/slm_block.cpp @@ -18,3 +18,13 @@ void kernel() __attribute__((sycl_device)) { slm_block_store(0, v0); } + +void kernel2() __attribute__((sycl_device)) { + simd v1(0, 1); + + auto v0 = slm_block_load(0); + + v0 = v0 + v1; + + slm_block_store(0, v0); +}