Skip to content

Commit 0b9caa6

Browse files
author
Diptorup Deb
committed
Add unit tests for local accessor kernel arg.
1 parent 4023e87 commit 0b9caa6

File tree

4 files changed

+370
-0
lines changed

4 files changed

+370
-0
lines changed

libsyclinterface/tests/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ set(spirv-test-files
2121
multi_kernel.spv
2222
oneD_range_kernel_inttys_fp32.spv
2323
oneD_range_kernel_fp64.spv
24+
local_accessor_kernel_inttys_fp32.spv
25+
local_accessor_kernel_fp64.spv
2426
)
2527

2628
foreach(tf ${spirv-test-files})
Binary file not shown.
Binary file not shown.
Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,368 @@
1+
//===-- test_sycl_queue_submit.cpp - Test cases for kernel submission fns. ===//
2+
//
3+
// Data Parallel Control (dpctl)
4+
//
5+
// Copyright 2020-2024 Intel Corporation
6+
//
7+
// Licensed under the Apache License, Version 2.0 (the "License");
8+
// you may not use this file except in compliance with the License.
9+
// You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing, software
14+
// distributed under the License is distributed on an "AS IS" BASIS,
15+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
// See the License for the specific language governing permissions and
17+
// limitations under the License.
18+
//
19+
//===----------------------------------------------------------------------===//
20+
///
21+
/// \file
22+
/// This file has unit test cases for the various submit functions defined
23+
/// inside dpctl_sycl_queue_interface.cpp.
24+
//===----------------------------------------------------------------------===//
25+
26+
#include "dpctl_sycl_context_interface.h"
27+
#include "dpctl_sycl_device_interface.h"
28+
#include "dpctl_sycl_device_selector_interface.h"
29+
#include "dpctl_sycl_event_interface.h"
30+
#include "dpctl_sycl_kernel_bundle_interface.h"
31+
#include "dpctl_sycl_kernel_interface.h"
32+
#include "dpctl_sycl_queue_interface.h"
33+
#include "dpctl_sycl_type_casters.hpp"
34+
#include "dpctl_sycl_usm_interface.h"
35+
#include <filesystem>
36+
#include <fstream>
37+
#include <gtest/gtest.h>
38+
#include <iostream>
39+
#include <sycl/sycl.hpp>
40+
#include <utility>
41+
42+
namespace
43+
{
44+
constexpr size_t SIZE = 100;
45+
46+
using namespace dpctl::syclinterface;
47+
48+
typedef struct MDLocalAccessorTy
49+
{
50+
size_t ndim;
51+
DPCTLKernelArgType dpctl_type_id;
52+
size_t dim0;
53+
size_t dim1;
54+
size_t dim2;
55+
} MDLocalAccessor;
56+
57+
template <typename T>
58+
void submit_kernel(DPCTLSyclQueueRef QRef,
59+
DPCTLSyclKernelBundleRef KBRef,
60+
std::vector<char> spirvBuffer,
61+
size_t spirvFileSize,
62+
DPCTLKernelArgType kernelArgTy,
63+
std::string kernelName)
64+
{
65+
constexpr size_t NARGS = 2;
66+
constexpr size_t RANGE_NDIMS = 1;
67+
68+
ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, kernelName.c_str()));
69+
auto kernel = DPCTLKernelBundle_GetKernel(KBRef, kernelName.c_str());
70+
71+
// Create the input args
72+
auto a = DPCTLmalloc_shared(SIZE * sizeof(T), QRef);
73+
ASSERT_TRUE(a != nullptr);
74+
auto a_ptr = static_cast<T *>(unwrap<void>(a));
75+
for (auto i = 0ul; i < SIZE; ++i) {
76+
a_ptr[i] = 0;
77+
}
78+
79+
auto la = MDLocalAccessor{1, kernelArgTy, SIZE / 10, 1, 1};
80+
81+
// Create kernel args for vector_add
82+
size_t gRange[] = {SIZE};
83+
size_t lRange[] = {SIZE / 10};
84+
void *args[NARGS] = {unwrap<void>(a), (void *)&la};
85+
DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR,
86+
DPCTL_LOCAL_ACCESSOR};
87+
88+
auto ERef =
89+
DPCTLQueue_SubmitNDRange(kernel, QRef, args, addKernelArgTypes, NARGS,
90+
gRange, lRange, RANGE_NDIMS, nullptr, 0);
91+
ASSERT_TRUE(ERef != nullptr);
92+
DPCTLQueue_Wait(QRef);
93+
94+
if (kernelArgTy != DPCTL_FLOAT32_T && kernelArgTy != DPCTL_FLOAT64_T)
95+
ASSERT_TRUE(a_ptr[0] == 20);
96+
else
97+
ASSERT_TRUE(a_ptr[0] == 20.0);
98+
99+
// clean ups
100+
DPCTLEvent_Delete(ERef);
101+
DPCTLKernel_Delete(kernel);
102+
DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef);
103+
}
104+
105+
} /* end of anonymous namespace */
106+
107+
/*
108+
// The local_accessor_kernel spv files were generated from the SYCL program
109+
// included in this comment. The program can be compiled using
110+
// `icpx -fsycl local_accessor_kernel.cpp`. After that if the generated
111+
// executable is run with the environment variable `SYCL_DUMP_IMAGES=1`, icpx
112+
// runtime will dump all offload sections of fat binary to the current working
113+
// directory. When tested with DPC++ 2024.0 the kernels are split across two
114+
// separate SPV files. One contains all kernels for integers and FP32
115+
// data type, and another contains the kernel for FP64.
116+
//
117+
// Note that, `SYCL_DUMP_IMAGES=1` will also generate extra SPV files that
118+
// contain the code for built in functions such as indexing and barriers. To
119+
// figure which SPV file contains the kernels, use `spirv-dis` from the
120+
// spirv-tools package to translate the SPV binary format to a human-readable
121+
// textual format.
122+
#include <CL/sycl.hpp>
123+
#include <iostream>
124+
#include <sstream>
125+
126+
template <typename T>
127+
class SyclKernel_SLM
128+
{
129+
private:
130+
T N_;
131+
T *a_ = nullptr;
132+
sycl::local_accessor<T, 1> slm_;
133+
134+
public:
135+
SyclKernel_SLM(T *a, sycl::local_accessor<T, 1> slm)
136+
: a_(a), slm_(slm)
137+
{
138+
}
139+
140+
void operator()(sycl::nd_item<1> it) const
141+
{
142+
int i = it.get_global_id();
143+
int j = it.get_local_id();
144+
slm_[j] = 2;
145+
auto g = it.get_group();
146+
group_barrier(g);
147+
auto temp = 0;
148+
for (auto idx = 0ul; idx < it.get_local_range(0); ++idx)
149+
temp += slm_[idx];
150+
a_[i] = temp * (i + 1);
151+
}
152+
};
153+
154+
template <typename T>
155+
void submit_kernel(sycl::queue q, const unsigned long N, T *a)
156+
{
157+
q.submit([&](auto &h)
158+
{
159+
sycl::local_accessor<T, 1> slm(sycl::range(N/10), h);
160+
h.parallel_for(sycl::nd_range(sycl::range{N}, sycl::range{N/10}),
161+
SyclKernel_SLM<T>(a, slm)); });
162+
}
163+
164+
template <typename T>
165+
void driver(size_t N)
166+
{
167+
sycl::queue q;
168+
auto *a = sycl::malloc_shared<T>(N, q);
169+
submit_kernel(q, N, a);
170+
q.wait();
171+
172+
for (auto i = 0ul; i < 10; ++i)
173+
std::cout << "A[" << i << "] : " << (size_t)a[i] << " " << std::endl;
174+
sycl::free(a, q);
175+
}
176+
177+
int main(int argc, const char **argv)
178+
{
179+
size_t N = 0;
180+
std::cout << "Enter problem size in N:\n";
181+
std::cin >> N;
182+
std::cout << "Executing with N = " << N << std::endl;
183+
184+
driver<int8_t>(N);
185+
driver<uint8_t>(N);
186+
driver<int16_t>(N);
187+
driver<int32_t>(N);
188+
driver<int32_t>(N);
189+
driver<uint32_t>(N);
190+
driver<int64_t>(N);
191+
driver<uint64_t>(N);
192+
driver<float>(N);
193+
driver<double>(N);
194+
195+
return 0;
196+
}
197+
198+
*/
199+
200+
struct TestQueueSubmitWithLocalAccessor : public ::testing::Test
201+
{
202+
std::ifstream spirvFile;
203+
size_t spirvFileSize_;
204+
std::vector<char> spirvBuffer_;
205+
DPCTLSyclQueueRef QRef = nullptr;
206+
DPCTLSyclKernelBundleRef KBRef = nullptr;
207+
208+
TestQueueSubmitWithLocalAccessor()
209+
{
210+
DPCTLSyclDeviceSelectorRef DSRef = nullptr;
211+
DPCTLSyclDeviceRef DRef = nullptr;
212+
213+
spirvFile.open("./local_accessor_kernel_inttys_fp32.spv",
214+
std::ios::binary | std::ios::ate);
215+
spirvFileSize_ = std::filesystem::file_size(
216+
"./local_accessor_kernel_inttys_fp32.spv");
217+
spirvBuffer_.reserve(spirvFileSize_);
218+
spirvFile.seekg(0, std::ios::beg);
219+
spirvFile.read(spirvBuffer_.data(), spirvFileSize_);
220+
221+
DSRef = DPCTLDefaultSelector_Create();
222+
DRef = DPCTLDevice_CreateFromSelector(DSRef);
223+
QRef =
224+
DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY);
225+
auto CRef = DPCTLQueue_GetContext(QRef);
226+
227+
KBRef = DPCTLKernelBundle_CreateFromSpirv(
228+
CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr);
229+
DPCTLDevice_Delete(DRef);
230+
DPCTLDeviceSelector_Delete(DSRef);
231+
}
232+
233+
~TestQueueSubmitWithLocalAccessor()
234+
{
235+
spirvFile.close();
236+
DPCTLQueue_Delete(QRef);
237+
DPCTLKernelBundle_Delete(KBRef);
238+
}
239+
};
240+
241+
struct TestQueueSubmitWithLocalAccessorFP64 : public ::testing::Test
242+
{
243+
std::ifstream spirvFile;
244+
size_t spirvFileSize_;
245+
std::vector<char> spirvBuffer_;
246+
DPCTLSyclQueueRef QRef = nullptr;
247+
DPCTLSyclKernelBundleRef KBRef = nullptr;
248+
249+
TestQueueSubmitWithLocalAccessorFP64()
250+
{
251+
DPCTLSyclDeviceSelectorRef DSRef = nullptr;
252+
DPCTLSyclDeviceRef DRef = nullptr;
253+
254+
spirvFile.open("./local_accessor_kernel_fp64.spv",
255+
std::ios::binary | std::ios::ate);
256+
spirvFileSize_ =
257+
std::filesystem::file_size("./local_accessor_kernel_fp64.spv");
258+
spirvBuffer_.reserve(spirvFileSize_);
259+
spirvFile.seekg(0, std::ios::beg);
260+
spirvFile.read(spirvBuffer_.data(), spirvFileSize_);
261+
DSRef = DPCTLDefaultSelector_Create();
262+
DRef = DPCTLDevice_CreateFromSelector(DSRef);
263+
QRef =
264+
DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY);
265+
auto CRef = DPCTLQueue_GetContext(QRef);
266+
267+
KBRef = DPCTLKernelBundle_CreateFromSpirv(
268+
CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr);
269+
DPCTLDevice_Delete(DRef);
270+
DPCTLDeviceSelector_Delete(DSRef);
271+
}
272+
273+
~TestQueueSubmitWithLocalAccessorFP64()
274+
{
275+
spirvFile.close();
276+
DPCTLQueue_Delete(QRef);
277+
DPCTLKernelBundle_Delete(KBRef);
278+
}
279+
};
280+
281+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt8)
282+
{
283+
submit_kernel<int8_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
284+
DPCTLKernelArgType::DPCTL_INT8_T,
285+
"_ZTS14SyclKernel_SLMIaE");
286+
}
287+
288+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt8)
289+
{
290+
submit_kernel<uint8_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
291+
DPCTLKernelArgType::DPCTL_UINT8_T,
292+
"_ZTS14SyclKernel_SLMIhE");
293+
}
294+
295+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt16)
296+
{
297+
submit_kernel<int16_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
298+
DPCTLKernelArgType::DPCTL_INT16_T,
299+
"_ZTS14SyclKernel_SLMIsE");
300+
}
301+
302+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt16)
303+
{
304+
submit_kernel<uint16_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
305+
DPCTLKernelArgType::DPCTL_UINT16_T,
306+
"_ZTS14SyclKernel_SLMItE");
307+
}
308+
309+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt32)
310+
{
311+
submit_kernel<int32_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
312+
DPCTLKernelArgType::DPCTL_INT32_T,
313+
"_ZTS14SyclKernel_SLMIiE");
314+
}
315+
316+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt32)
317+
{
318+
submit_kernel<uint32_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
319+
DPCTLKernelArgType::DPCTL_UINT32_T,
320+
"_ZTS14SyclKernel_SLMIjE");
321+
}
322+
323+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt64)
324+
{
325+
submit_kernel<int64_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
326+
DPCTLKernelArgType::DPCTL_INT64_T,
327+
"_ZTS14SyclKernel_SLMIlE");
328+
}
329+
330+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt64)
331+
{
332+
submit_kernel<uint64_t>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
333+
DPCTLKernelArgType::DPCTL_UINT64_T,
334+
"_ZTS14SyclKernel_SLMImE");
335+
}
336+
337+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForFloat)
338+
{
339+
submit_kernel<float>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
340+
DPCTLKernelArgType::DPCTL_FLOAT32_T,
341+
"_ZTS14SyclKernel_SLMIfE");
342+
}
343+
344+
TEST_F(TestQueueSubmitWithLocalAccessorFP64, CheckForDouble)
345+
{
346+
submit_kernel<double>(QRef, KBRef, spirvBuffer_, spirvFileSize_,
347+
DPCTLKernelArgType::DPCTL_FLOAT64_T,
348+
"_ZTS14SyclKernel_SLMIdE");
349+
}
350+
351+
TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUnsupportedArgTy)
352+
{
353+
size_t gRange[] = {SIZE};
354+
size_t lRange[] = {SIZE / 10};
355+
size_t RANGE_NDIMS = 1;
356+
constexpr size_t NARGS = 2;
357+
358+
auto la = MDLocalAccessor{1, DPCTL_UNSUPPORTED_KERNEL_ARG, SIZE / 10, 1, 1};
359+
auto kernel = DPCTLKernelBundle_GetKernel(KBRef, "_ZTS14SyclKernel_SLMImE");
360+
void *args[NARGS] = {unwrap<void>(nullptr), (void *)&la};
361+
DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR,
362+
DPCTL_LOCAL_ACCESSOR};
363+
auto ERef =
364+
DPCTLQueue_SubmitNDRange(kernel, QRef, args, addKernelArgTypes, NARGS,
365+
gRange, lRange, RANGE_NDIMS, nullptr, 0);
366+
367+
ASSERT_TRUE(ERef == nullptr);
368+
}

0 commit comments

Comments
 (0)