Skip to content

Commit 0beaf3f

Browse files
committed
[SYCL] Moved tests introduced in intel/llvm#3255
1 parent 3530fb2 commit 0beaf3f

File tree

2 files changed

+142
-0
lines changed

2 files changed

+142
-0
lines changed

SYCL/SubGroup/sub_group_as.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// Sub-groups are not suported on Host
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// Execution on CPU and FPGA takes 100000 times longer
5+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
6+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
7+
8+
#include <CL/sycl.hpp>
9+
#include <cassert>
10+
#include <cstdint>
11+
#include <cstdio>
12+
#include <cstdlib>
13+
14+
int main(int argc, char *argv[]) {
15+
cl::sycl::queue queue;
16+
printf("Device Name = %s\n",
17+
queue.get_device().get_info<cl::sycl::info::device::name>().c_str());
18+
19+
// Initialize some host memory
20+
constexpr int N = 64;
21+
int host_mem[N];
22+
for (int i = 0; i < N; ++i) {
23+
host_mem[i] = i * 100;
24+
}
25+
26+
// Use the device to transform each value
27+
{
28+
cl::sycl::buffer<int, 1> buf(host_mem, N);
29+
queue.submit([&](cl::sycl::handler &cgh) {
30+
auto global =
31+
buf.get_access<cl::sycl::access::mode::read_write,
32+
cl::sycl::access::target::global_buffer>(cgh);
33+
sycl::accessor<int, 1, sycl::access::mode::read_write,
34+
sycl::access::target::local>
35+
local(N, cgh);
36+
37+
cgh.parallel_for<class test>(
38+
cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) {
39+
cl::sycl::ONEAPI::sub_group sg = it.get_sub_group();
40+
if (!it.get_local_id(0)) {
41+
int end = it.get_global_id(0) + it.get_local_range()[0];
42+
for (int i = it.get_global_id(0); i < end; i++) {
43+
local[i] = i;
44+
}
45+
}
46+
it.barrier();
47+
48+
int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) *
49+
sg.get_max_local_range()[0];
50+
// Global address space
51+
auto x = sg.load(&global[i]);
52+
53+
// Local address space
54+
auto y = sg.load(&local[i]);
55+
56+
sg.store(&global[i], x + y);
57+
});
58+
});
59+
}
60+
61+
// Print results and tidy up
62+
for (int i = 0; i < N; ++i) {
63+
if (i * 101 != host_mem[i]) {
64+
printf("Unexpected result %04d vs %04d\n", i * 101, host_mem[i]);
65+
return 1;
66+
}
67+
}
68+
printf("Success!\n");
69+
return 0;
70+
}

SYCL/SubGroup/sub_group_as_vec.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// Sub-groups are not suported on Host
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// Execution on CPU and FPGA takes 100000 times longer
5+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
6+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
7+
8+
#include <CL/sycl.hpp>
9+
#include <cassert>
10+
#include <cstdint>
11+
#include <cstdio>
12+
#include <cstdlib>
13+
14+
int main(int argc, char *argv[]) {
15+
cl::sycl::queue queue;
16+
printf("Device Name = %s\n",
17+
queue.get_device().get_info<cl::sycl::info::device::name>().c_str());
18+
19+
// Initialize some host memory
20+
constexpr int N = 64;
21+
sycl::vec<int, 2> host_mem[N];
22+
for (int i = 0; i < N; ++i) {
23+
host_mem[i].s0() = i;
24+
host_mem[i].s1() = 0;
25+
}
26+
27+
// Use the device to transform each value
28+
{
29+
cl::sycl::buffer<sycl::vec<int, 2>, 1> buf(host_mem, N);
30+
queue.submit([&](cl::sycl::handler &cgh) {
31+
auto global =
32+
buf.get_access<cl::sycl::access::mode::read_write,
33+
cl::sycl::access::target::global_buffer>(cgh);
34+
sycl::accessor<sycl::vec<int, 2>, 1, sycl::access::mode::read_write,
35+
sycl::access::target::local>
36+
local(N, cgh);
37+
cgh.parallel_for<class test>(
38+
cl::sycl::nd_range<1>(N, 32), [=](cl::sycl::nd_item<1> it) {
39+
cl::sycl::ONEAPI::sub_group sg = it.get_sub_group();
40+
if (!it.get_local_id(0)) {
41+
int end = it.get_global_id(0) + it.get_local_range()[0];
42+
for (int i = it.get_global_id(0); i < end; i++) {
43+
local[i].s0() = 0;
44+
local[i].s1() = i;
45+
}
46+
}
47+
it.barrier();
48+
49+
int i = (it.get_global_id(0) / sg.get_max_local_range()[0]) *
50+
sg.get_max_local_range()[0];
51+
// Global address space
52+
auto x = sg.load(&global[i]);
53+
54+
// Local address space
55+
auto y = sg.load(&local[i]);
56+
57+
sg.store(&global[i], x + y);
58+
});
59+
});
60+
}
61+
62+
// Print results and tidy up
63+
for (int i = 0; i < N; ++i) {
64+
if (i != host_mem[i].s0() || i != host_mem[i].s1()) {
65+
printf("Unexpected result [%02d,%02d] vs [%02d,%02d]\n", i, i,
66+
host_mem[i].s0(), host_mem[i].s1());
67+
return 1;
68+
}
69+
}
70+
printf("Success!\n");
71+
return 0;
72+
}

0 commit comments

Comments
 (0)