|
| 1 | +// RUN: %{build} -o %t.out |
| 2 | +// RUN: %{run} %t.out |
| 3 | + |
| 4 | +// Currently grf_size property can take value 256 (large) on PVC and DG2: |
| 5 | +// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_intel_grf_size.asciidoc |
| 6 | +// REQUIRES: gpu && (gpu-intel-pvc || gpu-intel-dg2) |
| 7 | +// UNSUPPORTED: cuda || hip |
| 8 | + |
| 9 | +// Currently fails because of issue in UR Level Zero adapter. |
| 10 | +// XFAIL: level_zero |
| 11 | + |
| 12 | +// clang-format off |
| 13 | +#include <sycl/sycl.hpp> |
| 14 | +#include <sycl/ext/intel/experimental/grf_size_properties.hpp> |
| 15 | +// clang-format on |
| 16 | + |
| 17 | +using namespace sycl; |
| 18 | + |
| 19 | +// Test that kernel can be submitted with work group size returned by |
| 20 | +// info::kernel_device_specific::work_group_size when large register file is |
| 21 | +// used. |
| 22 | + |
| 23 | +class MyKernel; |
| 24 | +namespace syclex = sycl::ext::oneapi::experimental; |
| 25 | +namespace intelex = sycl::ext::intel::experimental; |
| 26 | + |
| 27 | +__attribute__((noinline)) void f(int *result, nd_item<1> &index) { |
| 28 | + result[index.get_global_id()] = index.get_global_id(); |
| 29 | +} |
| 30 | + |
| 31 | +int main() { |
| 32 | + queue myQueue; |
| 33 | + auto myContext = myQueue.get_context(); |
| 34 | + auto myDev = myQueue.get_device(); |
| 35 | + |
| 36 | + kernel_id kernelId = get_kernel_id<MyKernel>(); |
| 37 | + auto myBundle = |
| 38 | + get_kernel_bundle<bundle_state::executable>(myContext, {kernelId}); |
| 39 | + |
| 40 | + kernel myKernel = myBundle.get_kernel(kernelId); |
| 41 | + size_t maxWgSize = |
| 42 | + myKernel.get_info<info::kernel_device_specific::work_group_size>(myDev); |
| 43 | + |
| 44 | + // Submit kernel with maximum work group size. |
| 45 | + nd_range myRange{range{maxWgSize}, range{maxWgSize}}; |
| 46 | + |
| 47 | + int *result = sycl::malloc_shared<int>(maxWgSize, myQueue); |
| 48 | + syclex::properties kernelProperties{intelex::grf_size<256>}; |
| 49 | + myQueue.submit([&](handler &cgh) { |
| 50 | + cgh.use_kernel_bundle(myBundle); |
| 51 | + cgh.parallel_for<MyKernel>(myRange, kernelProperties, |
| 52 | + ([=](nd_item<1> index) { f(result, index); })); |
| 53 | + }); |
| 54 | + |
| 55 | + myQueue.wait(); |
| 56 | + return 0; |
| 57 | +} |
0 commit comments