diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 1a4bd03d33b7f..052d9bdcfdc77 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -27,6 +27,11 @@ kernel_impl::kernel_impl(RT::PiKernel Kernel, ContextImplPtr Context) // Let the runtime caller handle native kernel retaining in other cases if // it's needed. getPlugin().call(MKernel); + // Enable USM indirect access for interoperability kernels. + // Some PI Plugins (like OpenCL) require this call to enable USM + // For others, PI will turn this into a NOP. + getPlugin().call( + MKernel, PI_USM_INDIRECT_ACCESS, sizeof(pi_bool), &PI_TRUE); } kernel_impl::kernel_impl(RT::PiKernel Kernel, ContextImplPtr ContextImpl, diff --git a/sycl/test/usm/source_kernel_indirect_access.cpp b/sycl/test/usm/source_kernel_indirect_access.cpp new file mode 100644 index 0000000000000..1ce8814ac63ba --- /dev/null +++ b/sycl/test/usm/source_kernel_indirect_access.cpp @@ -0,0 +1,56 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -lOpenCL %s -o %t1.out +// RUN: %CPU_RUN_PLACEHOLDER %t1.out +// RUN: %GPU_RUN_PLACEHOLDER %t1.out +// REQUIRES: opencl + +#include +#include + +using namespace sycl; + +static const char *Src = R"( +kernel void test(global ulong *PSrc, global ulong *PDst) { + global int *Src = (global int *) *PSrc; + global int *Dst = (global int *) *PDst; + int Old = *Src, New = Old + 1; + printf("Read %d from %p; write %d to %p\n", Old, Src, New, Dst); + *Dst = New; +} +)"; + +int main() { + queue Q{}; + + cl_context Ctx = Q.get_context().get(); + cl_program Prog = clCreateProgramWithSource(Ctx, 1, &Src, NULL, NULL); + clBuildProgram(Prog, 0, NULL, NULL, NULL, NULL); + + cl_kernel OclKernel = clCreateKernel(Prog, "test", NULL); + + cl::sycl::kernel SyclKernel(OclKernel, Q.get_context()); + + auto POuter = malloc_shared(1, Q); + auto PInner = malloc_shared(1, Q); + auto QOuter = malloc_shared(1, Q); + auto QInner = malloc_shared(1, Q); + + *PInner = 4; + *POuter = PInner; + *QInner = 0; + *QOuter = QInner; + + Q.submit([&](handler &CGH) { + CGH.set_arg(0, POuter); + CGH.set_arg(1, QOuter); + CGH.parallel_for(cl::sycl::range<1>(1), SyclKernel); + }).wait(); + + assert(*PInner == 4 && "Read value is corrupted"); + assert(*QInner == 5 && "Value value is incorrect"); + + std::cout << "Increment: " << *PInner << " -> " << *QInner << std::endl; + + clReleaseKernel(OclKernel); + clReleaseProgram(Prog); + clReleaseContext(Ctx); +}