From 2911ea78aa52699334d98c1066e20200a6e5000b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 31 Mar 2021 17:07:13 +0300 Subject: [PATCH 001/122] [SYCL] [DOC] Prepare design-document for assert feature Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 144 +++++++++++++++++++++++ sycl/doc/extensions/Assert/abort.md | 1 + sycl/doc/extensions/Assert/level-zero.md | 19 +++ sycl/doc/extensions/Assert/opencl.md | 22 ++++ 4 files changed, 186 insertions(+) create mode 100644 sycl/doc/Assert.md create mode 100644 sycl/doc/extensions/Assert/abort.md create mode 100644 sycl/doc/extensions/Assert/level-zero.md create mode 100644 sycl/doc/extensions/Assert/opencl.md diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md new file mode 100644 index 0000000000000..d4582d861c5c3 --- /dev/null +++ b/sycl/doc/Assert.md @@ -0,0 +1,144 @@ +# Assert feature + +**IMPORTANT**: This document is a draft. + +During debugging of kernel code user may put assertions here and there. +The expected behaviour of assertion failure at host is application abort. +Our choice for device-side assertions is asynchronous exception in order to +allow for extensibility. + +The user is free to disable assertions by defining `NDEBUG` macro at +compile-time. + + +## Use-case example + +``` +using namespace cl::sycl; +auto ErrorHandler = [] (exception_list Exs) { + for (exception_ptr const& E : Exs) { + try { + std::rethrow_exception(E); + } + catch (event_error const& Ex) { + std::cout << “Exception - ” << Ex.what(); // assertion failed + std::abort(); + } + } +}; + +void user_func(item<2> Item) { + assert((Item[0] % 2) && “Nil”); +} + +int main() { + queue Q(ErrorHandler); + q.submit([&] (handler& CGH) { + CGH.parallel_for(range<2>{N, M}, [=](item<2> It) { + do_smth(); + user_func(It); + do_smth_else(); + }); + }); + Q.wait_and_throw(); + std::cout << “One shouldn’t see this message.“; + return 0; +} +``` + +In this use-case every work-item with even X dimension will trigger assertion +failure. Assertion failure should be reported via asynchronous exceptions. If +asynchronous exception handler is set the failure is reported with +`cl::sycl::event_error` exception. Otherwise, SYCL Runtime should trigger abort. +At least one failed assertion should be reported. + +When multiple kernels are enqueued and both fail at assertion at least single +assertion should be reported. + +## User requirements + +From user's point of view there are the following requirements: + +| # | Title | Description | Importance | +| - | ----- | ----------- | ---------- | +| 1 | Handle assertion failure | Signal about assertion failure via SYCL asynchronous exception | Must have | +| 2 | Print assert message | Assert function should print message to stderr at host | Must have | +| 3 | Stop under debugger | When debugger is attached, break at assertion point | Highly desired | +| 4 | Reliability | Assert failure should be reported regardless of kernel deadlock | Highly desired | + +## Contents of `cl::sycl::event_error` + +`cl::sycl::event_error::what()` should return the same assertion failure message +as is printed at the time being. + +Other than that, interface of `cl::sycl::event_error` should look like: +``` +class event_error : public runtime_error { +public: + event_error() = default; + + event_error(const char *Msg, cl_int Err) + : event_error(string_class(Msg), Err) {} + + event_error(const string_class &Msg, cl_int Err) : runtime_error(Msg, Err) {} + + /// Returns global ID with the dimension provided + int globalId(int Dim) const; + + /// Returns local ID with the dimension provided + int localId(int Dim) const; +}; +``` + +Regardless of whether asynchronous exception handler is set or not, there's an +action to be performed by SYCL Runtime. To achieve this, information about +assert failure should be propagated from device-side to SYCL Runtime. This +should be performed via calls to `clGetEventInfo` for OpenCL backend and +`zeEventQueryStatus` for Level-Zero backend. + +## Terms + + - Device-side Runtime - part of device-code, which is supplied by Device-side + Compiler. + - Low-level Runtime - the backend/runtime, behind DPCPP Runtime. + - Device-side Compiler - compiler which generates device-native bitcode based + on input SPIR-V image. + - Accessor metadata - parts of accessor representation at device-side: pointer, + ranges, offset. + +## How it works? + +For the time being, `assert(expr)` macro ends up in call to +`__devicelib_assert_fail` function. This function is part of [Device library extension](doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst#cl_intel_devicelib_cassert). +Device code already contains call to the function. Currently, a device-binary +is always linked against fallback implementation. +Device-side compiler/linker provides their implementation of `__devicelib_assert_fail` +and prefer this implementation over fallback one. + +If Device-side Runtime supports `__devicelib_assert_fail` then Low-Level Runtime +is responsible for: + - detecting if assert failure took place; + - flushing assert message to `stderr` on host. +When detected, Low-level Runtime reports assert failure to DPCPP Runtime +at synchronization points. + +Refer to [OpenCL](doc/extensions/Assert/opencl.md) and [Level-Zero](doc/extensions/Assert/level-zero.md) +extensions. + +If Device-side Runtime doesn't support `__devicelib_assert_fail` then a buffer +based approach comes in place. The approach doesn't require any support from +Device-side Runtime. Neither it does from Low-level Runtime. + +Within this approach, a dedicated assert buffer is allocated and implicit kernel +argument is introduced. The argument is an accessor with `discard_read_write` +or `discard_write` access mode. Accessor metadata is stored to program scope +variable. This allows to refer to the accessor without modifying each and every +user's function. Fallback implementation of `__devicelib_assert_fail` restores +accessor metadata from program scope variable and writes assert information to +the assert buffer. Atomic operations are used in order to not overwrite existing +information. + +Storing and restoring of accessor metadata to/from program scope variable is +performed with help of builtins. Implementations of these builtins are +substituted by frontend. + diff --git a/sycl/doc/extensions/Assert/abort.md b/sycl/doc/extensions/Assert/abort.md new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/sycl/doc/extensions/Assert/abort.md @@ -0,0 +1 @@ + diff --git a/sycl/doc/extensions/Assert/level-zero.md b/sycl/doc/extensions/Assert/level-zero.md new file mode 100644 index 0000000000000..265ff5858d83f --- /dev/null +++ b/sycl/doc/extensions/Assert/level-zero.md @@ -0,0 +1,19 @@ +# Overview + +This extension enables detection of assert failure of kernel. + +# New enum value + +`ze_result_t` enumeration should be augmented with `ZE_RESULT_ABORTED` enum +element. This enum value indicated a detected assert failure at device-side. + +# Changed API + +``` +ze_event_handle_t Event; // describes an event of kernel been submitted previously +ze_result Result = zeEventQueryStatus(Event); +``` + +If kernel failed an assertion `zeEventQueryStatus` should return +`ZE_RESULT_ABORTED`. + diff --git a/sycl/doc/extensions/Assert/opencl.md b/sycl/doc/extensions/Assert/opencl.md new file mode 100644 index 0000000000000..50ad0b7db0897 --- /dev/null +++ b/sycl/doc/extensions/Assert/opencl.md @@ -0,0 +1,22 @@ +# Overview + +This extension enables detection of assert failure of kernel. + +# New error code + +`CL_ASSERT_FAILURE` is added to indicate a detected assert failure at +device-side. + +# Changed API + +``` +cl_event Event; // describes an event of kernel been submitted previously +cl_int Result; +size_t ResultSize; + +clGetEventInfo(Event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(Result), &Result, &ResultSize); +``` + +If kernel failed an assertion `clGetEventInfo` should put `CL_ASSERT_FAILURE` +in `Result`. + From b69a1cdf1f99b5ef5c78a9d0db0fc92ddccfd64a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 31 Mar 2021 17:43:38 +0300 Subject: [PATCH 002/122] Remove redundant file Signed-off-by: Sergey Kanaev --- sycl/doc/extensions/Assert/abort.md | 1 - 1 file changed, 1 deletion(-) delete mode 100644 sycl/doc/extensions/Assert/abort.md diff --git a/sycl/doc/extensions/Assert/abort.md b/sycl/doc/extensions/Assert/abort.md deleted file mode 100644 index 8b137891791fe..0000000000000 --- a/sycl/doc/extensions/Assert/abort.md +++ /dev/null @@ -1 +0,0 @@ - From 15ea88ea0a43595e33245a5852559794a3b52eee Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 1 Apr 2021 13:57:14 +0300 Subject: [PATCH 003/122] Fix typo Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index d4582d861c5c3..3b5cbfbfafe47 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -81,12 +81,6 @@ public: : event_error(string_class(Msg), Err) {} event_error(const string_class &Msg, cl_int Err) : runtime_error(Msg, Err) {} - - /// Returns global ID with the dimension provided - int globalId(int Dim) const; - - /// Returns local ID with the dimension provided - int localId(int Dim) const; }; ``` From ca08fecc0945c673579505bf34eee6abba74b902 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 5 Apr 2021 16:16:43 +0300 Subject: [PATCH 004/122] Address some review comments. Add description of built-ins. Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 130 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 119 insertions(+), 11 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 3b5cbfbfafe47..956fd64732cf5 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -55,6 +55,7 @@ At least one failed assertion should be reported. When multiple kernels are enqueued and both fail at assertion at least single assertion should be reported. + ## User requirements From user's point of view there are the following requirements: @@ -66,12 +67,10 @@ From user's point of view there are the following requirements: | 3 | Stop under debugger | When debugger is attached, break at assertion point | Highly desired | | 4 | Reliability | Assert failure should be reported regardless of kernel deadlock | Highly desired | -## Contents of `cl::sycl::event_error` -`cl::sycl::event_error::what()` should return the same assertion failure message -as is printed at the time being. +## Contents of `cl::sycl::event_error` -Other than that, interface of `cl::sycl::event_error` should look like: +Interface of `cl::sycl::event_error` should look like: ``` class event_error : public runtime_error { public: @@ -90,22 +89,28 @@ assert failure should be propagated from device-side to SYCL Runtime. This should be performed via calls to `clGetEventInfo` for OpenCL backend and `zeEventQueryStatus` for Level-Zero backend. + ## Terms - Device-side Runtime - part of device-code, which is supplied by Device-side Compiler. - - Low-level Runtime - the backend/runtime, behind DPCPP Runtime. - - Device-side Compiler - compiler which generates device-native bitcode based - on input SPIR-V image. + - Device-side Compiler - compiler which generates device-native binary image + based on input SPIR-V image. + - Low-level Runtime - the backend/runtime behind DPCPP Runtime. - Accessor metadata - parts of accessor representation at device-side: pointer, ranges, offset. + ## How it works? For the time being, `assert(expr)` macro ends up in call to `__devicelib_assert_fail` function. This function is part of [Device library extension](doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst#cl_intel_devicelib_cassert). Device code already contains call to the function. Currently, a device-binary is always linked against fallback implementation. + + +### Device-specific approach + Device-side compiler/linker provides their implementation of `__devicelib_assert_fail` and prefer this implementation over fallback one. @@ -119,9 +124,12 @@ at synchronization points. Refer to [OpenCL](doc/extensions/Assert/opencl.md) and [Level-Zero](doc/extensions/Assert/level-zero.md) extensions. + +### Device-agnostic approach + If Device-side Runtime doesn't support `__devicelib_assert_fail` then a buffer based approach comes in place. The approach doesn't require any support from -Device-side Runtime. Neither it does from Low-level Runtime. +Device-side Runtime and Compiler. Neither it does from Low-level Runtime. Within this approach, a dedicated assert buffer is allocated and implicit kernel argument is introduced. The argument is an accessor with `discard_read_write` @@ -132,7 +140,107 @@ accessor metadata from program scope variable and writes assert information to the assert buffer. Atomic operations are used in order to not overwrite existing information. -Storing and restoring of accessor metadata to/from program scope variable is -performed with help of builtins. Implementations of these builtins are -substituted by frontend. +Both storing of accessor metadata and writing assert failure is performed with +help of built-ins. Implementations of these builtins are substituted by +frontend. + +#### Built-ins operation + +Accessor is a pointer augmented with offset and two ranges (access range and +memory range). + +There are two built-ins provided by frontend: + * `__store_acc()` - to store accessor metadata into program-scope variable. + * `__store_assert_failure()` - to store flag about assert failure in a buffer + using the metadata stored in program-scope variable. + +The accessor should be stored to program scope variable in global address space +using atomic operations. Motivation for using atomic operations: the program may +contain several kernels and some of them could be running simultaneously on a +single device. + +The `__store_assert_failure()` built-in atomically sets a flag in a buffer. The +buffer is accessed using accessor metadata from program-scope variable. This +built-in return a boolean value which is `true` if the flag is set by this call +to `__store_assert_failure()` and `false` if the flag was already set. +Motivation for using atomic operation is the same as with `__store_acc()` +builtin. + +The following pseudo-code snippets shows how these built-ins are used. +First of all, assume the following code as user's one: +``` +void user_func(int X) { + assert(X && “X is nil”); +} + +int main() { + queue Q(...); + Q.submit([&] (handler& CGH) { + CGH.single_task([=] () { + do_smth(); + user_func(0); + do_smth_else(); + }); + }); + ... +} +``` + +The following LLVM IR pseudo code will be generated for the user's code: +``` +@AssertBufferPtr = global void* null +@AssertBufferAccessRange = ... +@AssertBufferMemoryRange = ... +@AssertBufferOffset = ... + +/// user's code +void user_func(int X) { +if (!(X && “X is nil")) { + __assert_fail(...); + } +} + +users_kernel(...) { + do_smth() + user_func(0); + do_smth_else(); +} + +/// a wrapped user's kernel +kernel(AssertBufferAccessor, OtherArguments...) { + __store_acc(AssertBufferAccessor); + users_kernel(OtherArguments...); +} + +/// __assert_fail belongs to Linux version of devicelib +void __assert_fail(...) { + ... + __devicelib_assert_fail(...); +} + +void __devicelib_assert_fail(Expr, File, Line, GlobalID, LocalID) { + ... + if (__store_assert_info()) + printf("Assertion `%s' failed in %s at line %i. GlobalID: %i, LocalID: %i", + Expr, File, Line, GlobalID, LocalID); +} + +/// The following are built-ins provided by frontend +void __store_acc(accessor) { + %1 = accessor.getPtr(); + store void * %1, void * @AssertBufferPtr +} + +bool __store_assert_info(...) { + AssertBAcc = __fetch_acc(); + // fill in data in AsBAcc + volatile int *Ptr = (volatile int *)AssertBAcc.getPtr(); + bool Expected = false; + bool Desired = true; + + return atomic_cas(Ptr, Expected, Desired, SequentialConsistentMemoryOrder); + // or it could be: + // return !atomic_exchange(Ptr, Desired, SequentialConsistentMemoryOrder); +} +``` From 1f8d9a91069806e79c63c8cb96143bb4b7195071 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 5 Apr 2021 18:00:58 +0300 Subject: [PATCH 005/122] Fix links Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 956fd64732cf5..5c4deac3118aa 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -104,7 +104,7 @@ should be performed via calls to `clGetEventInfo` for OpenCL backend and ## How it works? For the time being, `assert(expr)` macro ends up in call to -`__devicelib_assert_fail` function. This function is part of [Device library extension](doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst#cl_intel_devicelib_cassert). +`__devicelib_assert_fail` function. This function is part of [Device library extension](extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst#cl_intel_devicelib_cassert). Device code already contains call to the function. Currently, a device-binary is always linked against fallback implementation. @@ -121,7 +121,7 @@ is responsible for: When detected, Low-level Runtime reports assert failure to DPCPP Runtime at synchronization points. -Refer to [OpenCL](doc/extensions/Assert/opencl.md) and [Level-Zero](doc/extensions/Assert/level-zero.md) +Refer to [OpenCL](extensions/Assert/opencl.md) and [Level-Zero](extensions/Assert/level-zero.md) extensions. From 2ee590c861704a559bfb2d1eb7a243b350b48d5e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 5 Apr 2021 18:08:23 +0300 Subject: [PATCH 006/122] Clarify that assertion failure message is printed by DPCPP Runtime Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 5c4deac3118aa..a40ef6cf9af3b 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -50,7 +50,8 @@ In this use-case every work-item with even X dimension will trigger assertion failure. Assertion failure should be reported via asynchronous exceptions. If asynchronous exception handler is set the failure is reported with `cl::sycl::event_error` exception. Otherwise, SYCL Runtime should trigger abort. -At least one failed assertion should be reported. +At least one failed assertion should be reported. The assertion failure message +is printed to `stderr` by SYCL Runtime. When multiple kernels are enqueued and both fail at assertion at least single assertion should be reported. From 77699a24713455836cdffec6a1feab30f4ba7ee3 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 6 Apr 2021 17:31:16 +0300 Subject: [PATCH 007/122] Clarify that fallback assert impl is synchronous Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index a40ef6cf9af3b..ca85893ae4466 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -141,6 +141,9 @@ accessor metadata from program scope variable and writes assert information to the assert buffer. Atomic operations are used in order to not overwrite existing information. +DPCPP Runtime checks contents of the assert buffer for assert failure flag after +kernel finishes. + Both storing of accessor metadata and writing assert failure is performed with help of built-ins. Implementations of these builtins are substituted by frontend. From 001a5736e4c3f7e269b889585d1b401879e681a6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 6 Apr 2021 18:22:30 +0300 Subject: [PATCH 008/122] Fix typo in level-zero ext draft Signed-off-by: Sergey Kanaev --- sycl/doc/extensions/Assert/level-zero.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/doc/extensions/Assert/level-zero.md b/sycl/doc/extensions/Assert/level-zero.md index 265ff5858d83f..f3b8e402db052 100644 --- a/sycl/doc/extensions/Assert/level-zero.md +++ b/sycl/doc/extensions/Assert/level-zero.md @@ -4,8 +4,9 @@ This extension enables detection of assert failure of kernel. # New enum value -`ze_result_t` enumeration should be augmented with `ZE_RESULT_ABORTED` enum -element. This enum value indicated a detected assert failure at device-side. +`ze_result_t` enumeration should be augmented with `ZE_RESULT_ASSERT_FAILED` +enum element. This enum value indicated a detected assert failure at +device-side. # Changed API @@ -15,5 +16,5 @@ ze_result Result = zeEventQueryStatus(Event); ``` If kernel failed an assertion `zeEventQueryStatus` should return -`ZE_RESULT_ABORTED`. +`ZE_RESULT_ASSERT_FAILED`. From 32b647987d3db0072f1bac298b726c8a867467ec Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 7 Apr 2021 18:29:40 +0300 Subject: [PATCH 009/122] Address some review comments. Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 85 ++++++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 33 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index ca85893ae4466..d57c55658188e 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -2,19 +2,20 @@ **IMPORTANT**: This document is a draft. -During debugging of kernel code user may put assertions here and there. -The expected behaviour of assertion failure at host is application abort. -Our choice for device-side assertions is asynchronous exception in order to -allow for extensibility. - -The user is free to disable assertions by defining `NDEBUG` macro at -compile-time. +Using the standard C++ `assert` API ("assertions") is an important debugging +technique widely used by developers. This document describes the design of +supporting assertions within SYCL device code. +The basic approach we chose is delivering device-side assertions as host-side +asynchronous exceptions, which allows further extensibility, such as better +error handling or potential recovery. +As usual, device-side assertions can be disabled by defining `NDEBUG` macro at +compile time. ## Use-case example ``` -using namespace cl::sycl; +using namespace sycl; auto ErrorHandler = [] (exception_list Exs) { for (exception_ptr const& E : Exs) { try { @@ -49,12 +50,13 @@ int main() { In this use-case every work-item with even X dimension will trigger assertion failure. Assertion failure should be reported via asynchronous exceptions. If asynchronous exception handler is set the failure is reported with -`cl::sycl::event_error` exception. Otherwise, SYCL Runtime should trigger abort. -At least one failed assertion should be reported. The assertion failure message -is printed to `stderr` by SYCL Runtime. +`sycl::event_error` exception. Otherwise, SYCL Runtime should trigger abort. +Even though multiple failures of the same or different assertions can happen in +multiple workitems, implementation is required to deliver only one. The +assertion failure message is printed to `stderr` by SYCL Runtime. -When multiple kernels are enqueued and both fail at assertion at least single -assertion should be reported. +When multiple kernels are enqueued and more than one fail at assertion, at least +single assertion should be reported. ## User requirements @@ -68,10 +70,14 @@ From user's point of view there are the following requirements: | 3 | Stop under debugger | When debugger is attached, break at assertion point | Highly desired | | 4 | Reliability | Assert failure should be reported regardless of kernel deadlock | Highly desired | +Implementations without enough capabilities to implement fourth requirement are +allowed to realize the fallback approach described below, which does not +guarantee assertion failure delivery to host, but is still useful in many +practical cases. -## Contents of `cl::sycl::event_error` +## Contents of `sycl::event_error` -Interface of `cl::sycl::event_error` should look like: +Interface of `sycl::event_error` should look like: ``` class event_error : public runtime_error { public: @@ -87,50 +93,63 @@ public: Regardless of whether asynchronous exception handler is set or not, there's an action to be performed by SYCL Runtime. To achieve this, information about assert failure should be propagated from device-side to SYCL Runtime. This -should be performed via calls to `clGetEventInfo` for OpenCL backend and -`zeEventQueryStatus` for Level-Zero backend. +should be performed via calls to `piEventGetInfo`. This Plugin Interface call +"lowers" to `clGetEventInfo` for OpenCL backend and `zeEventQueryStatus` for +Level-Zero backend. ## Terms - - Device-side Runtime - part of device-code, which is supplied by Device-side - Compiler. - - Device-side Compiler - compiler which generates device-native binary image + - Device-side Runtime - runtime library supplied by the Native Device Compiler + and running on the device. + - Native Device Compiler - compiler which generates device-native binary image based on input SPIR-V image. - - Low-level Runtime - the backend/runtime behind DPCPP Runtime. + - Low-level Runtime - the backend/runtime behind DPCPP Runtime attached via the + Plugin Interface. - Accessor metadata - parts of accessor representation at device-side: pointer, ranges, offset. ## How it works? -For the time being, `assert(expr)` macro ends up in call to -`__devicelib_assert_fail` function. This function is part of [Device library extension](extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst#cl_intel_devicelib_cassert). -Device code already contains call to the function. Currently, a device-binary -is always linked against fallback implementation. +`assert(expr)` macro ends up in call to `__devicelib_assert_fail`. This function +is part of [Device library extension](extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst#cl_intel_devicelib_cassert). + +Implementation of this function is supplied by Native Device Compiler for +safe approach or by DPCPP Compiler for fallback one. +Due to lack of support of online linking in Level-Zero, the application is +linked against fallback implementation of `__devicelib_assert_fail`. Hence, +Native Device Compilers should prefer their implementation instead of the one +provided in incoming SPIR-V/LLVM IR binary. -### Device-specific approach -Device-side compiler/linker provides their implementation of `__devicelib_assert_fail` -and prefer this implementation over fallback one. +### Safe approach -If Device-side Runtime supports `__devicelib_assert_fail` then Low-Level Runtime -is responsible for: +This is the preferred approach and implementations should use it when possible. +It guarantees assertion failure notification delivery to the host regardless of +kernel behavior which hit the assertion. + +The Native Device Compiler is responsible for providing implementation of +`__devicelib_assert_fail` which completely hides details of communication +between the device code and the Low-Level Runtime from the SYCL device compiler +and runtime. The Low-Level Runtime is responsible for: - detecting if assert failure took place; - flushing assert message to `stderr` on host. + When detected, Low-level Runtime reports assert failure to DPCPP Runtime -at synchronization points. +via events objects. Refer to [OpenCL](extensions/Assert/opencl.md) and [Level-Zero](extensions/Assert/level-zero.md) extensions. -### Device-agnostic approach +### Fallback approach If Device-side Runtime doesn't support `__devicelib_assert_fail` then a buffer based approach comes in place. The approach doesn't require any support from -Device-side Runtime and Compiler. Neither it does from Low-level Runtime. +Device-side Runtime and Native Device Compiler. Neither it does from Low-level +Runtime. Within this approach, a dedicated assert buffer is allocated and implicit kernel argument is introduced. The argument is an accessor with `discard_read_write` From b8637c2003cd558578886c3de821a6125eb6fe65 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 8 Apr 2021 16:30:00 +0300 Subject: [PATCH 010/122] Add exception extension Signed-off-by: Sergey Kanaev --- .../SYCL_INTEL_assert_exception.asciidoc | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 sycl/doc/extensions/Assert/SYCL_INTEL_assert_exception.asciidoc diff --git a/sycl/doc/extensions/Assert/SYCL_INTEL_assert_exception.asciidoc b/sycl/doc/extensions/Assert/SYCL_INTEL_assert_exception.asciidoc new file mode 100644 index 0000000000000..691548bfa9502 --- /dev/null +++ b/sycl/doc/extensions/Assert/SYCL_INTEL_assert_exception.asciidoc @@ -0,0 +1,109 @@ += SYCL_INTEL_assert_exception + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + +// This is necessary for asciidoc, but not for asciidoctor +:cpp: C++ + +== Introduction +IMPORTANT: This specification is a draft. + +NOTE: Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are +trademarks of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. +used by permission by Khronos. + +NOTE: This document is better viewed when rendered as html with asciidoctor. +GitHub does not render image icons. + +This document describes an extension to rename device-specific kernel queries +to better describe the operations performed. + +== Name Strings + ++SYCL_INTEL_assert_exception+ + +== Notice + +Copyright (c) 2021 Intel Corporation. All rights reserved. + +== Status + +Working Draft + +This is a preview extension specification, intended to provide early access to +a feature for review and community feedback. When the feature matures, this +specification may be released as a formal extension. + +Because the interfaces defined by this specification are not final and are +subject to change they are not intended to be used by shipping software +products. + +== Version + +Built On: {docdate} + +Revision: 1 + +== Dependencies + +This extension is written against the SYCL 2020 specification, Revision 3. + +== Overview + +== Modifications of SYCL 2020 Specification + +=== Change Section 4.13.2 Exception class interface + +Add enum member `assert` to the `errc` enum class: + +[source,c++,`sycl::kernel`,linenums] +---- +assert = /* implementation defined */ +---- + +==== Change table 136 Values of `errc` enum + +Add row `assert`: + +[width="40%",frame="topbot",options="header,footer"] +|====================== +|Standard SYCL Error Codes |Description +|`assert` | Assert failure had happened in device code during kernel execution +|====================== + +== Issues + +None. + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2021-04-08|Sergey Kanaev|*Initial public working draft* +|======================================== + +//************************************************************************ +//Other formatting suggestions: +// +//* Use *bold* text for host APIs, or [source] syntax highlighting. +//* Use +mono+ text for device APIs, or [source] syntax highlighting. +//* Use +mono+ text for extension names, types, or enum values. +//* Use _italics_ for parameters. +//************************************************************************ From b0cd85ff063a3bb5f85d38aa119f694600f3c476 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 8 Apr 2021 17:20:59 +0300 Subject: [PATCH 011/122] Use error-code instead of distinct exception. Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index d57c55658188e..058fde5126a76 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -48,12 +48,11 @@ int main() { ``` In this use-case every work-item with even X dimension will trigger assertion -failure. Assertion failure should be reported via asynchronous exceptions. If -asynchronous exception handler is set the failure is reported with -`sycl::event_error` exception. Otherwise, SYCL Runtime should trigger abort. +failure. Assertion failure should be reported via asynchronous exceptions with +[`assert` error code](extensions/Assert/SYCL_INTEL_assert_exception.asciidoc). Even though multiple failures of the same or different assertions can happen in multiple workitems, implementation is required to deliver only one. The -assertion failure message is printed to `stderr` by SYCL Runtime. +assertion failure message is printed to `stderr` by DPCPP Runtime. When multiple kernels are enqueued and more than one fail at assertion, at least single assertion should be reported. @@ -75,28 +74,6 @@ allowed to realize the fallback approach described below, which does not guarantee assertion failure delivery to host, but is still useful in many practical cases. -## Contents of `sycl::event_error` - -Interface of `sycl::event_error` should look like: -``` -class event_error : public runtime_error { -public: - event_error() = default; - - event_error(const char *Msg, cl_int Err) - : event_error(string_class(Msg), Err) {} - - event_error(const string_class &Msg, cl_int Err) : runtime_error(Msg, Err) {} -}; -``` - -Regardless of whether asynchronous exception handler is set or not, there's an -action to be performed by SYCL Runtime. To achieve this, information about -assert failure should be propagated from device-side to SYCL Runtime. This -should be performed via calls to `piEventGetInfo`. This Plugin Interface call -"lowers" to `clGetEventInfo` for OpenCL backend and `zeEventQueryStatus` for -Level-Zero backend. - ## Terms @@ -118,7 +95,7 @@ is part of [Device library extension](extensions/C-CXX-StandardLibrary/DeviceLib Implementation of this function is supplied by Native Device Compiler for safe approach or by DPCPP Compiler for fallback one. -Due to lack of support of online linking in Level-Zero, the application is +NB: Due to lack of support of online linking in Level-Zero, the application is linked against fallback implementation of `__devicelib_assert_fail`. Hence, Native Device Compilers should prefer their implementation instead of the one provided in incoming SPIR-V/LLVM IR binary. @@ -138,7 +115,10 @@ and runtime. The Low-Level Runtime is responsible for: - flushing assert message to `stderr` on host. When detected, Low-level Runtime reports assert failure to DPCPP Runtime -via events objects. +via events objects. To achieve this, information about assert failure should be +propagated from device-side to SYCL Runtime. This should be performed via calls +to `piEventGetInfo`. This Plugin Interface call "lowers" to `clGetEventInfo` for +OpenCL backend and `zeEventQueryStatus` for Level-Zero backend. Refer to [OpenCL](extensions/Assert/opencl.md) and [Level-Zero](extensions/Assert/level-zero.md) extensions. From 8c036486b0864399d1b59144d03f390cd7972920 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 9 Apr 2021 17:26:46 +0300 Subject: [PATCH 012/122] [SYCL] Add OpenCL extension for assert error code Signed-off-by: Sergey Kanaev --- .../cl_intel_assert_return_code.asciidoc | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc diff --git a/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc b/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc new file mode 100644 index 0000000000000..b7eec45d0a26f --- /dev/null +++ b/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc @@ -0,0 +1,99 @@ +cl_intel_assert_return_code +====================================== + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + +== Name Strings + ++cl_intel_assert_return_code+ + +== Notice + +Copyright (c) 2021 Intel Corporation. All rights reserved. + +== Status + +Working Draft + +This is a preview extension specification, intended to provide early access to +a feature for review and community feedback. When the feature matures, this +specification may be released as a formal extension. + +Because the interfaces defined by this specification are not final and are +subject to change they are not intended to be used by shipping software +products. + +== Version + +Built On: {docdate} + +Revision: 1 + +== Dependencies + +This extension is written against the OpenCL Specification Version 1.0, Revision 48. + +This extension requires OpenCL 1.0 or later. + +== Overview + +This extension allows OpenCL 1.x and 2.x devices to notify host that assert had +happened. + +== New error code + +[source] +---- +CL_ASSERT_FAILURE +---- + +Negative value of this error code should be set into `param_value` of +`clGetEventInfo` as described in table 5.15 "clGetEventInfo prameter queries" if +assert failure took place in device-code during kernel execution. + +An example: +[source] +---- +cl_event Event; // describes an event of kernel been submitted previously +cl_int Result; +size_t ResultSize; + +clGetEventInfo(Event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(Result), &Result, &ResultSize); +---- + +If kernel failed an assertion `clGetEventInfo` should put `CL_ASSERT_FAILURE` in +`Result`. + +== Issues + +None. + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2021-04-09|Sergey Kanaev|*Initial public working draft* +|======================================== + +//************************************************************************ +//Other formatting suggestions: +// +//* Use *bold* text for host APIs, or [source] syntax highlighting. +//* Use +mono+ text for device APIs, or [source] syntax highlighting. +//* Use +mono+ text for extension names, types, or enum values. +//* Use _italics_ for parameters. +//************************************************************************ From 121c945bbbe2cff52d4766127de3bbddb9df4d68 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 9 Apr 2021 17:50:17 +0300 Subject: [PATCH 013/122] [SYCL] Add Level-Zero extension for assert error code Signed-off-by: Sergey Kanaev --- .../ze_intel_assert_return_code.asciidoc | 124 ++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc diff --git a/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc b/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc new file mode 100644 index 0000000000000..b56937f9ba0d3 --- /dev/null +++ b/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc @@ -0,0 +1,124 @@ +ze_intel_assert_return_code +====================================== + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + +== Name Strings + ++ze_intel_assert_return_code+ + +== Notice + +Copyright (c) 2021 Intel Corporation. All rights reserved. + +== Status + +Working Draft + +This is a preview extension specification, intended to provide early access to +a feature for review and community feedback. When the feature matures, this +specification may be released as a formal extension. + +Because the interfaces defined by this specification are not final and are +subject to change they are not intended to be used by shipping software +products. + +== Version + +Built On: {docdate} + +Revision: 1 + +== Dependencies + +This extension is written against the Level-Zero Specification Version 1.1.2. + +== Overview + +This extension allows Level-Zero devices to notify host that assert had +happened. + +== New enumeration value + +`ze_result_t`: + +[source] +---- +ZE_RESULT_ASSERT_FAILED +---- + +This value should be returned by `zeEventQueryStatus` if assert failure took +place in device-code during kernel execution. + +An example: +[source] +---- +ze_event_handle_t Event; // describes an event of kernel been submitted previously +ze_result Result = zeEventQueryStatus(Event); +---- + +If kernel failed an assertion `zeEventQueryStatus` should return +`ZE_RESULT_ASSERT_FAILED`. + + +== Modifications to Level-Zero API + +(Add to Section API Documentation / Core API / Common / Common Enums / `ze_result_t`) :: ++ +-- +`ZE_RESULT_ASSERT_FAILED = 0x70000006` + +[Core] Assert failure took place in device-code during kernel execution. +-- + +(Add to section API Documentation / Core API / Event / Event Functions / `zeEventQueryStatus`) :: ++ +-- +Return: + +`ZE_RESULT_ASSERT_FAILED` +-- + +An example: +[source] +---- +cl_event Event; // describes an event of kernel been submitted previously +cl_int Result; +size_t ResultSize; + +clGetEventInfo(Event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(Result), &Result, &ResultSize); +---- + +If kernel failed an assertion `clGetEventInfo` should put `CL_ASSERT_FAILURE` in +`Result`. + +== Issues + +None. + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2021-04-09|Sergey Kanaev|*Initial public working draft* +|======================================== + +//************************************************************************ +//Other formatting suggestions: +// +//* Use *bold* text for host APIs, or [source] syntax highlighting. +//* Use +mono+ text for device APIs, or [source] syntax highlighting. +//* Use +mono+ text for extension names, types, or enum values. +//* Use _italics_ for parameters. +//************************************************************************ From a4b48849bbbbc290f3cb0168894a250579fe1bc0 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 9 Apr 2021 17:52:28 +0300 Subject: [PATCH 014/122] Remove draft files Signed-off-by: Sergey Kanaev --- sycl/doc/extensions/Assert/level-zero.md | 20 -------------------- sycl/doc/extensions/Assert/opencl.md | 22 ---------------------- 2 files changed, 42 deletions(-) delete mode 100644 sycl/doc/extensions/Assert/level-zero.md delete mode 100644 sycl/doc/extensions/Assert/opencl.md diff --git a/sycl/doc/extensions/Assert/level-zero.md b/sycl/doc/extensions/Assert/level-zero.md deleted file mode 100644 index f3b8e402db052..0000000000000 --- a/sycl/doc/extensions/Assert/level-zero.md +++ /dev/null @@ -1,20 +0,0 @@ -# Overview - -This extension enables detection of assert failure of kernel. - -# New enum value - -`ze_result_t` enumeration should be augmented with `ZE_RESULT_ASSERT_FAILED` -enum element. This enum value indicated a detected assert failure at -device-side. - -# Changed API - -``` -ze_event_handle_t Event; // describes an event of kernel been submitted previously -ze_result Result = zeEventQueryStatus(Event); -``` - -If kernel failed an assertion `zeEventQueryStatus` should return -`ZE_RESULT_ASSERT_FAILED`. - diff --git a/sycl/doc/extensions/Assert/opencl.md b/sycl/doc/extensions/Assert/opencl.md deleted file mode 100644 index 50ad0b7db0897..0000000000000 --- a/sycl/doc/extensions/Assert/opencl.md +++ /dev/null @@ -1,22 +0,0 @@ -# Overview - -This extension enables detection of assert failure of kernel. - -# New error code - -`CL_ASSERT_FAILURE` is added to indicate a detected assert failure at -device-side. - -# Changed API - -``` -cl_event Event; // describes an event of kernel been submitted previously -cl_int Result; -size_t ResultSize; - -clGetEventInfo(Event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(Result), &Result, &ResultSize); -``` - -If kernel failed an assertion `clGetEventInfo` should put `CL_ASSERT_FAILURE` -in `Result`. - From c06db5f013f337c5ef76d56d5ade53e7287d273e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 9 Apr 2021 17:55:16 +0300 Subject: [PATCH 015/122] Remove unwanted part Signed-off-by: Sergey Kanaev --- .../Assert/ze_intel_assert_return_code.asciidoc | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc b/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc index b56937f9ba0d3..85db891ad5bcf 100644 --- a/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc +++ b/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc @@ -87,19 +87,6 @@ Return: + `ZE_RESULT_ASSERT_FAILED` -- -An example: -[source] ----- -cl_event Event; // describes an event of kernel been submitted previously -cl_int Result; -size_t ResultSize; - -clGetEventInfo(Event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(Result), &Result, &ResultSize); ----- - -If kernel failed an assertion `clGetEventInfo` should put `CL_ASSERT_FAILURE` in -`Result`. - == Issues None. From a99368bfb0cd82142852296370d4181560897d6f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 9 Apr 2021 18:00:19 +0300 Subject: [PATCH 016/122] Add limitations on submit to same queue after exception thrown. Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 058fde5126a76..a677b0113aee3 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -100,6 +100,13 @@ linked against fallback implementation of `__devicelib_assert_fail`. Hence, Native Device Compilers should prefer their implementation instead of the one provided in incoming SPIR-V/LLVM IR binary. +Limitations for user: + - DPCPP RT, Low-level RT and device state is unknown after throwing of "assert" + asynchronous exception; + - "assert" asynchronous exception might not be recoverable; + - there might not be guarantees on enqueueing commands (kernel, copy, etc.) to + same queue/context: guarantees may vary with device/Low-level RT. + ### Safe approach From 78d7fcbd5ef4765078eaeeaab9a723ec68a7ea7b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 9 Apr 2021 18:04:21 +0300 Subject: [PATCH 017/122] Add format of assert message Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index a677b0113aee3..ce415ff9eef83 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -92,6 +92,11 @@ practical cases. `assert(expr)` macro ends up in call to `__devicelib_assert_fail`. This function is part of [Device library extension](extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst#cl_intel_devicelib_cassert). +Format of assert failure message, printed to `stderr` is the following: +``` +:: : global id: [,,], local id: [,,] Assertion `` failed. +``` + Implementation of this function is supplied by Native Device Compiler for safe approach or by DPCPP Compiler for fallback one. From 6882e95a17823c92e29925d0e70a84b3f47b7ac2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 9 Apr 2021 18:18:36 +0300 Subject: [PATCH 018/122] Clarify where kernel wrapping takes place Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index ce415ff9eef83..2789e200c37b3 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -159,6 +159,41 @@ Both storing of accessor metadata and writing assert failure is performed with help of built-ins. Implementations of these builtins are substituted by frontend. +User's kernel is executed through a wrapper. Wrapping takes place in DPCPP +Runtime headers in a following manner: + +``` +class handler { + +template parallel_for(KernelFunc, Range) { +#ifndef NDEBUG + // Assert required +  if (!MQueue->get_device()->assert_fail_supported()) { +    using KName2 = class ASSERT_WRAPPER_NAME(KernelName); +     +    auto AssertBufferAcc = MQueue->get_context()->getAssertBufferAccessor(this); + +    parallel_for_impl( +      Range, +      [=](Item) { +        __store_acc(AssertBuffAcc); +        KernelFunc(Item); +      }); +  } else { +#endif + + // (No assert required) OR (Assert supported by device) +     // ordinary enqueue process + +#ifndef NDEBUG + } +#endif +} + +} +``` + + #### Built-ins operation Accessor is a pointer augmented with offset and two ranges (access range and From 32663e03410db9685c2eb6841621d558a9267e3f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 13 Apr 2021 18:36:35 +0300 Subject: [PATCH 019/122] Changes to SYCL specification Signed-off-by: Sergey Kanaev --- .../SYCL_INTEL_assert_exception.asciidoc | 109 --------------- .../Assert/SYCL_ONEAPI_ASSERT.asciidoc | 131 ++++++++++++++++++ .../DeviceLibExtensions.rst | 2 + 3 files changed, 133 insertions(+), 109 deletions(-) delete mode 100644 sycl/doc/extensions/Assert/SYCL_INTEL_assert_exception.asciidoc create mode 100644 sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc diff --git a/sycl/doc/extensions/Assert/SYCL_INTEL_assert_exception.asciidoc b/sycl/doc/extensions/Assert/SYCL_INTEL_assert_exception.asciidoc deleted file mode 100644 index 691548bfa9502..0000000000000 --- a/sycl/doc/extensions/Assert/SYCL_INTEL_assert_exception.asciidoc +++ /dev/null @@ -1,109 +0,0 @@ -= SYCL_INTEL_assert_exception - -:source-highlighter: coderay -:coderay-linenums-mode: table - -// This section needs to be after the document title. -:doctype: book -:toc2: -:toc: left -:encoding: utf-8 -:lang: en - -:blank: pass:[ +] - -// Set the default source code type in this document to C++, -// for syntax highlighting purposes. This is needed because -// docbook uses c++ and html5 uses cpp. -:language: {basebackend@docbook:c++:cpp} - -// This is necessary for asciidoc, but not for asciidoctor -:cpp: C++ - -== Introduction -IMPORTANT: This specification is a draft. - -NOTE: Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are -trademarks of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. -used by permission by Khronos. - -NOTE: This document is better viewed when rendered as html with asciidoctor. -GitHub does not render image icons. - -This document describes an extension to rename device-specific kernel queries -to better describe the operations performed. - -== Name Strings - -+SYCL_INTEL_assert_exception+ - -== Notice - -Copyright (c) 2021 Intel Corporation. All rights reserved. - -== Status - -Working Draft - -This is a preview extension specification, intended to provide early access to -a feature for review and community feedback. When the feature matures, this -specification may be released as a formal extension. - -Because the interfaces defined by this specification are not final and are -subject to change they are not intended to be used by shipping software -products. - -== Version - -Built On: {docdate} + -Revision: 1 - -== Dependencies - -This extension is written against the SYCL 2020 specification, Revision 3. - -== Overview - -== Modifications of SYCL 2020 Specification - -=== Change Section 4.13.2 Exception class interface - -Add enum member `assert` to the `errc` enum class: - -[source,c++,`sycl::kernel`,linenums] ----- -assert = /* implementation defined */ ----- - -==== Change table 136 Values of `errc` enum - -Add row `assert`: - -[width="40%",frame="topbot",options="header,footer"] -|====================== -|Standard SYCL Error Codes |Description -|`assert` | Assert failure had happened in device code during kernel execution -|====================== - -== Issues - -None. - -== Revision History - -[cols="5,15,15,70"] -[grid="rows"] -[options="header"] -|======================================== -|Rev|Date|Author|Changes -|1|2021-04-08|Sergey Kanaev|*Initial public working draft* -|======================================== - -//************************************************************************ -//Other formatting suggestions: -// -//* Use *bold* text for host APIs, or [source] syntax highlighting. -//* Use +mono+ text for device APIs, or [source] syntax highlighting. -//* Use +mono+ text for extension names, types, or enum values. -//* Use _italics_ for parameters. -//************************************************************************ diff --git a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc new file mode 100644 index 0000000000000..8b79bf86fa967 --- /dev/null +++ b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc @@ -0,0 +1,131 @@ += SYCL_EXT_ONEAPI_ASSERT + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + +// This is necessary for asciidoc, but not for asciidoctor +:cpp: C++ + +== Notice +IMPORTANT: This specification is a draft. + +Copyright (c) 2021-2021 Intel Corporation. All rights reserved. + +NOTE: Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are +trademarks of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. +used by permission by Khronos. + +NOTE: This document is better viewed when rendered as html with asciidoctor. +GitHub does not render image icons. + +== Dependencies + +This extension is written against the SYCL 2020 specification, Revision 3. + +== Status + +Working Draft + +This is a preview extension specification, intended to provide early access to +a feature for review and community feedback. When the feature matures, this +specification may be released as a formal extension. + +Because the interfaces defined by this specification are not final and are +subject to change they are not intended to be used by shipping software +products. + +== Introduction +This extension adds the ability for device code to call the C++ `assert()` +macro. The behavior of `assert()` in device code is similar to its behavior in +host code. If the asserted condition is false, a message is printed to `stderr` +and then the program typically aborts. The mechanism for aborting the program is +different, though. Whereas the host version calls `std::abort()`, the device +version causes an asynchronous SYCL `exception` with the +`errc::ext_oneapi_assert` error code to be thrown. The application can catch +this exception like any other asynchronous exception that is thrown from an +executing kernel. The numeric value of this enumeration element is defined by +implementation. + + +Catching the asynchronous exception with `sycl::errc::ext_oneapi_assert` error +code means that assert failure had happened in device code during kernel +execution and the assert message is already printed to `stderr`. Format of the +assert message is the following: +``` +:: : global id: [,,], local id: [,,] Assertion `` failed. +``` + +It is unspecified whether a failing `assert()` returns to its caller before the +kernel terminates. If a failing call returns, the device code may need to +continue execution without deadlocking for the assertion message to be printed +or for the exception to be thrown. + +The contents of the exception's `e.what()` string is unspecified. Since the +assertion message is already printed to `stderr` by the time the exception is +thrown, the `e.what()` string may not include the location of the failing +assertion. + +The `assert()` macro is defined in system include headers, not in SYCL headers. +On most of systems it is `` and/or `` header files. +The user can can disable assertions in device code by defining the `NDEBUG` +preprocessor macro at the point when `` and `/` +are included. + + +== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification section 6.3.3 "Feature test macros". Therefore, an implementation +supporting this extension must predefine the macro `SYCL_EXT_ONEAPI_ASSERT` to +one of the values defined in the table below. Applications can test for the +existence of this macro to determine if the implementation supports this +feature, or applications can test the macro’s value to determine which of the +extension’s APIs the implementation supports. + +[%header,cols="1,5"] +|=== +|Value |Description +|1 |Initial extension version. Base features are supported. +|=== + +== Version + +Built On: {docdate} + +Revision: 1 + +== Issues + +None. + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2021-04-08|Sergey Kanaev, Gregory M Lueck |*Initial public working draft* +|======================================== + +//************************************************************************ +//Other formatting suggestions: +// +//* Use *bold* text for host APIs, or [source] syntax highlighting. +//* Use +mono+ text for device APIs, or [source] syntax highlighting. +//* Use +mono+ text for extension names, types, or enum values. +//* Use _italics_ for parameters. +//************************************************************************ diff --git a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst index 8b8b98d7a12bb..62ed492a76ac4 100644 --- a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst +++ b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst @@ -33,6 +33,8 @@ Example of a message: .. code: foo.cpp:42: void foo(int): global id: [0,0,0], local id: [0,0,0] Assertion `buf[wiID] == 0 && "Invalid value"` failed. +See also: [assert extension](../Assert/SYCL_ONEAPI_ASSERT.asciidoc) + cl_intel_devicelib_math ========================== From 2b84a83dce857993b7dcef396f0ff9a343ec1c9b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 13 Apr 2021 18:58:59 +0300 Subject: [PATCH 020/122] Elaborate on limitations Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 2789e200c37b3..22d626adcd1f4 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -105,12 +105,12 @@ linked against fallback implementation of `__devicelib_assert_fail`. Hence, Native Device Compilers should prefer their implementation instead of the one provided in incoming SPIR-V/LLVM IR binary. -Limitations for user: - - DPCPP RT, Low-level RT and device state is unknown after throwing of "assert" - asynchronous exception; - - "assert" asynchronous exception might not be recoverable; - - there might not be guarantees on enqueueing commands (kernel, copy, etc.) to - same queue/context: guarantees may vary with device/Low-level RT. +Limitations for user after catching the "assert" asynchronous exception: + - When using GPU device and the kernel hangs/crashes the subsequent enqueues + will fail; +When using CPU devices the user can proceed with enqueues to the same +device/queue/context. +DPCPP Runtime remains in valid state after "assert" exception been thrown. ### Safe approach From 423107b3f22ec85fdead940dce4b1b9525fea361 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Apr 2021 10:19:58 +0300 Subject: [PATCH 021/122] Fix link Signed-off-by: Sergey Kanaev --- .../extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst index 62ed492a76ac4..1c370e57ad89c 100644 --- a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst +++ b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst @@ -33,7 +33,8 @@ Example of a message: .. code: foo.cpp:42: void foo(int): global id: [0,0,0], local id: [0,0,0] Assertion `buf[wiID] == 0 && "Invalid value"` failed. -See also: [assert extension](../Assert/SYCL_ONEAPI_ASSERT.asciidoc) +See also: assert_extension_. +.. _assert_extension: ../Assert/SYCL_ONEAPI_ASSERT.asciidoc) cl_intel_devicelib_math ========================== From 76115114280afcd1a6fec0afd69171c4656715cc Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Apr 2021 14:46:58 +0300 Subject: [PATCH 022/122] Add sequence describing how DPCPP RT gets to know about assert failure Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 22d626adcd1f4..8a6767af12c15 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -108,12 +108,19 @@ provided in incoming SPIR-V/LLVM IR binary. Limitations for user after catching the "assert" asynchronous exception: - When using GPU device and the kernel hangs/crashes the subsequent enqueues will fail; + When using CPU devices the user can proceed with enqueues to the same device/queue/context. DPCPP Runtime remains in valid state after "assert" exception been thrown. -### Safe approach +### Current violation + +While throwing an asynchronous exception is quite an extensible way, for the +time being DPCPP Runtime merely calls `abort()`. + + +## Safe approach This is the preferred approach and implementations should use it when possible. It guarantees assertion failure notification delivery to the host regardless of @@ -135,6 +142,20 @@ OpenCL backend and `zeEventQueryStatus` for Level-Zero backend. Refer to [OpenCL](extensions/Assert/opencl.md) and [Level-Zero](extensions/Assert/level-zero.md) extensions. +The following sequence of events describes how user code gets notified: + - Device side: + 1. Assert fails in device-code in kernel + // It's not defined if GPU thread stops execution + // Other GPU threads are left untouched + 2. Specialized version of `__devicelib_assert_fail` is called + 3. Device immediately signals to host (Low-Level Runtime) + - Host side: + 1. The assert failure gets detected by Low-Level Runtime + 2. Low-Level Runtime sets event status + 3. Upon call to `sycl::queue::wait_and_throw()` or + `sycl::event::wait_and_throw()` DPCPP Runtime checks event status and + throws "assert" exception + ### Fallback approach @@ -155,6 +176,20 @@ information. DPCPP Runtime checks contents of the assert buffer for assert failure flag after kernel finishes. +The following sequence of events describes how user code gets notified: + - Device side: + 1. Assert fails in device-code in kernel + 2. Fallback version of `__devicelib_assert_fail` is called + 3. Assert information is stored into assert buffer + 4. Kernel continues running + - Host side: + 1. Upon call to `sycl::queue::wait_and_throw()` or + `sycl::event::wait_and_throw()` DPCPP Runtime waits until kernel finishes + and checks assert buffer for assert information throws exception + + +#### Storing accessor metadata and writing assert failure to buffer + Both storing of accessor metadata and writing assert failure is performed with help of built-ins. Implementations of these builtins are substituted by frontend. From a31b808075c2eef9717434863ffc131551bd4c58 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Apr 2021 17:47:38 +0300 Subject: [PATCH 023/122] Add notes on property set usage Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 115 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 8a6767af12c15..e5d0bf2fe76ef 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -188,7 +188,120 @@ The following sequence of events describes how user code gets notified: and checks assert buffer for assert information throws exception -#### Storing accessor metadata and writing assert failure to buffer +#### Online-linking fallback `__devicelib_assert_fail` + +Online linking against fallback implementation of `__devicelib_assert_fail` is +performed only when assertion is enabled. + +In DPCPP headers one can see if assert is enabled with status of `NDEBUG` macro +with `#ifdef`'s. This allows to add implicit buffer argument to kernel +invocation. Here "implicit" means "implicit to the user". + +When in DPCPP Runtime Library this knowledge is obtained from device binary +image descriptor's property sets. + +Each device image is supplied with an array of property sets: +```c++ +struct pi_device_binary_struct { + //... + // Array of property sets + pi_device_binary_property_set PropertySetsBegin; + pi_device_binary_property_set PropertySetsEnd; +}; +``` +Each property set is represented by the following struct: +```c++ +// Named array of properties. +struct _pi_device_binary_property_set_struct { + char *Name; // the name + pi_device_binary_property PropertiesBegin; // array start + pi_device_binary_property PropertiesEnd; // array end +}; +``` +It contains name of property set and array of properties. Each property is +represented by the following struct: +```c++ +struct _pi_device_binary_property_struct { + char *Name; // null-terminated property name + void *ValAddr; // address of property value + uint32_t Type; // _pi_property_type + uint64_t ValSize; // size of property value in bytes +}; +``` + +Whenever `isAssertEnabled` property set is present, this specific device image +was built with `NDEBUG` macro undefined and it requires fallback implementation +of `__devicelib_assert_fail` (i.e. if Device-side Runtime doesn't support it). + +Any properties in `isAssertEnabled` property set are ignored. + +The property set is added to device binary descriptor whenever at least single +translation unit was compiled with assertions enabled i.e. `NDEBUG` undefined. + + +##### Compiling with assert enabled/disabled + +Consider the following two use-case: +```c++ +// impl.cpp +using namespace sycl; +int calculus(int X) { + assert(X && "Invalid value"); + return X * 2; +} + +void enqueueKernel(queue &Q, buffer &B) { + Q.submit([](handler &H) { + auto Acc = B.get_access(H); + H.parallel_for(/* range */, [](item It) { + assert(Acc[It]); + // ... + }); + }); +} + +// main.cpp +// ... +using namespace sycl; + +SYCL_EXTERNAL int calculus(int); +void enqueueKernel(queue&, buffer&); + +void workload() { + queue Q; + buffer B; + + Q.submit([](handler &H) { + auto Acc = B.get_access(H); + H.parallel_for(/* range */, [](item It) { + int X = calculus(0); // should fail assertion + assert(X && "Nil in result"); + Acc[It] = X; + }); + }); + + enqueueKernel(Q, B); + ... +} +``` + +These two files are compiled into a single binary application. There are four +states of definedness of `NDEBUG` macro available: + +| # | `impl.cpp` | `main.cpp` | +| - | ---------- | ---------- | +| 1 | defined | defined | +| 2 | defined | undefined | +| 3 | undefined | defined | +| 4 | undefined | undefined | + +States of definedness of `NDEBUG` macro defines the set of assertions which can +fail. Having assertions enabled in at least one translation unit with device +code requires for `isAssertEnabled` property set being present in device image +descriptor structure. + + +### Storing accessor metadata and writing assert failure to buffer Both storing of accessor metadata and writing assert failure is performed with help of built-ins. Implementations of these builtins are substituted by From 257054a5bfd36c6fad3321a70579602bab16ac8e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Apr 2021 17:52:31 +0300 Subject: [PATCH 024/122] Address comments Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 15 +++-- .../Assert/SYCL_ONEAPI_ASSERT.asciidoc | 57 ++++++++++++++++--- 2 files changed, 59 insertions(+), 13 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index e5d0bf2fe76ef..ee5ba258ca9d6 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -14,16 +14,21 @@ compile time. ## Use-case example -``` +```c++ +#include +#include + using namespace sycl; auto ErrorHandler = [] (exception_list Exs) { - for (exception_ptr const& E : Exs) { + for (std::exception_ptr const& E : Exs) { try { std::rethrow_exception(E); } - catch (event_error const& Ex) { - std::cout << “Exception - ” << Ex.what(); // assertion failed - std::abort(); + catch (const exception& Ex) { + if (Ex.code() == errc::ext_oneapi_assert) { + std::cout << “Exception - ” << Ex.what(); // assertion failed + std::abort(); + } } } }; diff --git a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc index 8b79bf86fa967..c8a64d2262eed 100644 --- a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc +++ b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc @@ -63,11 +63,12 @@ implementation. Catching the asynchronous exception with `sycl::errc::ext_oneapi_assert` error code means that assert failure had happened in device code during kernel -execution and the assert message is already printed to `stderr`. Format of the -assert message is the following: -``` -:: : global id: [,,], local id: [,,] Assertion `` failed. -``` +execution and the assert message is already printed to `stderr`. +The format of the assert message is unspecified, but it will always include the +text of the failing expression, the values of the standard macros `__FILE__` and +`__LINE__`, and the value of the standard variable `__func__`. If the failing +assert comes from an `nd_range` `parallel_for` it will also include the global +ID and the local ID of the failing work item. It is unspecified whether a failing `assert()` returns to its caller before the kernel terminates. If a failing call returns, the device code may need to @@ -82,9 +83,49 @@ assertion. The `assert()` macro is defined in system include headers, not in SYCL headers. On most of systems it is `` and/or `` header files. The user can can disable assertions in device code by defining the `NDEBUG` -preprocessor macro at the point when `` and `/` -are included. - +preprocessor macro prior to including either of `` and +`/`. + +Following is an example use-case: + +#[source] +---- +#include +#include + +using namespace sycl; +auto ErrorHandler = [] (exception_list Exs) { + for (std::exception_ptr const& E : Exs) { + try { + std::rethrow_exception(E); + } + catch (const exception& Ex) { + if (Ex.code() == errc::ext_oneapi_assert) { + std::cout << “Exception - ” << Ex.what(); // assertion failed + std::abort(); + } + } + } +}; + +void user_func(item<2> Item) { + assert( (Item[0] % 2) && “Nil”); +} + +int main() { + queue Q(ErrorHandler); + q.submit([&] (handler& CGH) { + CGH.parallel_for(range<2>{N, M}, [=](item<2> It) { + do_smth(); + user_func(It); + do_smth_else(); + }); + }); + Q.wait_and_throw(); + std::cout << “One shouldn’t see this message.“; + return 0; +} +---- == Feature test macro From 3f501730e1cd89c0760ea3c595200df9d6a34fe1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Apr 2021 17:55:57 +0300 Subject: [PATCH 025/122] Fix typo and format note Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 11 ++++++----- .../doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index ee5ba258ca9d6..09042560bf532 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -97,10 +97,11 @@ practical cases. `assert(expr)` macro ends up in call to `__devicelib_assert_fail`. This function is part of [Device library extension](extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst#cl_intel_devicelib_cassert). -Format of assert failure message, printed to `stderr` is the following: -``` -:: : global id: [,,], local id: [,,] Assertion `` failed. -``` +The format of the assert message is unspecified, but it will always include the +text of the failing expression, the values of the standard macros `__FILE__` and +`__LINE__`, and the value of the standard variable `__func__`. If the failing +assert comes from an `nd_range` `parallel_for` it will also include the global +ID and the local ID of the failing work item. Implementation of this function is supplied by Native Device Compiler for safe approach or by DPCPP Compiler for fallback one. @@ -246,7 +247,7 @@ translation unit was compiled with assertions enabled i.e. `NDEBUG` undefined. ##### Compiling with assert enabled/disabled -Consider the following two use-case: +Consider the following example sources: ```c++ // impl.cpp using namespace sycl; diff --git a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc index c8a64d2262eed..3ca160ea7d810 100644 --- a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc +++ b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc @@ -88,7 +88,7 @@ preprocessor macro prior to including either of `` and Following is an example use-case: -#[source] +[source] ---- #include #include From c1326aa2defe52ca960f3f5efed11d6145db4d2f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Apr 2021 17:59:34 +0300 Subject: [PATCH 026/122] Fix typo Signed-off-by: Sergey Kanaev --- sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc index 3ca160ea7d810..284101fe8bac2 100644 --- a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc +++ b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc @@ -82,7 +82,7 @@ assertion. The `assert()` macro is defined in system include headers, not in SYCL headers. On most of systems it is `` and/or `` header files. -The user can can disable assertions in device code by defining the `NDEBUG` +The user can disable assertions in device code by defining the `NDEBUG` preprocessor macro prior to including either of `` and `/`. From 5095b1a59fb3e695b9da6fd02849c85ea6cb5f3c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Apr 2021 18:02:27 +0300 Subject: [PATCH 027/122] Add extension to README Signed-off-by: Sergey Kanaev --- sycl/doc/extensions/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/doc/extensions/README.md b/sycl/doc/extensions/README.md index e4b4a7bdb52be..239efe8421a9f 100755 --- a/sycl/doc/extensions/README.md +++ b/sycl/doc/extensions/README.md @@ -37,6 +37,7 @@ DPC++ extensions status: | [Use Pinned Memory Property](UsePinnedMemoryProperty/UsePinnedMemoryPropery.adoc) | Supported | | | [Level-Zero backend specification](LevelZeroBackend/LevelZeroBackend.md) | Supported | | | [ITT annotations support](ITTAnnotations/ITTAnnotations.rst) | Supported | | +| [Assert](Assert/SYCL_ONEAPI_ASSERT.asciidoc) | Proposal | | Legend: From 5078fccf940173d6a90829209a503cf3f986e370 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Apr 2021 18:08:08 +0300 Subject: [PATCH 028/122] Note on how property set gets to be set Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 09042560bf532..993f5b5ac681c 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -244,6 +244,9 @@ Any properties in `isAssertEnabled` property set are ignored. The property set is added to device binary descriptor whenever at least single translation unit was compiled with assertions enabled i.e. `NDEBUG` undefined. +The property set is added by `sycl-post-link` tool depending on module metadata. +Metadata is provided by Clang frontend. Metadata name is `is_assert_enabled`. + ##### Compiling with assert enabled/disabled From 9bcac020d074c9cc09d072485770ac73c44fa013 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 15 Apr 2021 16:18:10 +0300 Subject: [PATCH 029/122] Partially remove mentioning of async exception throw Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 14 +++---- .../Assert/SYCL_ONEAPI_ASSERT.asciidoc | 42 ++++--------------- 2 files changed, 14 insertions(+), 42 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 993f5b5ac681c..148db056d6cd4 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -157,10 +157,8 @@ The following sequence of events describes how user code gets notified: 3. Device immediately signals to host (Low-Level Runtime) - Host side: 1. The assert failure gets detected by Low-Level Runtime - 2. Low-Level Runtime sets event status - 3. Upon call to `sycl::queue::wait_and_throw()` or - `sycl::event::wait_and_throw()` DPCPP Runtime checks event status and - throws "assert" exception + 2. Low-Level Runtime prints assert failure message to `stderr` + 3. Low-Level Runtime calls `abort()` ### Fallback approach @@ -189,9 +187,11 @@ The following sequence of events describes how user code gets notified: 3. Assert information is stored into assert buffer 4. Kernel continues running - Host side: - 1. Upon call to `sycl::queue::wait_and_throw()` or - `sycl::event::wait_and_throw()` DPCPP Runtime waits until kernel finishes - and checks assert buffer for assert information throws exception + 1. A distinct thread is launched no later than the point of enqueue of the of + kernel with assertions + 2. This thread polls the enqueued kernels for finish and checks the assert + buffer for assert data + 3. If assert data is present DPCPP Runtime calls `abort()` #### Online-linking fallback `__devicelib_assert_fail` diff --git a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc index 284101fe8bac2..9b2b28cce2acb 100644 --- a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc +++ b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc @@ -52,18 +52,8 @@ products. This extension adds the ability for device code to call the C++ `assert()` macro. The behavior of `assert()` in device code is similar to its behavior in host code. If the asserted condition is false, a message is printed to `stderr` -and then the program typically aborts. The mechanism for aborting the program is -different, though. Whereas the host version calls `std::abort()`, the device -version causes an asynchronous SYCL `exception` with the -`errc::ext_oneapi_assert` error code to be thrown. The application can catch -this exception like any other asynchronous exception that is thrown from an -executing kernel. The numeric value of this enumeration element is defined by -implementation. - - -Catching the asynchronous exception with `sycl::errc::ext_oneapi_assert` error -code means that assert failure had happened in device code during kernel -execution and the assert message is already printed to `stderr`. +and then the program aborts with `std::abort()`. + The format of the assert message is unspecified, but it will always include the text of the failing expression, the values of the standard macros `__FILE__` and `__LINE__`, and the value of the standard variable `__func__`. If the failing @@ -73,12 +63,7 @@ ID and the local ID of the failing work item. It is unspecified whether a failing `assert()` returns to its caller before the kernel terminates. If a failing call returns, the device code may need to continue execution without deadlocking for the assertion message to be printed -or for the exception to be thrown. - -The contents of the exception's `e.what()` string is unspecified. Since the -assertion message is already printed to `stderr` by the time the exception is -thrown, the `e.what()` string may not include the location of the failing -assertion. +or for `std::abort()` to be called. The `assert()` macro is defined in system include headers, not in SYCL headers. On most of systems it is `` and/or `` header files. @@ -94,34 +79,21 @@ Following is an example use-case: #include using namespace sycl; -auto ErrorHandler = [] (exception_list Exs) { - for (std::exception_ptr const& E : Exs) { - try { - std::rethrow_exception(E); - } - catch (const exception& Ex) { - if (Ex.code() == errc::ext_oneapi_assert) { - std::cout << “Exception - ” << Ex.what(); // assertion failed - std::abort(); - } - } - } -}; void user_func(item<2> Item) { - assert( (Item[0] % 2) && “Nil”); + assert((Item[0] % 2) && “Nil”); } int main() { - queue Q(ErrorHandler); - q.submit([&] (handler& CGH) { + queue Q; + Q.submit([&] (handler& CGH) { CGH.parallel_for(range<2>{N, M}, [=](item<2> It) { do_smth(); user_func(It); do_smth_else(); }); }); - Q.wait_and_throw(); + Q.wait(); std::cout << “One shouldn’t see this message.“; return 0; } From 7ec3ac8676d8f4db10c69f3de6f19ead0591b2ba Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 15 Apr 2021 17:30:11 +0300 Subject: [PATCH 030/122] Add Assert.md to index Signed-off-by: Sergey Kanaev --- sycl/doc/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/doc/index.rst b/sycl/doc/index.rst index 8089d12230730..9be7037fbd959 100644 --- a/sycl/doc/index.rst +++ b/sycl/doc/index.rst @@ -32,3 +32,5 @@ Developing oneAPI DPC++ Compiler KernelProgramCache GlobalObjectsInRuntime LinkedAllocations + Assert + From 8cbfde7a82d845bdb1e16d809060e609c16b4ad9 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 15 Apr 2021 17:30:31 +0300 Subject: [PATCH 031/122] Remove the rest of exception throws Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 45 +++++++++------------------------------------ 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 148db056d6cd4..086cc948d815c 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -5,9 +5,8 @@ Using the standard C++ `assert` API ("assertions") is an important debugging technique widely used by developers. This document describes the design of supporting assertions within SYCL device code. -The basic approach we chose is delivering device-side assertions as host-side -asynchronous exceptions, which allows further extensibility, such as better -error handling or potential recovery. +The basic approach we chose is delivering device-side assertions as call to +`std::abort()` at host-side. As usual, device-side assertions can be disabled by defining `NDEBUG` macro at compile time. @@ -19,42 +18,30 @@ compile time. #include using namespace sycl; -auto ErrorHandler = [] (exception_list Exs) { - for (std::exception_ptr const& E : Exs) { - try { - std::rethrow_exception(E); - } - catch (const exception& Ex) { - if (Ex.code() == errc::ext_oneapi_assert) { - std::cout << “Exception - ” << Ex.what(); // assertion failed - std::abort(); - } - } - } -}; void user_func(item<2> Item) { assert((Item[0] % 2) && “Nil”); } int main() { - queue Q(ErrorHandler); - q.submit([&] (handler& CGH) { + queue Q; + Q.submit([&] (handler& CGH) { CGH.parallel_for(range<2>{N, M}, [=](item<2> It) { do_smth(); user_func(It); do_smth_else(); }); }); - Q.wait_and_throw(); + Q.wait(); std::cout << “One shouldn’t see this message.“; return 0; } ``` In this use-case every work-item with even X dimension will trigger assertion -failure. Assertion failure should be reported via asynchronous exceptions with -[`assert` error code](extensions/Assert/SYCL_INTEL_assert_exception.asciidoc). +failure. Assertion failure should be trigger a call to `std::abort()` at host as +described in +[extension](extensions/Assert/SYCL_INTEL_ASSERT.asciidoc). Even though multiple failures of the same or different assertions can happen in multiple workitems, implementation is required to deliver only one. The assertion failure message is printed to `stderr` by DPCPP Runtime. @@ -69,7 +56,7 @@ From user's point of view there are the following requirements: | # | Title | Description | Importance | | - | ----- | ----------- | ---------- | -| 1 | Handle assertion failure | Signal about assertion failure via SYCL asynchronous exception | Must have | +| 1 | Abort DPC++ application | Abort host application when assert function is called and print a message about assertion | Must have | | 2 | Print assert message | Assert function should print message to stderr at host | Must have | | 3 | Stop under debugger | When debugger is attached, break at assertion point | Highly desired | | 4 | Reliability | Assert failure should be reported regardless of kernel deadlock | Highly desired | @@ -111,20 +98,6 @@ linked against fallback implementation of `__devicelib_assert_fail`. Hence, Native Device Compilers should prefer their implementation instead of the one provided in incoming SPIR-V/LLVM IR binary. -Limitations for user after catching the "assert" asynchronous exception: - - When using GPU device and the kernel hangs/crashes the subsequent enqueues - will fail; - -When using CPU devices the user can proceed with enqueues to the same -device/queue/context. -DPCPP Runtime remains in valid state after "assert" exception been thrown. - - -### Current violation - -While throwing an asynchronous exception is quite an extensible way, for the -time being DPCPP Runtime merely calls `abort()`. - ## Safe approach From cc085f524deb19885c73fb6d355e2235cbb647d4 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 22 Apr 2021 17:28:22 +0300 Subject: [PATCH 032/122] Address review comments Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 59 ++++++++----------- .../Assert/SYCL_ONEAPI_ASSERT.asciidoc | 8 +-- .../cl_intel_assert_return_code.asciidoc | 4 +- 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 086cc948d815c..ebd70246764bb 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -43,8 +43,9 @@ failure. Assertion failure should be trigger a call to `std::abort()` at host as described in [extension](extensions/Assert/SYCL_INTEL_ASSERT.asciidoc). Even though multiple failures of the same or different assertions can happen in -multiple workitems, implementation is required to deliver only one. The -assertion failure message is printed to `stderr` by DPCPP Runtime. +multiple workitems, implementation is required to deliver at least one +assertion. The assertion failure message is printed to `stderr` by DPCPP +Runtime. When multiple kernels are enqueued and more than one fail at assertion, at least single assertion should be reported. @@ -93,10 +94,9 @@ ID and the local ID of the failing work item. Implementation of this function is supplied by Native Device Compiler for safe approach or by DPCPP Compiler for fallback one. -NB: Due to lack of support of online linking in Level-Zero, the application is -linked against fallback implementation of `__devicelib_assert_fail`. Hence, -Native Device Compilers should prefer their implementation instead of the one -provided in incoming SPIR-V/LLVM IR binary. +In order to distinguish which implementation to use, DPCPP Runtime checks for +`cl_intel_devicelib_cassert` extension. If the extension isn't available, then +fallback implementation is used. ## Safe approach @@ -112,15 +112,6 @@ and runtime. The Low-Level Runtime is responsible for: - detecting if assert failure took place; - flushing assert message to `stderr` on host. -When detected, Low-level Runtime reports assert failure to DPCPP Runtime -via events objects. To achieve this, information about assert failure should be -propagated from device-side to SYCL Runtime. This should be performed via calls -to `piEventGetInfo`. This Plugin Interface call "lowers" to `clGetEventInfo` for -OpenCL backend and `zeEventQueryStatus` for Level-Zero backend. - -Refer to [OpenCL](extensions/Assert/opencl.md) and [Level-Zero](extensions/Assert/level-zero.md) -extensions. - The following sequence of events describes how user code gets notified: - Device side: 1. Assert fails in device-code in kernel @@ -142,13 +133,14 @@ Device-side Runtime and Native Device Compiler. Neither it does from Low-level Runtime. Within this approach, a dedicated assert buffer is allocated and implicit kernel -argument is introduced. The argument is an accessor with `discard_read_write` -or `discard_write` access mode. Accessor metadata is stored to program scope -variable. This allows to refer to the accessor without modifying each and every -user's function. Fallback implementation of `__devicelib_assert_fail` restores -accessor metadata from program scope variable and writes assert information to -the assert buffer. Atomic operations are used in order to not overwrite existing -information. +argument is introduced. The argument is an accessor that has either +`access_mode::read_write` or `access_mode::write` access mode and was +constructed with the `property::no_init property`. Accessor metadata is stored +to program scope variable. This allows to refer to the accessor without +modifying each and every user's function. Fallback implementation of +`__devicelib_assert_fail` restores accessor metadata from program scope variable +and writes assert information to the assert buffer. Atomic operations are used +in order to not overwrite existing information. DPCPP Runtime checks contents of the assert buffer for assert failure flag after kernel finishes. @@ -160,8 +152,8 @@ The following sequence of events describes how user code gets notified: 3. Assert information is stored into assert buffer 4. Kernel continues running - Host side: - 1. A distinct thread is launched no later than the point of enqueue of the of - kernel with assertions + 1. A distinct thread is launched no later than the point of enqueue of the + first kernel with assertions 2. This thread polls the enqueued kernels for finish and checks the assert buffer for assert data 3. If assert data is present DPCPP Runtime calls `abort()` @@ -170,7 +162,8 @@ The following sequence of events describes how user code gets notified: #### Online-linking fallback `__devicelib_assert_fail` Online linking against fallback implementation of `__devicelib_assert_fail` is -performed only when assertion is enabled. +performed only when assertion is enabled and Device-side Runtime doesn't provide +implementation of `__devicelib_assert_fail`. In DPCPP headers one can see if assert is enabled with status of `NDEBUG` macro with `#ifdef`'s. This allows to add implicit buffer argument to kernel @@ -208,16 +201,16 @@ struct _pi_device_binary_property_struct { }; ``` -Whenever `isAssertEnabled` property set is present, this specific device image -was built with `NDEBUG` macro undefined and it requires fallback implementation -of `__devicelib_assert_fail` (i.e. if Device-side Runtime doesn't support it). - -Any properties in `isAssertEnabled` property set are ignored. +There's no need for a whole new property set so we reuse `SYCL/misc properties` +property set. Whenever `isAssertEnabled` property is present, this specific +device image was built with `NDEBUG` macro undefined and it requires fallback +implementation of `__devicelib_assert_fail` (i.e. if Device-side Runtime doesn't +support it). -The property set is added to device binary descriptor whenever at least single +The property is added to device binary descriptor whenever at least single translation unit was compiled with assertions enabled i.e. `NDEBUG` undefined. -The property set is added by `sycl-post-link` tool depending on module metadata. +The property is added by `sycl-post-link` tool depending on module metadata. Metadata is provided by Clang frontend. Metadata name is `is_assert_enabled`. @@ -283,7 +276,7 @@ code requires for `isAssertEnabled` property set being present in device image descriptor structure. -### Storing accessor metadata and writing assert failure to buffer +#### Storing accessor metadata and writing assert failure to buffer Both storing of accessor metadata and writing assert failure is performed with help of built-ins. Implementations of these builtins are substituted by diff --git a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc index 9b2b28cce2acb..c44d29c72113d 100644 --- a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc +++ b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc @@ -55,10 +55,10 @@ host code. If the asserted condition is false, a message is printed to `stderr` and then the program aborts with `std::abort()`. The format of the assert message is unspecified, but it will always include the -text of the failing expression, the values of the standard macros `__FILE__` and -`__LINE__`, and the value of the standard variable `__func__`. If the failing -assert comes from an `nd_range` `parallel_for` it will also include the global -ID and the local ID of the failing work item. +text of the failing expression, the values of the standard macros `+__FILE__+` +and `+__LINE__+`, and the value of the standard variable `+__func__+`. If the +failing assert comes from an `nd_range` `parallel_for` it will also include the +global ID and the local ID of the failing work item. It is unspecified whether a failing `assert()` returns to its caller before the kernel terminates. If a failing call returns, the device code may need to diff --git a/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc b/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc index b7eec45d0a26f..58036b0334b4a 100644 --- a/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc +++ b/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc @@ -42,9 +42,9 @@ Revision: 1 == Dependencies -This extension is written against the OpenCL Specification Version 1.0, Revision 48. +This extension is written against the OpenCL Specification Version 1.2, Revision 19. -This extension requires OpenCL 1.0 or later. +This extension requires OpenCL 1.2 or later. == Overview From 8835bf8a1ae5dfc1146e455ae672e3e39c1e3caf Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 6 May 2021 12:05:29 +0300 Subject: [PATCH 033/122] Document program-scope variable approach Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 308 +++++++++++----------- sycl/doc/images/assert-fallback-graph.svg | 3 + 2 files changed, 155 insertions(+), 156 deletions(-) create mode 100644 sycl/doc/images/assert-fallback-graph.svg diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index ebd70246764bb..29073e3e54116 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -76,8 +76,6 @@ practical cases. based on input SPIR-V image. - Low-level Runtime - the backend/runtime behind DPCPP Runtime attached via the Plugin Interface. - - Accessor metadata - parts of accessor representation at device-side: pointer, - ranges, offset. ## How it works? @@ -125,49 +123,73 @@ The following sequence of events describes how user code gets notified: 3. Low-Level Runtime calls `abort()` -### Fallback approach +## Fallback approach -If Device-side Runtime doesn't support `__devicelib_assert_fail` then a buffer -based approach comes in place. The approach doesn't require any support from +If Device-side Runtime doesn't support `__devicelib_assert_fail` then a fallback +approach comes in place. The approach doesn't require any support from Device-side Runtime and Native Device Compiler. Neither it does from Low-level Runtime. -Within this approach, a dedicated assert buffer is allocated and implicit kernel -argument is introduced. The argument is an accessor that has either -`access_mode::read_write` or `access_mode::write` access mode and was -constructed with the `property::no_init property`. Accessor metadata is stored -to program scope variable. This allows to refer to the accessor without -modifying each and every user's function. Fallback implementation of -`__devicelib_assert_fail` restores accessor metadata from program scope variable -and writes assert information to the assert buffer. Atomic operations are used -in order to not overwrite existing information. - -DPCPP Runtime checks contents of the assert buffer for assert failure flag after -kernel finishes. +Within this approach, a mutable program scope variable is introduced. This +variable stores a flag which says if an assert failure was encountered. Fallback +implementation of `__devicelib_assert_fail` atomically raises the flag so that +DPCPP Runtime is able to detect assert failure after kernel finishes. The following sequence of events describes how user code gets notified: - Device side: 1. Assert fails in device-code in kernel 2. Fallback version of `__devicelib_assert_fail` is called - 3. Assert information is stored into assert buffer + 3. Assert information is stored into program-scope variable 4. Kernel continues running - Host side: - 1. A distinct thread is launched no later than the point of enqueue of the - first kernel with assertions - 2. This thread polls the enqueued kernels for finish and checks the assert - buffer for assert data - 3. If assert data is present DPCPP Runtime calls `abort()` + 1. A copy 'kernel' is enqueued as the one depending on user's kernel to get + the value of assert failure flag. + 2. A host-task is enqueued to check value of assert failure flag. + 3. The host task calls abort whenever assert failure flag is set. + +Illustrating this with an example, lets assume the user enqueues three kernels: + - `Kernel #1` + - `Kernel #2` + - `Kernel #3`, which depends on `Kernel #1` + +The resulting graph will look like this: ![graph](images/assert-fallback-graph.svg) + +### Interface to program scope variable + +Multiple translation units could be compiled/linked into a single device binary +image. All of them should have `extern` declaration of program scope variable +available. Definition of the variable is only available within devicelib in the +same binary image where fallback `__devicelib_assert_fail` resides. + +The variable has the following structure and +declaration: + +```c++ +struct AssertHappened { + int Flag = 0; +}; + +#ifdef __SYCL_DEVICE_ONLY__ +extern SYCL_GLOBAL_VAR AssertHappened AssertHappenedMem; +#endif +``` +Here, `SYCL_GLOBAL_VAR` is a macro which wraps special attribute to allow for +mutable program-scope variable. -#### Online-linking fallback `__devicelib_assert_fail` +The reference to extern variable is resolved within online-linking against +fallback devicelib. + +### Online-linking fallback `__devicelib_assert_fail` Online linking against fallback implementation of `__devicelib_assert_fail` is performed only when assertion is enabled and Device-side Runtime doesn't provide implementation of `__devicelib_assert_fail`. In DPCPP headers one can see if assert is enabled with status of `NDEBUG` macro -with `#ifdef`'s. This allows to add implicit buffer argument to kernel -invocation. Here "implicit" means "implicit to the user". +with `#ifdef`'s. This allows to enqueue a copy kernel and host task. The copy +kernel will copy `AssertHappenedMem` to host and host-task will check the `Flag` +value and `abort()` as needed. When in DPCPP Runtime Library this knowledge is obtained from device binary image descriptor's property sets. @@ -213,8 +235,62 @@ translation unit was compiled with assertions enabled i.e. `NDEBUG` undefined. The property is added by `sycl-post-link` tool depending on module metadata. Metadata is provided by Clang frontend. Metadata name is `is_assert_enabled`. +Suppose the following example user code: +```c++ +void user_func(int X) { + assert(X && “X is nil”); +} + +int main() { + queue Q(...); + Q.submit([&] (handler& CGH) { + CGH.single_task([=] () { + do_smth(); + user_func(0); + do_smth_else(); + }); + }); + ... +} +``` + +The following LLVM IR pseudo code will be generated after linking against +fallback implementation of devicelib: +``` +@AssertHappenedMem = global AssertHappened + +/// user's code +void user_func(int X) { +if (!(X && “X is nil")) { + __assert_fail(...); + } +} -##### Compiling with assert enabled/disabled +kernel(...) { + do_smth() + user_func(0); + do_smth_else(); +} + +/// __assert_fail belongs to Linux version of devicelib +void __assert_fail(...) { + ... + __devicelib_assert_fail(...); +} + +void __devicelib_assert_fail(Expr, File, Line, GlobalID, LocalID) { + ... + volatile int *Ptr = (volatile int *)AssertHappenedMem.Flag; + int Expected = 0; + int Desired = 1; + + if (atomic_CAS(&AssertHappenedMem.Flag, Expected, Desired)) + printf("Assertion `%s' failed in %s at line %i. GlobalID: %i, LocalID: %i", + Expr, File, Line, GlobalID, LocalID); +} +``` + +#### Compiling with assert enabled/disabled Consider the following example sources: ```c++ @@ -275,145 +351,65 @@ fail. Having assertions enabled in at least one translation unit with device code requires for `isAssertEnabled` property set being present in device image descriptor structure. +### Raising assert failure flag and reading it on host -#### Storing accessor metadata and writing assert failure to buffer +Each and every translation unit provided by user should have `extern` +declaration of `AssertHappenedMem` i.e. DPCPP headers includes appropriate file +with [declaration](#prog-scope-var-decl). -Both storing of accessor metadata and writing assert failure is performed with -help of built-ins. Implementations of these builtins are substituted by -frontend. +The definition is only provided within devicelib along with +`__devicelib_assert_fail` function which raises the flag. -User's kernel is executed through a wrapper. Wrapping takes place in DPCPP -Runtime headers in a following manner: +Reading of assert failure flag is performed with the help of auxiliary kernel +which is enqueued as dependent on user's one. The flag state is checked later +in host-task. This is achieved with approximately the following changes: -``` -class handler { +```c++ +#include // contains extern decl of AssertHappenedMem -template parallel_for(KernelFunc, Range) { #ifndef NDEBUG - // Assert required -  if (!MQueue->get_device()->assert_fail_supported()) { -    using KName2 = class ASSERT_WRAPPER_NAME(KernelName); -     -    auto AssertBufferAcc = MQueue->get_context()->getAssertBufferAccessor(this); - -    parallel_for_impl( -      Range, -      [=](Item) { -        __store_acc(AssertBuffAcc); -        KernelFunc(Item); -      }); -  } else { +class AssertFlagCopier; #endif - // (No assert required) OR (Assert supported by device) -     // ordinary enqueue process - +class queue { + template event submit(T CGF) { + event Event = submit_impl(CGF); #ifndef NDEBUG - } + // assert required + if (!get_device()->assert_fail_supported()) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is performed + // by program manager class + AssertHappened *AH = new AssertHappened; + buffer *Buffer = new buffer{1, AH}; + + // read flag value + event CopierEv = submit_impl([&](handler &CGH) { + CGH.depends_on(Event); + + auto Acc = Buffer->get_access(CGH); + + CGH.single_task([=] { + Acc[0].Flag = atomic_load(&AssertHappenedMem.Flag); + }); + }); + + // check flag state + submit_impl([=](handler &CGH) { + CGH.depends_on(CopierEv); + + CGH.codeplay_host_task([=] { + if (AH->Flag) + abort(); + + free(Buffer); + free(AH); + }); + }); + } #endif -} - -} -``` - - -#### Built-ins operation - -Accessor is a pointer augmented with offset and two ranges (access range and -memory range). - -There are two built-ins provided by frontend: - * `__store_acc()` - to store accessor metadata into program-scope variable. - * `__store_assert_failure()` - to store flag about assert failure in a buffer - using the metadata stored in program-scope variable. - -The accessor should be stored to program scope variable in global address space -using atomic operations. Motivation for using atomic operations: the program may -contain several kernels and some of them could be running simultaneously on a -single device. - -The `__store_assert_failure()` built-in atomically sets a flag in a buffer. The -buffer is accessed using accessor metadata from program-scope variable. This -built-in return a boolean value which is `true` if the flag is set by this call -to `__store_assert_failure()` and `false` if the flag was already set. -Motivation for using atomic operation is the same as with `__store_acc()` -builtin. - -The following pseudo-code snippets shows how these built-ins are used. -First of all, assume the following code as user's one: -``` -void user_func(int X) { - assert(X && “X is nil”); -} - -int main() { - queue Q(...); - Q.submit([&] (handler& CGH) { - CGH.single_task([=] () { - do_smth(); - user_func(0); - do_smth_else(); - }); - }); - ... -} -``` - -The following LLVM IR pseudo code will be generated for the user's code: -``` -@AssertBufferPtr = global void* null -@AssertBufferAccessRange = ... -@AssertBufferMemoryRange = ... -@AssertBufferOffset = ... - -/// user's code -void user_func(int X) { -if (!(X && “X is nil")) { - __assert_fail(...); + return Event; } -} - -users_kernel(...) { - do_smth() - user_func(0); - do_smth_else(); -} - -/// a wrapped user's kernel -kernel(AssertBufferAccessor, OtherArguments...) { - __store_acc(AssertBufferAccessor); - users_kernel(OtherArguments...); -} - -/// __assert_fail belongs to Linux version of devicelib -void __assert_fail(...) { - ... - __devicelib_assert_fail(...); -} - -void __devicelib_assert_fail(Expr, File, Line, GlobalID, LocalID) { - ... - if (__store_assert_info()) - printf("Assertion `%s' failed in %s at line %i. GlobalID: %i, LocalID: %i", - Expr, File, Line, GlobalID, LocalID); -} - -/// The following are built-ins provided by frontend -void __store_acc(accessor) { - %1 = accessor.getPtr(); - store void * %1, void * @AssertBufferPtr -} - -bool __store_assert_info(...) { - AssertBAcc = __fetch_acc(); - // fill in data in AsBAcc - volatile int *Ptr = (volatile int *)AssertBAcc.getPtr(); - bool Expected = false; - bool Desired = true; - - return atomic_cas(Ptr, Expected, Desired, SequentialConsistentMemoryOrder); - // or it could be: - // return !atomic_exchange(Ptr, Desired, SequentialConsistentMemoryOrder); -} +}; ``` diff --git a/sycl/doc/images/assert-fallback-graph.svg b/sycl/doc/images/assert-fallback-graph.svg new file mode 100644 index 0000000000000..fadf4a07ba1c0 --- /dev/null +++ b/sycl/doc/images/assert-fallback-graph.svg @@ -0,0 +1,3 @@ + + +
User's kernel #1
User's kernel #1
User's kernel #2
User's kernel #2
User's kernel #3
User's kernel #3
Copy assert failure flag
Copy assert failure...
Copy assert failure flag
Copy assert failure...
Host-task with check for the value of assert failure flag
Host-task with check...
Host-task with check for the value of assert failure flag
Host-task with check...
Copy assert failure flag
Copy assert failure...
Host-task with check for the value of assert failure flag
Host-task with check...
Viewer does not support full SVG 1.1
From ecb8659af546350d6da8f1b19d451c1bb95c3008 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 7 May 2021 17:23:04 +0300 Subject: [PATCH 034/122] Remove L0 and OCL extensions. Signed-off-by: Sergey Kanaev --- .../cl_intel_assert_return_code.asciidoc | 99 ---------------- .../ze_intel_assert_return_code.asciidoc | 111 ------------------ 2 files changed, 210 deletions(-) delete mode 100644 sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc delete mode 100644 sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc diff --git a/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc b/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc deleted file mode 100644 index 58036b0334b4a..0000000000000 --- a/sycl/doc/extensions/Assert/cl_intel_assert_return_code.asciidoc +++ /dev/null @@ -1,99 +0,0 @@ -cl_intel_assert_return_code -====================================== - -// This section needs to be after the document title. -:doctype: book -:toc2: -:toc: left -:encoding: utf-8 -:lang: en - -:blank: pass:[ +] - -// Set the default source code type in this document to C++, -// for syntax highlighting purposes. This is needed because -// docbook uses c++ and html5 uses cpp. -:language: {basebackend@docbook:c++:cpp} - -== Name Strings - -+cl_intel_assert_return_code+ - -== Notice - -Copyright (c) 2021 Intel Corporation. All rights reserved. - -== Status - -Working Draft - -This is a preview extension specification, intended to provide early access to -a feature for review and community feedback. When the feature matures, this -specification may be released as a formal extension. - -Because the interfaces defined by this specification are not final and are -subject to change they are not intended to be used by shipping software -products. - -== Version - -Built On: {docdate} + -Revision: 1 - -== Dependencies - -This extension is written against the OpenCL Specification Version 1.2, Revision 19. - -This extension requires OpenCL 1.2 or later. - -== Overview - -This extension allows OpenCL 1.x and 2.x devices to notify host that assert had -happened. - -== New error code - -[source] ----- -CL_ASSERT_FAILURE ----- - -Negative value of this error code should be set into `param_value` of -`clGetEventInfo` as described in table 5.15 "clGetEventInfo prameter queries" if -assert failure took place in device-code during kernel execution. - -An example: -[source] ----- -cl_event Event; // describes an event of kernel been submitted previously -cl_int Result; -size_t ResultSize; - -clGetEventInfo(Event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(Result), &Result, &ResultSize); ----- - -If kernel failed an assertion `clGetEventInfo` should put `CL_ASSERT_FAILURE` in -`Result`. - -== Issues - -None. - -== Revision History - -[cols="5,15,15,70"] -[grid="rows"] -[options="header"] -|======================================== -|Rev|Date|Author|Changes -|1|2021-04-09|Sergey Kanaev|*Initial public working draft* -|======================================== - -//************************************************************************ -//Other formatting suggestions: -// -//* Use *bold* text for host APIs, or [source] syntax highlighting. -//* Use +mono+ text for device APIs, or [source] syntax highlighting. -//* Use +mono+ text for extension names, types, or enum values. -//* Use _italics_ for parameters. -//************************************************************************ diff --git a/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc b/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc deleted file mode 100644 index 85db891ad5bcf..0000000000000 --- a/sycl/doc/extensions/Assert/ze_intel_assert_return_code.asciidoc +++ /dev/null @@ -1,111 +0,0 @@ -ze_intel_assert_return_code -====================================== - -// This section needs to be after the document title. -:doctype: book -:toc2: -:toc: left -:encoding: utf-8 -:lang: en - -:blank: pass:[ +] - -// Set the default source code type in this document to C++, -// for syntax highlighting purposes. This is needed because -// docbook uses c++ and html5 uses cpp. -:language: {basebackend@docbook:c++:cpp} - -== Name Strings - -+ze_intel_assert_return_code+ - -== Notice - -Copyright (c) 2021 Intel Corporation. All rights reserved. - -== Status - -Working Draft - -This is a preview extension specification, intended to provide early access to -a feature for review and community feedback. When the feature matures, this -specification may be released as a formal extension. - -Because the interfaces defined by this specification are not final and are -subject to change they are not intended to be used by shipping software -products. - -== Version - -Built On: {docdate} + -Revision: 1 - -== Dependencies - -This extension is written against the Level-Zero Specification Version 1.1.2. - -== Overview - -This extension allows Level-Zero devices to notify host that assert had -happened. - -== New enumeration value - -`ze_result_t`: + -[source] ----- -ZE_RESULT_ASSERT_FAILED ----- - -This value should be returned by `zeEventQueryStatus` if assert failure took -place in device-code during kernel execution. - -An example: -[source] ----- -ze_event_handle_t Event; // describes an event of kernel been submitted previously -ze_result Result = zeEventQueryStatus(Event); ----- - -If kernel failed an assertion `zeEventQueryStatus` should return -`ZE_RESULT_ASSERT_FAILED`. - - -== Modifications to Level-Zero API - -(Add to Section API Documentation / Core API / Common / Common Enums / `ze_result_t`) :: -+ --- -`ZE_RESULT_ASSERT_FAILED = 0x70000006` + -[Core] Assert failure took place in device-code during kernel execution. --- - -(Add to section API Documentation / Core API / Event / Event Functions / `zeEventQueryStatus`) :: -+ --- -Return: + -`ZE_RESULT_ASSERT_FAILED` --- - -== Issues - -None. - -== Revision History - -[cols="5,15,15,70"] -[grid="rows"] -[options="header"] -|======================================== -|Rev|Date|Author|Changes -|1|2021-04-09|Sergey Kanaev|*Initial public working draft* -|======================================== - -//************************************************************************ -//Other formatting suggestions: -// -//* Use *bold* text for host APIs, or [source] syntax highlighting. -//* Use +mono+ text for device APIs, or [source] syntax highlighting. -//* Use +mono+ text for extension names, types, or enum values. -//* Use _italics_ for parameters. -//************************************************************************ From 4c91fa3214ddad4a9131607679d472a9274eccec Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 7 May 2021 16:59:51 +0300 Subject: [PATCH 035/122] [SYCL] Add PoC for using global variable Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 23 ++++++ sycl/include/CL/sycl/device.hpp | 2 + sycl/include/CL/sycl/handler.hpp | 1 + sycl/include/CL/sycl/queue.hpp | 116 ++++++++++++++++++++++++++++- sycl/source/detail/device_impl.cpp | 4 + sycl/source/detail/device_impl.hpp | 2 + sycl/source/detail/queue_impl.cpp | 5 ++ sycl/source/detail/queue_impl.hpp | 34 ++++++++- sycl/source/device.cpp | 4 + sycl/source/queue.cpp | 16 ++++ 10 files changed, 203 insertions(+), 4 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 724d4635fb0b5..c8ca4983d9ab2 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -9,10 +9,29 @@ #include "wrapper.h" #ifdef __SPIR__ + +struct AssertHappened { + int Flag = 1; +}; + +#ifndef __SYCL_GLOBAL_VAR__ +#define __SYCL_GLOBAL_VAR__ +#endif + +extern "C" __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // declaration + +__SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // definition + static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " "Assertion `%s` failed.\n"; +static const __attribute__((opencl_constant)) char flag_output_fmt[] = "Flag = %d\n"; + +DEVICE_EXTERN_C int __devicelib_assert_read(void) { + return AssertHappenedMem.Flag; +} + DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, int32_t line, const char *func, uint64_t gid0, uint64_t gid1, @@ -27,6 +46,10 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, // (func) ? func : "", func, gid0, gid1, gid2, lid0, lid1, lid2, expr); + //AssertHappenedMem.Flag = 1; + + __spirv_ocl_printf(flag_output_fmt, AssertHappenedMem.Flag); + // FIXME: call SPIR-V unreachable instead // volatile int *die = (int *)0x0; // *die = 0xdead; diff --git a/sycl/include/CL/sycl/device.hpp b/sycl/include/CL/sycl/device.hpp index 375311b523cf6..29cdab85fd0b8 100644 --- a/sycl/include/CL/sycl/device.hpp +++ b/sycl/include/CL/sycl/device.hpp @@ -195,6 +195,8 @@ class __SYCL_EXPORT device { /// \return true if the SYCL device has the given feature. bool has(aspect Aspect) const; + bool is_assert_fail_supported() const; + private: shared_ptr_class impl; device(shared_ptr_class impl) : impl(impl) {} diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 4804d63c0123f..1483180499e0e 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -2287,6 +2287,7 @@ class __SYCL_EXPORT handler { access::target); friend class ::MockHandler; + friend class detail::queue_impl; template < typename TransformedArgType, int Dims, typename KernelType, diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index a69c7f7c547b5..56ac8da205e73 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -22,6 +22,22 @@ #include +struct AssertHappened { + int Flag = 0; +}; + +#if 0 +#ifndef __SYCL_GLOBAL_VAR__ +#define __SYCL_GLOBAL_VAR__ +#endif + +extern "C" __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; +#endif + +#ifdef __SYCL_DEVICE_ONLY__ +SYCL_EXTERNAL __attribute__((weak)) extern "C" int __devicelib_assert_read(); +#endif + // having _TWO_ mid-param #ifdefs makes the functions very difficult to read. // Here we simplify the &CodeLoc declaration to be _CODELOCPARAM(&CodeLoc) and // _CODELOCARG(&CodeLoc) Similarly, the KernelFunc param is simplified to be @@ -61,6 +77,8 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { +/*template */ class AssertFlagCopier {}; + // Forward declaration class context; class device; @@ -214,6 +232,56 @@ class __SYCL_EXPORT queue { template typename info::param_traits::return_type get_info() const; +private: +#ifndef NDEBUG + event submitAssertCapture(event &Event, queue *SecondaryQueue, const detail::code_location &CodeLoc) { + _CODELOCARG(&CodeLoc); + + AssertHappened *AH = new AssertHappened; + buffer *Buffer = new buffer{AH, range<1>{1}}; + + event CopierEv, CheckerEv; + auto CopierCGF = [&](handler &CGH) { + CGH.depends_on(Event); + + auto Acc = Buffer->get_access(CGH); + + fprintf(stderr, "About to enqueue copier\n"); + CGH.single_task([Acc] { +#ifdef __SYCL_DEVICE_ONLY__ + Acc[0].Flag = __devicelib_assert_read(); //AssertHappenedMem.Flag; +#endif // __SYCL_DEVICE_ONLY__ + }); + }; + auto CheckerCGF = [&CopierEv, AH, Buffer](handler &CGH) { + CGH.depends_on(CopierEv); + + fprintf(stderr, "About to enqueue checker\n"); + CGH.codeplay_host_task([=] { + fprintf(stderr, "Checker running!\n"); + if (AH->Flag) + abort(); + + delete Buffer; + delete AH; + }); + }; + + if (SecondaryQueue) { + CopierEv = submit_impl(CopierCGF, *SecondaryQueue, CodeLoc); + CheckerEv = submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc); + } else { + CopierEv = submit_impl(CopierCGF, CodeLoc); + CheckerEv = submit_impl(CheckerCGF, CodeLoc); + } + + return CheckerEv; + } +#endif + + bool kernelUsesAssert(const std::string &KernelName) const; + +public: /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// @@ -223,7 +291,25 @@ class __SYCL_EXPORT queue { template event submit(T CGF _CODELOCPARAM(&CodeLoc)) { _CODELOCARG(&CodeLoc); - return submit_impl(CGF, CodeLoc); + event Event; +#ifndef NDEBUG + std::string KernelName; + Event = submit_impl(CGF, KernelName, CodeLoc); +#else + Event = submit_impl(CGF, CodeLoc); +#endif + +#ifndef NDEBUG + // assert required + if (!get_device().is_assert_fail_supported() && kernelUsesAssert(KernelName)) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is performed + // by program manager class + submitAssertCapture(Event, /* SecondaryQueue = */ nullptr, CodeLoc); + } +#endif // NDEBUG + + return Event; } /// Submits a command group function object to the queue, in order to be @@ -241,7 +327,26 @@ class __SYCL_EXPORT queue { event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc)) { _CODELOCARG(&CodeLoc); - return submit_impl(CGF, SecondaryQueue, CodeLoc); + event Event; + +#ifndef NDEBUG + std::string KernelName; + Event = submit_impl(CGF, KernelName, SecondaryQueue, CodeLoc); +#else + Event = submit_impl(CGF, SecondaryQueue, CodeLoc); +#endif + +#ifndef NDEBUG + // assert required + if (!get_device().is_assert_fail_supported() && kernelUsesAssert(KernelName)) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is performed + // by program manager class + submitAssertCapture(Event, &SecondaryQueue, CodeLoc); + } +#endif // NDEBUG + + return Event; } /// Prevents any commands submitted afterward to this queue from executing @@ -753,6 +858,13 @@ class __SYCL_EXPORT queue { event submit_impl(function_class CGH, queue secondQueue, const detail::code_location &CodeLoc); + event submit_impl(function_class CGH, + std::string &KernelName, + const detail::code_location &CodeLoc); + event submit_impl(function_class CGH, queue secondQueue, + std::string &KernelName, + const detail::code_location &CodeLoc); + /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. /// diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 85305d397987d..9dec5847b3c31 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -301,6 +301,10 @@ std::shared_ptr device_impl::getHostDeviceImpl() { return HostImpl; } +bool device_impl::isAssertFailSupported() const { + return false; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 0e1381f933964..93e1d963faf40 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -222,6 +222,8 @@ class device_impl { /// \return the host device_impl singleton static std::shared_ptr getHostDeviceImpl(); + bool isAssertFailSupported() const; + private: explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index bcbe28720ac56..e501be272e2d2 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -280,6 +280,11 @@ pi_native_handle queue_impl::getNative() const { return Handle; } +bool queue_impl::kernelUsesAssert(const std::string &KernelName) const { + // TODO check device binary image descriptor for if kernel uses assert + return true; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index aea55006a6fba..07d56ea4638f9 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -173,7 +173,7 @@ class queue_impl { const shared_ptr_class &SecondQueue, const detail::code_location &Loc) { try { - return submit_impl(CGF, Self, Loc); + return submit_impl(CGF, /*KernelName =*/ nullptr, Self, Loc); } catch (...) { { std::lock_guard Lock(MMutex); @@ -183,6 +183,22 @@ class queue_impl { } } + event submit(const function_class &CGF, + std::string &KernelName, + const shared_ptr_class &Self, + const shared_ptr_class &SecondQueue, + const detail::code_location &Loc) { + try { + return submit_impl(CGF, &KernelName, Self, Loc); + } catch (...) { + { + std::lock_guard Lock(MMutex); + MExceptions.PushBack(std::current_exception()); + } + return SecondQueue->submit(CGF, KernelName, SecondQueue, Loc); + } + } + /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// @@ -193,7 +209,14 @@ class queue_impl { event submit(const function_class &CGF, const shared_ptr_class &Self, const detail::code_location &Loc) { - return submit_impl(CGF, Self, Loc); + return submit_impl(CGF, /*KernelName =*/ nullptr, Self, Loc); + } + + event submit(const function_class &CGF, + std::string &KernelName, + const shared_ptr_class &Self, + const detail::code_location &Loc) { + return submit_impl(CGF, &KernelName, Self, Loc); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -377,6 +400,8 @@ class queue_impl { /// \return a native handle. pi_native_handle getNative() const; + bool kernelUsesAssert(const std::string &KernelName) const; + private: /// Performs command group submission to the queue. /// @@ -385,11 +410,16 @@ class queue_impl { /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. event submit_impl(const function_class &CGF, + std::string *KernelName, const shared_ptr_class &Self, const detail::code_location &Loc) { handler Handler(Self, MHostQueue); Handler.saveCodeLoc(Loc); CGF(Handler); + + if (KernelName && Handler.getType() == CG::KERNEL) + *KernelName = Handler.MKernelName; + event Event = Handler.finalize(); addEvent(Event); return Event; diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index e824b5faba039..435d5b3456c6a 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -162,5 +162,9 @@ pi_native_handle device::getNative() const { return impl->getNative(); } bool device::has(aspect Aspect) const { return impl->has(Aspect); } +bool device::is_assert_fail_supported() const { + return impl->isAssertFailSupported(); +} + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index e470a5c845d88..72a43623aede8 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -100,6 +100,18 @@ event queue::submit_impl(function_class CGH, queue SecondQueue, return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); } +event queue::submit_impl(function_class CGH, + std::string &KernelName, + const detail::code_location &CodeLoc) { + return impl->submit(CGH, KernelName, impl, CodeLoc); +} + +event queue::submit_impl(function_class CGH, queue SecondQueue, + std::string &KernelName, + const detail::code_location &CodeLoc) { + return impl->submit(CGH, KernelName, impl, SecondQueue.impl, CodeLoc); +} + void queue::wait_proxy(const detail::code_location &CodeLoc) { impl->wait(CodeLoc); } @@ -143,5 +155,9 @@ backend queue::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle queue::getNative() const { return impl->getNative(); } +bool queue::kernelUsesAssert(const std::string &KernelName) const { + return impl->kernelUsesAssert(KernelName); +} + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) From 07debdb03835d1c3f6c70656c18c7e74a91478a7 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 11 May 2021 21:42:00 +0300 Subject: [PATCH 036/122] Address comments Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 29073e3e54116..31b86c8d7caa4 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -223,17 +223,20 @@ struct _pi_device_binary_property_struct { }; ``` -There's no need for a whole new property set so we reuse `SYCL/misc properties` -property set. Whenever `isAssertEnabled` property is present, this specific -device image was built with `NDEBUG` macro undefined and it requires fallback -implementation of `__devicelib_assert_fail` (i.e. if Device-side Runtime doesn't -support it). +A distinct property set `SYCL/assert used` is added. In this set a single +with the name of the kernel is added whenever the kernel uses assert. Use of +assert is detected through call to `__devicelib_assert_fail` function after +linking device binary image with wrapper device library (the `libsycl-crt` +library). -The property is added to device binary descriptor whenever at least single -translation unit was compiled with assertions enabled i.e. `NDEBUG` undefined. +The property set and the underlying properties are added by `sycl-post-link` +tool with help of building callgraph for each and every kernel in device binary +image. -The property is added by `sycl-post-link` tool depending on module metadata. -Metadata is provided by Clang frontend. Metadata name is `is_assert_enabled`. +The added property is used for: + - deciding if online-linking against fallback devicelib is required; + - if there's a need to enqueue program scope variable copier kernel and checker + host-task. Suppose the following example user code: ```c++ @@ -353,9 +356,13 @@ descriptor structure. ### Raising assert failure flag and reading it on host -Each and every translation unit provided by user should have `extern` -declaration of `AssertHappenedMem` i.e. DPCPP headers includes appropriate file -with [declaration](#prog-scope-var-decl). +Each and every translation unit provided by user should have declaration of +assert flag read function: +```c++ +int __devicelib_assert_read(void); +``` +Also, the [AssertHappened](#prog-scope-var-decl) structure type should be +available for the copier kernel. The definition is only provided within devicelib along with `__devicelib_assert_fail` function which raises the flag. @@ -375,8 +382,9 @@ class queue { template event submit(T CGF) { event Event = submit_impl(CGF); #ifndef NDEBUG + std::string KernelName = /* get kernel name from calls to parallel_for, etc. */; // assert required - if (!get_device()->assert_fail_supported()) { + if (!get_device()->assert_fail_supported() && isAssertUsed(KernelName)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class From 995e4d8b58f801fa79dc2757d332be4541fe6f9a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 12 May 2021 10:15:16 +0300 Subject: [PATCH 037/122] Fix typo Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 31b86c8d7caa4..55f68f9abb6ca 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -223,7 +223,7 @@ struct _pi_device_binary_property_struct { }; ``` -A distinct property set `SYCL/assert used` is added. In this set a single +A distinct property set `SYCL/assert used` is added. In this set a property with the name of the kernel is added whenever the kernel uses assert. Use of assert is detected through call to `__devicelib_assert_fail` function after linking device binary image with wrapper device library (the `libsycl-crt` From b57ac48f17d20127b6a87ce53f12008f6013a2c3 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 12 May 2021 10:41:50 +0300 Subject: [PATCH 038/122] Fix typo Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 55f68f9abb6ca..4e205338e2f57 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -350,14 +350,12 @@ states of definedness of `NDEBUG` macro available: | 4 | undefined | undefined | States of definedness of `NDEBUG` macro defines the set of assertions which can -fail. Having assertions enabled in at least one translation unit with device -code requires for `isAssertEnabled` property set being present in device image -descriptor structure. +fail. ### Raising assert failure flag and reading it on host Each and every translation unit provided by user should have declaration of -assert flag read function: +assert flag read function available: ```c++ int __devicelib_assert_read(void); ``` From d2f13ff0495991efedefe843ea801a94f78cbcec Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 17 May 2021 13:37:42 +0300 Subject: [PATCH 039/122] Address review comments Signed-off-by: Sergey Kanaev Co-authored-by: kbobrovs --- sycl/doc/Assert.md | 95 +++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 51 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 4e205338e2f57..dc9126cc073d7 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -39,13 +39,13 @@ int main() { ``` In this use-case every work-item with even X dimension will trigger assertion -failure. Assertion failure should be trigger a call to `std::abort()` at host as +failure. Assertion failure should trigger a call to `std::abort()` at host as described in [extension](extensions/Assert/SYCL_INTEL_ASSERT.asciidoc). Even though multiple failures of the same or different assertions can happen in multiple workitems, implementation is required to deliver at least one assertion. The assertion failure message is printed to `stderr` by DPCPP -Runtime. +Runtime or underlying backend. When multiple kernels are enqueued and more than one fail at assertion, at least single assertion should be reported. @@ -93,7 +93,7 @@ Implementation of this function is supplied by Native Device Compiler for safe approach or by DPCPP Compiler for fallback one. In order to distinguish which implementation to use, DPCPP Runtime checks for -`cl_intel_devicelib_cassert` extension. If the extension isn't available, then +`PI_INTEL_DEVICELIB_CASSERT` extension. If the extension isn't available, then fallback implementation is used. @@ -101,7 +101,9 @@ fallback implementation is used. This is the preferred approach and implementations should use it when possible. It guarantees assertion failure notification delivery to the host regardless of -kernel behavior which hit the assertion. +kernel behavior which hit the assertion. If backend suports the safe approach, +it must report this capability to DPCPP Runtime via the +`PI_INTEL_DEVICELIB_CASSERT` extension query. The Native Device Compiler is responsible for providing implementation of `__devicelib_assert_fail` which completely hides details of communication @@ -125,10 +127,10 @@ The following sequence of events describes how user code gets notified: ## Fallback approach -If Device-side Runtime doesn't support `__devicelib_assert_fail` then a fallback -approach comes in place. The approach doesn't require any support from -Device-side Runtime and Native Device Compiler. Neither it does from Low-level -Runtime. +If Device-side Runtime doesn't support `__devicelib_assert_fail` (as reported +via `PI_INTEL_DEVICELIB_CASSERT` extension query) then a fallback approach comes +in place. The approach doesn't require any support from Device-side Runtime and +Native Device Compiler. Neither it does from Low-level Runtime. Within this approach, a mutable program scope variable is introduced. This variable stores a flag which says if an assert failure was encountered. Fallback @@ -147,10 +149,15 @@ The following sequence of events describes how user code gets notified: 2. A host-task is enqueued to check value of assert failure flag. 3. The host task calls abort whenever assert failure flag is set. +DPCPP Runtime will automatically check if assertions are enabled in the kernel +being run, and won't enqueue the auxiliary kernels if assertions are not +enabled. So there is no host-side runtime overhead when assertion are not +enabled. + Illustrating this with an example, lets assume the user enqueues three kernels: - - `Kernel #1` - - `Kernel #2` - - `Kernel #3`, which depends on `Kernel #1` + - `Kernel #1`, uses assert + - `Kernel #2`, uses assert + - `Kernel #3`, uses assert and depends on `Kernel #1` The resulting graph will look like this: ![graph](images/assert-fallback-graph.svg) @@ -165,9 +172,15 @@ same binary image where fallback `__devicelib_assert_fail` resides. declaration: ```c++ +namespace cl { +namespace sycl { +namespace detail { struct AssertHappened { int Flag = 0; }; +} +} +} #ifdef __SYCL_DEVICE_ONLY__ extern SYCL_GLOBAL_VAR AssertHappened AssertHappenedMem; @@ -189,49 +202,29 @@ implementation of `__devicelib_assert_fail`. In DPCPP headers one can see if assert is enabled with status of `NDEBUG` macro with `#ifdef`'s. This allows to enqueue a copy kernel and host task. The copy kernel will copy `AssertHappenedMem` to host and host-task will check the `Flag` -value and `abort()` as needed. +value and `abort()` as needed. The kernel and host task are enqueued when +`NDEBUG` macro isn't defined. When in DPCPP Runtime Library this knowledge is obtained from device binary image descriptor's property sets. -Each device image is supplied with an array of property sets: -```c++ -struct pi_device_binary_struct { - //... - // Array of property sets - pi_device_binary_property_set PropertySetsBegin; - pi_device_binary_property_set PropertySetsEnd; -}; -``` -Each property set is represented by the following struct: -```c++ -// Named array of properties. -struct _pi_device_binary_property_set_struct { - char *Name; // the name - pi_device_binary_property PropertiesBegin; // array start - pi_device_binary_property PropertiesEnd; // array end -}; -``` -It contains name of property set and array of properties. Each property is -represented by the following struct: -```c++ -struct _pi_device_binary_property_struct { - char *Name; // null-terminated property name - void *ValAddr; // address of property value - uint32_t Type; // _pi_property_type - uint64_t ValSize; // size of property value in bytes -}; -``` +Each device image is supplied with an array of property sets. For description +of property sets see `struct pi_device_binary_struct` in +[`pi.h`](https://github.com/intel/llvm/blob/sycl/sycl/include/CL/sycl/detail/pi.h#L692) A distinct property set `SYCL/assert used` is added. In this set a property -with the name of the kernel is added whenever the kernel uses assert. Use of -assert is detected through call to `__devicelib_assert_fail` function after -linking device binary image with wrapper device library (the `libsycl-crt` -library). - -The property set and the underlying properties are added by `sycl-post-link` -tool with help of building callgraph for each and every kernel in device binary -image. +with the name of the kernel is added whenever the kernel uses assert. The use of +assert is detected by a specific LLVM IR pass invoked by the `sycl-post-link` +tool which runs on linked device code, i.e. after linking with the `libsycl-crt` +library which defines the assert function. The pass builds complete call graph +for a kernel, and sees if there's a call to `__devicelib_assert_fail` anywhere +in the graph. If found, `sycl-post-link` adds the property for the kernel. + +The same is done for all indirect callable functions (marked with specific +attribute) found in the linked device code. Those are functions whose pointers +can be taken and passed around in device code. If a callgraph for any such +function has a call to `__devicelib_assert_fail`, then all kernels in the module +are conservatively marked as using asserts. The added property is used for: - deciding if online-linking against fallback devicelib is required; @@ -340,7 +333,7 @@ void workload() { ``` These two files are compiled into a single binary application. There are four -states of definedness of `NDEBUG` macro available: +states of definition of `NDEBUG` macro available: | # | `impl.cpp` | `main.cpp` | | - | ---------- | ---------- | @@ -349,12 +342,12 @@ states of definedness of `NDEBUG` macro available: | 3 | undefined | defined | | 4 | undefined | undefined | -States of definedness of `NDEBUG` macro defines the set of assertions which can +States of definition of `NDEBUG` macro defines the set of assertions which can fail. ### Raising assert failure flag and reading it on host -Each and every translation unit provided by user should have declaration of +All translation units provided by the user should have a declaration of the assert flag read function available: ```c++ int __devicelib_assert_read(void); From 6281bc52eb11582ec6a9768f80f5a053cf676368 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 19 May 2021 16:35:10 +0300 Subject: [PATCH 040/122] Switch to __devicelib_assert_read Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index dc9126cc073d7..69ceaa0b40c3e 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -363,10 +363,11 @@ which is enqueued as dependent on user's one. The flag state is checked later in host-task. This is achieved with approximately the following changes: ```c++ -#include // contains extern decl of AssertHappenedMem - #ifndef NDEBUG class AssertFlagCopier; +#ifdef __SYCL_DEVICE_ONLY__ +int __devicelib_assert_read(void); +#endif #endif class queue { @@ -389,7 +390,9 @@ class queue { auto Acc = Buffer->get_access(CGH); CGH.single_task([=] { - Acc[0].Flag = atomic_load(&AssertHappenedMem.Flag); +#ifdef __SYCL_DEVICE_ONLY__ + Acc[0].Flag = __devicelib_assert_read(); +#endif }); }); From a5461f3d5dd86e1d740c40d6cc7cd8d4d922b385 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 19 May 2021 16:35:43 +0300 Subject: [PATCH 041/122] Remove use of NDEBUG from suggested changes Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 69ceaa0b40c3e..56c384749b3a0 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -363,17 +363,14 @@ which is enqueued as dependent on user's one. The flag state is checked later in host-task. This is achieved with approximately the following changes: ```c++ -#ifndef NDEBUG class AssertFlagCopier; #ifdef __SYCL_DEVICE_ONLY__ int __devicelib_assert_read(void); #endif -#endif class queue { template event submit(T CGF) { event Event = submit_impl(CGF); -#ifndef NDEBUG std::string KernelName = /* get kernel name from calls to parallel_for, etc. */; // assert required if (!get_device()->assert_fail_supported() && isAssertUsed(KernelName)) { @@ -409,7 +406,6 @@ class queue { }); }); } -#endif return Event; } }; From 32a32f46e58fa106c683ec21056a46dfc05d4c1d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 19 May 2021 17:32:33 +0300 Subject: [PATCH 042/122] Reorder text to increase readability Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 56c384749b3a0..1a397674f49cf 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -200,13 +200,8 @@ performed only when assertion is enabled and Device-side Runtime doesn't provide implementation of `__devicelib_assert_fail`. In DPCPP headers one can see if assert is enabled with status of `NDEBUG` macro -with `#ifdef`'s. This allows to enqueue a copy kernel and host task. The copy -kernel will copy `AssertHappenedMem` to host and host-task will check the `Flag` -value and `abort()` as needed. The kernel and host task are enqueued when -`NDEBUG` macro isn't defined. - -When in DPCPP Runtime Library this knowledge is obtained from device binary -image descriptor's property sets. +with `#ifdef`'s. When in DPCPP Runtime Library this knowledge is obtained from +device binary image descriptor's property sets. Each device image is supplied with an array of property sets. For description of property sets see `struct pi_device_binary_struct` in @@ -347,6 +342,19 @@ fail. ### Raising assert failure flag and reading it on host +In DPCPP headers one can see if assert is enabled with status of `NDEBUG` macro +with `#ifdef`'s. Though, in order to support for multi translation unit use-case +it's not allowed to rely on definition of `NDEBUG` macro. + +*Note: Multi translation unit use-case here is the one with `SYCL_EXTERNAL` +function compiled with assertions enabled and used in a kernel but the kernel +is compiled with assertions disabled.* + +There're two commands used for reading assert failure flag: copy kernel and +checker host task. The copy kernel will copy `AssertHappenedMem` to host and +host-task will check the `Flag` value and `abort()` as needed. The kernel and +host task are enqueued when `NDEBUG` macro isn't defined. + All translation units provided by the user should have a declaration of the assert flag read function available: ```c++ From eb49adb5bf811bc32f534a38d89d6e48d576eda2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 20 May 2021 10:58:23 +0300 Subject: [PATCH 043/122] Put variable into namespace Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index c8ca4983d9ab2..d702e0d5651fe 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -18,9 +18,13 @@ struct AssertHappened { #define __SYCL_GLOBAL_VAR__ #endif -extern "C" __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // declaration +namespace cl { namespace sycl { namespace detail { +extern __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // declaration +}}} +namespace cl { namespace sycl { namespace detail { __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // definition +}}} static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " @@ -29,7 +33,8 @@ static const __attribute__((opencl_constant)) char assert_fmt[] = static const __attribute__((opencl_constant)) char flag_output_fmt[] = "Flag = %d\n"; DEVICE_EXTERN_C int __devicelib_assert_read(void) { - return AssertHappenedMem.Flag; + volatile int *Ptr = (int *)(&cl::sycl::detail::AssertHappenedMem.Flag); + return *Ptr; } DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, @@ -46,9 +51,10 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, // (func) ? func : "", func, gid0, gid1, gid2, lid0, lid1, lid2, expr); - //AssertHappenedMem.Flag = 1; + //cl::sycl::detail::AssertHappenedMem.Flag = 1; + volatile int *Ptr = (int *)(&cl::sycl::detail::AssertHappenedMem.Flag); - __spirv_ocl_printf(flag_output_fmt, AssertHappenedMem.Flag); + __spirv_ocl_printf(flag_output_fmt, *Ptr); // FIXME: call SPIR-V unreachable instead // volatile int *die = (int *)0x0; From 641d07178f240f1b3f4d63e9f1836095f274eeae Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 20 May 2021 11:58:27 +0300 Subject: [PATCH 044/122] Address review comment Signed-off-by: Sergey Kanaev Co-authored-by: kbobrovs --- sycl/doc/Assert.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 1a397674f49cf..972352450c45f 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -353,7 +353,9 @@ is compiled with assertions disabled.* There're two commands used for reading assert failure flag: copy kernel and checker host task. The copy kernel will copy `AssertHappenedMem` to host and host-task will check the `Flag` value and `abort()` as needed. The kernel and -host task are enqueued when `NDEBUG` macro isn't defined. +host task are enqueued together with a kernel only when the corresponding device +binary image for this kernel tells that it may use (maybe indirectly) the +`assert` in its code. All translation units provided by the user should have a declaration of the assert flag read function available: From dc058a9fe3c1deb7e51fe5cc1363f0cc489c1f9d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 27 May 2021 13:23:09 +0300 Subject: [PATCH 045/122] Address review comments Co-authored-by: bader Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 10 +++++----- sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc | 4 +++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 972352450c45f..12b074c258665 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -38,17 +38,17 @@ int main() { } ``` -In this use-case every work-item with even X dimension will trigger assertion -failure. Assertion failure should trigger a call to `std::abort()` at host as -described in +In this use-case every work-item with even index along 0 dimension will trigger +assertion failure. Assertion failure should trigger a call to `std::abort()` at +host as described in [extension](extensions/Assert/SYCL_INTEL_ASSERT.asciidoc). Even though multiple failures of the same or different assertions can happen in -multiple workitems, implementation is required to deliver at least one +multiple work-items, implementation is required to deliver at least one assertion. The assertion failure message is printed to `stderr` by DPCPP Runtime or underlying backend. When multiple kernels are enqueued and more than one fail at assertion, at least -single assertion should be reported. +one assertion should be reported. ## User requirements diff --git a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc index c44d29c72113d..b2ab21ea04561 100644 --- a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc +++ b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc @@ -21,9 +21,10 @@ :cpp: C++ == Notice + IMPORTANT: This specification is a draft. -Copyright (c) 2021-2021 Intel Corporation. All rights reserved. +Copyright (c) 2021 Intel Corporation. All rights reserved. NOTE: Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. @@ -49,6 +50,7 @@ subject to change they are not intended to be used by shipping software products. == Introduction + This extension adds the ability for device code to call the C++ `assert()` macro. The behavior of `assert()` in device code is similar to its behavior in host code. If the asserted condition is false, a message is printed to `stderr` From 16fd8f0e5ab18626cb5860ce1229e047a6816cf1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 27 May 2021 17:56:33 +0300 Subject: [PATCH 046/122] Add aspect Signed-off-by: Sergey Kanaev --- .../Assert/SYCL_ONEAPI_ASSERT.asciidoc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc index b2ab21ea04561..dffe35bab9958 100644 --- a/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc +++ b/sycl/doc/extensions/Assert/SYCL_ONEAPI_ASSERT.asciidoc @@ -117,6 +117,22 @@ extension’s APIs the implementation supports. |1 |Initial extension version. Base features are supported. |=== +== Extension to `enum class aspect` + +[source] +---- +namespace sycl { +enum class aspect { + ext_oneapi_native_assert +} +} +---- + +If device has the `ext_oneapi_native_assert` aspect, then its Device-Side +Runtime is capable of native support of `assert`. That is, safe implementation +is used. If device doesn't have the aspect, then fallback implementation is +used. + == Version Built On: {docdate} + From 7fce0411a5920241259a6b675dbce8aff2e0a73a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 27 May 2021 20:50:44 +0300 Subject: [PATCH 047/122] Worked on implementation Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/aspects.hpp | 3 +- sycl/include/CL/sycl/device.hpp | 4 +- sycl/include/CL/sycl/queue.hpp | 68 +++++++++++++----------------- sycl/source/detail/device_impl.cpp | 10 ++++- sycl/source/detail/queue_impl.cpp | 14 +++++- sycl/source/detail/queue_impl.hpp | 26 +++++++++--- sycl/source/device.cpp | 4 -- sycl/source/queue.cpp | 12 ++++-- 8 files changed, 82 insertions(+), 59 deletions(-) diff --git a/sycl/include/CL/sycl/aspects.hpp b/sycl/include/CL/sycl/aspects.hpp index 34d02856639aa..8ffc3c2b6d45e 100644 --- a/sycl/include/CL/sycl/aspects.hpp +++ b/sycl/include/CL/sycl/aspects.hpp @@ -38,7 +38,8 @@ enum class aspect { ext_intel_gpu_subslices_per_slice = 22, ext_intel_gpu_eu_count_per_subslice = 23, ext_intel_max_mem_bandwidth = 24, - ext_intel_mem_channel = 25 + ext_intel_mem_channel = 25, + ext_oneapi_native_assert = 26, }; } // namespace sycl diff --git a/sycl/include/CL/sycl/device.hpp b/sycl/include/CL/sycl/device.hpp index 29cdab85fd0b8..62d180bf388fc 100644 --- a/sycl/include/CL/sycl/device.hpp +++ b/sycl/include/CL/sycl/device.hpp @@ -195,7 +195,9 @@ class __SYCL_EXPORT device { /// \return true if the SYCL device has the given feature. bool has(aspect Aspect) const; - bool is_assert_fail_supported() const; + bool is_assert_fail_supported() const { + return has(aspect::ext_oneapi_native_assert); + } private: shared_ptr_class impl; diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 56ac8da205e73..853a9abeb5859 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -22,22 +23,6 @@ #include -struct AssertHappened { - int Flag = 0; -}; - -#if 0 -#ifndef __SYCL_GLOBAL_VAR__ -#define __SYCL_GLOBAL_VAR__ -#endif - -extern "C" __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; -#endif - -#ifdef __SYCL_DEVICE_ONLY__ -SYCL_EXTERNAL __attribute__((weak)) extern "C" int __devicelib_assert_read(); -#endif - // having _TWO_ mid-param #ifdefs makes the functions very difficult to read. // Here we simplify the &CodeLoc declaration to be _CODELOCPARAM(&CodeLoc) and // _CODELOCARG(&CodeLoc) Similarly, the KernelFunc param is simplified to be @@ -233,12 +218,21 @@ class __SYCL_EXPORT queue { typename info::param_traits::return_type get_info() const; private: -#ifndef NDEBUG - event submitAssertCapture(event &Event, queue *SecondaryQueue, const detail::code_location &CodeLoc) { + /** + * Submit copy task for assert failure flag and host-task to check the flag + * \param Event kernel's event to depend on i.e. the event represents the + * kernel to check for assertion failure + * \param SecondaryQueue secondary queue for submit process, null if not used + * \returns host tasks event + */ + event submitAssertCapture(event &Event, queue *SecondaryQueue, + const detail::code_location &CodeLoc) { _CODELOCARG(&CodeLoc); - AssertHappened *AH = new AssertHappened; - buffer *Buffer = new buffer{AH, range<1>{1}}; + using AHBufT = buffer; + + detail::AssertHappened *AH = new detail::AssertHappened; + AHBufT *Buffer = new AHBufT{AH, range<1>{1}}; event CopierEv, CheckerEv; auto CopierCGF = [&](handler &CGH) { @@ -250,6 +244,8 @@ class __SYCL_EXPORT queue { CGH.single_task([Acc] { #ifdef __SYCL_DEVICE_ONLY__ Acc[0].Flag = __devicelib_assert_read(); //AssertHappenedMem.Flag; +#else + (void)Acc; #endif // __SYCL_DEVICE_ONLY__ }); }; @@ -277,9 +273,10 @@ class __SYCL_EXPORT queue { return CheckerEv; } -#endif - bool kernelUsesAssert(const std::string &KernelName) const; + // Check if kernel with the name provided in KernelName and which is being + // enqueued and can be waited on by Event uses assert + bool kernelUsesAssert(event &Event, const std::string &KernelName) const; public: /// Submits a command group function object to the queue, in order to be @@ -292,22 +289,18 @@ class __SYCL_EXPORT queue { _CODELOCARG(&CodeLoc); event Event; -#ifndef NDEBUG std::string KernelName; - Event = submit_impl(CGF, KernelName, CodeLoc); -#else - Event = submit_impl(CGF, CodeLoc); -#endif + bool IsKernel = false; + Event = submit_impl(CGF, KernelName, IsKernel, CodeLoc); -#ifndef NDEBUG // assert required - if (!get_device().is_assert_fail_supported() && kernelUsesAssert(KernelName)) { + if (IsKernel && !get_device().is_assert_fail_supported() && + kernelUsesAssert(KernelName)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class submitAssertCapture(Event, /* SecondaryQueue = */ nullptr, CodeLoc); } -#endif // NDEBUG return Event; } @@ -328,23 +321,18 @@ class __SYCL_EXPORT queue { _CODELOCARG(&CodeLoc); event Event; - -#ifndef NDEBUG std::string KernelName; - Event = submit_impl(CGF, KernelName, SecondaryQueue, CodeLoc); -#else - Event = submit_impl(CGF, SecondaryQueue, CodeLoc); -#endif + bool IsKernel = false; + Event = submit_impl(CGF, KernelName, IsKernel, SecondaryQueue, CodeLoc); -#ifndef NDEBUG // assert required - if (!get_device().is_assert_fail_supported() && kernelUsesAssert(KernelName)) { + if (IsKernel && !get_device().is_assert_fail_supported() && + kernelUsesAssert(KernelName)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class submitAssertCapture(Event, &SecondaryQueue, CodeLoc); } -#endif // NDEBUG return Event; } @@ -860,9 +848,11 @@ class __SYCL_EXPORT queue { event submit_impl(function_class CGH, std::string &KernelName, + bool &IsKernel, const detail::code_location &CodeLoc); event submit_impl(function_class CGH, queue secondQueue, std::string &KernelName, + bool &IsKernel, const detail::code_location &CodeLoc); /// parallel_for_impl with a kernel represented as a lambda + range that diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 9dec5847b3c31..6abbe8bfbf74f 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -287,6 +287,8 @@ bool device_impl::has(aspect Aspect) const { case aspect::ext_intel_max_mem_bandwidth: // currently not supported return false; + case aspect::ext_oneapi_native_assert: + rerutn isAssertFailSupported(); default: throw runtime_error("This device aspect has not been implemented yet.", @@ -302,7 +304,13 @@ std::shared_ptr device_impl::getHostDeviceImpl() { } bool device_impl::isAssertFailSupported() const { - return false; + plugin &Plugin = getPlugin(); + + // assume CUDA supports native asserts by default + if (Plugin.getBackend() == backend::cuda) + return true; + + return has_extension("cl_intel_devicelib_cassert"); } } // namespace detail diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 58bb694443f4d..66bd102af270a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -287,8 +287,18 @@ pi_native_handle queue_impl::getNative() const { return Handle; } -bool queue_impl::kernelUsesAssert(const std::string &KernelName) const { - // TODO check device binary image descriptor for if kernel uses assert +bool queue_impl::kernelUsesAssert(event &Event, + const std::string &KernelName) const { + Scheduler &Sched = Scheduler::getInstance(); + std::shared_lock Lock(Sched.MGraphLock); + + EventImplPtr &EventPtr = detail::getSyclObjImpl(Event); + + Command *Cmd = EventPtr->getCommand(); + + // TODO get device binary image out of command in the way its performed in + // ExecCGCommand::enqueueImp @ CGTYPE::KERNEL + return true; } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 07d56ea4638f9..c379a422f9360 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -173,7 +173,8 @@ class queue_impl { const shared_ptr_class &SecondQueue, const detail::code_location &Loc) { try { - return submit_impl(CGF, /*KernelName =*/ nullptr, Self, Loc); + bool Dummy; + return submit_impl(CGF, /*KernelName =*/ nullptr, Dummy, Self, Loc); } catch (...) { { std::lock_guard Lock(MMutex); @@ -185,17 +186,18 @@ class queue_impl { event submit(const function_class &CGF, std::string &KernelName, + bool &IsKernel const shared_ptr_class &Self, const shared_ptr_class &SecondQueue, const detail::code_location &Loc) { try { - return submit_impl(CGF, &KernelName, Self, Loc); + return submit_impl(CGF, &KernelName, IsKernel, Self, Loc); } catch (...) { { std::lock_guard Lock(MMutex); MExceptions.PushBack(std::current_exception()); } - return SecondQueue->submit(CGF, KernelName, SecondQueue, Loc); + return SecondQueue->submit(CGF, KernelName, IsKernel, SecondQueue, Loc); } } @@ -209,14 +211,16 @@ class queue_impl { event submit(const function_class &CGF, const shared_ptr_class &Self, const detail::code_location &Loc) { - return submit_impl(CGF, /*KernelName =*/ nullptr, Self, Loc); + bool Dummy; + return submit_impl(CGF, /*KernelName =*/ nullptr, Dummy, Self, Loc); } event submit(const function_class &CGF, std::string &KernelName, + bool &IsKernel, const shared_ptr_class &Self, const detail::code_location &Loc) { - return submit_impl(CGF, &KernelName, Self, Loc); + return submit_impl(CGF, &KernelName, IsKernel, Self, Loc); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -400,25 +404,33 @@ class queue_impl { /// \return a native handle. pi_native_handle getNative() const; - bool kernelUsesAssert(const std::string &KernelName) const; + bool kernelUsesAssert(event &Event, const std::string &KernelName) const; private: /// Performs command group submission to the queue. /// /// \param CGF is a function object containing command group. + /// \param[out] KernelName the name of kernel being submit + /// \param[out] IsKernel set to true if kernel was submit /// \param Self is a pointer to this queue. /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. + /// + /// KernelName is null if the caller doesn't want the kernel name. The object + /// is modified if and only if there was a kernel submit. event submit_impl(const function_class &CGF, std::string *KernelName, + bool &IsKernel, const shared_ptr_class &Self, const detail::code_location &Loc) { handler Handler(Self, MHostQueue); Handler.saveCodeLoc(Loc); CGF(Handler); - if (KernelName && Handler.getType() == CG::KERNEL) + IsKernel = Handler.getType() == CG::KERNEL; + if (KernelName && IsKernel) { *KernelName = Handler.MKernelName; + } event Event = Handler.finalize(); addEvent(Event); diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 435d5b3456c6a..e824b5faba039 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -162,9 +162,5 @@ pi_native_handle device::getNative() const { return impl->getNative(); } bool device::has(aspect Aspect) const { return impl->has(Aspect); } -bool device::is_assert_fail_supported() const { - return impl->isAssertFailSupported(); -} - } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 72a43623aede8..bb48c85e6f3c3 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -102,14 +102,17 @@ event queue::submit_impl(function_class CGH, queue SecondQueue, event queue::submit_impl(function_class CGH, std::string &KernelName, + bool &IsKernel, const detail::code_location &CodeLoc) { - return impl->submit(CGH, KernelName, impl, CodeLoc); + return impl->submit(CGH, KernelName, IsKernel, impl, CodeLoc); } event queue::submit_impl(function_class CGH, queue SecondQueue, std::string &KernelName, + bool &IsKernel, const detail::code_location &CodeLoc) { - return impl->submit(CGH, KernelName, impl, SecondQueue.impl, CodeLoc); + return impl->submit(CGH, KernelName, IsKernel, impl, SecondQueue.impl, + CodeLoc); } void queue::wait_proxy(const detail::code_location &CodeLoc) { @@ -155,8 +158,9 @@ backend queue::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle queue::getNative() const { return impl->getNative(); } -bool queue::kernelUsesAssert(const std::string &KernelName) const { - return impl->kernelUsesAssert(KernelName); +bool queue::kernelUsesAssert(event &Event, + const std::string &KernelName) const { + return impl->kernelUsesAssert(Event, KernelName); } } // namespace sycl From 3f3e1f52bda27b4c7f8c19863180c00fd045c44a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 31 May 2021 14:29:56 +0300 Subject: [PATCH 048/122] Worked on implementation Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 16 ++++++++++--- sycl/source/detail/device_impl.cpp | 4 ++-- sycl/source/detail/queue_impl.cpp | 11 +-------- sycl/source/detail/queue_impl.hpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 26 ++++++++++++++++++++++ sycl/source/detail/scheduler/scheduler.hpp | 4 +++- 6 files changed, 46 insertions(+), 17 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 853a9abeb5859..325d3292d57b6 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -229,7 +229,7 @@ class __SYCL_EXPORT queue { const detail::code_location &CodeLoc) { _CODELOCARG(&CodeLoc); - using AHBufT = buffer; + using AHBufT = buffer; detail::AssertHappened *AH = new detail::AssertHappened; AHBufT *Buffer = new AHBufT{AH, range<1>{1}}; @@ -289,18 +289,23 @@ class __SYCL_EXPORT queue { _CODELOCARG(&CodeLoc); event Event; + +#ifndef SYCL_DISABLE_FALLBACK_ASSERT std::string KernelName; bool IsKernel = false; Event = submit_impl(CGF, KernelName, IsKernel, CodeLoc); // assert required if (IsKernel && !get_device().is_assert_fail_supported() && - kernelUsesAssert(KernelName)) { + kernelUsesAssert(Event, KernelName)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class submitAssertCapture(Event, /* SecondaryQueue = */ nullptr, CodeLoc); } +#else + Event = submit_impl(CGF, CodeLoc); +#endif return Event; } @@ -321,18 +326,23 @@ class __SYCL_EXPORT queue { _CODELOCARG(&CodeLoc); event Event; + +#ifndef SYCL_DISABLE_FALLBACK_ASSERT std::string KernelName; bool IsKernel = false; Event = submit_impl(CGF, KernelName, IsKernel, SecondaryQueue, CodeLoc); // assert required if (IsKernel && !get_device().is_assert_fail_supported() && - kernelUsesAssert(KernelName)) { + kernelUsesAssert(Event, KernelName)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class submitAssertCapture(Event, &SecondaryQueue, CodeLoc); } +#else + Event = submit_impl(CGF, SecondaryQueue, CodeLoc); +#endif return Event; } diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 6abbe8bfbf74f..4e96451e1910b 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -288,7 +288,7 @@ bool device_impl::has(aspect Aspect) const { // currently not supported return false; case aspect::ext_oneapi_native_assert: - rerutn isAssertFailSupported(); + return isAssertFailSupported(); default: throw runtime_error("This device aspect has not been implemented yet.", @@ -304,7 +304,7 @@ std::shared_ptr device_impl::getHostDeviceImpl() { } bool device_impl::isAssertFailSupported() const { - plugin &Plugin = getPlugin(); + const plugin &Plugin = getPlugin(); // assume CUDA supports native asserts by default if (Plugin.getBackend() == backend::cuda) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 66bd102af270a..a6b6f46599553 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -290,16 +290,7 @@ pi_native_handle queue_impl::getNative() const { bool queue_impl::kernelUsesAssert(event &Event, const std::string &KernelName) const { Scheduler &Sched = Scheduler::getInstance(); - std::shared_lock Lock(Sched.MGraphLock); - - EventImplPtr &EventPtr = detail::getSyclObjImpl(Event); - - Command *Cmd = EventPtr->getCommand(); - - // TODO get device binary image out of command in the way its performed in - // ExecCGCommand::enqueueImp @ CGTYPE::KERNEL - - return true; + return Sched.kernelUsesAssert(Event, KernelName); } } // namespace detail diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c379a422f9360..d075c67bb90f2 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -186,7 +186,7 @@ class queue_impl { event submit(const function_class &CGF, std::string &KernelName, - bool &IsKernel + bool &IsKernel, const shared_ptr_class &Self, const shared_ptr_class &SecondQueue, const detail::code_location &Loc) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 995b6e2a13ac8..414d3a187e78d 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -362,6 +362,32 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { return Req->MSYCLMemObj->MRecord.get(); } +bool +Scheduler::kernelUsesAssert(event &Event, const std::string &KernelName) const { + std::shared_lock Lock(MGraphLock); + + EventImplPtr EventPtr = detail::getSyclObjImpl(Event); + + Command *_Cmd = static_cast(EventPtr->getCommand()); + + assert((_Cmd->getType() == Command::RUN_CG) && + "Only RUN_CG command can use asserts"); + + ExecCGCommand *Cmd = static_cast(_Cmd); + CG &_CG = Cmd->getCG(); + + assert((_CG.getType() == CG::CGTYPE::KERNEL) && + "Only kernel can use asserts"); + + CGExecKernel &CmdGroup = static_cast(_CG); + (void)CmdGroup; + + // TODO get device binary image out of command in the way its performed in + // ExecCGCommand::enqueueImp @ CGTYPE::KERNEL + + return false; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 917fc0e1a3ee3..ef213cbcf578b 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -446,6 +446,8 @@ class Scheduler { static MemObjRecord *getMemObjRecord(const Requirement *const Req); + bool kernelUsesAssert(event &Event, const std::string &KernelName) const; + Scheduler(); ~Scheduler(); @@ -726,7 +728,7 @@ class Scheduler { GraphBuilder MGraphBuilder; // TODO: after switching to C++17, change std::shared_timed_mutex to // std::shared_mutex - std::shared_timed_mutex MGraphLock; + mutable std::shared_timed_mutex MGraphLock; QueueImplPtr DefaultHostQueue; From 1490e918f1c111073e40bee552d63471c3025068 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 31 May 2021 18:10:14 +0300 Subject: [PATCH 049/122] Worked on implementation Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/pi.h | 1 + sycl/include/CL/sycl/detail/pi.hpp | 4 +++ sycl/include/CL/sycl/queue.hpp | 20 +++++-------- sycl/source/detail/pi.cpp | 1 + sycl/source/detail/queue_impl.cpp | 33 ++++++++++++++++++++-- sycl/source/detail/queue_impl.hpp | 19 ++++--------- sycl/source/detail/scheduler/scheduler.cpp | 26 ----------------- sycl/source/detail/scheduler/scheduler.hpp | 3 +- sycl/source/queue.cpp | 17 ++++------- 9 files changed, 56 insertions(+), 68 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index e308f5e8f63e2..e1281b9417357 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -683,6 +683,7 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; #define __SYCL_PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt" /// PropertySetRegistry::SYCL_MISC_PROP defined in PropertySetIO.h #define __SYCL_PI_PROPERTY_SET_SYCL_MISC_PROP "SYCL/misc properties" +#define __SYCL_PI_PROPERTY_SET_SYCL_ASSERT_USED "SYCL/assert used" /// This struct is a record of the device binary information. If the Kind field /// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index e06ae106e65e7..36644329abf81 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -330,6 +330,9 @@ class DeviceBinaryImage { const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } + const PropertyRange &getAssertUsed() const { + return AssertUsed; + } virtual ~DeviceBinaryImage() {} protected: @@ -341,6 +344,7 @@ class DeviceBinaryImage { DeviceBinaryImage::PropertyRange SpecConstIDMap; DeviceBinaryImage::PropertyRange DeviceLibReqMask; DeviceBinaryImage::PropertyRange KernelParamOptInfo; + DeviceBinaryImage::PropertyRange AssertUsed; }; /// Tries to determine the device binary image foramat. Returns diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 325d3292d57b6..31c317dd4595d 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -276,7 +276,7 @@ class __SYCL_EXPORT queue { // Check if kernel with the name provided in KernelName and which is being // enqueued and can be waited on by Event uses assert - bool kernelUsesAssert(event &Event, const std::string &KernelName) const; + bool kernelUsesAssert(event &Event) const; public: /// Submits a command group function object to the queue, in order to be @@ -291,13 +291,12 @@ class __SYCL_EXPORT queue { event Event; #ifndef SYCL_DISABLE_FALLBACK_ASSERT - std::string KernelName; bool IsKernel = false; - Event = submit_impl(CGF, KernelName, IsKernel, CodeLoc); + Event = submit_impl(CGF, IsKernel, CodeLoc); // assert required if (IsKernel && !get_device().is_assert_fail_supported() && - kernelUsesAssert(Event, KernelName)) { + kernelUsesAssert(Event)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class @@ -328,13 +327,12 @@ class __SYCL_EXPORT queue { event Event; #ifndef SYCL_DISABLE_FALLBACK_ASSERT - std::string KernelName; bool IsKernel = false; - Event = submit_impl(CGF, KernelName, IsKernel, SecondaryQueue, CodeLoc); + Event = submit_impl(CGF, IsKernel, SecondaryQueue, CodeLoc); // assert required if (IsKernel && !get_device().is_assert_fail_supported() && - kernelUsesAssert(Event, KernelName)) { + kernelUsesAssert(Event)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class @@ -857,13 +855,9 @@ class __SYCL_EXPORT queue { const detail::code_location &CodeLoc); event submit_impl(function_class CGH, - std::string &KernelName, - bool &IsKernel, - const detail::code_location &CodeLoc); + bool &IsKernel, const detail::code_location &CodeLoc); event submit_impl(function_class CGH, queue secondQueue, - std::string &KernelName, - bool &IsKernel, - const detail::code_location &CodeLoc); + bool &IsKernel, const detail::code_location &CodeLoc); /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index f5a393d2c0d82..4bfccbf6ff513 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -617,6 +617,7 @@ void DeviceBinaryImage::init(pi_device_binary Bin) { SpecConstIDMap.init(Bin, __SYCL_PI_PROPERTY_SET_SPEC_CONST_MAP); DeviceLibReqMask.init(Bin, __SYCL_PI_PROPERTY_SET_DEVICELIB_REQ_MASK); KernelParamOptInfo.init(Bin, __SYCL_PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); + AssertUsed.init(Bin, __SYCL_PI_PROPERTY_SET_SYCL_ASSERT_USED); } } // namespace pi diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index a6b6f46599553..f682688890a2a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -287,10 +287,37 @@ pi_native_handle queue_impl::getNative() const { return Handle; } -bool queue_impl::kernelUsesAssert(event &Event, - const std::string &KernelName) const { +bool queue_impl::kernelUsesAssert(event &Event) const { Scheduler &Sched = Scheduler::getInstance(); - return Sched.kernelUsesAssert(Event, KernelName); + std::shared_lock Lock(Sched.MGraphLock); + + EventImplPtr EventPtr = detail::getSyclObjImpl(Event); + + Command *_Cmd = static_cast(EventPtr->getCommand()); + + assert((_Cmd->getType() == Command::RUN_CG) && + "Only RUN_CG command can use asserts"); + + ExecCGCommand *Cmd = static_cast(_Cmd); + CG &_CG = Cmd->getCG(); + + assert((_CG.getType() == CG::CGTYPE::KERNEL) && + "Only kernel can use asserts"); + + CGExecKernel &CmdGroup = static_cast(_CG); + + RTDeviceBinaryImage &BinImg = ProgramManager::getInstance().getDeviceImage( + CmdGroup.MOSModuleHandle, CmdGroup.MKernelName, get_context(), + get_device()); + + const pi::DeviceBinaryImage::PropertyRange &AssertUsedRange = + Img->getAssertUsed(); + if (AssertUsedRange.isAvailable()) + for (const auto &Prop : AssertUsedRange) + if (Prop->Name == CmdGroup.MKernelName) + return true; + + return false; } } // namespace detail diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index d075c67bb90f2..3fe4602497125 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -174,7 +174,7 @@ class queue_impl { const detail::code_location &Loc) { try { bool Dummy; - return submit_impl(CGF, /*KernelName =*/ nullptr, Dummy, Self, Loc); + return submit_impl(CGF, Dummy, Self, Loc); } catch (...) { { std::lock_guard Lock(MMutex); @@ -185,19 +185,18 @@ class queue_impl { } event submit(const function_class &CGF, - std::string &KernelName, bool &IsKernel, const shared_ptr_class &Self, const shared_ptr_class &SecondQueue, const detail::code_location &Loc) { try { - return submit_impl(CGF, &KernelName, IsKernel, Self, Loc); + return submit_impl(CGF, IsKernel, Self, Loc); } catch (...) { { std::lock_guard Lock(MMutex); MExceptions.PushBack(std::current_exception()); } - return SecondQueue->submit(CGF, KernelName, IsKernel, SecondQueue, Loc); + return SecondQueue->submit(CGF, IsKernel, SecondQueue, Loc); } } @@ -212,15 +211,14 @@ class queue_impl { const shared_ptr_class &Self, const detail::code_location &Loc) { bool Dummy; - return submit_impl(CGF, /*KernelName =*/ nullptr, Dummy, Self, Loc); + return submit_impl(CGF, Dummy, Self, Loc); } event submit(const function_class &CGF, - std::string &KernelName, bool &IsKernel, const shared_ptr_class &Self, const detail::code_location &Loc) { - return submit_impl(CGF, &KernelName, IsKernel, Self, Loc); + return submit_impl(CGF, IsKernel, Self, Loc); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -404,13 +402,12 @@ class queue_impl { /// \return a native handle. pi_native_handle getNative() const; - bool kernelUsesAssert(event &Event, const std::string &KernelName) const; + bool kernelUsesAssert(event &Event) const; private: /// Performs command group submission to the queue. /// /// \param CGF is a function object containing command group. - /// \param[out] KernelName the name of kernel being submit /// \param[out] IsKernel set to true if kernel was submit /// \param Self is a pointer to this queue. /// \param Loc is the code location of the submit call (default argument) @@ -419,7 +416,6 @@ class queue_impl { /// KernelName is null if the caller doesn't want the kernel name. The object /// is modified if and only if there was a kernel submit. event submit_impl(const function_class &CGF, - std::string *KernelName, bool &IsKernel, const shared_ptr_class &Self, const detail::code_location &Loc) { @@ -428,9 +424,6 @@ class queue_impl { CGF(Handler); IsKernel = Handler.getType() == CG::KERNEL; - if (KernelName && IsKernel) { - *KernelName = Handler.MKernelName; - } event Event = Handler.finalize(); addEvent(Event); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 414d3a187e78d..995b6e2a13ac8 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -362,32 +362,6 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { return Req->MSYCLMemObj->MRecord.get(); } -bool -Scheduler::kernelUsesAssert(event &Event, const std::string &KernelName) const { - std::shared_lock Lock(MGraphLock); - - EventImplPtr EventPtr = detail::getSyclObjImpl(Event); - - Command *_Cmd = static_cast(EventPtr->getCommand()); - - assert((_Cmd->getType() == Command::RUN_CG) && - "Only RUN_CG command can use asserts"); - - ExecCGCommand *Cmd = static_cast(_Cmd); - CG &_CG = Cmd->getCG(); - - assert((_CG.getType() == CG::CGTYPE::KERNEL) && - "Only kernel can use asserts"); - - CGExecKernel &CmdGroup = static_cast(_CG); - (void)CmdGroup; - - // TODO get device binary image out of command in the way its performed in - // ExecCGCommand::enqueueImp @ CGTYPE::KERNEL - - return false; -} - } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index ef213cbcf578b..82ee222e1bc06 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -446,8 +446,6 @@ class Scheduler { static MemObjRecord *getMemObjRecord(const Requirement *const Req); - bool kernelUsesAssert(event &Event, const std::string &KernelName) const; - Scheduler(); ~Scheduler(); @@ -734,6 +732,7 @@ class Scheduler { friend class Command; friend class DispatchHostTask; + friend class queue_impl; /// Stream buffers structure. /// diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index bb48c85e6f3c3..3b27775279f90 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -101,17 +101,13 @@ event queue::submit_impl(function_class CGH, queue SecondQueue, } event queue::submit_impl(function_class CGH, - std::string &KernelName, - bool &IsKernel, - const detail::code_location &CodeLoc) { - return impl->submit(CGH, KernelName, IsKernel, impl, CodeLoc); + bool &IsKernel, const detail::code_location &CodeLoc) { + return impl->submit(CGH, IsKernel, impl, CodeLoc); } event queue::submit_impl(function_class CGH, queue SecondQueue, - std::string &KernelName, - bool &IsKernel, - const detail::code_location &CodeLoc) { - return impl->submit(CGH, KernelName, IsKernel, impl, SecondQueue.impl, + bool &IsKernel, const detail::code_location &CodeLoc) { + return impl->submit(CGH, IsKernel, impl, SecondQueue.impl, CodeLoc); } @@ -158,9 +154,8 @@ backend queue::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle queue::getNative() const { return impl->getNative(); } -bool queue::kernelUsesAssert(event &Event, - const std::string &KernelName) const { - return impl->kernelUsesAssert(Event, KernelName); +bool queue::kernelUsesAssert(event &Event) const { + return impl->kernelUsesAssert(Event); } } // namespace sycl From 44004d4b6e4a5b21382f91384396cab86f9bd932 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 11:45:37 +0300 Subject: [PATCH 050/122] Worked on implementation Signed-off-by: Sergey Kanaev --- sycl/source/detail/queue_impl.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index f682688890a2a..6eb5e8a8c389a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -291,6 +291,10 @@ bool queue_impl::kernelUsesAssert(event &Event) const { Scheduler &Sched = Scheduler::getInstance(); std::shared_lock Lock(Sched.MGraphLock); + // FIXME remove unwanted lines after sycl-post-link tool changes +#ifndef __SYCL_POST_LINK_TOOL_ADDS_ASSERT_USED_PROPERTY_SET + return true; +#else EventImplPtr EventPtr = detail::getSyclObjImpl(Event); Command *_Cmd = static_cast(EventPtr->getCommand()); @@ -311,13 +315,14 @@ bool queue_impl::kernelUsesAssert(event &Event) const { get_device()); const pi::DeviceBinaryImage::PropertyRange &AssertUsedRange = - Img->getAssertUsed(); + BinImg.getAssertUsed(); if (AssertUsedRange.isAvailable()) for (const auto &Prop : AssertUsedRange) if (Prop->Name == CmdGroup.MKernelName) return true; return false; +#endif } } // namespace detail From fc979817d53c4f3f6efe99039494a65a84e485c7 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 13:11:10 +0300 Subject: [PATCH 051/122] Fix code style issues. Code clean-up Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 31 +++++++++++++++----------- sycl/include/CL/sycl/detail/pi.hpp | 4 +--- sycl/include/CL/sycl/queue.hpp | 22 +++++++++++++------ sycl/source/detail/queue_impl.hpp | 35 +++++------------------------- sycl/source/queue.cpp | 14 ++++++------ 5 files changed, 46 insertions(+), 60 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index d702e0d5651fe..d79898f9b3ed1 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -11,27 +11,34 @@ #ifdef __SPIR__ struct AssertHappened { - int Flag = 1; + int Flag = 0; }; #ifndef __SYCL_GLOBAL_VAR__ -#define __SYCL_GLOBAL_VAR__ +#define __SYCL_GLOBAL_VAR__ /*__attribute__((sycl_global_var))*/ #endif -namespace cl { namespace sycl { namespace detail { -extern __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // declaration -}}} +namespace cl { +namespace sycl { +namespace detail { +extern __SYCL_GLOBAL_VAR__ const + AssertHappened AssertHappenedMem; // declaration +} // namespace detail +} // namespace sycl +} // namespace cl -namespace cl { namespace sycl { namespace detail { +namespace cl { +namespace sycl { +namespace detail { __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // definition -}}} +} // namespace detail +} // namespace sycl +} // namespace cl static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " "Assertion `%s` failed.\n"; -static const __attribute__((opencl_constant)) char flag_output_fmt[] = "Flag = %d\n"; - DEVICE_EXTERN_C int __devicelib_assert_read(void) { volatile int *Ptr = (int *)(&cl::sycl::detail::AssertHappenedMem.Flag); return *Ptr; @@ -51,10 +58,8 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, // (func) ? func : "", func, gid0, gid1, gid2, lid0, lid1, lid2, expr); - //cl::sycl::detail::AssertHappenedMem.Flag = 1; - volatile int *Ptr = (int *)(&cl::sycl::detail::AssertHappenedMem.Flag); - - __spirv_ocl_printf(flag_output_fmt, *Ptr); + // FIXME uncomment the following line after clang changes + // cl::sycl::detail::AssertHappenedMem.Flag = 1; // FIXME: call SPIR-V unreachable instead // volatile int *die = (int *)0x0; diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index 36644329abf81..b6ae054ecabd2 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -330,9 +330,7 @@ class DeviceBinaryImage { const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } - const PropertyRange &getAssertUsed() const { - return AssertUsed; - } + const PropertyRange &getAssertUsed() const { return AssertUsed; } virtual ~DeviceBinaryImage() {} protected: diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 31c317dd4595d..3a12d53892c5d 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -240,10 +240,9 @@ class __SYCL_EXPORT queue { auto Acc = Buffer->get_access(CGH); - fprintf(stderr, "About to enqueue copier\n"); CGH.single_task([Acc] { #ifdef __SYCL_DEVICE_ONLY__ - Acc[0].Flag = __devicelib_assert_read(); //AssertHappenedMem.Flag; + Acc[0].Flag = __devicelib_assert_read(); #else (void)Acc; #endif // __SYCL_DEVICE_ONLY__ @@ -252,11 +251,9 @@ class __SYCL_EXPORT queue { auto CheckerCGF = [&CopierEv, AH, Buffer](handler &CGH) { CGH.depends_on(CopierEv); - fprintf(stderr, "About to enqueue checker\n"); CGH.codeplay_host_task([=] { - fprintf(stderr, "Checker running!\n"); if (AH->Flag) - abort(); + abort(); // no need to release memory as it's abort anyway delete Buffer; delete AH; @@ -854,8 +851,19 @@ class __SYCL_EXPORT queue { event submit_impl(function_class CGH, queue secondQueue, const detail::code_location &CodeLoc); - event submit_impl(function_class CGH, - bool &IsKernel, const detail::code_location &CodeLoc); + /// A template-free version of submit. + /// \param CGH command group function/handler + /// \param[out] IsKernel set by callee to \c true if CGH represents a kernel + /// submit + /// \param CodeLoc code location + event submit_impl(function_class CGH, bool &IsKernel, + const detail::code_location &CodeLoc); + /// A template-free version of submit. + /// \param CGH command group function/handler + /// \param secondQueue fallback queue + /// \param[out] IsKernel set by callee to \c true if CGH represents a kernel + /// submit + /// \param CodeLoc code location event submit_impl(function_class CGH, queue secondQueue, bool &IsKernel, const detail::code_location &CodeLoc); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 3fe4602497125..ae8ced132b2f6 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -168,24 +168,7 @@ class queue_impl { /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. - event submit(const function_class &CGF, - const shared_ptr_class &Self, - const shared_ptr_class &SecondQueue, - const detail::code_location &Loc) { - try { - bool Dummy; - return submit_impl(CGF, Dummy, Self, Loc); - } catch (...) { - { - std::lock_guard Lock(MMutex); - MExceptions.PushBack(std::current_exception()); - } - return SecondQueue->submit(CGF, SecondQueue, Loc); - } - } - - event submit(const function_class &CGF, - bool &IsKernel, + event submit(const function_class &CGF, bool *IsKernel, const shared_ptr_class &Self, const shared_ptr_class &SecondQueue, const detail::code_location &Loc) { @@ -207,15 +190,7 @@ class queue_impl { /// \param Self is a shared_ptr to this queue. /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event object for the submitted command group. - event submit(const function_class &CGF, - const shared_ptr_class &Self, - const detail::code_location &Loc) { - bool Dummy; - return submit_impl(CGF, Dummy, Self, Loc); - } - - event submit(const function_class &CGF, - bool &IsKernel, + event submit(const function_class &CGF, bool *IsKernel, const shared_ptr_class &Self, const detail::code_location &Loc) { return submit_impl(CGF, IsKernel, Self, Loc); @@ -415,15 +390,15 @@ class queue_impl { /// /// KernelName is null if the caller doesn't want the kernel name. The object /// is modified if and only if there was a kernel submit. - event submit_impl(const function_class &CGF, - bool &IsKernel, + event submit_impl(const function_class &CGF, bool *IsKernel, const shared_ptr_class &Self, const detail::code_location &Loc) { handler Handler(Self, MHostQueue); Handler.saveCodeLoc(Loc); CGF(Handler); - IsKernel = Handler.getType() == CG::KERNEL; + if (IsKernel) + *IsKernel = Handler.getType() == CG::KERNEL; event Event = Handler.finalize(); addEvent(Event); diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 3b27775279f90..b49b0aed9cc06 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -92,23 +92,23 @@ event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice) { event queue::submit_impl(function_class CGH, const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, CodeLoc); + return impl->submit(CGH, /* IsKernel */ nullptr, impl, CodeLoc); } event queue::submit_impl(function_class CGH, queue SecondQueue, const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); + return impl->submit(CGH, /* IsKernel */ nullptr, impl, SecondQueue.impl, + CodeLoc); } -event queue::submit_impl(function_class CGH, - bool &IsKernel, const detail::code_location &CodeLoc) { - return impl->submit(CGH, IsKernel, impl, CodeLoc); +event queue::submit_impl(function_class CGH, bool &IsKernel, + const detail::code_location &CodeLoc) { + return impl->submit(CGH, &IsKernel, impl, CodeLoc); } event queue::submit_impl(function_class CGH, queue SecondQueue, bool &IsKernel, const detail::code_location &CodeLoc) { - return impl->submit(CGH, IsKernel, impl, SecondQueue.impl, - CodeLoc); + return impl->submit(CGH, &IsKernel, impl, SecondQueue.impl, CodeLoc); } void queue::wait_proxy(const detail::code_location &CodeLoc) { From d2038be1f76c9015782abe3f95c20ca04fe3c8f0 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 13:24:06 +0300 Subject: [PATCH 052/122] Add doc Signed-off-by: Sergey Kanaev --- sycl/doc/PreprocessorMacros.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/doc/PreprocessorMacros.md b/sycl/doc/PreprocessorMacros.md index bf4543082acbd..cdd213090c10c 100644 --- a/sycl/doc/PreprocessorMacros.md +++ b/sycl/doc/PreprocessorMacros.md @@ -33,6 +33,10 @@ SYCL 2020. Disables all deprecation warnings in SYCL runtime headers, including SYCL 1.2.1 deprecations. +### `SYCL_DISABLE_FALLBACK_ASSERT` + +Forces fallback assert feature implementation disable + ### Version macros - `__LIBSYCL_MAJOR_VERSION` is set to SYCL runtime library major version. From 490b697265f4e951ba59ae0c5b19ce89cbec6cfc Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 13:27:44 +0300 Subject: [PATCH 053/122] Fix code style issue Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index d79898f9b3ed1..26fddb4e6a1d7 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -21,8 +21,8 @@ struct AssertHappened { namespace cl { namespace sycl { namespace detail { -extern __SYCL_GLOBAL_VAR__ const - AssertHappened AssertHappenedMem; // declaration +extern __SYCL_GLOBAL_VAR__ const AssertHappened + AssertHappenedMem; // declaration } // namespace detail } // namespace sycl } // namespace cl From 0d05368de143f0bbce08f2acafb2c084eb11bf22 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 13:28:14 +0300 Subject: [PATCH 054/122] Add missing file Signed-off-by: Sergey Kanaev --- .../CL/sycl/detail/assert_happened.hpp | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 sycl/include/CL/sycl/detail/assert_happened.hpp diff --git a/sycl/include/CL/sycl/detail/assert_happened.hpp b/sycl/include/CL/sycl/detail/assert_happened.hpp new file mode 100644 index 0000000000000..a03cf78f74964 --- /dev/null +++ b/sycl/include/CL/sycl/detail/assert_happened.hpp @@ -0,0 +1,26 @@ +//==------- assert_happened.hpp - Assert signalling structure --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#ifdef __SYCL_DEVICE_ONLY__ +// Reads Flag of AssertHappened on device +SYCL_EXTERNAL __attribute__((weak)) extern "C" int __devicelib_assert_read(); +#endif + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { +struct AssertHappened { + int Flag = 0; // set to non-zero upon assert failure +}; +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) From 8e64d96f8c984ec38879b97048d7841046ccd08d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 14:56:16 +0300 Subject: [PATCH 055/122] Elaborate on preprocessor macro function Signed-off-by: Sergey Kanaev --- sycl/doc/PreprocessorMacros.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/doc/PreprocessorMacros.md b/sycl/doc/PreprocessorMacros.md index cdd213090c10c..7ad83b0b091f8 100644 --- a/sycl/doc/PreprocessorMacros.md +++ b/sycl/doc/PreprocessorMacros.md @@ -35,7 +35,10 @@ Disables all deprecation warnings in SYCL runtime headers, including SYCL 1.2.1 ### `SYCL_DISABLE_FALLBACK_ASSERT` -Forces fallback assert feature implementation disable +Forces fallback assert feature implementation disable i.e. the *copier kernel* +and *checker host-task* are not enqueued. Also, DPCPP RT won't perform check if +user's kernel uses `__devicelib_assert_fail`. Refer to [the document](Assert.md) +for function behind *copier kernel* and *checker host-task*. ### Version macros From e1a7f45bcdd70104c7189e0d4a836faa6fb8ec47 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 15:45:13 +0300 Subject: [PATCH 056/122] Add missing symbols Signed-off-by: Sergey Kanaev --- sycl/test/abi/sycl_symbols_linux.dump | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 011113631ec29..d33de4a87f153 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3635,7 +3635,9 @@ _ZN2cl4sycl5eventC2Ev _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE +_ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERbRKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE +_ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RbRKNS0_6detail13code_locationE _ZN2cl4sycl5queue18throw_asynchronousEv _ZN2cl4sycl5queue20wait_and_throw_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue6memcpyEPvPKvm @@ -3935,6 +3937,7 @@ _ZNK2cl4sycl5queue11get_contextEv _ZNK2cl4sycl5queue11is_in_orderEv _ZNK2cl4sycl5queue12get_propertyINS0_8property5queue16enable_profilingEEET_v _ZNK2cl4sycl5queue12has_propertyINS0_8property5queue16enable_profilingEEEbv +_ZNK2cl4sycl5queue16kernelUsesAssertERNS0_5eventE _ZNK2cl4sycl5queue3getEv _ZNK2cl4sycl5queue7is_hostEv _ZNK2cl4sycl5queue8get_infoILNS0_4info5queueE4240EEENS3_12param_traitsIS4_XT_EE11return_typeEv From 8a9effde9a6c2d54fcc479889229055303cf27ad Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 16:10:50 +0300 Subject: [PATCH 057/122] Fix runtime issues Signed-off-by: Sergey Kanaev --- sycl/source/detail/device_impl.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 4e96451e1910b..060c8c1d1f87d 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -304,6 +304,10 @@ std::shared_ptr device_impl::getHostDeviceImpl() { } bool device_impl::isAssertFailSupported() const { + // assert is sort of natively supported by host + if (MIsHostDevice) + return true; + const plugin &Plugin = getPlugin(); // assume CUDA supports native asserts by default From 2d2018b84eb8b43ca9f547755520400ffc4090fb Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 1 Jun 2021 16:14:26 +0300 Subject: [PATCH 058/122] Report CUDA device as of no support for assert Signed-off-by: Sergey Kanaev --- sycl/source/detail/device_impl.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 060c8c1d1f87d..0b55bddfe8297 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -310,9 +310,11 @@ bool device_impl::isAssertFailSupported() const { const plugin &Plugin = getPlugin(); - // assume CUDA supports native asserts by default + // Return false for CUDA even though, it has native support for assert. + // Change to return true when devicelib issue is fixed. + // See https://github.com/intel/llvm/issues/3385 if (Plugin.getBackend() == backend::cuda) - return true; + return false; return has_extension("cl_intel_devicelib_cassert"); } From 5e3647ab4b561bc8c8d46d4a967077c6ea67cf40 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 2 Jun 2021 15:27:21 +0300 Subject: [PATCH 059/122] Disable fallback assert in unit-tests Signed-off-by: Sergey Kanaev --- sycl/cmake/modules/AddSYCLUnitTest.cmake | 1 + sycl/include/CL/sycl/queue.hpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/sycl/cmake/modules/AddSYCLUnitTest.cmake b/sycl/cmake/modules/AddSYCLUnitTest.cmake index 2d50567d2ee0b..703727f53dbe5 100644 --- a/sycl/cmake/modules/AddSYCLUnitTest.cmake +++ b/sycl/cmake/modules/AddSYCLUnitTest.cmake @@ -66,6 +66,7 @@ macro(add_sycl_unittest_with_device test_dirname link_variant) -DGTEST_LANG_CXX11=1 -DGTEST_HAS_TR1_TUPLE=0 -D__SYCL_BUILD_SYCL_DLL + -DSYCL_DISABLE_FALLBACK_ASSERT -I${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include -I${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include -I${LLVM_BINARY_DIR}/include diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 3a12d53892c5d..0d4b58c88e8af 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -218,6 +218,7 @@ class __SYCL_EXPORT queue { typename info::param_traits::return_type get_info() const; private: +#ifndef SYCL_DISABLE_FALLBACK_ASSERT /** * Submit copy task for assert failure flag and host-task to check the flag * \param Event kernel's event to depend on i.e. the event represents the @@ -270,6 +271,7 @@ class __SYCL_EXPORT queue { return CheckerEv; } +#endif // Check if kernel with the name provided in KernelName and which is being // enqueued and can be waited on by Event uses assert From 4d204f847ddd7b54a0db5e4c4e51a974c822cb84 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 2 Jun 2021 15:30:10 +0300 Subject: [PATCH 060/122] Address some review comments Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 2 +- sycl/source/detail/scheduler/scheduler.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 0d4b58c88e8af..1da8bd663aa60 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -62,7 +62,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -/*template */ class AssertFlagCopier {}; +class AssertFlagCopier; // Forward declaration class context; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 82ee222e1bc06..5e52e9bdde1fa 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -726,7 +726,7 @@ class Scheduler { GraphBuilder MGraphBuilder; // TODO: after switching to C++17, change std::shared_timed_mutex to // std::shared_mutex - mutable std::shared_timed_mutex MGraphLock; + std::shared_timed_mutex MGraphLock; QueueImplPtr DefaultHostQueue; From 6be5ac519152027719612814b11d10420a753b6f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 2 Jun 2021 15:30:56 +0300 Subject: [PATCH 061/122] Add FIXME comments in devicelib Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 26fddb4e6a1d7..19c682cd07095 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -15,12 +15,14 @@ struct AssertHappened { }; #ifndef __SYCL_GLOBAL_VAR__ +// FIXME uncomment after CFE changes #define __SYCL_GLOBAL_VAR__ /*__attribute__((sycl_global_var))*/ #endif namespace cl { namespace sycl { namespace detail { +// FIXME remove const after CFE changes extern __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // declaration } // namespace detail From 6de4d5c8fb4858d377f4931cb4d2a445960d3e1e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 2 Jun 2021 16:05:17 +0300 Subject: [PATCH 062/122] Move AssertHappenedMem out of namespace Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 19c682cd07095..fea5ad1d0e88f 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -19,30 +19,18 @@ struct AssertHappened { #define __SYCL_GLOBAL_VAR__ /*__attribute__((sycl_global_var))*/ #endif -namespace cl { -namespace sycl { -namespace detail { // FIXME remove const after CFE changes extern __SYCL_GLOBAL_VAR__ const AssertHappened - AssertHappenedMem; // declaration -} // namespace detail -} // namespace sycl -} // namespace cl + __SYCL_AssertHappenedMem; // declaration -namespace cl { -namespace sycl { -namespace detail { __SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // definition -} // namespace detail -} // namespace sycl -} // namespace cl static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " "Assertion `%s` failed.\n"; DEVICE_EXTERN_C int __devicelib_assert_read(void) { - volatile int *Ptr = (int *)(&cl::sycl::detail::AssertHappenedMem.Flag); + volatile int *Ptr = (int *)(&__SYCL_AssertHappenedMem.Flag); return *Ptr; } @@ -61,7 +49,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, func, gid0, gid1, gid2, lid0, lid1, lid2, expr); // FIXME uncomment the following line after clang changes - // cl::sycl::detail::AssertHappenedMem.Flag = 1; + // __SYCL_AssertHappenedMem.Flag = 1; // FIXME: call SPIR-V unreachable instead // volatile int *die = (int *)0x0; From e39e15f81b2c7758b64fdeab5c7a4d36b917d346 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 2 Jun 2021 17:04:21 +0300 Subject: [PATCH 063/122] Updated comment Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index fea5ad1d0e88f..47d41b1d12761 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -48,8 +48,16 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, // (func) ? func : "", func, gid0, gid1, gid2, lid0, lid1, lid2, expr); - // FIXME uncomment the following line after clang changes - // __SYCL_AssertHappenedMem.Flag = 1; + // FIXME uncomment the following lines after clang changes + //{ + // int Expected = 0; + // int Desired = 1; + // __spirv_AtomicCompareExchange( + // &__SYCL_AssertHappenedMem.Flag, /*__spv::Scope::Device*/ 1, + // /*__spv::MemorySemanticsMask::SequentiallyConsistent*/ 0x10, + // /*__spv::MemorySemanticsMask::SequentiallyConsistent*/ 0x10, + // Desired, Expected); + //} // FIXME: call SPIR-V unreachable instead // volatile int *die = (int *)0x0; From 60a21213119a68d3d6e61b78e3a337462ae87390 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 2 Jun 2021 17:04:43 +0300 Subject: [PATCH 064/122] Update document --- sycl/doc/Assert.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 12b074c258665..3acef1b94fc9c 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -172,18 +172,12 @@ same binary image where fallback `__devicelib_assert_fail` resides. declaration: ```c++ -namespace cl { -namespace sycl { -namespace detail { -struct AssertHappened { +struct __SYCL_AssertHappened { int Flag = 0; }; -} -} -} #ifdef __SYCL_DEVICE_ONLY__ -extern SYCL_GLOBAL_VAR AssertHappened AssertHappenedMem; +extern SYCL_GLOBAL_VAR __SYCL_AssertHappened __SYCL_AssertHappenedMem; #endif ``` From 436e8a6648cd9c1344bd53a18d19fdf41338a78a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 2 Jun 2021 17:11:59 +0300 Subject: [PATCH 065/122] Update document --- .../C-CXX-StandardLibrary/DeviceLibExtensions.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst index b411710b773bd..0b68b496fb2f4 100644 --- a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst +++ b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst @@ -14,6 +14,7 @@ cl_intel_devicelib_cassert __generic const char *func, size_t gid0, size_t gid1, size_t gid2, size_t lid0, size_t lid1, size_t lid2); + Semantic: the function is called when an assertion expression `expr` is false, and it indicates that a program does not execute as expected. @@ -21,6 +22,8 @@ The function should print a message containing the information provided in the arguments. In addition to that, the function is free to terminate the current kernel invocation. +Fallback implementation of the function raises a flag to be read later by `__devicelib_assert_read`. + Arguments: - `expr` is a string representation of the assertion condition @@ -33,6 +36,13 @@ Example of a message: .. code: foo.cpp:42: void foo(int): global id: [0,0,0], local id: [0,0,0] Assertion `buf[wiID] == 0 && "Invalid value"` failed. +.. code: + int __devicelib_assert_read(); + +Semantic: +the function is called to read assert failure flag raised by `__devicelib_assert_fail`. +The function is only used in fallback implementation. + See also: assert_extension_. .. _assert_extension: ../Assert/SYCL_ONEAPI_ASSERT.asciidoc) From 7c2ea4b7ec91be3b8ba7fc71c2058bce158f6c3f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 3 Jun 2021 16:45:26 +0300 Subject: [PATCH 066/122] [PI] Get to know if extension is present w/ variable ext name for backends Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/pi.def | 3 +++ sycl/include/CL/sycl/detail/pi.h | 21 +++++++++++++++++++++ sycl/include/CL/sycl/queue.hpp | 4 ++-- sycl/plugins/cuda/pi_cuda.cpp | 22 ++++++++++++++++++++++ sycl/plugins/level_zero/pi_level_zero.cpp | 20 ++++++++++++++++++++ sycl/plugins/opencl/pi_opencl.cpp | 23 +++++++++++++++++++++++ sycl/source/detail/device_impl.cpp | 14 +++++++++++++- 7 files changed, 104 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/include/CL/sycl/detail/pi.def index 57b92264a57bc..f42582b45637a 100644 --- a/sycl/include/CL/sycl/detail/pi.def +++ b/sycl/include/CL/sycl/detail/pi.def @@ -130,4 +130,7 @@ _PI_API(piextKernelSetArgMemObj) _PI_API(piextKernelSetArgSampler) _PI_API(piTearDown) +// Extension names +_PI_API(piextGetExtensionName) + #undef _PI_API diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index e1281b9417357..8655e72374619 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -508,6 +508,10 @@ typedef enum { PI_PROFILING_INFO_COMMAND_END = CL_PROFILING_COMMAND_END } _pi_profiling_info; +typedef enum { + PI_INTEL_DEVICELIB_CASSERT = 0, +} _pi_extension_number; + // NOTE: this is made 64-bit to match the size of cl_mem_flags to // make the translation to OpenCL transparent. // TODO: populate @@ -572,6 +576,7 @@ using pi_program_build_info = _pi_program_build_info; using pi_program_build_status = _pi_program_build_status; using pi_kernel_info = _pi_kernel_info; using pi_profiling_info = _pi_profiling_info; +using pi_extension_number = _pi_extension_number; // For compatibility with OpenCL define this not as enum. using pi_device_partition_property = intptr_t; @@ -1640,6 +1645,20 @@ __SYCL_EXPORT pi_result piextUSMGetMemAllocInfo( /// \param PluginParameter placeholder for future use, currenly not used. __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); + +/// API to get extension name for those extension whose name varies with backend +/// \param ExtNumber number of extension \sa pi_extension_number +/// \param Size[out] length of returned extension name +/// \param Value[out] memory location where to put extension name +/// \return \c PI_SUCCESS if this extension name is available, +/// \c PI_INVALID_VALUE if the extension name isn't available for this +/// plugin +/// +/// Either of Size or Value isn't set if it's nil. +/// Value returned in Size can't be zero. +__SYCL_EXPORT pi_result piextGetExtensionName( + pi_extension_number ExtNumber, size_t *Size, char *Value); + struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin // checks and writes the appropriate Function Pointers in @@ -1658,6 +1677,8 @@ struct _pi_plugin { } PiFunctionTable; }; +extern const char PI_INTEL_DEVICELIB_CASSERT_EXTENSION_NAME[]; + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 1da8bd663aa60..1e62875eff9c9 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -294,7 +294,7 @@ class __SYCL_EXPORT queue { Event = submit_impl(CGF, IsKernel, CodeLoc); // assert required - if (IsKernel && !get_device().is_assert_fail_supported() && + if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && kernelUsesAssert(Event)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed @@ -330,7 +330,7 @@ class __SYCL_EXPORT queue { Event = submit_impl(CGF, IsKernel, SecondaryQueue, CodeLoc); // assert required - if (IsKernel && !get_device().is_assert_fail_supported() && + if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && kernelUsesAssert(Event)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 9384a0ac10c5a..ae49e40be531b 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -4763,9 +4763,31 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, cuda_piextKernelSetArgSampler) _PI_CL(piTearDown, cuda_piTearDown) + _PI_CL(piextGetExtensionName, cuda_piextGetExtensionName); + #undef _PI_CL return PI_SUCCESS; } +pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, + char *Value) { + pi_result Result = PI_SUCCESS; + // TODO switch to map/unordered_map when have enough number of extensions + switch (ExtNumber) { + case PI_INTEL_DEVICELIB_CASSERT: { + static const std::string Name = "N/A"; + if (Size) + *Size = Name.length(); + if (Value) + std::memcpy(Value, Name.data(), Name.length()); + break; + } + default: + Result = PI_INVALID_VALUE; + } + + return Result; +} + } // extern "C" diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 7a19cc133d892..c34ab3f55e13e 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -6502,4 +6502,24 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, + char *Value) { + pi_result Result = PI_SUCCESS; + // TODO switch to map/unordered_map when have enough number of extensions + switch (ExtNumber) { + case PI_INTEL_DEVICELIB_CASSERT: { + static const std::string Name = "N/A"; + if (Size) + *Size = Name.length(); + if (Value) + std::memcpy(Value, Name.data(), Name.length()); + break; + } + default: + Result = PI_INVALID_VALUE; + } + + return Result; +} + } // extern "C" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 484c7cf77f1b3..66c7e62603672 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1206,6 +1206,27 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, + char *Value) { + pi_result Result = PI_SUCCESS; + // TODO switch to map/unordered_map when have enough number of extensions + switch (ExtNumber) { + case PI_INTEL_DEVICELIB_CASSERT: { + static const std::string Name = "cl_intel_devicelib_cassert"; + if (Size) + *Size = Name.length(); + if (Value) + std::memcpy(Value, Name.data(), Name.length()); + break; + } + default: + Result = PI_INVALID_VALUE; + } + + return Result; +} + + pi_result piPluginInit(pi_plugin *PluginInit) { int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion); if (CompareVersions < 0) { @@ -1337,6 +1358,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler) _PI_CL(piTearDown, piTearDown) + _PI_CL(piextGetExtensionName, piextGetExtensionName); + #undef _PI_CL return PI_SUCCESS; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 0b55bddfe8297..7875bebe2ce6f 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -316,7 +316,19 @@ bool device_impl::isAssertFailSupported() const { if (Plugin.getBackend() == backend::cuda) return false; - return has_extension("cl_intel_devicelib_cassert"); + size_t ExtNameSize = 0; + Plugin.call(PI_INTEL_DEVICELIB_CASSERT, + &ExtNameSize, nullptr); + + assert(ExtNameSize && "Size can't be zero"); + + //std::vector ExtName{ExtNameSize + 1} + std::unique_ptr ExtName{new char[ExtNameSize + 1]}; + ExtName[ExtNameSize] = '\0'; + Plugin.call(PI_INTEL_DEVICELIB_CASSERT, + nullptr, ExtName.get()); + + return has_extension(ExtName.get()); } } // namespace detail From acce48cc5f87435fb49e816c596107319a2c752b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 3 Jun 2021 16:46:04 +0300 Subject: [PATCH 067/122] Add atomic operations Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 67 ++++++++++++++++++++++++++++++++++ libdevice/fallback-cassert.cpp | 14 +++---- 2 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 libdevice/atomic.hpp diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp new file mode 100644 index 0000000000000..6a991f7910289 --- /dev/null +++ b/libdevice/atomic.hpp @@ -0,0 +1,67 @@ +//==-------------- atomic.cpp - support of atomic operations ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "device.h" + +#define __SYCL_GLOBAL__ __attribute__((opencl_global)) + +namespace __spv { +struct Scope { + + enum Flag : uint32_t { + CrossDevice = 0, + Device = 1, + Workgroup = 2, + Subgroup = 3, + Invocation = 4, + }; + + constexpr Scope(Flag flag) : flag_value(flag) {} + + constexpr operator uint32_t() const { return flag_value; } + + Flag flag_value; +}; + +struct MemorySemanticsMask { + + enum Flag : uint32_t { + None = 0x0, + Acquire = 0x2, + Release = 0x4, + AcquireRelease = 0x8, + SequentiallyConsistent = 0x10, + UniformMemory = 0x40, + SubgroupMemory = 0x80, + WorkgroupMemory = 0x100, + CrossWorkgroupMemory = 0x200, + AtomicCounterMemory = 0x400, + ImageMemory = 0x800, + }; + + constexpr MemorySemanticsMask(Flag flag) : flag_value(flag) {} + + constexpr operator uint32_t() const { return flag_value; } + + Flag flag_value; +}; +} // namespace __spv + +extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange( + int __SYCL_GLOBAL__ *Ptr, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag SMO, + __spv::MemorySemanticsMask::Flag FMO, int E, int D); + +inline void CAS(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { + __spirv_AtomicCompareExchange( + Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent, + __spv::MemorySemanticsMask::SequentiallyConsistent, + Desired, Expected); +} + diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 47d41b1d12761..825dfc1d83593 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "wrapper.h" +#include "atomic.hpp" #ifdef __SPIR__ @@ -20,10 +21,11 @@ struct AssertHappened { #endif // FIXME remove const after CFE changes -extern __SYCL_GLOBAL_VAR__ const AssertHappened - __SYCL_AssertHappenedMem; // declaration +// declaration +extern __SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened __SYCL_AssertHappenedMem; -__SYCL_GLOBAL_VAR__ const AssertHappened AssertHappenedMem; // definition +// definition +__SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened __SYCL_AssertHappenedMem; static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " @@ -52,11 +54,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, //{ // int Expected = 0; // int Desired = 1; - // __spirv_AtomicCompareExchange( - // &__SYCL_AssertHappenedMem.Flag, /*__spv::Scope::Device*/ 1, - // /*__spv::MemorySemanticsMask::SequentiallyConsistent*/ 0x10, - // /*__spv::MemorySemanticsMask::SequentiallyConsistent*/ 0x10, - // Desired, Expected); + // CAS(&__SYCL_AssertHappenedMem.Flag, Desired, Expected); //} // FIXME: call SPIR-V unreachable instead From a64d9661e82c32d84490b1f4f726d96bcdc555f1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 3 Jun 2021 16:49:06 +0300 Subject: [PATCH 068/122] Updated document Signed-off-by: Sergey Kanaev --- .../C-CXX-StandardLibrary/DeviceLibExtensions.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst index 0b68b496fb2f4..d925e41b43276 100644 --- a/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst +++ b/sycl/doc/extensions/C-CXX-StandardLibrary/DeviceLibExtensions.rst @@ -23,6 +23,7 @@ provided in the arguments. In addition to that, the function is free to terminate the current kernel invocation. Fallback implementation of the function raises a flag to be read later by `__devicelib_assert_read`. +The flag remains raised until the program finishes. Arguments: @@ -40,8 +41,11 @@ Example of a message: int __devicelib_assert_read(); Semantic: -the function is called to read assert failure flag raised by `__devicelib_assert_fail`. +the function is called to read assert failure flag raised by +`__devicelib_assert_fail`. The function is only used in fallback implementation. +Invoking `__devicelib_assert_fail` after a kernel doesn't imply the kernel has +assertion failed. See also: assert_extension_. .. _assert_extension: ../Assert/SYCL_ONEAPI_ASSERT.asciidoc) From 26c1c9cfc3844181e889e318d3b9d6f9ea0cfe3f Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 3 Jun 2021 16:56:30 +0300 Subject: [PATCH 069/122] Clang format fix Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 11 ++++++----- libdevice/fallback-cassert.cpp | 8 +++++--- sycl/include/CL/sycl/detail/pi.h | 5 ++--- sycl/plugins/cuda/pi_cuda.cpp | 20 ++++++++++---------- sycl/plugins/level_zero/pi_level_zero.cpp | 20 ++++++++++---------- sycl/plugins/opencl/pi_opencl.cpp | 20 ++++++++++---------- sycl/source/detail/device_impl.cpp | 1 - 7 files changed, 43 insertions(+), 42 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 6a991f7910289..2a8cdcb7db83f 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -53,15 +53,16 @@ struct MemorySemanticsMask { }; } // namespace __spv -extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange( - int __SYCL_GLOBAL__ *Ptr, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag SMO, - __spv::MemorySemanticsMask::Flag FMO, int E, int D); +extern DEVICE_EXTERNAL int + __spirv_AtomicCompareExchange(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag, + __spv::MemorySemanticsMask::Flag, + int, int); inline void CAS(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { __spirv_AtomicCompareExchange( Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent, - __spv::MemorySemanticsMask::SequentiallyConsistent, - Desired, Expected); + __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected); } diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 825dfc1d83593..e00f11bf5ef94 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "wrapper.h" #include "atomic.hpp" +#include "wrapper.h" #ifdef __SPIR__ @@ -22,10 +22,12 @@ struct AssertHappened { // FIXME remove const after CFE changes // declaration -extern __SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened __SYCL_AssertHappenedMem; +extern __SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened + __SYCL_AssertHappenedMem; // definition -__SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened __SYCL_AssertHappenedMem; +__SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened + __SYCL_AssertHappenedMem; static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 8655e72374619..c92b5b8aae5d7 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -1645,7 +1645,6 @@ __SYCL_EXPORT pi_result piextUSMGetMemAllocInfo( /// \param PluginParameter placeholder for future use, currenly not used. __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); - /// API to get extension name for those extension whose name varies with backend /// \param ExtNumber number of extension \sa pi_extension_number /// \param Size[out] length of returned extension name @@ -1656,8 +1655,8 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// /// Either of Size or Value isn't set if it's nil. /// Value returned in Size can't be zero. -__SYCL_EXPORT pi_result piextGetExtensionName( - pi_extension_number ExtNumber, size_t *Size, char *Value); +__SYCL_EXPORT pi_result piextGetExtensionName(pi_extension_number ExtNumber, + size_t *Size, char *Value); struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index ae49e40be531b..ceeaeff0ce79a 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -4775,16 +4775,16 @@ pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, pi_result Result = PI_SUCCESS; // TODO switch to map/unordered_map when have enough number of extensions switch (ExtNumber) { - case PI_INTEL_DEVICELIB_CASSERT: { - static const std::string Name = "N/A"; - if (Size) - *Size = Name.length(); - if (Value) - std::memcpy(Value, Name.data(), Name.length()); - break; - } - default: - Result = PI_INVALID_VALUE; + case PI_INTEL_DEVICELIB_CASSERT: { + static const std::string Name = "N/A"; + if (Size) + *Size = Name.length(); + if (Value) + std::memcpy(Value, Name.data(), Name.length()); + break; + } + default: + Result = PI_INVALID_VALUE; } return Result; diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index c34ab3f55e13e..2bb749fd2bb23 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -6507,16 +6507,16 @@ pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, pi_result Result = PI_SUCCESS; // TODO switch to map/unordered_map when have enough number of extensions switch (ExtNumber) { - case PI_INTEL_DEVICELIB_CASSERT: { - static const std::string Name = "N/A"; - if (Size) - *Size = Name.length(); - if (Value) - std::memcpy(Value, Name.data(), Name.length()); - break; - } - default: - Result = PI_INVALID_VALUE; + case PI_INTEL_DEVICELIB_CASSERT: { + static const std::string Name = "N/A"; + if (Size) + *Size = Name.length(); + if (Value) + std::memcpy(Value, Name.data(), Name.length()); + break; + } + default: + Result = PI_INVALID_VALUE; } return Result; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 66c7e62603672..77fbdb3e18a28 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1211,16 +1211,16 @@ pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, pi_result Result = PI_SUCCESS; // TODO switch to map/unordered_map when have enough number of extensions switch (ExtNumber) { - case PI_INTEL_DEVICELIB_CASSERT: { - static const std::string Name = "cl_intel_devicelib_cassert"; - if (Size) - *Size = Name.length(); - if (Value) - std::memcpy(Value, Name.data(), Name.length()); - break; - } - default: - Result = PI_INVALID_VALUE; + case PI_INTEL_DEVICELIB_CASSERT: { + static const std::string Name = "cl_intel_devicelib_cassert"; + if (Size) + *Size = Name.length(); + if (Value) + std::memcpy(Value, Name.data(), Name.length()); + break; + } + default: + Result = PI_INVALID_VALUE; } return Result; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 7875bebe2ce6f..b6bd5b93f747b 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -322,7 +322,6 @@ bool device_impl::isAssertFailSupported() const { assert(ExtNameSize && "Size can't be zero"); - //std::vector ExtName{ExtNameSize + 1} std::unique_ptr ExtName{new char[ExtNameSize + 1]}; ExtName[ExtNameSize] = '\0'; Plugin.call(PI_INTEL_DEVICELIB_CASSERT, From a3028716042f6b29cbef366be348eb4974273ffb Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 3 Jun 2021 16:59:06 +0300 Subject: [PATCH 070/122] Add missing change Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/device.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/include/CL/sycl/device.hpp b/sycl/include/CL/sycl/device.hpp index 62d180bf388fc..375311b523cf6 100644 --- a/sycl/include/CL/sycl/device.hpp +++ b/sycl/include/CL/sycl/device.hpp @@ -195,10 +195,6 @@ class __SYCL_EXPORT device { /// \return true if the SYCL device has the given feature. bool has(aspect Aspect) const; - bool is_assert_fail_supported() const { - return has(aspect::ext_oneapi_native_assert); - } - private: shared_ptr_class impl; device(shared_ptr_class impl) : impl(impl) {} From 02840641518f308951504fd0521bcfb09276f303 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 3 Jun 2021 17:11:34 +0300 Subject: [PATCH 071/122] Fix style issues Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 9 ++++----- sycl/plugins/opencl/pi_opencl.cpp | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 2a8cdcb7db83f..585893ed95899 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -54,10 +54,10 @@ struct MemorySemanticsMask { } // namespace __spv extern DEVICE_EXTERNAL int - __spirv_AtomicCompareExchange(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, - __spv::MemorySemanticsMask::Flag, - int, int); +__spirv_AtomicCompareExchange(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag, + __spv::MemorySemanticsMask::Flag, + int, int); inline void CAS(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { __spirv_AtomicCompareExchange( @@ -65,4 +65,3 @@ inline void CAS(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { __spv::MemorySemanticsMask::SequentiallyConsistent, __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected); } - diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 77fbdb3e18a28..069053f14b2f8 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1226,7 +1226,6 @@ pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, return Result; } - pi_result piPluginInit(pi_plugin *PluginInit) { int CompareVersions = strcmp(PluginInit->PiVersion, SupportedVersion); if (CompareVersions < 0) { From 89b93a9336154bb4fc274f9e8c37ec438c8c798a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 4 Jun 2021 11:17:17 +0300 Subject: [PATCH 072/122] Fix style issue and typo Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 585893ed95899..bad86b6a625a1 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -1,4 +1,4 @@ -//==-------------- atomic.cpp - support of atomic operations ---------------==// +//==-------------- atomic.hpp - support of atomic operations ---------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -56,8 +56,7 @@ struct MemorySemanticsMask { extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, - __spv::MemorySemanticsMask::Flag, - int, int); + __spv::MemorySemanticsMask::Flag, int, int); inline void CAS(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { __spirv_AtomicCompareExchange( From 30237e8963098a3ca5acbf3554b90c77b03a3ad7 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 4 Jun 2021 12:53:08 +0300 Subject: [PATCH 073/122] Fix build issues Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index bad86b6a625a1..a5332028e9dbf 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -7,8 +7,12 @@ //===----------------------------------------------------------------------===// #pragma once +#include + #include "device.h" +#ifdef __SPIR__ + #define __SYCL_GLOBAL__ __attribute__((opencl_global)) namespace __spv { @@ -64,3 +68,4 @@ inline void CAS(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { __spv::MemorySemanticsMask::SequentiallyConsistent, __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected); } +#endif // __SPIR__ From 7bac294a2f8b7b8a32ea6482be3119d4bc7ce454 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 4 Jun 2021 14:39:01 +0300 Subject: [PATCH 074/122] Fix ABI tests Signed-off-by: Sergey Kanaev --- sycl/test/abi/pi_level_zero_symbol_check.dump | 1 + sycl/test/abi/pi_opencl_symbol_check.dump | 1 + 2 files changed, 2 insertions(+) diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index efbc073eab79f..970710fe4245d 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -88,6 +88,7 @@ piextDeviceSelectBinary piextEventCreateWithNativeHandle piextEventGetNativeHandle piextGetDeviceFunctionPointer +piextGetExtensionName piextKernelCreateWithNativeHandle piextKernelGetNativeHandle piextKernelSetArgMemObj diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 23f7a3992bf60..d3d2dabfb2aa0 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -34,6 +34,7 @@ piextDeviceGetNativeHandle piextDeviceSelectBinary piextEventCreateWithNativeHandle piextGetDeviceFunctionPointer +piextGetExtensionName piextKernelCreateWithNativeHandle piextKernelGetNativeHandle piextKernelSetArgMemObj From 3e70b011ef68fd62021a2fcfd95a674e78469f58 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 7 Jun 2021 18:11:21 +0300 Subject: [PATCH 075/122] Rename AssertFlagCopier => AssertInfoCopier Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 018b009d9da70..b6f51d4c3b2b7 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -62,7 +62,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -class AssertFlagCopier; +class AssertInfoCopier; // Forward declaration class context; @@ -241,7 +241,7 @@ class __SYCL_EXPORT queue { auto Acc = Buffer->get_access(CGH); - CGH.single_task([Acc] { + CGH.single_task([Acc] { #ifdef __SYCL_DEVICE_ONLY__ Acc[0].Flag = __devicelib_assert_read(); #else From 04b39ab28391c59acda49b7c91817c5e12ca80a8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 7 Jun 2021 18:14:55 +0300 Subject: [PATCH 076/122] Move buffer delete to distinct host task Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index b6f51d4c3b2b7..03d94da6c5139 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -235,7 +235,7 @@ class __SYCL_EXPORT queue { detail::AssertHappened *AH = new detail::AssertHappened; AHBufT *Buffer = new AHBufT{AH, range<1>{1}}; - event CopierEv, CheckerEv; + event CopierEv, CheckerEv, PostCheckerEv; auto CopierCGF = [&](handler &CGH) { CGH.depends_on(Event); @@ -255,7 +255,13 @@ class __SYCL_EXPORT queue { CGH.codeplay_host_task([=] { if (AH->Flag) abort(); // no need to release memory as it's abort anyway + }); + }; + // Release memory in distinct host-task so that any dependency is eliminated + auto PostCheckerCGF = [&CheckerEv, AH, Buffer](handler &CGH) { + CGH.depends_on(CheckerEv); + CGH.codeplay_host_task([=] { delete Buffer; delete AH; }); @@ -264,9 +270,11 @@ class __SYCL_EXPORT queue { if (SecondaryQueue) { CopierEv = submit_impl(CopierCGF, *SecondaryQueue, CodeLoc); CheckerEv = submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc); + PostCheckerEv = submit_impl(PostCheckerCGF, *SecondaryQueue, CodeLoc); } else { CopierEv = submit_impl(CopierCGF, CodeLoc); CheckerEv = submit_impl(CheckerCGF, CodeLoc); + PostCheckerEv = submit_impl(PostCheckerCGF, CodeLoc); } return CheckerEv; From acb8662e493c7a3064fa0c4922ee00bd2c48839d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 8 Jun 2021 11:07:21 +0300 Subject: [PATCH 077/122] Fix cuda build Signed-off-by: Sergey Kanaev --- sycl/plugins/cuda/pi_cuda.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index ceeaeff0ce79a..fca3481d991ba 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -4770,8 +4770,8 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } -pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, - char *Value) { +pi_result cuda_piextGetExtensionName(pi_extension_number ExtNumber, + size_t *Size, char *Value) { pi_result Result = PI_SUCCESS; // TODO switch to map/unordered_map when have enough number of extensions switch (ExtNumber) { From 07b43a7682a5b24318e0d57e0831e7ef9d13b53e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 8 Jun 2021 11:28:14 +0300 Subject: [PATCH 078/122] Eliminate 'unused capture' warning Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 03d94da6c5139..d75ab31e91214 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -249,7 +249,7 @@ class __SYCL_EXPORT queue { #endif // __SYCL_DEVICE_ONLY__ }); }; - auto CheckerCGF = [&CopierEv, AH, Buffer](handler &CGH) { + auto CheckerCGF = [&CopierEv, AH](handler &CGH) { CGH.depends_on(CopierEv); CGH.codeplay_host_task([=] { From 22e53761fcace3a480257372881ae8b288c15a5e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 8 Jun 2021 12:59:05 +0300 Subject: [PATCH 079/122] Fix failing unit tests Signed-off-by: Sergey Kanaev --- sycl/cmake/modules/AddSYCLUnitTest.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sycl/cmake/modules/AddSYCLUnitTest.cmake b/sycl/cmake/modules/AddSYCLUnitTest.cmake index 703727f53dbe5..7276927573ba9 100644 --- a/sycl/cmake/modules/AddSYCLUnitTest.cmake +++ b/sycl/cmake/modules/AddSYCLUnitTest.cmake @@ -17,10 +17,14 @@ macro(add_sycl_unittest test_dirname link_variant) if ("${link_variant}" MATCHES "SHARED") set(SYCL_LINK_LIBS ${sycl_so_target}) add_unittest(SYCLUnitTests ${test_dirname} ${ARGN}) + target_compile_definitions(${test_dirname} + PRIVATE SYCL_DISABLE_FALLBACK_ASSERT) else() add_unittest(SYCLUnitTests ${test_dirname} $ ${ARGN}) - target_compile_definitions(${test_dirname} PRIVATE __SYCL_BUILD_SYCL_DLL) + target_compile_definitions(${test_dirname} + PRIVATE __SYCL_BUILD_SYCL_DLL + SYCL_DISABLE_FALLBACK_ASSERT) get_target_property(SYCL_LINK_LIBS ${sycl_so_target} LINK_LIBRARIES) endif() From 966756b69178e07df5c1cfac924e6b3d6cec3bae Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 8 Jun 2021 13:28:25 +0300 Subject: [PATCH 080/122] Rename CAS and make it static inline Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 4 +++- libdevice/fallback-cassert.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index a5332028e9dbf..64fcdd75aabc5 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -62,7 +62,9 @@ __spirv_AtomicCompareExchange(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, __spv::MemorySemanticsMask::Flag, int, int); -inline void CAS(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { +/// Atomically set the value in *Ptr with Desired if and only if it is Expected +static inline void +CompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { __spirv_AtomicCompareExchange( Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent, diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index e00f11bf5ef94..0d5351afda4b5 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -56,7 +56,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, //{ // int Expected = 0; // int Desired = 1; - // CAS(&__SYCL_AssertHappenedMem.Flag, Desired, Expected); + // CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected); //} // FIXME: call SPIR-V unreachable instead From 415b35e53ae60684aad93fa0e8f3de75af771a20 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 8 Jun 2021 14:20:04 +0300 Subject: [PATCH 081/122] Fix style issue. Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 64fcdd75aabc5..b72b3aad0b811 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -63,8 +63,8 @@ __spirv_AtomicCompareExchange(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, int, int); /// Atomically set the value in *Ptr with Desired if and only if it is Expected -static inline void -CompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Expected, int Desired) { +static inline void CompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Expected, + int Desired) { __spirv_AtomicCompareExchange( Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent, From c96534da87d22df9f0bc21454eccc02a829fe101 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 8 Jun 2021 15:34:21 +0300 Subject: [PATCH 082/122] Fix cuda build Signed-off-by: Sergey Kanaev --- sycl/plugins/cuda/pi_cuda.cpp | 40 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index fca3481d991ba..e10b50f1cb056 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -4623,6 +4623,26 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result cuda_piTearDown(void *) { return PI_SUCCESS; } +pi_result cuda_piextGetExtensionName(pi_extension_number ExtNumber, + size_t *Size, char *Value) { + pi_result Result = PI_SUCCESS; + // TODO switch to map/unordered_map when have enough number of extensions + switch (ExtNumber) { + case PI_INTEL_DEVICELIB_CASSERT: { + static const std::string Name = "N/A"; + if (Size) + *Size = Name.length(); + if (Value) + std::memcpy(Value, Name.data(), Name.length()); + break; + } + default: + Result = PI_INVALID_VALUE; + } + + return Result; +} + const char SupportedVersion[] = _PI_H_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -4770,24 +4790,4 @@ pi_result piPluginInit(pi_plugin *PluginInit) { return PI_SUCCESS; } -pi_result cuda_piextGetExtensionName(pi_extension_number ExtNumber, - size_t *Size, char *Value) { - pi_result Result = PI_SUCCESS; - // TODO switch to map/unordered_map when have enough number of extensions - switch (ExtNumber) { - case PI_INTEL_DEVICELIB_CASSERT: { - static const std::string Name = "N/A"; - if (Size) - *Size = Name.length(); - if (Value) - std::memcpy(Value, Name.data(), Name.length()); - break; - } - default: - Result = PI_INVALID_VALUE; - } - - return Result; -} - } // extern "C" From 2365eb450af3127ee7afb4ca9b701cadfb68da0c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 9 Jun 2021 10:39:31 +0300 Subject: [PATCH 083/122] Disable fallback assert in L0 dynamic batch test Signed-off-by: Sergey Kanaev --- .../test/on-device/plugins/level_zero_dynamic_batch_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test/on-device/plugins/level_zero_dynamic_batch_test.cpp b/sycl/test/on-device/plugins/level_zero_dynamic_batch_test.cpp index bc07034e05110..3dc8c3c7da102 100644 --- a/sycl/test/on-device/plugins/level_zero_dynamic_batch_test.cpp +++ b/sycl/test/on-device/plugins/level_zero_dynamic_batch_test.cpp @@ -1,6 +1,7 @@ // REQUIRES: gpu, level_zero - -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// Disable fallback assert as it employs host-task which calls event::wait and +// causes to execute open command lists +// RUN: %clangxx -DSYCL_DISABLE_FALLBACK_ASSERT -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // Check that dynamic batching raises/lowers batch size // RUN: env SYCL_PI_TRACE=2 ZE_DEBUG=1 %GPU_RUN_PLACEHOLDER %t.out 2>&1 | FileCheck --check-prefixes=CKALL,CKDYN %s From 0368946dd5e0def2b42bcde375c77132831252f6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 10 Jun 2021 19:01:58 +0300 Subject: [PATCH 084/122] Uncomment lines after FE changes Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 0d5351afda4b5..7448fcac21acd 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -16,11 +16,9 @@ struct AssertHappened { }; #ifndef __SYCL_GLOBAL_VAR__ -// FIXME uncomment after CFE changes -#define __SYCL_GLOBAL_VAR__ /*__attribute__((sycl_global_var))*/ +#define __SYCL_GLOBAL_VAR__ __attribute__((sycl_global_var)) #endif -// FIXME remove const after CFE changes // declaration extern __SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened __SYCL_AssertHappenedMem; @@ -52,12 +50,11 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, // (func) ? func : "", func, gid0, gid1, gid2, lid0, lid1, lid2, expr); - // FIXME uncomment the following lines after clang changes - //{ - // int Expected = 0; - // int Desired = 1; - // CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected); - //} + { + int Expected = 0; + int Desired = 1; + CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected); + } // FIXME: call SPIR-V unreachable instead // volatile int *die = (int *)0x0; From dedf6825d2e45bed02568fdb5d06c6f9ea7b7050 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 17:04:11 +0300 Subject: [PATCH 085/122] Workaround FE restriction Signed-off-by: Sergey Kanaev --- libdevice/assert-happened.hpp | 14 ++++++++++++++ libdevice/fallback-cassert.cpp | 16 ++-------------- 2 files changed, 16 insertions(+), 14 deletions(-) create mode 100644 libdevice/assert-happened.hpp diff --git a/libdevice/assert-happened.hpp b/libdevice/assert-happened.hpp new file mode 100644 index 0000000000000..5bd3e9903e609 --- /dev/null +++ b/libdevice/assert-happened.hpp @@ -0,0 +1,14 @@ +#pragma once + +struct AssertHappened { + int Flag = 0; +}; + +#ifndef __SYCL_GLOBAL_VAR__ +#define __SYCL_GLOBAL_VAR__ __attribute__((sycl_global_var)) +#endif + +// declaration +extern __SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened + __SYCL_AssertHappenedMem; + diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 7448fcac21acd..f84804158620e 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -8,24 +8,12 @@ #include "atomic.hpp" #include "wrapper.h" +#include "include/assert-happened.hpp" #ifdef __SPIR__ -struct AssertHappened { - int Flag = 0; -}; - -#ifndef __SYCL_GLOBAL_VAR__ -#define __SYCL_GLOBAL_VAR__ __attribute__((sycl_global_var)) -#endif - -// declaration -extern __SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened - __SYCL_AssertHappenedMem; - // definition -__SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened - __SYCL_AssertHappenedMem; +__SYCL_GLOBAL__ AssertHappened __SYCL_AssertHappenedMem; static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " From 1144198ff58457006334695f0cda56394358250d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 17:06:36 +0300 Subject: [PATCH 086/122] Remove unwanted changes Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/pi.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 75c0c7b1435e5..edd0d291538c6 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -1685,8 +1685,6 @@ struct _pi_plugin { } PiFunctionTable; }; -extern const char PI_INTEL_DEVICELIB_CASSERT_EXTENSION_NAME[]; - #ifdef __cplusplus } // extern "C" #endif // __cplusplus From 954a24c839e4ce5aa186082410576a91e1eaa942 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 17:08:44 +0300 Subject: [PATCH 087/122] Remove unwanted comment Signed-off-by: Sergey Kanaev --- sycl/source/detail/queue_impl.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ae8ced132b2f6..38725f83bbbc7 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -387,9 +387,6 @@ class queue_impl { /// \param Self is a pointer to this queue. /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. - /// - /// KernelName is null if the caller doesn't want the kernel name. The object - /// is modified if and only if there was a kernel submit. event submit_impl(const function_class &CGF, bool *IsKernel, const shared_ptr_class &Self, const detail::code_location &Loc) { From 0088a7d8c404d7b111d6065c50a666f688bad704 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 18:31:08 +0300 Subject: [PATCH 088/122] Add missing file Signed-off-by: Sergey Kanaev --- libdevice/include/assert-happened.hpp | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 libdevice/include/assert-happened.hpp diff --git a/libdevice/include/assert-happened.hpp b/libdevice/include/assert-happened.hpp new file mode 100644 index 0000000000000..0e15d6adf0366 --- /dev/null +++ b/libdevice/include/assert-happened.hpp @@ -0,0 +1,29 @@ +//==-- assert-happened.hpp - Structure and declaration for assert support --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +// Treat this header as system one to workaround frontend's restriction +#pragma clang system_header + +#ifdef __SPIR__ + +struct AssertHappened { + int Flag = 0; +}; + +#ifndef __SYCL_GLOBAL_VAR__ +#define __SYCL_GLOBAL_VAR__ __attribute__((sycl_global_var)) +#endif + +#define __SYCL_GLOBAL__ __attribute__((opencl_global)) + +// declaration +extern __SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ AssertHappened + __SYCL_AssertHappenedMem; + +#endif From 0994acc68cf6aafdaff7ee3c42687d74c23f127c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 18:47:13 +0300 Subject: [PATCH 089/122] Fix style issues Signed-off-by: Sergey Kanaev --- libdevice/assert-happened.hpp | 14 -------------- libdevice/fallback-cassert.cpp | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) delete mode 100644 libdevice/assert-happened.hpp diff --git a/libdevice/assert-happened.hpp b/libdevice/assert-happened.hpp deleted file mode 100644 index 5bd3e9903e609..0000000000000 --- a/libdevice/assert-happened.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -struct AssertHappened { - int Flag = 0; -}; - -#ifndef __SYCL_GLOBAL_VAR__ -#define __SYCL_GLOBAL_VAR__ __attribute__((sycl_global_var)) -#endif - -// declaration -extern __SYCL_GLOBAL_VAR__ __SYCL_GLOBAL__ const AssertHappened - __SYCL_AssertHappenedMem; - diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index f84804158620e..2b79c50ba62cb 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "atomic.hpp" -#include "wrapper.h" #include "include/assert-happened.hpp" +#include "wrapper.h" #ifdef __SPIR__ From 1c429d885000bb18df4e3e93506ba1001f9d2451 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 19:34:26 +0300 Subject: [PATCH 090/122] Update PI Signed-off-by: Sergey Kanaev --- sycl/plugins/cuda/pi_cuda.cpp | 1 + sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp | 21 +++++++++++++++++++++ sycl/plugins/level_zero/pi_level_zero.cpp | 1 + 3 files changed, 23 insertions(+) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index e10b50f1cb056..f266e2e7cb1a4 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -4629,6 +4629,7 @@ pi_result cuda_piextGetExtensionName(pi_extension_number ExtNumber, // TODO switch to map/unordered_map when have enough number of extensions switch (ExtNumber) { case PI_INTEL_DEVICELIB_CASSERT: { + // FIXME set name after backend support static const std::string Name = "N/A"; if (Size) *Size = Name.length(); diff --git a/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp b/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp index 63fc720f49eee..9a9fe2e8f8f57 100644 --- a/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp +++ b/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp @@ -1232,6 +1232,27 @@ pi_result piTearDown(void *) { return PI_SUCCESS; } +pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, + char *Value) { + pi_result Result = PI_SUCCESS; + // TODO switch to map/unordered_map when have enough number of extensions + switch (ExtNumber) { + case PI_INTEL_DEVICELIB_CASSERT: { + // FIXME set name after backend support + static const std::string Name = "N/A"; + if (Size) + *Size = Name.length(); + if (Value) + std::memcpy(Value, Name.data(), Name.length()); + break; + } + default: + Result = PI_INVALID_VALUE; + } + + return Result; +} + pi_result piPluginInit(pi_plugin *PluginInit) { assert(PluginInit); size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 31739427d4e0b..5eb6c5531d862 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -6515,6 +6515,7 @@ pi_result piextGetExtensionName(pi_extension_number ExtNumber, size_t *Size, // TODO switch to map/unordered_map when have enough number of extensions switch (ExtNumber) { case PI_INTEL_DEVICELIB_CASSERT: { + // FIXME set name after backend support static const std::string Name = "N/A"; if (Size) *Size = Name.length(); From d264d16ef48d3af044714bc1e360f0835759bc2e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 20:57:53 +0300 Subject: [PATCH 091/122] Copy assert message elements Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 28 ++++++- libdevice/fallback-cassert.cpp | 75 +++++++++++++++++-- libdevice/include/assert-happened.hpp | 13 ++++ .../CL/sycl/detail/assert_happened.hpp | 16 +++- sycl/include/CL/sycl/queue.hpp | 18 ++++- 5 files changed, 138 insertions(+), 12 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index b72b3aad0b811..56b1fe3655893 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -62,12 +62,34 @@ __spirv_AtomicCompareExchange(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, __spv::MemorySemanticsMask::Flag, int, int); +extern DEVICE_EXTERNAL int +__spirv_AtomicLoad(const int __SYCL_GLOBAL__ *, __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag); + +extern DEVICE_EXTERNAL int +__spirv_AtomicStore(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag, int); + /// Atomically set the value in *Ptr with Desired if and only if it is Expected -static inline void CompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Expected, - int Desired) { - __spirv_AtomicCompareExchange( +/// Return the which already was in *Ptr +static inline int CompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Desired, + int Expected) { + return __spirv_AtomicCompareExchange( Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent, __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected); } + +static inline int Load(__SYCL_GLOBAL__ int *Ptr) { + return __spirv_AtomicLoad( + Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent); +} + +static inline int Store(__SYCL_GLOBAL__ int *Ptr, int V) { + return __spirv_AtomicStore( + Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent, V); +} + #endif // __SPIR__ diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 2b79c50ba62cb..00344620adb46 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -12,6 +12,10 @@ #ifdef __SPIR__ +#define ASSERT_NONE 0 +#define ASSERT_START 1 +#define ASSERT_FINISH 2 + // definition __SYCL_GLOBAL__ AssertHappened __SYCL_AssertHappenedMem; @@ -19,9 +23,21 @@ static const __attribute__((opencl_constant)) char assert_fmt[] = "%s:%d: %s: global id: [%lu,%lu,%lu], local id: [%lu,%lu,%lu] " "Assertion `%s` failed.\n"; -DEVICE_EXTERN_C int __devicelib_assert_read(void) { - volatile int *Ptr = (int *)(&__SYCL_AssertHappenedMem.Flag); - return *Ptr; +DEVICE_EXTERN_C void __devicelib_assert_read(void *_Dst) { + AssertHappened *Dst = (AssertHappened *)_Dst; + int Flag = Load(&__SYCL_AssertHappenedMem.Flag); + + if (ASSERT_NONE == Flag) { + Dst->Flag = Flag; + return; + } +/* + if (Flag != ASSERT_FINISH) + while (ASSERT_START == Load(&__SYCL_AssertHappenedMem.Flag)) + ; +*/ + + *Dst = __SYCL_AssertHappenedMem; } DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, @@ -39,9 +55,56 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, func, gid0, gid1, gid2, lid0, lid1, lid2, expr); { - int Expected = 0; - int Desired = 1; - CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected); + int Expected = ASSERT_NONE; + int Desired = ASSERT_START; + + if (CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected) == + Expected) { + __SYCL_AssertHappenedMem.Line = line; + __SYCL_AssertHappenedMem.GID0 = gid0; + __SYCL_AssertHappenedMem.GID1 = gid1; + __SYCL_AssertHappenedMem.GID2 = gid2; + __SYCL_AssertHappenedMem.LID0 = lid0; + __SYCL_AssertHappenedMem.LID1 = lid1; + __SYCL_AssertHappenedMem.LID2 = lid2; + + int ExprLength = 0; + int FileLength = 0; + int FuncLength = 0; + + if (expr) + for (const char *C = expr; *C != '\0'; ++C, ++ExprLength); + if (file) + for (const char *C = file; *C != '\0'; ++C, ++FileLength); + if (func) + for (const char *C = func; *C != '\0'; ++C, ++FuncLength); + + int MaxExprIdx = sizeof(__SYCL_AssertHappenedMem.Expr) - 1; + int MaxFileIdx = sizeof(__SYCL_AssertHappenedMem.File) - 1; + int MaxFuncIdx = sizeof(__SYCL_AssertHappenedMem.Func) - 1; + + if (ExprLength < MaxExprIdx) + MaxExprIdx = ExprLength; + if (FileLength < MaxFileIdx) + MaxFileIdx = FileLength; + if (FuncLength < MaxFuncIdx) + MaxFuncIdx = FuncLength; + + for (int Idx = 0; Idx < MaxExprIdx; ++Idx) + __SYCL_AssertHappenedMem.Expr[Idx] = expr[Idx]; + __SYCL_AssertHappenedMem.Expr[MaxExprIdx] = '\0'; + + for (int Idx = 0; Idx < MaxFileIdx; ++Idx) + __SYCL_AssertHappenedMem.File[Idx] = file[Idx]; + __SYCL_AssertHappenedMem.File[MaxFileIdx] = '\0'; + + for (int Idx = 0; Idx < MaxFuncIdx; ++Idx) + __SYCL_AssertHappenedMem.Func[Idx] = func[Idx]; + __SYCL_AssertHappenedMem.Func[MaxFuncIdx] = '\0'; + + // Show we've done copying + Store(&__SYCL_AssertHappenedMem.Flag, ASSERT_FINISH); + } } // FIXME: call SPIR-V unreachable instead diff --git a/libdevice/include/assert-happened.hpp b/libdevice/include/assert-happened.hpp index 0e15d6adf0366..d92957276cd72 100644 --- a/libdevice/include/assert-happened.hpp +++ b/libdevice/include/assert-happened.hpp @@ -14,6 +14,19 @@ struct AssertHappened { int Flag = 0; + char Expr[256 + 1]; + char File[256 + 1]; + char Func[128 + 1]; + + int32_t Line; + + uint64_t GID0; + uint64_t GID1; + uint64_t GID2; + + uint64_t LID0; + uint64_t LID1; + uint64_t LID2; }; #ifndef __SYCL_GLOBAL_VAR__ diff --git a/sycl/include/CL/sycl/detail/assert_happened.hpp b/sycl/include/CL/sycl/detail/assert_happened.hpp index a03cf78f74964..bf58506ca5966 100644 --- a/sycl/include/CL/sycl/detail/assert_happened.hpp +++ b/sycl/include/CL/sycl/detail/assert_happened.hpp @@ -12,7 +12,8 @@ #ifdef __SYCL_DEVICE_ONLY__ // Reads Flag of AssertHappened on device -SYCL_EXTERNAL __attribute__((weak)) extern "C" int __devicelib_assert_read(); +SYCL_EXTERNAL __attribute__((weak)) extern "C" +void __devicelib_assert_read(void *); #endif __SYCL_INLINE_NAMESPACE(cl) { @@ -20,6 +21,19 @@ namespace sycl { namespace detail { struct AssertHappened { int Flag = 0; // set to non-zero upon assert failure + char Expr[256 + 1]; + char File[256 + 1]; + char Func[128 + 1]; + + int32_t Line; + + uint64_t GID0; + uint64_t GID1; + uint64_t GID2; + + uint64_t LID0; + uint64_t LID1; + uint64_t LID2; }; } // namespace detail } // namespace sycl diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index d75ab31e91214..f0034257e7aff 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -243,7 +243,8 @@ class __SYCL_EXPORT queue { CGH.single_task([Acc] { #ifdef __SYCL_DEVICE_ONLY__ - Acc[0].Flag = __devicelib_assert_read(); + //Acc[0].Flag = + __devicelib_assert_read(&Acc[0]); #else (void)Acc; #endif // __SYCL_DEVICE_ONLY__ @@ -253,8 +254,21 @@ class __SYCL_EXPORT queue { CGH.depends_on(CopierEv); CGH.codeplay_host_task([=] { - if (AH->Flag) + assert(AH->Flag != 1 && "Invalid value"); + + if (AH->Flag) { + const char *Expr = AH->Expr[0] ? AH->Expr : ""; + const char *File = AH->File[0] ? AH->File : ""; + const char *Func = AH->Func[0] ? AH->Func : ""; + + fprintf(stderr, + "%s:%d: %s: global id: [%lu, %lu, %lu], " + "local id: [%lu,%lu,%lu] " + "Assertion `%s` failed", + File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0, + AH->LID1, AH->LID2, Expr); abort(); // no need to release memory as it's abort anyway + } }); }; // Release memory in distinct host-task so that any dependency is eliminated From 293123a210b06af40e188ca372fd5282f7a92c57 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Tue, 15 Jun 2021 18:22:35 +0300 Subject: [PATCH 092/122] Fix warning as error for msvc: 'fprintf' : format string '%lu' requires an argument of type 'unsigned long', but variadic argument 4 has type 'uint64_t' --- sycl/include/CL/sycl/queue.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index f0034257e7aff..08befc4467db0 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -21,6 +21,7 @@ #include #include +#include #include // having _TWO_ mid-param #ifdefs makes the functions very difficult to read. @@ -262,8 +263,8 @@ class __SYCL_EXPORT queue { const char *Func = AH->Func[0] ? AH->Func : ""; fprintf(stderr, - "%s:%d: %s: global id: [%lu, %lu, %lu], " - "local id: [%lu,%lu,%lu] " + "%s:%d: %s: global id: [%" PRIu64 ", %" PRIu64 ", %" PRIu64 + "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] " "Assertion `%s` failed", File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0, AH->LID1, AH->LID2, Expr); From 51a8c9c4f7ec81468f0312f575c08e87855049c1 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Thu, 17 Jun 2021 19:44:12 +0300 Subject: [PATCH 093/122] Fix "_GLOBAL__sub_I_fallback_cassert.cpp is undefined" error on Linux --- libdevice/include/assert-happened.hpp | 20 +++++++++---------- .../CL/sycl/detail/assert_happened.hpp | 20 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/libdevice/include/assert-happened.hpp b/libdevice/include/assert-happened.hpp index d92957276cd72..a4d53f6401240 100644 --- a/libdevice/include/assert-happened.hpp +++ b/libdevice/include/assert-happened.hpp @@ -14,19 +14,19 @@ struct AssertHappened { int Flag = 0; - char Expr[256 + 1]; - char File[256 + 1]; - char Func[128 + 1]; + char Expr[256 + 1] = ""; + char File[256 + 1] = ""; + char Func[128 + 1] = ""; - int32_t Line; + int32_t Line = 0; - uint64_t GID0; - uint64_t GID1; - uint64_t GID2; + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; - uint64_t LID0; - uint64_t LID1; - uint64_t LID2; + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; }; #ifndef __SYCL_GLOBAL_VAR__ diff --git a/sycl/include/CL/sycl/detail/assert_happened.hpp b/sycl/include/CL/sycl/detail/assert_happened.hpp index bf58506ca5966..fe236e05bb62d 100644 --- a/sycl/include/CL/sycl/detail/assert_happened.hpp +++ b/sycl/include/CL/sycl/detail/assert_happened.hpp @@ -21,19 +21,19 @@ namespace sycl { namespace detail { struct AssertHappened { int Flag = 0; // set to non-zero upon assert failure - char Expr[256 + 1]; - char File[256 + 1]; - char Func[128 + 1]; + char Expr[256 + 1] = ""; + char File[256 + 1] = ""; + char Func[128 + 1] = ""; - int32_t Line; + int32_t Line = 0; - uint64_t GID0; - uint64_t GID1; - uint64_t GID2; + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; - uint64_t LID0; - uint64_t LID1; - uint64_t LID2; + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; }; } // namespace detail } // namespace sycl From 88af34097d8ae0b667d45f6958a168e56f781e88 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 18 Jun 2021 12:42:26 +0300 Subject: [PATCH 094/122] Temporarily enable CUDA support + fix clang-format --- libdevice/atomic.hpp | 26 +++++++++---------- .../CL/sycl/detail/assert_happened.hpp | 4 +-- sycl/include/CL/sycl/queue.hpp | 1 - sycl/source/detail/device_impl.cpp | 5 +--- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 56b1fe3655893..4a0b3a3030911 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -62,18 +62,19 @@ __spirv_AtomicCompareExchange(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, __spv::MemorySemanticsMask::Flag, int, int); -extern DEVICE_EXTERNAL int -__spirv_AtomicLoad(const int __SYCL_GLOBAL__ *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag); +extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int __SYCL_GLOBAL__ *, + __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag); -extern DEVICE_EXTERNAL int -__spirv_AtomicStore(int __SYCL_GLOBAL__ *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, int); +extern DEVICE_EXTERNAL int __spirv_AtomicStore(int __SYCL_GLOBAL__ *, + __spv::Scope::Flag, + __spv::MemorySemanticsMask::Flag, + int); /// Atomically set the value in *Ptr with Desired if and only if it is Expected /// Return the which already was in *Ptr static inline int CompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Desired, - int Expected) { + int Expected) { return __spirv_AtomicCompareExchange( Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent, @@ -81,15 +82,14 @@ static inline int CompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Desired, } static inline int Load(__SYCL_GLOBAL__ int *Ptr) { - return __spirv_AtomicLoad( - Ptr, __spv::Scope::Device, - __spv::MemorySemanticsMask::SequentiallyConsistent); + return __spirv_AtomicLoad(Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent); } static inline int Store(__SYCL_GLOBAL__ int *Ptr, int V) { - return __spirv_AtomicStore( - Ptr, __spv::Scope::Device, - __spv::MemorySemanticsMask::SequentiallyConsistent, V); + return __spirv_AtomicStore(Ptr, __spv::Scope::Device, + __spv::MemorySemanticsMask::SequentiallyConsistent, + V); } #endif // __SPIR__ diff --git a/sycl/include/CL/sycl/detail/assert_happened.hpp b/sycl/include/CL/sycl/detail/assert_happened.hpp index fe236e05bb62d..02ef02694cab2 100644 --- a/sycl/include/CL/sycl/detail/assert_happened.hpp +++ b/sycl/include/CL/sycl/detail/assert_happened.hpp @@ -12,8 +12,8 @@ #ifdef __SYCL_DEVICE_ONLY__ // Reads Flag of AssertHappened on device -SYCL_EXTERNAL __attribute__((weak)) extern "C" -void __devicelib_assert_read(void *); +SYCL_EXTERNAL __attribute__((weak)) extern "C" void +__devicelib_assert_read(void *); #endif __SYCL_INLINE_NAMESPACE(cl) { diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 08befc4467db0..4a42f3cdc3678 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -244,7 +244,6 @@ class __SYCL_EXPORT queue { CGH.single_task([Acc] { #ifdef __SYCL_DEVICE_ONLY__ - //Acc[0].Flag = __devicelib_assert_read(&Acc[0]); #else (void)Acc; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index b6bd5b93f747b..8f68a0a14da28 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -310,11 +310,8 @@ bool device_impl::isAssertFailSupported() const { const plugin &Plugin = getPlugin(); - // Return false for CUDA even though, it has native support for assert. - // Change to return true when devicelib issue is fixed. - // See https://github.com/intel/llvm/issues/3385 if (Plugin.getBackend() == backend::cuda) - return false; + return true; size_t ExtNameSize = 0; Plugin.call(PI_INTEL_DEVICELIB_CASSERT, From 17378c37d298dcf1b636ed4059737b993715aa18 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 18 Jun 2021 19:49:19 +0300 Subject: [PATCH 095/122] More fixes --- libdevice/fallback-cassert.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 00344620adb46..58857664cd009 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -31,11 +31,10 @@ DEVICE_EXTERN_C void __devicelib_assert_read(void *_Dst) { Dst->Flag = Flag; return; } -/* + if (Flag != ASSERT_FINISH) while (ASSERT_START == Load(&__SYCL_AssertHappenedMem.Flag)) ; -*/ *Dst = __SYCL_AssertHappenedMem; } @@ -59,7 +58,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, int Desired = ASSERT_START; if (CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected) == - Expected) { + Expected) { __SYCL_AssertHappenedMem.Line = line; __SYCL_AssertHappenedMem.GID0 = gid0; __SYCL_AssertHappenedMem.GID1 = gid1; @@ -73,11 +72,14 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, int FuncLength = 0; if (expr) - for (const char *C = expr; *C != '\0'; ++C, ++ExprLength); + for (const char *C = expr; *C != '\0'; ++C, ++ExprLength) + ; if (file) - for (const char *C = file; *C != '\0'; ++C, ++FileLength); + for (const char *C = file; *C != '\0'; ++C, ++FileLength) + ; if (func) - for (const char *C = func; *C != '\0'; ++C, ++FuncLength); + for (const char *C = func; *C != '\0'; ++C, ++FuncLength) + ; int MaxExprIdx = sizeof(__SYCL_AssertHappenedMem.Expr) - 1; int MaxFileIdx = sizeof(__SYCL_AssertHappenedMem.File) - 1; From a68f6d4f77119c6c18ead23360da2d4512188434 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 21 Jun 2021 13:16:31 +0300 Subject: [PATCH 096/122] Fix checker host-task Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 4a42f3cdc3678..292f6d29fe331 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -233,8 +233,7 @@ class __SYCL_EXPORT queue { using AHBufT = buffer; - detail::AssertHappened *AH = new detail::AssertHappened; - AHBufT *Buffer = new AHBufT{AH, range<1>{1}}; + AHBufT *Buffer = new AHBufT{range<1>{1}}; event CopierEv, CheckerEv, PostCheckerEv; auto CopierCGF = [&](handler &CGH) { @@ -250,10 +249,13 @@ class __SYCL_EXPORT queue { #endif // __SYCL_DEVICE_ONLY__ }); }; - auto CheckerCGF = [&CopierEv, AH](handler &CGH) { + auto CheckerCGF = [&CopierEv, Buffer](handler &CGH) { CGH.depends_on(CopierEv); + auto Acc = Buffer->get_access(CGH); + CGH.codeplay_host_task([=] { + const detail::AssertHappened *AH = &Acc[0]; assert(AH->Flag != 1 && "Invalid value"); if (AH->Flag) { @@ -272,12 +274,11 @@ class __SYCL_EXPORT queue { }); }; // Release memory in distinct host-task so that any dependency is eliminated - auto PostCheckerCGF = [&CheckerEv, AH, Buffer](handler &CGH) { + auto PostCheckerCGF = [&CheckerEv, Buffer](handler &CGH) { CGH.depends_on(CheckerEv); CGH.codeplay_host_task([=] { delete Buffer; - delete AH; }); }; From 2a4ecb341b61625cabbc88865b833511671761df Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 21 Jun 2021 13:26:14 +0300 Subject: [PATCH 097/122] Fix assert message Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 292f6d29fe331..12bf53604ebaa 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -220,6 +220,8 @@ class __SYCL_EXPORT queue { private: #ifndef SYCL_DISABLE_FALLBACK_ASSERT +#define __SYCL_ASSERT_START 1 + /** * Submit copy task for assert failure flag and host-task to check the flag * \param Event kernel's event to depend on i.e. the event represents the @@ -256,7 +258,8 @@ class __SYCL_EXPORT queue { CGH.codeplay_host_task([=] { const detail::AssertHappened *AH = &Acc[0]; - assert(AH->Flag != 1 && "Invalid value"); + + assert(AH->Flag != __SYCL_ASSERT_START && "Invalid value"); if (AH->Flag) { const char *Expr = AH->Expr[0] ? AH->Expr : ""; @@ -266,7 +269,7 @@ class __SYCL_EXPORT queue { fprintf(stderr, "%s:%d: %s: global id: [%" PRIu64 ", %" PRIu64 ", %" PRIu64 "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] " - "Assertion `%s` failed", + "Assertion `%s` failed.\n", File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0, AH->LID1, AH->LID2, Expr); abort(); // no need to release memory as it's abort anyway @@ -294,6 +297,7 @@ class __SYCL_EXPORT queue { return CheckerEv; } +#undef __SYCL_ASSERT_START #endif // Check if kernel with the name provided in KernelName and which is being From 6b501bba3244c1dcecd446ea8ab13e9619714030 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 21 Jun 2021 13:52:02 +0300 Subject: [PATCH 098/122] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 12bf53604ebaa..a7f16dc3d2dfb 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -253,8 +253,10 @@ class __SYCL_EXPORT queue { }; auto CheckerCGF = [&CopierEv, Buffer](handler &CGH) { CGH.depends_on(CopierEv); + using mode = access::mode; + using target = access::target; - auto Acc = Buffer->get_access(CGH); + auto Acc = Buffer->get_access(CGH); CGH.codeplay_host_task([=] { const detail::AssertHappened *AH = &Acc[0]; From d6fbb259687843fd85327372155c3b10d544e297 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 21 Jun 2021 14:34:14 +0300 Subject: [PATCH 099/122] Remove call to spirv printf Signed-off-by: Sergey Kanaev --- libdevice/fallback-cassert.cpp | 115 +++++++++++++++------------------ 1 file changed, 52 insertions(+), 63 deletions(-) diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 58857664cd009..fa53b52abbee3 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -44,69 +44,58 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, uint64_t gid0, uint64_t gid1, uint64_t gid2, uint64_t lid0, uint64_t lid1, uint64_t lid2) { - // intX_t types are used instead of `int' and `long' because the format string - // is defined in terms of *device* types (OpenCL types): %d matches a 32 bit - // integer, %lu matches a 64 bit unsigned integer. Host `int' and - // `long' types may be different, so we cannot use them. - __spirv_ocl_printf(assert_fmt, file, (int32_t)line, - // WORKAROUND: IGC does not handle this well - // (func) ? func : "", - func, gid0, gid1, gid2, lid0, lid1, lid2, expr); - - { - int Expected = ASSERT_NONE; - int Desired = ASSERT_START; - - if (CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected) == - Expected) { - __SYCL_AssertHappenedMem.Line = line; - __SYCL_AssertHappenedMem.GID0 = gid0; - __SYCL_AssertHappenedMem.GID1 = gid1; - __SYCL_AssertHappenedMem.GID2 = gid2; - __SYCL_AssertHappenedMem.LID0 = lid0; - __SYCL_AssertHappenedMem.LID1 = lid1; - __SYCL_AssertHappenedMem.LID2 = lid2; - - int ExprLength = 0; - int FileLength = 0; - int FuncLength = 0; - - if (expr) - for (const char *C = expr; *C != '\0'; ++C, ++ExprLength) - ; - if (file) - for (const char *C = file; *C != '\0'; ++C, ++FileLength) - ; - if (func) - for (const char *C = func; *C != '\0'; ++C, ++FuncLength) - ; - - int MaxExprIdx = sizeof(__SYCL_AssertHappenedMem.Expr) - 1; - int MaxFileIdx = sizeof(__SYCL_AssertHappenedMem.File) - 1; - int MaxFuncIdx = sizeof(__SYCL_AssertHappenedMem.Func) - 1; - - if (ExprLength < MaxExprIdx) - MaxExprIdx = ExprLength; - if (FileLength < MaxFileIdx) - MaxFileIdx = FileLength; - if (FuncLength < MaxFuncIdx) - MaxFuncIdx = FuncLength; - - for (int Idx = 0; Idx < MaxExprIdx; ++Idx) - __SYCL_AssertHappenedMem.Expr[Idx] = expr[Idx]; - __SYCL_AssertHappenedMem.Expr[MaxExprIdx] = '\0'; - - for (int Idx = 0; Idx < MaxFileIdx; ++Idx) - __SYCL_AssertHappenedMem.File[Idx] = file[Idx]; - __SYCL_AssertHappenedMem.File[MaxFileIdx] = '\0'; - - for (int Idx = 0; Idx < MaxFuncIdx; ++Idx) - __SYCL_AssertHappenedMem.Func[Idx] = func[Idx]; - __SYCL_AssertHappenedMem.Func[MaxFuncIdx] = '\0'; - - // Show we've done copying - Store(&__SYCL_AssertHappenedMem.Flag, ASSERT_FINISH); - } + int Expected = ASSERT_NONE; + int Desired = ASSERT_START; + + if (CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected) == + Expected) { + __SYCL_AssertHappenedMem.Line = line; + __SYCL_AssertHappenedMem.GID0 = gid0; + __SYCL_AssertHappenedMem.GID1 = gid1; + __SYCL_AssertHappenedMem.GID2 = gid2; + __SYCL_AssertHappenedMem.LID0 = lid0; + __SYCL_AssertHappenedMem.LID1 = lid1; + __SYCL_AssertHappenedMem.LID2 = lid2; + + int ExprLength = 0; + int FileLength = 0; + int FuncLength = 0; + + if (expr) + for (const char *C = expr; *C != '\0'; ++C, ++ExprLength) + ; + if (file) + for (const char *C = file; *C != '\0'; ++C, ++FileLength) + ; + if (func) + for (const char *C = func; *C != '\0'; ++C, ++FuncLength) + ; + + int MaxExprIdx = sizeof(__SYCL_AssertHappenedMem.Expr) - 1; + int MaxFileIdx = sizeof(__SYCL_AssertHappenedMem.File) - 1; + int MaxFuncIdx = sizeof(__SYCL_AssertHappenedMem.Func) - 1; + + if (ExprLength < MaxExprIdx) + MaxExprIdx = ExprLength; + if (FileLength < MaxFileIdx) + MaxFileIdx = FileLength; + if (FuncLength < MaxFuncIdx) + MaxFuncIdx = FuncLength; + + for (int Idx = 0; Idx < MaxExprIdx; ++Idx) + __SYCL_AssertHappenedMem.Expr[Idx] = expr[Idx]; + __SYCL_AssertHappenedMem.Expr[MaxExprIdx] = '\0'; + + for (int Idx = 0; Idx < MaxFileIdx; ++Idx) + __SYCL_AssertHappenedMem.File[Idx] = file[Idx]; + __SYCL_AssertHappenedMem.File[MaxFileIdx] = '\0'; + + for (int Idx = 0; Idx < MaxFuncIdx; ++Idx) + __SYCL_AssertHappenedMem.Func[Idx] = func[Idx]; + __SYCL_AssertHappenedMem.Func[MaxFuncIdx] = '\0'; + + // Show we've done copying + Store(&__SYCL_AssertHappenedMem.Flag, ASSERT_FINISH); } // FIXME: call SPIR-V unreachable instead From 52d854cea900ca94f6c42bdc0ea751fda5f0a050 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 21 Jun 2021 15:11:14 +0300 Subject: [PATCH 100/122] Use proper naming in libdevice Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 8 ++++---- libdevice/fallback-cassert.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 4a0b3a3030911..671cc4b1690eb 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -73,20 +73,20 @@ extern DEVICE_EXTERNAL int __spirv_AtomicStore(int __SYCL_GLOBAL__ *, /// Atomically set the value in *Ptr with Desired if and only if it is Expected /// Return the which already was in *Ptr -static inline int CompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Desired, - int Expected) { +static inline int atomicCompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Desired, + int Expected) { return __spirv_AtomicCompareExchange( Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent, __spv::MemorySemanticsMask::SequentiallyConsistent, Desired, Expected); } -static inline int Load(__SYCL_GLOBAL__ int *Ptr) { +static inline int atomicLoad(__SYCL_GLOBAL__ int *Ptr) { return __spirv_AtomicLoad(Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent); } -static inline int Store(__SYCL_GLOBAL__ int *Ptr, int V) { +static inline int atomicStore(__SYCL_GLOBAL__ int *Ptr, int V) { return __spirv_AtomicStore(Ptr, __spv::Scope::Device, __spv::MemorySemanticsMask::SequentiallyConsistent, V); diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index fa53b52abbee3..fc42ac64c748a 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -25,7 +25,7 @@ static const __attribute__((opencl_constant)) char assert_fmt[] = DEVICE_EXTERN_C void __devicelib_assert_read(void *_Dst) { AssertHappened *Dst = (AssertHappened *)_Dst; - int Flag = Load(&__SYCL_AssertHappenedMem.Flag); + int Flag = atomicLoad(&__SYCL_AssertHappenedMem.Flag); if (ASSERT_NONE == Flag) { Dst->Flag = Flag; @@ -33,7 +33,7 @@ DEVICE_EXTERN_C void __devicelib_assert_read(void *_Dst) { } if (Flag != ASSERT_FINISH) - while (ASSERT_START == Load(&__SYCL_AssertHappenedMem.Flag)) + while (ASSERT_START == atomicLoad(&__SYCL_AssertHappenedMem.Flag)) ; *Dst = __SYCL_AssertHappenedMem; @@ -47,7 +47,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, int Expected = ASSERT_NONE; int Desired = ASSERT_START; - if (CompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected) == + if (atomicCompareAndSet(&__SYCL_AssertHappenedMem.Flag, Desired, Expected) == Expected) { __SYCL_AssertHappenedMem.Line = line; __SYCL_AssertHappenedMem.GID0 = gid0; @@ -95,7 +95,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, __SYCL_AssertHappenedMem.Func[MaxFuncIdx] = '\0'; // Show we've done copying - Store(&__SYCL_AssertHappenedMem.Flag, ASSERT_FINISH); + atomicStore(&__SYCL_AssertHappenedMem.Flag, ASSERT_FINISH); } // FIXME: call SPIR-V unreachable instead From 0b66b0e2ba2c670d6e629b23268ca13cb970316c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 21 Jun 2021 15:12:33 +0300 Subject: [PATCH 101/122] Fix comment Signed-off-by: Sergey Kanaev --- libdevice/atomic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index 671cc4b1690eb..bc1188a3844f9 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -72,7 +72,7 @@ extern DEVICE_EXTERNAL int __spirv_AtomicStore(int __SYCL_GLOBAL__ *, int); /// Atomically set the value in *Ptr with Desired if and only if it is Expected -/// Return the which already was in *Ptr +/// Return the value which already was in *Ptr static inline int atomicCompareAndSet(__SYCL_GLOBAL__ int *Ptr, int Desired, int Expected) { return __spirv_AtomicCompareExchange( From e7ef209e57f6b890f3c57d3addd08342c8342ba6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 22 Jun 2021 11:56:35 +0300 Subject: [PATCH 102/122] Enable querying device binary property set Signed-off-by: Sergey Kanaev --- sycl/doc/PreprocessorMacros.md | 8 ++++++++ sycl/include/CL/sycl/queue.hpp | 8 +++++++- sycl/source/detail/queue_impl.cpp | 5 ----- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/sycl/doc/PreprocessorMacros.md b/sycl/doc/PreprocessorMacros.md index 6c54547805279..4826eb46354c5 100644 --- a/sycl/doc/PreprocessorMacros.md +++ b/sycl/doc/PreprocessorMacros.md @@ -40,6 +40,14 @@ and *checker host-task* are not enqueued. Also, DPCPP RT won't perform check if user's kernel uses `__devicelib_assert_fail`. Refer to [the document](Assert.md) for function behind *copier kernel* and *checker host-task*. +### `SYCL_ENFORCE_FALLBACK_ASSERT` + +The macro has effect only when `SYCL_DISABLE_FALLBACK_ASSERT` isn't defined. +This macro is used for making DPCPP RT "think" that every kernel in application +is using assert feature. If the macro isn't set, DPCPP RT detects if kernel is +using assert feature via device image property set as described in +[the document](Assert.md). + ### Version macros - `__LIBSYCL_MAJOR_VERSION` is set to SYCL runtime library major version. diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index a7f16dc3d2dfb..675216e87d847 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -322,9 +322,15 @@ class __SYCL_EXPORT queue { bool IsKernel = false; Event = submit_impl(CGF, IsKernel, CodeLoc); + bool KernelUsesAssert = true; + +#ifndef SYCL_ENFORCE_FALLBACK_ASSERT + KernelUsesAssert = kernelUsesAssert(Event); +#endif + // assert required if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && - kernelUsesAssert(Event)) { + KernelUsesAssert) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 6eb5e8a8c389a..92d80f9bfb2c7 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -291,10 +291,6 @@ bool queue_impl::kernelUsesAssert(event &Event) const { Scheduler &Sched = Scheduler::getInstance(); std::shared_lock Lock(Sched.MGraphLock); - // FIXME remove unwanted lines after sycl-post-link tool changes -#ifndef __SYCL_POST_LINK_TOOL_ADDS_ASSERT_USED_PROPERTY_SET - return true; -#else EventImplPtr EventPtr = detail::getSyclObjImpl(Event); Command *_Cmd = static_cast(EventPtr->getCommand()); @@ -322,7 +318,6 @@ bool queue_impl::kernelUsesAssert(event &Event) const { return true; return false; -#endif } } // namespace detail From 81a1dc24b89a68e0824fb61f6bb995db09196a9e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 22 Jun 2021 12:48:15 +0300 Subject: [PATCH 103/122] Guard builds for CUDA target Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 675216e87d847..2a4107409b476 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -219,7 +219,8 @@ class __SYCL_EXPORT queue { typename info::param_traits::return_type get_info() const; private: -#ifndef SYCL_DISABLE_FALLBACK_ASSERT +// FIXME remove __NVPTX__ condition once devicelib supports CUDA +#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) #define __SYCL_ASSERT_START 1 /** @@ -300,7 +301,7 @@ class __SYCL_EXPORT queue { return CheckerEv; } #undef __SYCL_ASSERT_START -#endif +#endif // !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) // Check if kernel with the name provided in KernelName and which is being // enqueued and can be waited on by Event uses assert @@ -318,7 +319,7 @@ class __SYCL_EXPORT queue { event Event; -#ifndef SYCL_DISABLE_FALLBACK_ASSERT +#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) bool IsKernel = false; Event = submit_impl(CGF, IsKernel, CodeLoc); @@ -338,7 +339,7 @@ class __SYCL_EXPORT queue { } #else Event = submit_impl(CGF, CodeLoc); -#endif +#endif // !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) return Event; } @@ -360,7 +361,7 @@ class __SYCL_EXPORT queue { event Event; -#ifndef SYCL_DISABLE_FALLBACK_ASSERT +#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) bool IsKernel = false; Event = submit_impl(CGF, IsKernel, SecondaryQueue, CodeLoc); @@ -374,7 +375,7 @@ class __SYCL_EXPORT queue { } #else Event = submit_impl(CGF, SecondaryQueue, CodeLoc); -#endif +#endif // !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) return Event; } From 34489698411a26e948c6634638d515dcb153a886 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 22 Jun 2021 12:48:44 +0300 Subject: [PATCH 104/122] Fix style issues Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 2a4107409b476..dddd5a3c19cab 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -283,9 +283,7 @@ class __SYCL_EXPORT queue { auto PostCheckerCGF = [&CheckerEv, Buffer](handler &CGH) { CGH.depends_on(CheckerEv); - CGH.codeplay_host_task([=] { - delete Buffer; - }); + CGH.codeplay_host_task([=] { delete Buffer; }); }; if (SecondaryQueue) { From c8ec4960fdea5422c0b49f0809dbd25a5ba31546 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 22 Jun 2021 12:49:59 +0300 Subject: [PATCH 105/122] Revert "Enable querying device binary property set" This reverts commit e7ef209e57f6b890f3c57d3addd08342c8342ba6. Signed-off-by: Sergey Kanaev --- sycl/doc/PreprocessorMacros.md | 8 -------- sycl/include/CL/sycl/queue.hpp | 8 +------- sycl/source/detail/queue_impl.cpp | 5 +++++ 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/sycl/doc/PreprocessorMacros.md b/sycl/doc/PreprocessorMacros.md index 4826eb46354c5..6c54547805279 100644 --- a/sycl/doc/PreprocessorMacros.md +++ b/sycl/doc/PreprocessorMacros.md @@ -40,14 +40,6 @@ and *checker host-task* are not enqueued. Also, DPCPP RT won't perform check if user's kernel uses `__devicelib_assert_fail`. Refer to [the document](Assert.md) for function behind *copier kernel* and *checker host-task*. -### `SYCL_ENFORCE_FALLBACK_ASSERT` - -The macro has effect only when `SYCL_DISABLE_FALLBACK_ASSERT` isn't defined. -This macro is used for making DPCPP RT "think" that every kernel in application -is using assert feature. If the macro isn't set, DPCPP RT detects if kernel is -using assert feature via device image property set as described in -[the document](Assert.md). - ### Version macros - `__LIBSYCL_MAJOR_VERSION` is set to SYCL runtime library major version. diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index dddd5a3c19cab..90a11c4614316 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -321,15 +321,9 @@ class __SYCL_EXPORT queue { bool IsKernel = false; Event = submit_impl(CGF, IsKernel, CodeLoc); - bool KernelUsesAssert = true; - -#ifndef SYCL_ENFORCE_FALLBACK_ASSERT - KernelUsesAssert = kernelUsesAssert(Event); -#endif - // assert required if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && - KernelUsesAssert) { + kernelUsesAssert(Event)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 92d80f9bfb2c7..6eb5e8a8c389a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -291,6 +291,10 @@ bool queue_impl::kernelUsesAssert(event &Event) const { Scheduler &Sched = Scheduler::getInstance(); std::shared_lock Lock(Sched.MGraphLock); + // FIXME remove unwanted lines after sycl-post-link tool changes +#ifndef __SYCL_POST_LINK_TOOL_ADDS_ASSERT_USED_PROPERTY_SET + return true; +#else EventImplPtr EventPtr = detail::getSyclObjImpl(Event); Command *_Cmd = static_cast(EventPtr->getCommand()); @@ -318,6 +322,7 @@ bool queue_impl::kernelUsesAssert(event &Event) const { return true; return false; +#endif } } // namespace detail From eb4bdc7f2b514c4844c136935fa16cd3bcdd0497 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 23 Jun 2021 22:37:51 +0300 Subject: [PATCH 106/122] Overcome msvc behaviour Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 179 +++++++++++++++++---------------- 1 file changed, 95 insertions(+), 84 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 90a11c4614316..86746ddf5770f 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -68,8 +68,13 @@ class AssertInfoCopier; // Forward declaration class context; class device; +class queue; + namespace detail { class queue_impl; +static +event submitAssertCapture(queue &, event &, queue *, + const detail::code_location &); } /// Encapsulates a single SYCL queue which schedules kernels on a SYCL device. @@ -219,88 +224,6 @@ class __SYCL_EXPORT queue { typename info::param_traits::return_type get_info() const; private: -// FIXME remove __NVPTX__ condition once devicelib supports CUDA -#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) -#define __SYCL_ASSERT_START 1 - - /** - * Submit copy task for assert failure flag and host-task to check the flag - * \param Event kernel's event to depend on i.e. the event represents the - * kernel to check for assertion failure - * \param SecondaryQueue secondary queue for submit process, null if not used - * \returns host tasks event - */ - event submitAssertCapture(event &Event, queue *SecondaryQueue, - const detail::code_location &CodeLoc) { - _CODELOCARG(&CodeLoc); - - using AHBufT = buffer; - - AHBufT *Buffer = new AHBufT{range<1>{1}}; - - event CopierEv, CheckerEv, PostCheckerEv; - auto CopierCGF = [&](handler &CGH) { - CGH.depends_on(Event); - - auto Acc = Buffer->get_access(CGH); - - CGH.single_task([Acc] { -#ifdef __SYCL_DEVICE_ONLY__ - __devicelib_assert_read(&Acc[0]); -#else - (void)Acc; -#endif // __SYCL_DEVICE_ONLY__ - }); - }; - auto CheckerCGF = [&CopierEv, Buffer](handler &CGH) { - CGH.depends_on(CopierEv); - using mode = access::mode; - using target = access::target; - - auto Acc = Buffer->get_access(CGH); - - CGH.codeplay_host_task([=] { - const detail::AssertHappened *AH = &Acc[0]; - - assert(AH->Flag != __SYCL_ASSERT_START && "Invalid value"); - - if (AH->Flag) { - const char *Expr = AH->Expr[0] ? AH->Expr : ""; - const char *File = AH->File[0] ? AH->File : ""; - const char *Func = AH->Func[0] ? AH->Func : ""; - - fprintf(stderr, - "%s:%d: %s: global id: [%" PRIu64 ", %" PRIu64 ", %" PRIu64 - "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] " - "Assertion `%s` failed.\n", - File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0, - AH->LID1, AH->LID2, Expr); - abort(); // no need to release memory as it's abort anyway - } - }); - }; - // Release memory in distinct host-task so that any dependency is eliminated - auto PostCheckerCGF = [&CheckerEv, Buffer](handler &CGH) { - CGH.depends_on(CheckerEv); - - CGH.codeplay_host_task([=] { delete Buffer; }); - }; - - if (SecondaryQueue) { - CopierEv = submit_impl(CopierCGF, *SecondaryQueue, CodeLoc); - CheckerEv = submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc); - PostCheckerEv = submit_impl(PostCheckerCGF, *SecondaryQueue, CodeLoc); - } else { - CopierEv = submit_impl(CopierCGF, CodeLoc); - CheckerEv = submit_impl(CheckerCGF, CodeLoc); - PostCheckerEv = submit_impl(PostCheckerCGF, CodeLoc); - } - - return CheckerEv; - } -#undef __SYCL_ASSERT_START -#endif // !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) - // Check if kernel with the name provided in KernelName and which is being // enqueued and can be waited on by Event uses assert bool kernelUsesAssert(event &Event) const; @@ -327,7 +250,8 @@ class __SYCL_EXPORT queue { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class - submitAssertCapture(Event, /* SecondaryQueue = */ nullptr, CodeLoc); + submitAssertCapture(*this, Event, /* SecondaryQueue = */ nullptr, + CodeLoc); } #else Event = submit_impl(CGF, CodeLoc); @@ -363,7 +287,7 @@ class __SYCL_EXPORT queue { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class - submitAssertCapture(Event, &SecondaryQueue, CodeLoc); + submitAssertCapture(*this, Event, &SecondaryQueue, CodeLoc); } #else Event = submit_impl(CGF, SecondaryQueue, CodeLoc); @@ -874,6 +798,9 @@ class __SYCL_EXPORT queue { template friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); + friend event detail::submitAssertCapture(queue &, event &, queue *, + const detail::code_location &); + /// A template-free version of submit. event submit_impl(function_class CGH, const detail::code_location &CodeLoc); @@ -961,6 +888,90 @@ class __SYCL_EXPORT queue { } }; +namespace detail { +// FIXME remove __NVPTX__ condition once devicelib supports CUDA +#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) +#define __SYCL_ASSERT_START 1 +/** + * Submit copy task for assert failure flag and host-task to check the flag + * \param Event kernel's event to depend on i.e. the event represents the + * kernel to check for assertion failure + * \param SecondaryQueue secondary queue for submit process, null if not used + * \returns host tasks event + * This method doesn't belong to queue class to overcome msvc behaviour due to + * which it gets compiled and exported without any integration header and, thus, + * with no proper KernelInfo instance. + */ +event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, + const detail::code_location &CodeLoc) { + using AHBufT = buffer; + + AHBufT *Buffer = new AHBufT{range<1>{1}}; + + event CopierEv, CheckerEv, PostCheckerEv; + auto CopierCGF = [&](handler &CGH) { + CGH.depends_on(Event); + + auto Acc = Buffer->get_access(CGH); + + CGH.single_task([Acc] { +#ifdef __SYCL_DEVICE_ONLY__ + __devicelib_assert_read(&Acc[0]); +#else + (void)Acc; +#endif // __SYCL_DEVICE_ONLY__ + }); + }; + auto CheckerCGF = [&CopierEv, Buffer](handler &CGH) { + CGH.depends_on(CopierEv); + using mode = access::mode; + using target = access::target; + + auto Acc = Buffer->get_access(CGH); + + CGH.codeplay_host_task([=] { + const detail::AssertHappened *AH = &Acc[0]; + + assert(AH->Flag != __SYCL_ASSERT_START && "Invalid value"); + + if (AH->Flag) { + const char *Expr = AH->Expr[0] ? AH->Expr : ""; + const char *File = AH->File[0] ? AH->File : ""; + const char *Func = AH->Func[0] ? AH->Func : ""; + + fprintf(stderr, + "%s:%d: %s: global id: [%" PRIu64 ", %" PRIu64 ", %" PRIu64 + "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] " + "Assertion `%s` failed.\n", + File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0, + AH->LID1, AH->LID2, Expr); + abort(); // no need to release memory as it's abort anyway + } + }); + }; + // Release memory in distinct host-task so that any dependency is eliminated + auto PostCheckerCGF = [&CheckerEv, Buffer](handler &CGH) { + CGH.depends_on(CheckerEv); + + CGH.codeplay_host_task([=] { delete Buffer; }); + }; + + if (SecondaryQueue) { + CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc); + CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc); + PostCheckerEv = Self.submit_impl(PostCheckerCGF, *SecondaryQueue, CodeLoc); + } else { + CopierEv = Self.submit_impl(CopierCGF, CodeLoc); + CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc); + PostCheckerEv = Self.submit_impl(PostCheckerCGF, CodeLoc); + } + + return CheckerEv; +} +#undef __SYCL_ASSERT_START +#endif // !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) +} // namespace detail + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) From 6cb0a4374ff28b11cdfbab34e0d8520bb7431539 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 23 Jun 2021 22:46:38 +0300 Subject: [PATCH 107/122] Fix message style Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 86746ddf5770f..c013620ce75a8 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -940,7 +940,7 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, const char *Func = AH->Func[0] ? AH->Func : ""; fprintf(stderr, - "%s:%d: %s: global id: [%" PRIu64 ", %" PRIu64 ", %" PRIu64 + "%s:%d: %s: global id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "], local id: [%" PRIu64 ",%" PRIu64 ",%" PRIu64 "] " "Assertion `%s` failed.\n", File, AH->Line, Func, AH->GID0, AH->GID1, AH->GID2, AH->LID0, From 9f9421b1f35d95a8f9ec2304fd84a3a81bfcb70a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 23 Jun 2021 23:01:54 +0300 Subject: [PATCH 108/122] Fix style issue Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index c013620ce75a8..e0bf903822252 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -72,9 +72,8 @@ class queue; namespace detail { class queue_impl; -static -event submitAssertCapture(queue &, event &, queue *, - const detail::code_location &); +static event submitAssertCapture(queue &, event &, queue *, + const detail::code_location &); } /// Encapsulates a single SYCL queue which schedules kernels on a SYCL device. From 9cc989a85176f40c69e800e3399ed45f0891fe86 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 24 Jun 2021 12:37:15 +0300 Subject: [PATCH 109/122] Fix build issue Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index e0bf903822252..52daed3579ee2 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -60,6 +60,14 @@ #define _KERNELFUNCPARAM(a) const KernelType &a #endif +// Helper macro to identify if fallback assert is needed +// FIXME remove __NVPTX__ condition once devicelib supports CUDA +#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) +#define __SYCL_USE_FALLBACK_ASSERT 1 +#else +#define __SYCL_USE_FALLBACK_ASSERT 0 +#endif + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -72,8 +80,10 @@ class queue; namespace detail { class queue_impl; +#if __SYCL_USE_FALLBACK_ASSERT static event submitAssertCapture(queue &, event &, queue *, const detail::code_location &); +#endif } /// Encapsulates a single SYCL queue which schedules kernels on a SYCL device. @@ -239,7 +249,7 @@ class __SYCL_EXPORT queue { event Event; -#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) +#if __SYCL_USE_FALLBACK_ASSERT bool IsKernel = false; Event = submit_impl(CGF, IsKernel, CodeLoc); @@ -276,7 +286,7 @@ class __SYCL_EXPORT queue { event Event; -#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) +#if __SYCL_USE_FALLBACK_ASSERT bool IsKernel = false; Event = submit_impl(CGF, IsKernel, SecondaryQueue, CodeLoc); @@ -797,8 +807,10 @@ class __SYCL_EXPORT queue { template friend T detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); +#if __SYCL_USE_FALLBACK_ASSERT friend event detail::submitAssertCapture(queue &, event &, queue *, const detail::code_location &); +#endif /// A template-free version of submit. event submit_impl(function_class CGH, @@ -888,8 +900,7 @@ class __SYCL_EXPORT queue { }; namespace detail { -// FIXME remove __NVPTX__ condition once devicelib supports CUDA -#if !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) +#if __SYCL_USE_FALLBACK_ASSERT #define __SYCL_ASSERT_START 1 /** * Submit copy task for assert failure flag and host-task to check the flag @@ -983,3 +994,5 @@ template <> struct hash { } }; } // namespace std + +#undef __SYCL_USE_FALLBACK_ASSERT From 7eec0905e12bfa7c990b1d6e06e3a2feaf1c2eba Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 24 Jun 2021 18:51:42 +0300 Subject: [PATCH 110/122] Overcome msvc behaviour. pt2 Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 52daed3579ee2..863457df151b8 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -908,6 +908,7 @@ namespace detail { * kernel to check for assertion failure * \param SecondaryQueue secondary queue for submit process, null if not used * \returns host tasks event + * * This method doesn't belong to queue class to overcome msvc behaviour due to * which it gets compiled and exported without any integration header and, thus, * with no proper KernelInfo instance. @@ -942,7 +943,14 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, CGH.codeplay_host_task([=] { const detail::AssertHappened *AH = &Acc[0]; - assert(AH->Flag != __SYCL_ASSERT_START && "Invalid value"); + // Don't use assert here as msvc will insert reference to __imp__wassert + // which won't be properly resolved in separate compile use-case +#ifndef NDEBUG + if (AH->Flag == __SYCL_ASSERT_START) + throw sycl::runtime_error( + "Internal Error. Invalid value in assert description.", + PI_INVALID_VALUE); +#endif if (AH->Flag) { const char *Expr = AH->Expr[0] ? AH->Expr : ""; From 66c816c844d03e1bf5e23a39db8c5b1c95f576c5 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 24 Jun 2021 19:02:16 +0300 Subject: [PATCH 111/122] Move AssertInfoCopier kernel name class to detail namespace Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 863457df151b8..78ae47217a79c 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -71,8 +71,6 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -class AssertInfoCopier; - // Forward declaration class context; class device; @@ -81,6 +79,7 @@ class queue; namespace detail { class queue_impl; #if __SYCL_USE_FALLBACK_ASSERT +class AssertInfoCopier; static event submitAssertCapture(queue &, event &, queue *, const detail::code_location &); #endif From b9c39bb44b73a4f9d75a1dfbc58438fd57f05acb Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 24 Jun 2021 19:10:06 +0300 Subject: [PATCH 112/122] Add structure layout description Signed-off-by: Sergey Kanaev --- sycl/doc/Assert.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/sycl/doc/Assert.md b/sycl/doc/Assert.md index 3acef1b94fc9c..75e69a528b1c5 100644 --- a/sycl/doc/Assert.md +++ b/sycl/doc/Assert.md @@ -174,6 +174,19 @@ declaration: ```c++ struct __SYCL_AssertHappened { int Flag = 0; + char Expr[256 + 1] = ""; + char File[256 + 1] = ""; + char Func[128 + 1] = ""; + + int32_t Line = 0; + + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; + + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; }; #ifdef __SYCL_DEVICE_ONLY__ @@ -187,6 +200,28 @@ mutable program-scope variable. The reference to extern variable is resolved within online-linking against fallback devicelib. +#### Description of fields + +The value stored here denotes if assert happened at all. There are two valid +values at host: + +| Value | Meaning | +| ----- | ------- | +| 0 | No assert failure detected | +| 2 | Assert failure detected and reported within this instance of struct | + +At device-side, there's another valid value: 1, which means that assert failure +is detected and the structure is filling up at the moment. This value is for +device-side only and should never be reported to host. Otherwise, it means, that +atomic operation malfunctioned. + +`Expr`, `File`, `Func`, `Line` are to describe the assert message itself and +contain the expression, file name, function name, line in the file where assert +failure had happened respectively. + +`GID*` and `LID*` fields describe the global and local ID respectively of a +work-item in which assert had failed. + ### Online-linking fallback `__devicelib_assert_fail` Online linking against fallback implementation of `__devicelib_assert_fail` is From f797224ee27beca163910c0ccd4f514e1b2c41cb Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Jun 2021 10:54:13 +0300 Subject: [PATCH 113/122] Eliminate ABI break Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/detail/pi.hpp | 9 +++++++-- sycl/source/detail/pi.cpp | 1 - 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index f86eb43697375..984fd0f6b6a99 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -336,7 +336,13 @@ class DeviceBinaryImage { const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } - const PropertyRange &getAssertUsed() const { return AssertUsed; } + const PropertyRange getAssertUsed() const { + // We can't have this variable as a class member, since it would break + // the ABI backwards compatibility. + PropertyRange AssertUsed; + AssertUsed.init(Bin, __SYCL_PI_PROPERTY_SET_SYCL_ASSERT_USED); + return AssertUsed; + } virtual ~DeviceBinaryImage() {} protected: @@ -348,7 +354,6 @@ class DeviceBinaryImage { DeviceBinaryImage::PropertyRange SpecConstIDMap; DeviceBinaryImage::PropertyRange DeviceLibReqMask; DeviceBinaryImage::PropertyRange KernelParamOptInfo; - DeviceBinaryImage::PropertyRange AssertUsed; }; /// Tries to determine the device binary image foramat. Returns diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 2e4a1bb476f3b..fc7ede340d616 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -633,7 +633,6 @@ void DeviceBinaryImage::init(pi_device_binary Bin) { SpecConstIDMap.init(Bin, __SYCL_PI_PROPERTY_SET_SPEC_CONST_MAP); DeviceLibReqMask.init(Bin, __SYCL_PI_PROPERTY_SET_DEVICELIB_REQ_MASK); KernelParamOptInfo.init(Bin, __SYCL_PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); - AssertUsed.init(Bin, __SYCL_PI_PROPERTY_SET_SYCL_ASSERT_USED); } } // namespace pi From 901fe8062ebe4b8a2b6bbf190c1d3b01d9d171c5 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Jun 2021 13:18:16 +0300 Subject: [PATCH 114/122] Store assert usage info in the event instead of runtime fetching. Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/event.hpp | 4 +++ sycl/include/CL/sycl/queue.hpp | 38 ++++++++++++-------------- sycl/source/detail/event_impl.hpp | 11 ++++++++ sycl/source/detail/queue_impl.cpp | 27 +++---------------- sycl/source/detail/queue_impl.hpp | 44 ++++++++++++++++++++++--------- sycl/source/event.cpp | 12 +++++++++ sycl/source/queue.cpp | 21 +++++++-------- 7 files changed, 88 insertions(+), 69 deletions(-) diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index 41dc3bb557597..c874b34c93080 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -134,6 +134,10 @@ class __SYCL_EXPORT event { getNative()); } + void storeAdditionalInfo(bool IsKernel, bool KernelUsesAssert); + bool enqueuedIsKernel() const; + bool enqueuedKernelUsesAssert() const; + private: event(shared_ptr_class EventImpl); diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 78ae47217a79c..bcfcd19d4902a 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -231,11 +231,6 @@ class __SYCL_EXPORT queue { template typename info::param_traits::return_type get_info() const; -private: - // Check if kernel with the name provided in KernelName and which is being - // enqueued and can be waited on by Event uses assert - bool kernelUsesAssert(event &Event) const; - public: /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. @@ -249,12 +244,12 @@ class __SYCL_EXPORT queue { event Event; #if __SYCL_USE_FALLBACK_ASSERT - bool IsKernel = false; - Event = submit_impl(CGF, IsKernel, CodeLoc); + Event = submit_impl_and_store_info(CGF, CodeLoc); // assert required - if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && - kernelUsesAssert(Event)) { + if (Event.enqueuedIsKernel() && + !get_device().has(aspect::ext_oneapi_native_assert) && + Event.enqueuedKernelUsesAssert()) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class @@ -286,12 +281,12 @@ class __SYCL_EXPORT queue { event Event; #if __SYCL_USE_FALLBACK_ASSERT - bool IsKernel = false; - Event = submit_impl(CGF, IsKernel, SecondaryQueue, CodeLoc); + Event = submit_impl_and_store_info(CGF, SecondaryQueue, CodeLoc); // assert required - if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && - kernelUsesAssert(Event)) { + if (Event.enqueuedIsKernel() && + !get_device().has(aspect::ext_oneapi_native_assert) && + Event.enqueuedKernelUsesAssert()) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is performed // by program manager class @@ -820,19 +815,20 @@ class __SYCL_EXPORT queue { /// A template-free version of submit. /// \param CGH command group function/handler - /// \param[out] IsKernel set by callee to \c true if CGH represents a kernel - /// submit /// \param CodeLoc code location - event submit_impl(function_class CGH, bool &IsKernel, - const detail::code_location &CodeLoc); + /// + /// This method stores additional information within event_impl class instance + event submit_impl_and_store_info(function_class CGH, + const detail::code_location &CodeLoc); /// A template-free version of submit. /// \param CGH command group function/handler /// \param secondQueue fallback queue - /// \param[out] IsKernel set by callee to \c true if CGH represents a kernel - /// submit /// \param CodeLoc code location - event submit_impl(function_class CGH, queue secondQueue, - bool &IsKernel, const detail::code_location &CodeLoc); + /// + /// This method stores additional information within event_impl class instance + event submit_impl_and_store_info(function_class CGH, + queue secondQueue, + const detail::code_location &CodeLoc); /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 81e9e9b626356..0c51c9c01dfd0 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -155,6 +155,14 @@ class event_impl { /// \return a native handle. pi_native_handle getNative() const; + void storeAdditionalInfo(bool IsKernel, bool KernelUsesAssert) { + MEnqueuedIsKernel = IsKernel; + MEnqueuedKernelUsesAssert = KernelUsesAssert; + } + + bool enqueuedIsKernel() const { return MEnqueuedIsKernel; } + bool enqueuedKernelUsesAssert() const { return MEnqueuedKernelUsesAssert; } + private: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait @@ -177,6 +185,9 @@ class event_impl { // backend's representation (e.g. alloca). Used values are listed in // HostEventState enum. std::atomic MState; + + bool MEnqueuedIsKernel = false; + bool MEnqueuedKernelUsesAssert = false; }; } // namespace detail diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 6eb5e8a8c389a..4421af334ae6c 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -287,38 +287,19 @@ pi_native_handle queue_impl::getNative() const { return Handle; } -bool queue_impl::kernelUsesAssert(event &Event) const { - Scheduler &Sched = Scheduler::getInstance(); - std::shared_lock Lock(Sched.MGraphLock); - - // FIXME remove unwanted lines after sycl-post-link tool changes +bool queue_impl::kernelUsesAssert(const std::string &KernelName, + OSModuleHandle Handle) const { #ifndef __SYCL_POST_LINK_TOOL_ADDS_ASSERT_USED_PROPERTY_SET return true; #else - EventImplPtr EventPtr = detail::getSyclObjImpl(Event); - - Command *_Cmd = static_cast(EventPtr->getCommand()); - - assert((_Cmd->getType() == Command::RUN_CG) && - "Only RUN_CG command can use asserts"); - - ExecCGCommand *Cmd = static_cast(_Cmd); - CG &_CG = Cmd->getCG(); - - assert((_CG.getType() == CG::CGTYPE::KERNEL) && - "Only kernel can use asserts"); - - CGExecKernel &CmdGroup = static_cast(_CG); - RTDeviceBinaryImage &BinImg = ProgramManager::getInstance().getDeviceImage( - CmdGroup.MOSModuleHandle, CmdGroup.MKernelName, get_context(), - get_device()); + Handle, KernelName, get_context(), get_device()); const pi::DeviceBinaryImage::PropertyRange &AssertUsedRange = BinImg.getAssertUsed(); if (AssertUsedRange.isAvailable()) for (const auto &Prop : AssertUsedRange) - if (Prop->Name == CmdGroup.MKernelName) + if (Prop->Name == KernelName) return true; return false; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 38725f83bbbc7..6c416ce090aef 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -166,20 +166,22 @@ class queue_impl { /// \param Self is a shared_ptr to this queue. /// \param SecondQueue is a shared_ptr to the secondary queue. /// \param Loc is the code location of the submit call (default argument) + /// \param StoreAdditionalInfo makes additional info be stored in event_impl /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. - event submit(const function_class &CGF, bool *IsKernel, + event submit(const function_class &CGF, const shared_ptr_class &Self, const shared_ptr_class &SecondQueue, - const detail::code_location &Loc) { + const detail::code_location &Loc, + bool StoreAdditionalInfo = false) { try { - return submit_impl(CGF, IsKernel, Self, Loc); + return submit_impl(CGF, Self, Loc, StoreAdditionalInfo); } catch (...) { { std::lock_guard Lock(MMutex); MExceptions.PushBack(std::current_exception()); } - return SecondQueue->submit(CGF, IsKernel, SecondQueue, Loc); + return SecondQueue->submit(CGF, SecondQueue, Loc, StoreAdditionalInfo); } } @@ -189,11 +191,13 @@ class queue_impl { /// \param CGF is a function object containing command group. /// \param Self is a shared_ptr to this queue. /// \param Loc is the code location of the submit call (default argument) + /// \param StoreAdditionalInfo makes additional info be stored in event_impl /// \return a SYCL event object for the submitted command group. - event submit(const function_class &CGF, bool *IsKernel, + event submit(const function_class &CGF, const shared_ptr_class &Self, - const detail::code_location &Loc) { - return submit_impl(CGF, IsKernel, Self, Loc); + const detail::code_location &Loc, + bool StoreAdditionalInfo = false) { + return submit_impl(CGF, Self, Loc, StoreAdditionalInfo); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -377,7 +381,8 @@ class queue_impl { /// \return a native handle. pi_native_handle getNative() const; - bool kernelUsesAssert(event &Event) const; + bool kernelUsesAssert(const std::string &KernelName, + OSModuleHandle Handle) const; private: /// Performs command group submission to the queue. @@ -387,17 +392,30 @@ class queue_impl { /// \param Self is a pointer to this queue. /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. - event submit_impl(const function_class &CGF, bool *IsKernel, + event submit_impl(const function_class &CGF, const shared_ptr_class &Self, - const detail::code_location &Loc) { + const detail::code_location &Loc, + bool StoreAdditionalInfo) { handler Handler(Self, MHostQueue); Handler.saveCodeLoc(Loc); CGF(Handler); - if (IsKernel) - *IsKernel = Handler.getType() == CG::KERNEL; + event Event; + + if (StoreAdditionalInfo) { + bool IsKernel = Handler.getType() == CG::KERNEL; + bool KernelUsesAssert = false; + if (IsKernel) + KernelUsesAssert = kernelUsesAssert( + Handler.MKernelName, Handler.MOSModuleHandle); + + Event = Handler.finalize(); + + detail::getSyclObjImpl(Event)->storeAdditionalInfo( + IsKernel, KernelUsesAssert); + } else + Event = Handler.finalize(); - event Event = Handler.finalize(); addEvent(Event); return Event; } diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index c8b94a5644b13..1660851de765f 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -90,5 +90,17 @@ backend event::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle event::getNative() const { return impl->getNative(); } +void event::storeAdditionalInfo(bool IsKernel, bool KernelUsesAssert) { + impl->storeAdditionalInfo(IsKernel, KernelUsesAssert); +} + +bool event::enqueuedIsKernel() const { + return impl->enqueuedIsKernel(); +} + +bool event::enqueuedKernelUsesAssert() const { + return impl->enqueuedKernelUsesAssert(); +} + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index b49b0aed9cc06..efc9577b10f43 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -92,23 +92,24 @@ event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice) { event queue::submit_impl(function_class CGH, const detail::code_location &CodeLoc) { - return impl->submit(CGH, /* IsKernel */ nullptr, impl, CodeLoc); + return impl->submit(CGH, impl, CodeLoc); } event queue::submit_impl(function_class CGH, queue SecondQueue, const detail::code_location &CodeLoc) { - return impl->submit(CGH, /* IsKernel */ nullptr, impl, SecondQueue.impl, + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); } -event queue::submit_impl(function_class CGH, bool &IsKernel, - const detail::code_location &CodeLoc) { - return impl->submit(CGH, &IsKernel, impl, CodeLoc); +event queue::submit_impl_and_store_info(function_class CGH, + const detail::code_location &CodeLoc) { + return impl->submit(CGH, impl, CodeLoc, true); } -event queue::submit_impl(function_class CGH, queue SecondQueue, - bool &IsKernel, const detail::code_location &CodeLoc) { - return impl->submit(CGH, &IsKernel, impl, SecondQueue.impl, CodeLoc); +event queue::submit_impl_and_store_info(function_class CGH, + queue SecondQueue, + const detail::code_location &CodeLoc) { + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, true); } void queue::wait_proxy(const detail::code_location &CodeLoc) { @@ -154,9 +155,5 @@ backend queue::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle queue::getNative() const { return impl->getNative(); } -bool queue::kernelUsesAssert(event &Event) const { - return impl->kernelUsesAssert(Event); -} - } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) From fcc53ea653e0fc2d72c176061375e66eb00efbfc Mon Sep 17 00:00:00 2001 From: Viktoria Maksimova Date: Thu, 24 Jun 2021 20:05:12 +0300 Subject: [PATCH 115/122] [sycl-post-link] Fix call graph traversal for assert property generation This patch fixes early exit on call graph traversal. Now we do not mark all functions above as "definitely does not call assert" as we can be sure it's true only for a reached leaf. --- .../tools/sycl-post-link/assert-property-2.ll | 54 +++++++++++++++++++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 14 ++--- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/llvm/test/tools/sycl-post-link/assert-property-2.ll b/llvm/test/tools/sycl-post-link/assert-property-2.ll index 8b707c1a4ef4c..cb1e53d129c75 100644 --- a/llvm/test/tools/sycl-post-link/assert-property-2.ll +++ b/llvm/test/tools/sycl-post-link/assert-property-2.ll @@ -38,9 +38,26 @@ ; void G() { common3(); } ; void H() { common3(); } ; +; void no_assert_func() { +; return; +; } +; void common4() { +; assert_func(); +; no_assert_func(); +; } +; void J() { +; common4(); +; } +; ; int main() { ; queue Q; ; Q.submit([&] (handler& CGH) { +; CGH.parallel_for(range<1>{1}, [=](id<1> i) { +; J(); +; }); +; CGH.parallel_for(range<1>{1}, [=](id<1> i) { +; common4(); +; }); ; CGH.parallel_for(range<1>{1}, [=](id<1> i) { ; A_excl(); ; B_incl(); @@ -90,6 +107,43 @@ target triple = "spir64_x86_64-unknown-unknown-sycldevice" ; CHECK: [SYCL/assert used] +; Function Attrs: convergent noinline norecurse optnone mustprogress +define dso_local spir_func void @_Z1Jv() #3 { +entry: + call spir_func void @_Z7common4v() + ret void +} + +; Function Attrs: convergent noinline norecurse optnone mustprogress +define dso_local spir_func void @_Z7common4v() #3 { +entry: + call spir_func void @_Z11assert_funcv() + call spir_func void @_Z14no_assert_funcv() + ret void +} + +; CHECK: _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E7Kernel9 +; Function Attrs: convergent noinline norecurse mustprogress +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E7Kernel9() #0 { +entry: + call spir_func void @_Z1Jv() + ret void +} + +; CHECK: _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E8Kernel10 +; Function Attrs: convergent noinline norecurse optnone mustprogress +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E8Kernel10() #0 { +entry: + call spir_func void @_Z7common4v() + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone mustprogress +define dso_local spir_func void @_Z14no_assert_funcv() #2 { +entry: + ret void +} + ; Function Attrs: convergent norecurse nounwind mustprogress define dso_local spir_func void @_Z6B_inclv() local_unnamed_addr { entry: diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 9a5ea2da3bf0b..e1ec9a094a39b 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -316,10 +316,11 @@ static bool hasAssertInFunctionCallGraph(llvm::Function *Func) { // Return if we've already discovered if there are asserts in the // function call graph. - if (hasAssertionInCallGraphMap.count(CF)) { + auto HasAssert = hasAssertionInCallGraphMap.find(CF); + if (HasAssert != hasAssertionInCallGraphMap.end()) { // If we know, that this function does not contain assert, we still // should investigate another instructions in the function. - if (!hasAssertionInCallGraphMap[CF]) + if (!HasAssert->second) continue; return true; @@ -343,11 +344,10 @@ static bool hasAssertInFunctionCallGraph(llvm::Function *Func) { } } - if (IsLeaf) { - // Mark the above functions as ones that definetely do not call assert. - for (auto *It : FuncCallStack) - hasAssertionInCallGraphMap[It] = false; - FuncCallStack.clear(); + if (IsLeaf && !FuncCallStack.empty()) { + // Mark the leaf function as one that definetely does not call assert. + hasAssertionInCallGraphMap[FuncCallStack.back()] = false; + FuncCallStack.pop_back(); } } return false; From 0b011d31dc165e7f0ed87833e4d3227b715cbc32 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Jun 2021 14:11:59 +0300 Subject: [PATCH 116/122] Use postprocessor lambda instead of storing additional data in event Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/event.hpp | 5 --- sycl/include/CL/sycl/queue.hpp | 68 ++++++++++++++++++------------- sycl/source/detail/event_impl.hpp | 11 ----- sycl/source/detail/queue_impl.hpp | 19 +++++---- sycl/source/event.cpp | 13 ------ sycl/source/queue.cpp | 22 ++++++---- 6 files changed, 63 insertions(+), 75 deletions(-) diff --git a/sycl/include/CL/sycl/event.hpp b/sycl/include/CL/sycl/event.hpp index c874b34c93080..0a9ca3aba820e 100644 --- a/sycl/include/CL/sycl/event.hpp +++ b/sycl/include/CL/sycl/event.hpp @@ -133,11 +133,6 @@ class __SYCL_EXPORT event { return reinterpret_cast::type>( getNative()); } - - void storeAdditionalInfo(bool IsKernel, bool KernelUsesAssert); - bool enqueuedIsKernel() const; - bool enqueuedKernelUsesAssert() const; - private: event(shared_ptr_class EventImpl); diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index bcfcd19d4902a..106ff84535306 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -244,18 +244,18 @@ class __SYCL_EXPORT queue { event Event; #if __SYCL_USE_FALLBACK_ASSERT - Event = submit_impl_and_store_info(CGF, CodeLoc); - - // assert required - if (Event.enqueuedIsKernel() && - !get_device().has(aspect::ext_oneapi_native_assert) && - Event.enqueuedKernelUsesAssert()) { - // __devicelib_assert_fail isn't supported by Device-side Runtime - // Linking against fallback impl of __devicelib_assert_fail is performed - // by program manager class - submitAssertCapture(*this, Event, /* SecondaryQueue = */ nullptr, - CodeLoc); - } + auto PostProcess = [this, &CodeLoc]( + bool IsKernel, bool KernelUsesAssert, event &E) { + if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && + KernelUsesAssert) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is performed + // by program manager class + submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc); + } + }; + + Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess); #else Event = submit_impl(CGF, CodeLoc); #endif // !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) @@ -281,17 +281,19 @@ class __SYCL_EXPORT queue { event Event; #if __SYCL_USE_FALLBACK_ASSERT - Event = submit_impl_and_store_info(CGF, SecondaryQueue, CodeLoc); - - // assert required - if (Event.enqueuedIsKernel() && - !get_device().has(aspect::ext_oneapi_native_assert) && - Event.enqueuedKernelUsesAssert()) { - // __devicelib_assert_fail isn't supported by Device-side Runtime - // Linking against fallback impl of __devicelib_assert_fail is performed - // by program manager class - submitAssertCapture(*this, Event, &SecondaryQueue, CodeLoc); - } + auto PostProcess = [this, &SecondaryQueue, &CodeLoc]( + bool IsKernel, bool KernelUsesAssert, event &E) { + if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && + KernelUsesAssert) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is performed + // by program manager class + submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc); + } + }; + + Event = + submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc, PostProcess); #else Event = submit_impl(CGF, SecondaryQueue, CodeLoc); #endif // !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) @@ -813,22 +815,32 @@ class __SYCL_EXPORT queue { event submit_impl(function_class CGH, queue secondQueue, const detail::code_location &CodeLoc); + // Function to postprocess submitted command + // Arguments: + // bool IsKernel - true if the submitted command was kernel, false otherwise + // bool KernelUsesAssert - true if submitted kernel uses assert, only + // meaningful when IsKernel is true + // event &Event - event after which post processing should be executed + using SubmitPostProcessF = std::function; + /// A template-free version of submit. /// \param CGH command group function/handler /// \param CodeLoc code location /// /// This method stores additional information within event_impl class instance - event submit_impl_and_store_info(function_class CGH, - const detail::code_location &CodeLoc); + event submit_impl_and_postprocess(function_class CGH, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess); /// A template-free version of submit. /// \param CGH command group function/handler /// \param secondQueue fallback queue /// \param CodeLoc code location /// /// This method stores additional information within event_impl class instance - event submit_impl_and_store_info(function_class CGH, - queue secondQueue, - const detail::code_location &CodeLoc); + event submit_impl_and_postprocess(function_class CGH, + queue secondQueue, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess); /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 0c51c9c01dfd0..81e9e9b626356 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -155,14 +155,6 @@ class event_impl { /// \return a native handle. pi_native_handle getNative() const; - void storeAdditionalInfo(bool IsKernel, bool KernelUsesAssert) { - MEnqueuedIsKernel = IsKernel; - MEnqueuedKernelUsesAssert = KernelUsesAssert; - } - - bool enqueuedIsKernel() const { return MEnqueuedIsKernel; } - bool enqueuedKernelUsesAssert() const { return MEnqueuedKernelUsesAssert; } - private: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait @@ -185,9 +177,6 @@ class event_impl { // backend's representation (e.g. alloca). Used values are listed in // HostEventState enum. std::atomic MState; - - bool MEnqueuedIsKernel = false; - bool MEnqueuedKernelUsesAssert = false; }; } // namespace detail diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 6c416ce090aef..a4e90968e4b47 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -156,6 +156,8 @@ class queue_impl { template typename info::param_traits::return_type get_info() const; + using SubmitPostProcessF = std::function; + /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// @@ -173,15 +175,15 @@ class queue_impl { const shared_ptr_class &Self, const shared_ptr_class &SecondQueue, const detail::code_location &Loc, - bool StoreAdditionalInfo = false) { + const SubmitPostProcessF *PostProcess = nullptr) { try { - return submit_impl(CGF, Self, Loc, StoreAdditionalInfo); + return submit_impl(CGF, Self, Loc, PostProcess); } catch (...) { { std::lock_guard Lock(MMutex); MExceptions.PushBack(std::current_exception()); } - return SecondQueue->submit(CGF, SecondQueue, Loc, StoreAdditionalInfo); + return SecondQueue->submit(CGF, SecondQueue, Loc, PostProcess); } } @@ -196,8 +198,8 @@ class queue_impl { event submit(const function_class &CGF, const shared_ptr_class &Self, const detail::code_location &Loc, - bool StoreAdditionalInfo = false) { - return submit_impl(CGF, Self, Loc, StoreAdditionalInfo); + const SubmitPostProcessF *PostProcess = nullptr) { + return submit_impl(CGF, Self, Loc, PostProcess); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -395,14 +397,14 @@ class queue_impl { event submit_impl(const function_class &CGF, const shared_ptr_class &Self, const detail::code_location &Loc, - bool StoreAdditionalInfo) { + const SubmitPostProcessF *PostProcess) { handler Handler(Self, MHostQueue); Handler.saveCodeLoc(Loc); CGF(Handler); event Event; - if (StoreAdditionalInfo) { + if (PostProcess) { bool IsKernel = Handler.getType() == CG::KERNEL; bool KernelUsesAssert = false; if (IsKernel) @@ -411,8 +413,7 @@ class queue_impl { Event = Handler.finalize(); - detail::getSyclObjImpl(Event)->storeAdditionalInfo( - IsKernel, KernelUsesAssert); + (*PostProcess)(IsKernel, KernelUsesAssert, Event); } else Event = Handler.finalize(); diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 1660851de765f..2c73af98b2c38 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -89,18 +89,5 @@ event::event(shared_ptr_class event_impl) backend event::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle event::getNative() const { return impl->getNative(); } - -void event::storeAdditionalInfo(bool IsKernel, bool KernelUsesAssert) { - impl->storeAdditionalInfo(IsKernel, KernelUsesAssert); -} - -bool event::enqueuedIsKernel() const { - return impl->enqueuedIsKernel(); -} - -bool event::enqueuedKernelUsesAssert() const { - return impl->enqueuedKernelUsesAssert(); -} - } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index efc9577b10f43..57a527b6875b8 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -101,15 +101,19 @@ event queue::submit_impl(function_class CGH, queue SecondQueue, CodeLoc); } -event queue::submit_impl_and_store_info(function_class CGH, - const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, CodeLoc, true); -} - -event queue::submit_impl_and_store_info(function_class CGH, - queue SecondQueue, - const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, true); +event +queue::submit_impl_and_postprocess(function_class CGH, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess) { + return impl->submit(CGH, impl, CodeLoc, &PostProcess); +} + +event +queue::submit_impl_and_postprocess(function_class CGH, + queue SecondQueue, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess) { + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, &PostProcess); } void queue::wait_proxy(const detail::code_location &CodeLoc) { From 2b2b897c50c204a78bd611d172cadd34cff63d69 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Jun 2021 14:21:12 +0300 Subject: [PATCH 117/122] Resolve style issue Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 6 +++--- sycl/source/detail/queue_impl.hpp | 4 ++-- sycl/source/queue.cpp | 19 ++++++++----------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 106ff84535306..b0733990ac0fd 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -244,8 +244,8 @@ class __SYCL_EXPORT queue { event Event; #if __SYCL_USE_FALLBACK_ASSERT - auto PostProcess = [this, &CodeLoc]( - bool IsKernel, bool KernelUsesAssert, event &E) { + auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert, + event &E) { if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && KernelUsesAssert) { // __devicelib_assert_fail isn't supported by Device-side Runtime @@ -282,7 +282,7 @@ class __SYCL_EXPORT queue { #if __SYCL_USE_FALLBACK_ASSERT auto PostProcess = [this, &SecondaryQueue, &CodeLoc]( - bool IsKernel, bool KernelUsesAssert, event &E) { + bool IsKernel, bool KernelUsesAssert, event &E) { if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && KernelUsesAssert) { // __devicelib_assert_fail isn't supported by Device-side Runtime diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index a4e90968e4b47..c9615918f4829 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -408,8 +408,8 @@ class queue_impl { bool IsKernel = Handler.getType() == CG::KERNEL; bool KernelUsesAssert = false; if (IsKernel) - KernelUsesAssert = kernelUsesAssert( - Handler.MKernelName, Handler.MOSModuleHandle); + KernelUsesAssert = + kernelUsesAssert(Handler.MKernelName, Handler.MOSModuleHandle); Event = Handler.finalize(); diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 57a527b6875b8..d4154a27a532a 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -97,22 +97,19 @@ event queue::submit_impl(function_class CGH, event queue::submit_impl(function_class CGH, queue SecondQueue, const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, SecondQueue.impl, - CodeLoc); + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); } -event -queue::submit_impl_and_postprocess(function_class CGH, - const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess) { +event queue::submit_impl_and_postprocess( + function_class CGH, const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess) { return impl->submit(CGH, impl, CodeLoc, &PostProcess); } -event -queue::submit_impl_and_postprocess(function_class CGH, - queue SecondQueue, - const detail::code_location &CodeLoc, - const SubmitPostProcessF &PostProcess) { +event queue::submit_impl_and_postprocess( + function_class CGH, queue SecondQueue, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess) { return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, &PostProcess); } From 269539d091847f065332951cb87406e870a1d943 Mon Sep 17 00:00:00 2001 From: Viktoria Maksimova Date: Fri, 25 Jun 2021 14:31:42 +0300 Subject: [PATCH 118/122] minor fix --- llvm/tools/sycl-post-link/sycl-post-link.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index e1ec9a094a39b..e7a4651ba7eab 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -347,7 +347,7 @@ static bool hasAssertInFunctionCallGraph(llvm::Function *Func) { if (IsLeaf && !FuncCallStack.empty()) { // Mark the leaf function as one that definetely does not call assert. hasAssertionInCallGraphMap[FuncCallStack.back()] = false; - FuncCallStack.pop_back(); + FuncCallStack.clear(); } } return false; From a02420d70654d15974769a59504e63ef26d6b71b Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Jun 2021 16:46:12 +0300 Subject: [PATCH 119/122] Reuse assert info buffer in per-queue manner to eliminate deadlock situation Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 18 ++++++------------ sycl/source/detail/queue_impl.cpp | 1 - sycl/source/detail/queue_impl.hpp | 16 ++++++++++++++-- sycl/source/queue.cpp | 3 +++ 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index b0733990ac0fd..d9e56576bc84b 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -904,6 +904,8 @@ class __SYCL_EXPORT queue { }, CodeLoc); } + + buffer &getAssertHappenedBuffer(); }; namespace detail { @@ -924,13 +926,13 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, const detail::code_location &CodeLoc) { using AHBufT = buffer; - AHBufT *Buffer = new AHBufT{range<1>{1}}; + AHBufT &Buffer = Self.getAssertHappenedBuffer(); event CopierEv, CheckerEv, PostCheckerEv; auto CopierCGF = [&](handler &CGH) { CGH.depends_on(Event); - auto Acc = Buffer->get_access(CGH); + auto Acc = Buffer.get_access(CGH); CGH.single_task([Acc] { #ifdef __SYCL_DEVICE_ONLY__ @@ -940,12 +942,12 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, #endif // __SYCL_DEVICE_ONLY__ }); }; - auto CheckerCGF = [&CopierEv, Buffer](handler &CGH) { + auto CheckerCGF = [&CopierEv, &Buffer](handler &CGH) { CGH.depends_on(CopierEv); using mode = access::mode; using target = access::target; - auto Acc = Buffer->get_access(CGH); + auto Acc = Buffer.get_access(CGH); CGH.codeplay_host_task([=] { const detail::AssertHappened *AH = &Acc[0]; @@ -974,21 +976,13 @@ event submitAssertCapture(queue &Self, event &Event, queue *SecondaryQueue, } }); }; - // Release memory in distinct host-task so that any dependency is eliminated - auto PostCheckerCGF = [&CheckerEv, Buffer](handler &CGH) { - CGH.depends_on(CheckerEv); - - CGH.codeplay_host_task([=] { delete Buffer; }); - }; if (SecondaryQueue) { CopierEv = Self.submit_impl(CopierCGF, *SecondaryQueue, CodeLoc); CheckerEv = Self.submit_impl(CheckerCGF, *SecondaryQueue, CodeLoc); - PostCheckerEv = Self.submit_impl(PostCheckerCGF, *SecondaryQueue, CodeLoc); } else { CopierEv = Self.submit_impl(CopierCGF, CodeLoc); CheckerEv = Self.submit_impl(CheckerCGF, CodeLoc); - PostCheckerEv = Self.submit_impl(PostCheckerCGF, CodeLoc); } return CheckerEv; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 4421af334ae6c..73273fb9a6eb3 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -305,7 +305,6 @@ bool queue_impl::kernelUsesAssert(const std::string &KernelName, return false; #endif } - } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c9615918f4829..04a6e40c5bdba 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -78,7 +79,8 @@ class queue_impl { queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList) : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), - MPropList(PropList), MHostQueue(MDevice->is_host()) { + MPropList(PropList), MHostQueue(MDevice->is_host()), + MAssertHappenedBuffer(range<1>{1}) { if (!Context->hasDevice(Device)) throw cl::sycl::invalid_parameter_error( "Queue cannot be constructed with the given context and device " @@ -101,7 +103,8 @@ class queue_impl { /// \param AsyncHandler is a SYCL asynchronous exception handler. queue_impl(RT::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler) - : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false) { + : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false), + MAssertHappenedBuffer(range<1>{1}) { MQueues.push_back(pi::cast(PiQueue)); @@ -386,6 +389,12 @@ class queue_impl { bool kernelUsesAssert(const std::string &KernelName, OSModuleHandle Handle) const; + void asynchronouslyDeleteBuffer(buffer *B); + + buffer &getAssertHappenedBuffer() { + return MAssertHappenedBuffer; + } + private: /// Performs command group submission to the queue. /// @@ -473,6 +482,9 @@ class queue_impl { // Thread pool for host task and event callbacks execution. // The thread pool is instantiated upon the very first call to getThreadPool() std::unique_ptr MHostTaskThreadPool; + + // Buffer to store assert failure descriptor + buffer MAssertHappenedBuffer; }; } // namespace detail diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index d4154a27a532a..85055200d4271 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -156,5 +156,8 @@ backend queue::get_backend() const noexcept { return getImplBackend(impl); } pi_native_handle queue::getNative() const { return impl->getNative(); } +buffer &queue::getAssertHappenedBuffer() { + return impl->getAssertHappenedBuffer(); +} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) From bd5405df0e2b18d8bb6cb650a5e67c1cf1cba57c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Jun 2021 18:16:54 +0300 Subject: [PATCH 120/122] Only append copier kernel and host-task after kernels which use assert Signed-off-by: Sergey Kanaev --- sycl/source/detail/queue_impl.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 73273fb9a6eb3..6e94f71efb49a 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -289,9 +289,6 @@ pi_native_handle queue_impl::getNative() const { bool queue_impl::kernelUsesAssert(const std::string &KernelName, OSModuleHandle Handle) const { -#ifndef __SYCL_POST_LINK_TOOL_ADDS_ASSERT_USED_PROPERTY_SET - return true; -#else RTDeviceBinaryImage &BinImg = ProgramManager::getInstance().getDeviceImage( Handle, KernelName, get_context(), get_device()); @@ -303,7 +300,6 @@ bool queue_impl::kernelUsesAssert(const std::string &KernelName, return true; return false; -#endif } } // namespace detail } // namespace sycl From 8b0b8fdcb0c80842caeccdb5b1d2141c01aaac28 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 28 Jun 2021 10:49:29 +0300 Subject: [PATCH 121/122] Fix symbols dump test Signed-off-by: Sergey Kanaev --- sycl/test/abi/sycl_symbols_linux.dump | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index bf93d1df69dce..14d0543e3687a 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3635,11 +3635,12 @@ _ZN2cl4sycl5eventC2Ev _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE -_ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERbRKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE -_ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RbRKNS0_6detail13code_locationE _ZN2cl4sycl5queue18throw_asynchronousEv _ZN2cl4sycl5queue20wait_and_throw_proxyERKNS0_6detail13code_locationE +_ZN2cl4sycl5queue23getAssertHappenedBufferEv +_ZN2cl4sycl5queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE +_ZN2cl4sycl5queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN2cl4sycl5queue6memcpyEPvPKvm _ZN2cl4sycl5queue6memsetEPvim _ZN2cl4sycl5queueC1EP17_cl_command_queueRKNS0_7contextERKSt8functionIFvNS0_14exception_listEEE @@ -3941,7 +3942,6 @@ _ZNK2cl4sycl5queue11get_contextEv _ZNK2cl4sycl5queue11is_in_orderEv _ZNK2cl4sycl5queue12get_propertyINS0_8property5queue16enable_profilingEEET_v _ZNK2cl4sycl5queue12has_propertyINS0_8property5queue16enable_profilingEEEbv -_ZNK2cl4sycl5queue16kernelUsesAssertERNS0_5eventE _ZNK2cl4sycl5queue3getEv _ZNK2cl4sycl5queue7is_hostEv _ZNK2cl4sycl5queue8get_infoILNS0_4info5queueE4240EEENS3_12param_traitsIS4_XT_EE11return_typeEv From c8d753c5b36ae36eecb6aca55c8224cb60395771 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 28 Jun 2021 12:12:24 +0300 Subject: [PATCH 122/122] Don't emit assert post-processing for host Signed-off-by: Sergey Kanaev --- sycl/include/CL/sycl/queue.hpp | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index d9e56576bc84b..2c7c3bd59c21a 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -244,21 +244,25 @@ class __SYCL_EXPORT queue { event Event; #if __SYCL_USE_FALLBACK_ASSERT - auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert, - event &E) { - if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && - KernelUsesAssert) { - // __devicelib_assert_fail isn't supported by Device-side Runtime - // Linking against fallback impl of __devicelib_assert_fail is performed - // by program manager class - submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc); - } - }; - - Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess); -#else - Event = submit_impl(CGF, CodeLoc); + if (!is_host()) { + auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert, + event &E) { + if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) && + KernelUsesAssert) { + // __devicelib_assert_fail isn't supported by Device-side Runtime + // Linking against fallback impl of __devicelib_assert_fail is + // performed by program manager class + submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, + CodeLoc); + } + }; + + Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess); + } else #endif // !defined(SYCL_DISABLE_FALLBACK_ASSERT) && !defined(__NVPTX__) + { + Event = submit_impl(CGF, CodeLoc); + } return Event; }