Skip to content

Commit b94f23a

Browse files
author
sergei
authored
[SYCL] Cache 'assert supported' flag for device (#4505)
This should eliminate some part of overhead on host when running kernels in a loop. Especially small ones. Another part of improvement is in #4516 Signed-off-by: Sergey Kanaev <[email protected]>
1 parent 8bc48c8 commit b94f23a

File tree

7 files changed

+22
-9
lines changed

7 files changed

+22
-9
lines changed

sycl/include/CL/sycl/queue.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ class __SYCL_EXPORT queue {
231231
template <info::queue param>
232232
typename info::param_traits<info::queue, param>::return_type get_info() const;
233233

234+
// A shorthand for `get_device().has()' which is expected to be a bit quicker
235+
// than the long version
236+
bool device_has(aspect Aspect) const;
237+
234238
public:
235239
/// Submits a command group function object to the queue, in order to be
236240
/// scheduled for execution on the device.
@@ -247,7 +251,7 @@ class __SYCL_EXPORT queue {
247251
if (!is_host()) {
248252
auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert,
249253
event &E) {
250-
if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) &&
254+
if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
251255
KernelUsesAssert) {
252256
// __devicelib_assert_fail isn't supported by Device-side Runtime
253257
// Linking against fallback impl of __devicelib_assert_fail is
@@ -287,7 +291,7 @@ class __SYCL_EXPORT queue {
287291
#if __SYCL_USE_FALLBACK_ASSERT
288292
auto PostProcess = [this, &SecondaryQueue, &CodeLoc](
289293
bool IsKernel, bool KernelUsesAssert, event &E) {
290-
if (IsKernel && !get_device().has(aspect::ext_oneapi_native_assert) &&
294+
if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
291295
KernelUsesAssert) {
292296
// __devicelib_assert_fail isn't supported by Device-side Runtime
293297
// Linking against fallback impl of __devicelib_assert_fail is performed

sycl/source/detail/device_impl.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ namespace sycl {
1717
namespace detail {
1818

1919
device_impl::device_impl()
20-
: MIsHostDevice(true), MPlatform(platform_impl::getHostPlatformImpl()) {}
20+
: MIsHostDevice(true), MPlatform(platform_impl::getHostPlatformImpl()),
21+
// assert is natively supported by host
22+
MIsAssertFailSupported(true) {}
2123

2224
device_impl::device_impl(pi_native_handle InteropDeviceHandle,
2325
const plugin &Plugin)
@@ -70,6 +72,9 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle,
7072
Platform = platform_impl::getPlatformFromPiDevice(MDevice, Plugin);
7173
}
7274
MPlatform = Platform;
75+
76+
MIsAssertFailSupported =
77+
has_extension(PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT);
7378
}
7479

7580
device_impl::~device_impl() {
@@ -334,11 +339,7 @@ std::shared_ptr<device_impl> device_impl::getHostDeviceImpl() {
334339
}
335340

336341
bool device_impl::isAssertFailSupported() const {
337-
// assert is sort of natively supported by host
338-
if (MIsHostDevice)
339-
return true;
340-
341-
return has_extension(PI_DEVICE_INFO_EXTENSION_DEVICELIB_ASSERT);
342+
return MIsAssertFailSupported;
342343
}
343344

344345
} // namespace detail

sycl/source/detail/device_impl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ class device_impl {
232232
bool MIsRootDevice = false;
233233
bool MIsHostDevice;
234234
PlatformImplPtr MPlatform;
235+
bool MIsAssertFailSupported = false;
235236
}; // class device_impl
236237

237238
} // namespace detail

sycl/source/detail/device_info.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ template <info::device param> struct get_device_info<platform, param> {
126126
// for string return type in other specializations.
127127
template <info::device param> struct get_device_info_string {
128128
static std::string get(RT::PiDevice dev, const plugin &Plugin) {
129-
size_t resultSize;
129+
size_t resultSize = 0;
130130
Plugin.call<PiApiKind::piDeviceGetInfo>(
131131
dev, pi::cast<RT::PiDeviceInfo>(param), 0, nullptr, &resultSize);
132132
if (resultSize == 0) {

sycl/source/queue.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,5 +191,10 @@ pi_native_handle queue::getNative() const { return impl->getNative(); }
191191
buffer<detail::AssertHappened, 1> &queue::getAssertHappenedBuffer() {
192192
return impl->getAssertHappenedBuffer();
193193
}
194+
195+
bool queue::device_has(aspect Aspect) const {
196+
// avoid creating sycl object from impl
197+
return impl->getDeviceImplPtr()->has(Aspect);
198+
}
194199
} // namespace sycl
195200
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3666,6 +3666,7 @@ _ZN2cl4sycl5eventC1Ev
36663666
_ZN2cl4sycl5eventC2EP9_cl_eventRKNS0_7contextE
36673667
_ZN2cl4sycl5eventC2ESt10shared_ptrINS0_6detail10event_implEE
36683668
_ZN2cl4sycl5eventC2Ev
3669+
_ZNK2cl4sycl5queue10device_hasENS0_6aspectE
36693670
_ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice
36703671
_ZN2cl4sycl5queue10mem_adviseEPKvmi
36713672
_ZN2cl4sycl5queue10mem_adviseEPKvmiNS0_5eventE

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
??$get_info@$0BAIA@@context@sycl@cl@@QEBAIXZ
111111
??$get_info@$0BAIB@@context@sycl@cl@@QEBA?AV?$vector@Vdevice@sycl@cl@@V?$allocator@Vdevice@sycl@cl@@@std@@@std@@XZ
112112
??$get_info@$0BAIE@@context@sycl@cl@@QEBA?AVplatform@12@XZ
113+
?device_has@queue@sycl@cl@@QEBA_NW4aspect@23@@Z
113114
??$get_info@$0BAJA@@queue@sycl@cl@@QEBA?AVcontext@12@XZ
114115
??$get_info@$0BAJB@@queue@sycl@cl@@QEBA?AVdevice@12@XZ
115116
??$get_info@$0BAJC@@queue@sycl@cl@@QEBAIXZ

0 commit comments

Comments
 (0)