From 623786954bcc2a3e7b6e12425b7ee248b8fc5670 Mon Sep 17 00:00:00 2001 From: Michael Kinsner Date: Mon, 2 Aug 2021 00:34:26 -0300 Subject: [PATCH 01/13] [SYCL][Doc] Add device global extension spec Signed-off-by: Michael Kinsner --- .../SYCL_INTEL_device_global.asciidoc | 790 ++++++++++++++++++ sycl/doc/extensions/README.md | 1 + 2 files changed, 791 insertions(+) create mode 100755 sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc new file mode 100755 index 0000000000000..b7986f47af047 --- /dev/null +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -0,0 +1,790 @@ += SYCL_EXT_ONEAPI_DEVICE_GLOBAL + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + +// This is necessary for asciidoc, but not for asciidoctor +:cpp: C++ + +== Introduction +In OpenCL 2.0 and later, a user is able to allocate program +scope memory which can be accessed like a {cpp} global variable by any kernel in +an OpenCL program (`cl_program`). When a program is shared between multiple devices, +each device receives its own unique instance of the program scope memory allocation. + +This extension introduces device scoped memory allocations into SYCL that can be accessed +within a kernel using syntax similar to {cpp} global variables, but that have unique +instances per `sycl::device`. Mechanisms are provided for the host program to enqueue +copies to or from the allocations on a specific device. Restrictions are +placed on the types of data that can be stored within `device_global` allocations, particularly +around copyability and constructors/destructors. + +== Notice + +Copyright (c) 2021 Intel Corporation. All rights reserved. + +NOTE: Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are +trademarks of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. +used by permission by Khronos. + +== Status + +Working Draft + +This is a preview extension specification, intended to provide early access to +a feature for review and community feedback. When the feature matures, this +specification may be released as a formal extension. + +Because the interfaces defined by this specification are not final and are +subject to change they are not intended to be used by shipping software +products. + +== Version + +Built On: 2021-08-01 + +Revision: 2 + +== Contact + +Artem Radzikhovskyy, Intel (artem 'dot' radzikhovskyy 'at' intel 'dot' com) + +== Contributors + +Artem Radzikhovskyy, Intel + +Michael Kinsner, Intel + +Jessica Davies, Intel + +Joe Garvey, Intel + +Mohammad Fawaz, Intel + +Tommy Hoffner, Intel + +John Pennycook, Intel + +Greg Lueck, Intel + +Roland Schulz, Intel + +== Dependencies + +This extension is written against the SYCL 2020 specification, revision 3. + +It also depends on the `sycl::ext::oneapi::property_list` extension. + +== Overview + +[NOTE] +==== +In this document, we use `device_global` to indicate the proposed `sycl::ext::oneapi::device_global`. +==== + +The purpose of this document is to clearly describe and specify `device_global` and related +concepts, types, and mechanisms, and to give examples and context for their usage. + +=== Motivation + +Device scope memory allocations can provide an efficient mechanism for communication +between multiple invocations of a kernel, or between kernels executing on a device. +There are additional benefits and optimization opportunities when a device compiler +has visibility into the allocation size (static sizing) and uses of the allocation. + +Syntax allowing direct use of an allocation (without passing pointers or parameters +through function call boundaries) can also lead to syntax simplification in some +important use cases. + +=== Examples + +Two example `device_global` can be declared at namespace scope, as follows: + +[source,c++] +---- +struct MyClass { + bool flag; +}; + +using namespace sycl::ext::oneapi; + +device_global dm1; +static device_global dm2; +---- + +`dm1` creates an allocation on each `sycl::device` that contains an object of type `MyClass`. +`dm2` has internal linkage (due to `static`), and creates allocations containing an array +of four `int` per device. + +Uses of `dm1` and `dm2` in device functions are syntactically similar to uses of global variables +in {cpp} (access directly through the namespace scope identifier), and `device_global` has +reference wrapper-like semantics on a device. Of note, because {cpp} doesn't allow for +overloading of the "dot operator", a `get()` member of `device_global` allows a reference +to be extracted, to which the usual dot operator may be applied as in: + +[source,c++] +---- +sycl::queue Q; +Q.submit([&](sycl::handler& h) { + h.single_task([=]() { + int x = 5; + if (dm1.get().flag) + x = dm2[0]; + }); +}); +---- + +For both `dm1` and `dm2`, the `MyClass` and `int[4]` allocations on each device are zero-initialized before any non-initialization accesses occur. + +== Proposal + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification, Section 6.3.3 "Feature test macros". Therefore, an +implementation supporting this extension must predefine the macro +`SYCL_EXT_ONEAPI_DEVICE_GLOBAL` to one of the values defined in the table below. +Applications can test for the existence of this macro to determine if the +implementation supports this feature, or applications can test the macro's +value to determine which of the extension's features +that the implementation supports. + +[%header,cols="1,5"] +|=== +|Value |Description +|1 |Initial extension version +|=== + +=== Representation of device globals + +`device_global` provides a mechanism to allocate device scope memory - memory which has unique underlying storage (of type _T_) for each `sycl::device` object. If multiple device objects are present then each device object receives its own unique underlying allocation. All kernels that reference the same `device_global` entity (either directly or via a pointer to its underlying object of type _T_) share the same allocation of that object when those kernels run on the same device. + +`device_global` allocations are in the global address space, as are any underlying allocations of type `T` which are implicitly allocated on each device as a result of a `device_global` object. It is undefined behavior if the host program directly accesses a `device_global` or any address obtained from a `device_global` member function, and similarly it is undefined behavior if a `device_global` or address obtained on a device from a `device_global` member function is accessed on a different device. There is no mechanism to obtain addresses of or directly access a device's `device_global` allocation within the host program. + +A `device_global` on a given device maintains its state (address of the allocation and data within the allocation) even after the application changes the value of a specialization constant via `handler::set_specialization_constant()`. Additionally, a `device_global` maintains its state even when it is referenced from a kernel in a different `kernel_bundle`. + +[source,c++] +---- +namespace sycl::ext::oneapi { +template > +class device_global { + ... +---- + +`device_global` is a class template, parameterized by the type of the underlying allocation _T_, and a list of properties _propertyListT_. The type of the allocation _T_ also encodes the size of the allocation for potentially multidimensional array types. + +_T_ is restricted to types that have a trivial destructor and a trivial default constructor in this revision of the specification (the constructor restriction may be partially relaxed in a future revision). _propertyListT_ enables properties to be associated with a `device_global`. + +Since _T_ is restricted to types with trivial default constructors in this version of the specification, there are no non-default `device_global` constructors, and therefore no initialization values may be specified for the content of a `device_global` allocation on a device. + +The allocation of type _T_ is zero-initialized on each device prior to the first access to that `device_global` variable. + +`device_global` may only be declared with static storage duration at namespace scope or class scope. If a `device_global` is declared with any other storage duration or scope, the program is ill-formed. + +The example below creates two global namespace scope `device_global` objects named `dm1` and `dm2`. `dm1` contains one object of type `MyClass` on each device, and the `device_global` object has external linkage. `dm2` contains an array of four integers on each device, and the `device_global` object has internal linkage. In both cases, the `MyClass` and `int[4]` allocations on each device are zero-initialized before any non-initialization accesses occur. + +[source,c++] +---- +using namespace sycl::ext::oneapi; + +device_global dm1; +static device_global dm2; + +int main () { + sycl::queue Q; + Q.submit([&](sycl::handler& h) { + h.single_task([=]() { + int x = 5; + if (dm1.get().flag) + x = dm2[0]; + }); + }); +} +---- + +Properties may be specified for a `device_global` to provide semantic modification or optimization hint information to the compiler. Specific properties are defined in other extensions, but example uses of a property (with a "no copy" attribute described by another extension) are: + +[source,c++] +---- +device_global> dm1; +device_global> dm2; +---- + +[NOTE] +==== + +On a device, `device_global` has similar semantics to a reference wrapper. The dot operator (`operator.`) cannot be overloaded, so a `get()` member is provided to allow a reference to be extracted directly when needed. Some operators are declared in `device_global` that must be members (e.g. `operator[]` and `+operator->+`). Note that other operators can be overloaded by specific `T` as free functions, which will be selected through implicit conversion to `T` in device functions. + +==== + + +The section below and the table following describe the constructors, member functions and factory methods for `device_global`. + +[source,c++] +---- +namespace sycl::ext::oneapi { + +template > +class device_global { +public: + using element_type = std::remove_extent_t; + + static_assert(std::is_trivially_default_constructible_v, + "Type T must be trivially default constructable (until C++20 " + "consteval is supported and enabled)"); + + static_assert(std::is_trivially_destructible_v, + "Type T must be trivially destructible."); + + // Only default construction is allowed. The underlying memory allocations + // of type T on devices will be zero-initialized before any non-initialization + // accesses occur. + device_global(); + + device_global(const device_global &) = delete; + device_global(const device_global &&) = delete; + device_global &operator=(const device_global &) = delete; + device_global &operator=(const device_global &&) = delete; + + template + multi_ptr get_multi_ptr() noexcept; + template + multi_ptr get_multi_ptr() const noexcept; + + // Access the underlying data + operator T&() noexcept; + operator const T&() const noexcept; + + T& get() noexcept; + const T& get() const noexcept; + + // Enable assignments from underlying type + device_global& operator=(const T&) noexcept; + + // Available if the operator[] is valid for objects of type T + using subscript_return_t = std::remove_reference_t()[std::ptrdiff_t{}])>; + subscript_return_t& operator[]( std::ptrdiff_t idx ) noexcept; + const subscript_return_t& operator[]( std::ptrdiff_t idx ) const noexcept; + + // Available if the operator-> is valid for objects of type T + T& operator->() noexcept; + const T& operator->() const noexcept; + + // Note that there is no need for "device_global" to define member functions for + // operators like "++", comparison, etc. Instead, the type "T" need only define + // these operators as non-member functions. Because there is an implicit conversion + // from "device_global" to "T&", the operations can be applied to objects of type + // "device_global". + + template + static constexpr bool has_property(); + + // The return type is an unspecified internal class used to represent + // instances of propertyT + template + static constexpr auto get_property(); +}; + +} // namespace sycl::ext::oneapi +---- + +[frame="topbot",options="header"] +|=== +|Functions |Description + +// --- ROW BREAK --- +a| +[source,c++] +---- +device_global(); +---- +| +Constructs a `device_global` object, and implicit storage for `T` in the global address space on each device that may access it. + +The storage on each device for `T` is zero-initialized. + +`T` must be trivially default constructable and trivially destructible. + +// --- ROW BREAK --- +a| +[source,c++] +---- +template +multi_ptr get_multi_ptr() noexcept; +template +multi_ptr get_multi_ptr() const noexcept; + +---- +| +Available only in device functions. + +Returns a `multi_ptr` to the underlying `T` on the device. It is undefined behavior to dereference the returned pointer or any address derived from the pointer on a different device or on the host. + +// --- ROW BREAK --- +a| +[source,c++] +---- +operator T&() noexcept; +operator const T&() const noexcept; +---- +| +Available only in device functions. + +Implicit conversion to a reference to the underlying `T` on the device. It is undefined behavior to access the reference or any address derived from it on a different device or on the host. + +// --- ROW BREAK --- +a| +[source,c++] +---- +T& get() noexcept; +const T& get() const noexcept; +---- +| +Available only in device functions. + +Returns a reference to the underlying `T` on the device. It is undefined behavior to access the reference or any address derived from it on a different device or on the host. + +// --- ROW BREAK --- +a| +[source,c++] +---- +device_global& operator=(const T&) noexcept; +---- +| +Available only in device functions. + +Enables assignment of type `T` to the underlying allocation on the device. + +// --- ROW BREAK --- +a| +[source,c++] +---- +element_type& operator[]( std::ptrdiff_t idx ) noexcept; +const element_type& operator[]( std::ptrdiff_t idx ) const noexcept; +---- +| +Available only in device functions. + +Available only when the underlying `T` defines an `operator[]`. + +Indexes into the underlying `T`. It is undefined behavior if _idx_ is negative. + +// --- ROW BREAK --- +a| +[source,c++] +---- +T& operator->() noexcept; +const T& operator->() const noexcept; +---- +| +Available only in device functions. + +Available only when `+operator->+` is valid for objects of type `T`. + +Provides member access through `T` that is a pointer or a class which defines `+operator->+`. + +// --- ROW BREAK --- +a| +[source,c++] +---- +template +static constexpr bool has_property(); +---- +| Returns true if the `property_listT` contains the property specified by `propertyT`. Returns false if it does not. +Available only if `sycl::is_property_of_v` is true. + +// --- ROW BREAK --- +a| +[source,c++] +---- +template +static constexpr auto get_property(); +---- +| Returns an object of the class used to represent the value of property `propertyT`. +Must produce a compiler diagnostic if `property_listT` does not contain a `propertyT` property. +Available only if `sycl::is_property_of_v` is true. + +|=== + +=== Relax language restrictions for SYCL device functions + +SYCL 2020 restrictions must be relaxed to allow `device_global` to be used within +device functions without being `const` or `constexpr` and without being zero-initialized +or constant-initialized. This is achieved by adding `device_global` exceptions to the +following point in Section 5.4 "Language restrictions for device functions". The modified restriction is: + +* Variables with static storage duration that are odr-used inside a device function, must be +`const` or `constexpr` and zero-initialized or constant-initialized, except if the variable is +of type `device_global` in which case it can be odr-used inside a device function without being +`const`/`constexpr` or zero-/constant-initialized. +** Amongst other things, this restriction makes it illegal for a device function to access a +global variable that isn’t `const` or `constexpr` unless the variable is of type `device_global`. + + +=== Add new copy and memcpy members to the queue class + +Add the following functions to the `sycl::queue` interface described in Section 4.6.5.1 of +the SYCL 2020 specification. + +```c++ +namespace sycl { +class queue { +public: + // Copy to device_global + template + event copy(const std::remove_all_extents_t *src, + device_global& dest, + size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), + size_t startIndex = 0); + + template + event copy(const std::remove_all_extents_t *src, + device_global& dest, + size_t count, size_t startIndex, event depEvent); + + template + event copy(const std::remove_all_extents_t *src, + device_global& dest, + size_t count, size_t startIndex, + const std::vector &depEvents); + + // Copy from device_global + template + event copy(const device_global& src, + std::remove_all_extents_t *dest, + size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), + size_t startIndex = 0); + + template + event copy(const device_global& src, + std::remove_all_extents_t *dest, + size_t count, size_t startIndex, event depEvent); + + template + event copy(const device_global& src, + std::remove_all_extents_t *dest, + size_t count,size_t startIndex, const std::vector &depEvents); + + // memcpy to device_global + template + event memcpy(device_global& dest, + const void *src, size_t numBytes = sizeof(T), size_t offset = 0); + + template + event memcpy(device_global& dest, + const void *src, size_t numBytes, + size_t offset, event depEvent); + + template + event memcpy(device_global& dest, + const void *src, size_t numBytes, + size_t offset, const std::vector &depEvents); + + // memcpy from device_global + template + event memcpy(void *dest, + const device_global& src, + size_t numBytes = sizeof(T), size_t offset = 0); + + template + event memcpy(void *dest, + const device_global& src, size_t numBytes, + size_t offset, event depEvent); + + template + event memcpy(void *dest, + const device_global& src, size_t numBytes, + size_t offset, const std::vector &depEvents); +}; +} // namespace sycl +``` + + +Add the following function descriptions to the `sycl::queue` interface description table +in Section 4.6.5.1 of the SYCL 2020 specification. + +-- +[options="header"] +|==== +| Function Definition | Function type +a| +[source, c++] +---- +template +event copy(const std::remove_all_extents_t *src, + device_global& dest, + size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), + size_t startIndex = 0); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event copy(const std::remove_all_extents_t *src, + device_global& dest, + size_t count, size_t startIndex, event depEvent); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event copy(const std::remove_all_extents_t *src, + device_global& dest, + size_t count, size_t startIndex, const std::vector &depEvents); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event copy(const device_global& src, + std::remove_all_extents_t *dest, + size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), + size_t startIndex = 0); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event copy(const device_global& src, + std::remove_all_extents_t *dest, + size_t count, size_t startIndex, event depEvent); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event copy(const device_global& src, + std::remove_all_extents_t *dest, + size_t count, size_t startIndex, const std::vector &depEvents); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event memcpy(device_global& dest, + const void *src, size_t numBytes = sizeof(T), size_t offset = 0); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event memcpy(device_global& dest, + const void *src, size_t numBytes, + size_t offset, event depEvent); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event memcpy(device_global& dest, + const void *src, size_t numBytes, + size_t offset, const std::vector &depEvents); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event memcpy(void *dest, + const device_global& src, + size_t numBytes = sizeof(T), size_t offset = 0); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event memcpy(void *dest, + const device_global& src, size_t numBytes, + size_t offset, event depEvent); +---- +| Explicit copy + +a| +[source, c++] +---- +template +event memcpy(void *dest, + const device_global& src, size_t numBytes, + size_t offset, const std::vector &depEvents); +---- +| Explicit copy +|==== +-- + + +=== Add new copy and memcpy members to the handler class + +Add the following functions to the `sycl::handler` interface described in Section 4.9.4.3 of +the SYCL 2020 specification. + +Add to Table 130, "Member functions of the handler class". + +-- +[options="header"] +|==== +| Member Function | Description +a| +[source, c++] +---- +template +void copy(const std::remove_all_extents_t *src, + device_global& dest, + size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), + size_t startIndex = 0); +---- +| `T` must be device copyable. + +Copies _count_ elements of type `std::remove_all_extents_t` from the pointer _src_ to the `device_global` _dest_, starting at _startIndex_ elements of _dest_. _src_ may be either a host or USM pointer. +a| +[source, c++] +---- +template +void copy(const device_global& src, + std::remove_all_extents_t *dest, + size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), + size_t startIndex = 0); +---- +| `T` must be device copyable. + +Copies _count_ elements of type `std::remove_all_extents_t` from the `device_global` _src_ to the pointer _dest_, starting at _startIndex_ elements of _src_. _dest_ may be either a host or USM pointer. + +a| +[source, c++] +---- +template +void memcpy(device_global& dest, + const void *src, size_t numBytes = sizeof(T), size_t offset = 0); +---- +|`T` must be device copyable. + +Copies _count_ bytes from the pointer _src_ to the `device_global` _dest_, starting at _offset_ bytes. _src_ may be either a host or USM pointer. + +a| +[source, c++] +---- +template +void memcpy(void *dest, + const device_global& src, + size_t numBytes = sizeof(T), size_t offset = 0); +---- +|`T` must be device copyable. + +Copies _count_ bytes from the `device_global` _src_ to the pointer _dest_, starting at _offset_ bytes. _dest_ may be either a host or USM pointer. +|==== +-- + + +== Non-normative: Future anticipated changes, not enabled within this version of the specification + +In a future version of this extension, it is expected that when {cpp}20 support is available and enabled, the `consteval` keyword will be used to enable compile-time constant initialization of the device allocations backing `device_global`. This will simplify some coding patterns, compared with the current zero-initialization requirement. + +A sketch of the anticipated constructor interface is: + +[source,c++] +---- +namespace sycl::ext::oneapi { + +template > +class device_global { +public: + using element_type = std::remove_extent_t; + + static_assert(std::is_trivially_destructible_v, + "Type T must be trivially destructible."); + + #ifdef __cpp_consteval + device_global(); + + // device_global initializes underlying T with the args argument + template + consteval explicit device_global(Args&&... args); + #else + static_assert(std::is_trivially_default_constructible_v, + "Type T must be trivially default constructable (until C++20 " + "consteval is supported and enabled)"); + + device_global(); + #endif // __cpp_consteval + +---- + +The example below creates two global namespace scope `device_global` objects named `dm1` and `dm2`. `dm1` is default constructed with external linkage, while `dm2` is initialized and has internal linkage. + +[source,c++] +---- +using namespace sycl; +using namespace sycl::ext::oneapi; + +device_global dm1; +static device_global dm2{1, 3, 5, 7}; // Requires C++20 to be enabled + +int main () { + sycl::queue Q; + Q.submit([&](sycl::handler& h) { + h.single_task([=]() { + int x = 5; + if (dm1.get().flag) + x = dm2[0]; + }); + }); +} +---- + +== Non-normative: Implementation hints + +`device_global` prioritizes usability over simplicity of implementation, and therefore adds requirements such as (1) that contents and addresses of the allocation on each device remain stable across changes to specialization constant values, and (2) that the allocation be accessible across `device_image` on the same device. These requirements mean that the semantics of `device_global` do not match the semantics of SPIR-V module scope variables, and therefore may not be implementable exclusively using the SPIR-V feature in existing SPIR-V consuming implementations. + +Also note that there are no restrictions on passing (and subsequent dereferencing) of pointers obtained on a device from a `device_global`, between kernels on a device, including through storage to memory. + +== Issues + +1) Can `sycl::atomic_ref` be used with `device_global`? + +*Resolved*: Yes, but only on the device side. There is no visibility/communication across devices because each device receives a unique allocation of type _T_ underlying the `device_global`. There is no way for an `atomic_ref` to the to be created in host code because there is no way to extract a pointer or reference in host code (only copy/memcpy). + +2) Should we restrict `device_global` to static storage duration, and if so how? + +*Resolved*: Yes, through similar language as `specialization_id`. Moreover restricted to namespace scope, because it is expensive to implement function scope statics. This could change if a compelling use case arises that needs function scope static support. + +3) Should the returned `multi_ptr` default to decorated or an undecorated? + +*Resolved*: No default - follow convention on this set by multi_ptr + +4) Is a mechanism needed that can mark device accesses as read only, while allowing for host write access? + +*Resolved*: No known compelling use cases at this point. + +5) Are there important use cases that require arbitrary destructors to be supported by `device_global`? + +*Resolved*: No important cases known at this time. May loosen restriction in the future. + + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2021-06-11|Artem Radzikhovskyy|*Initial review version* +|2|2021-08-01|Mike Kinsner|Restrict to trivial default constructors for first release, change from pointer to reference semantics, swap order of arguments in `copy` functions, update and clarify wording, remove factory functions. +|======================================== diff --git a/sycl/doc/extensions/README.md b/sycl/doc/extensions/README.md index 1cebff11302f8..6b4be7f6140d7 100755 --- a/sycl/doc/extensions/README.md +++ b/sycl/doc/extensions/README.md @@ -44,6 +44,7 @@ DPC++ extensions status: | [Assert](Assert/SYCL_ONEAPI_ASSERT.asciidoc) | Proposal | | | [Matrix](Matrix/dpcpp-joint-matrix.asciidoc) | Partially supported(AMX AOT) | Not supported: dynamic-extent, wg and wi scopes, layouts other than packed| | [SYCL_INTEL_free_function_queries](FreeFunctionQueries/SYCL_INTEL_free_function_queries.asciidoc) | Supported (experimental) | | +| [SYCL_EXT_ONEAPI_DEVICE_GLOBAL](DeviceGlobal/SYCL_INTEL_device_global.asciidoc) | Proposal | | Legend: From a301564e705d0884bf44cb1d849cf30036cbc97e Mon Sep 17 00:00:00 2001 From: Mike Kinsner Date: Mon, 2 Aug 2021 13:19:56 -0300 Subject: [PATCH 02/13] Update sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc Co-authored-by: John Pennycook --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index b7986f47af047..17c5d749a6f63 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -763,7 +763,7 @@ Also note that there are no restrictions on passing (and subsequent dereferencin == Issues 1) Can `sycl::atomic_ref` be used with `device_global`? + -*Resolved*: Yes, but only on the device side. There is no visibility/communication across devices because each device receives a unique allocation of type _T_ underlying the `device_global`. There is no way for an `atomic_ref` to the to be created in host code because there is no way to extract a pointer or reference in host code (only copy/memcpy). +*Resolved*: Yes, but only on the device side. There is no visibility/communication across devices because each device receives a unique allocation of type _T_ underlying the `device_global`. There is no way for an `atomic_ref` associated with the allocation to be created in host code because there is no way to extract a pointer or reference in host code (only copy/memcpy). 2) Should we restrict `device_global` to static storage duration, and if so how? + *Resolved*: Yes, through similar language as `specialization_id`. Moreover restricted to namespace scope, because it is expensive to implement function scope statics. This could change if a compelling use case arises that needs function scope static support. From 05ef1115dcd88aa0f7b2bb8324b34551a727993e Mon Sep 17 00:00:00 2001 From: Michael Kinsner Date: Tue, 3 Aug 2021 09:40:15 -0300 Subject: [PATCH 03/13] Apply suggestions from @Pennycook --- .../SYCL_INTEL_device_global.asciidoc | 129 +++++++++--------- 1 file changed, 67 insertions(+), 62 deletions(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index 17c5d749a6f63..d1fe213e12a66 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -78,7 +78,7 @@ Roland Schulz, Intel This extension is written against the SYCL 2020 specification, revision 3. -It also depends on the `sycl::ext::oneapi::property_list` extension. +It also depends on the `SYCL_EXT_ONEAPI_PROPERTY_LIST` extension. == Overview @@ -171,14 +171,14 @@ A `device_global` on a given device maintains its state (address of the allocati [source,c++] ---- namespace sycl::ext::oneapi { -template > +template > class device_global { ... ---- -`device_global` is a class template, parameterized by the type of the underlying allocation _T_, and a list of properties _propertyListT_. The type of the allocation _T_ also encodes the size of the allocation for potentially multidimensional array types. +`device_global` is a class template, parameterized by the type of the underlying allocation _T_, and a list of properties _PropertyListT_. The type of the allocation _T_ also encodes the size of the allocation for potentially multidimensional array types. -_T_ is restricted to types that have a trivial destructor and a trivial default constructor in this revision of the specification (the constructor restriction may be partially relaxed in a future revision). _propertyListT_ enables properties to be associated with a `device_global`. +_T_ is restricted to types that have a trivial destructor and a trivial default constructor in this revision of the specification (the constructor restriction may be partially relaxed in a future revision). _PropertyListT_ enables properties to be associated with a `device_global`. Since _T_ is restricted to types with trivial default constructors in this version of the specification, there are no non-default `device_global` constructors, and therefore no initialization values may be specified for the content of a `device_global` allocation on a device. @@ -229,7 +229,7 @@ The section below and the table following describe the constructors, member func ---- namespace sycl::ext::oneapi { -template > +template > class device_global { public: using element_type = std::remove_extent_t; @@ -431,74 +431,79 @@ global variable that isn’t `const` or `constexpr` unless the variable is of ty Add the following functions to the `sycl::queue` interface described in Section 4.6.5.1 of the SYCL 2020 specification. +[NOTE] +==== +A pointer to the allocation within a `device_global` may not be obtained by the host program (can only be extracted in device functions because allocations are per device), so pointer arithmetic can therefore not be used in the host program to define `copy`/`memcpy' offsets into data. `startIndex` and `offset` arguments are provided in these interfaces to allow offsetting without pointer arithmetic. +==== + ```c++ namespace sycl { class queue { public: // Copy to device_global - template + template event copy(const std::remove_all_extents_t *src, - device_global& dest, + device_global& dest, size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), size_t startIndex = 0); - template + template event copy(const std::remove_all_extents_t *src, - device_global& dest, + device_global& dest, size_t count, size_t startIndex, event depEvent); - template + template event copy(const std::remove_all_extents_t *src, - device_global& dest, + device_global& dest, size_t count, size_t startIndex, const std::vector &depEvents); // Copy from device_global - template - event copy(const device_global& src, + template + event copy(const device_global& src, std::remove_all_extents_t *dest, size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), size_t startIndex = 0); - template - event copy(const device_global& src, + template + event copy(const device_global& src, std::remove_all_extents_t *dest, size_t count, size_t startIndex, event depEvent); - template - event copy(const device_global& src, + template + event copy(const device_global& src, std::remove_all_extents_t *dest, size_t count,size_t startIndex, const std::vector &depEvents); // memcpy to device_global - template - event memcpy(device_global& dest, + template + event memcpy(device_global& dest, const void *src, size_t numBytes = sizeof(T), size_t offset = 0); - template - event memcpy(device_global& dest, + template + event memcpy(device_global& dest, const void *src, size_t numBytes, size_t offset, event depEvent); - template - event memcpy(device_global& dest, + template + event memcpy(device_global& dest, const void *src, size_t numBytes, size_t offset, const std::vector &depEvents); // memcpy from device_global - template + template event memcpy(void *dest, - const device_global& src, + const device_global& src, size_t numBytes = sizeof(T), size_t offset = 0); - template + template event memcpy(void *dest, - const device_global& src, size_t numBytes, + const device_global& src, size_t numBytes, size_t offset, event depEvent); - template + template event memcpy(void *dest, - const device_global& src, size_t numBytes, + const device_global& src, size_t numBytes, size_t offset, const std::vector &depEvents); }; } // namespace sycl @@ -515,9 +520,9 @@ in Section 4.6.5.1 of the SYCL 2020 specification. a| [source, c++] ---- -template +template event copy(const std::remove_all_extents_t *src, - device_global& dest, + device_global& dest, size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), size_t startIndex = 0); ---- @@ -526,9 +531,9 @@ event copy(const std::remove_all_extents_t *src, a| [source, c++] ---- -template +template event copy(const std::remove_all_extents_t *src, - device_global& dest, + device_global& dest, size_t count, size_t startIndex, event depEvent); ---- | Explicit copy @@ -536,9 +541,9 @@ event copy(const std::remove_all_extents_t *src, a| [source, c++] ---- -template +template event copy(const std::remove_all_extents_t *src, - device_global& dest, + device_global& dest, size_t count, size_t startIndex, const std::vector &depEvents); ---- | Explicit copy @@ -546,8 +551,8 @@ event copy(const std::remove_all_extents_t *src, a| [source, c++] ---- -template -event copy(const device_global& src, +template +event copy(const device_global& src, std::remove_all_extents_t *dest, size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), size_t startIndex = 0); @@ -557,8 +562,8 @@ event copy(const device_global& src, a| [source, c++] ---- -template -event copy(const device_global& src, +template +event copy(const device_global& src, std::remove_all_extents_t *dest, size_t count, size_t startIndex, event depEvent); ---- @@ -567,8 +572,8 @@ event copy(const device_global& src, a| [source, c++] ---- -template -event copy(const device_global& src, +template +event copy(const device_global& src, std::remove_all_extents_t *dest, size_t count, size_t startIndex, const std::vector &depEvents); ---- @@ -577,8 +582,8 @@ event copy(const device_global& src, a| [source, c++] ---- -template -event memcpy(device_global& dest, +template +event memcpy(device_global& dest, const void *src, size_t numBytes = sizeof(T), size_t offset = 0); ---- | Explicit copy @@ -586,8 +591,8 @@ event memcpy(device_global& dest, a| [source, c++] ---- -template -event memcpy(device_global& dest, +template +event memcpy(device_global& dest, const void *src, size_t numBytes, size_t offset, event depEvent); ---- @@ -596,8 +601,8 @@ event memcpy(device_global& dest, a| [source, c++] ---- -template -event memcpy(device_global& dest, +template +event memcpy(device_global& dest, const void *src, size_t numBytes, size_t offset, const std::vector &depEvents); ---- @@ -606,9 +611,9 @@ event memcpy(device_global& dest, a| [source, c++] ---- -template +template event memcpy(void *dest, - const device_global& src, + const device_global& src, size_t numBytes = sizeof(T), size_t offset = 0); ---- | Explicit copy @@ -616,9 +621,9 @@ event memcpy(void *dest, a| [source, c++] ---- -template +template event memcpy(void *dest, - const device_global& src, size_t numBytes, + const device_global& src, size_t numBytes, size_t offset, event depEvent); ---- | Explicit copy @@ -626,9 +631,9 @@ event memcpy(void *dest, a| [source, c++] ---- -template +template event memcpy(void *dest, - const device_global& src, size_t numBytes, + const device_global& src, size_t numBytes, size_t offset, const std::vector &depEvents); ---- | Explicit copy @@ -650,9 +655,9 @@ Add to Table 130, "Member functions of the handler class". a| [source, c++] ---- -template +template void copy(const std::remove_all_extents_t *src, - device_global& dest, + device_global& dest, size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), size_t startIndex = 0); ---- @@ -662,8 +667,8 @@ Copies _count_ elements of type `std::remove_all_extents_t` from the pointer a| [source, c++] ---- -template -void copy(const device_global& src, +template +void copy(const device_global& src, std::remove_all_extents_t *dest, size_t count = sizeof(T) / sizeof(std::remove_all_extents_t), size_t startIndex = 0); @@ -675,8 +680,8 @@ Copies _count_ elements of type `std::remove_all_extents_t` from the `device_ a| [source, c++] ---- -template -void memcpy(device_global& dest, +template +void memcpy(device_global& dest, const void *src, size_t numBytes = sizeof(T), size_t offset = 0); ---- |`T` must be device copyable. @@ -686,9 +691,9 @@ Copies _count_ bytes from the pointer _src_ to the `device_global` _dest_, start a| [source, c++] ---- -template +template void memcpy(void *dest, - const device_global& src, + const device_global& src, size_t numBytes = sizeof(T), size_t offset = 0); ---- |`T` must be device copyable. @@ -708,7 +713,7 @@ A sketch of the anticipated constructor interface is: ---- namespace sycl::ext::oneapi { -template > +template > class device_global { public: using element_type = std::remove_extent_t; From fc9fbae5ac4b32a2355bc552ae0148e24884649f Mon Sep 17 00:00:00 2001 From: Michael Kinsner Date: Tue, 3 Aug 2021 09:49:38 -0300 Subject: [PATCH 04/13] Fix formatting in note --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index d1fe213e12a66..0b34726757389 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -433,7 +433,7 @@ the SYCL 2020 specification. [NOTE] ==== -A pointer to the allocation within a `device_global` may not be obtained by the host program (can only be extracted in device functions because allocations are per device), so pointer arithmetic can therefore not be used in the host program to define `copy`/`memcpy' offsets into data. `startIndex` and `offset` arguments are provided in these interfaces to allow offsetting without pointer arithmetic. +A pointer to the allocation within a `device_global` may not be obtained by the host program (can only be extracted in device functions because allocations are per device), so pointer arithmetic can therefore not be used in the host program to define `copy`/`memcpy` offsets into data. `startIndex` and `offset` arguments are provided in these interfaces to allow offsetting without pointer arithmetic. ==== ```c++ From f4d0feb89a529c6ee56ce32ef591c5d95330d45a Mon Sep 17 00:00:00 2001 From: Michael Kinsner Date: Mon, 9 Aug 2021 09:46:48 -0300 Subject: [PATCH 05/13] Incorporate suggestions from @artemrad --- .../SYCL_INTEL_device_global.asciidoc | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index 0b34726757389..397c5cf5e1d28 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -186,27 +186,6 @@ The allocation of type _T_ is zero-initialized on each device prior to the first `device_global` may only be declared with static storage duration at namespace scope or class scope. If a `device_global` is declared with any other storage duration or scope, the program is ill-formed. -The example below creates two global namespace scope `device_global` objects named `dm1` and `dm2`. `dm1` contains one object of type `MyClass` on each device, and the `device_global` object has external linkage. `dm2` contains an array of four integers on each device, and the `device_global` object has internal linkage. In both cases, the `MyClass` and `int[4]` allocations on each device are zero-initialized before any non-initialization accesses occur. - -[source,c++] ----- -using namespace sycl::ext::oneapi; - -device_global dm1; -static device_global dm2; - -int main () { - sycl::queue Q; - Q.submit([&](sycl::handler& h) { - h.single_task([=]() { - int x = 5; - if (dm1.get().flag) - x = dm2[0]; - }); - }); -} ----- - Properties may be specified for a `device_global` to provide semantic modification or optimization hint information to the compiler. Specific properties are defined in other extensions, but example uses of a property (with a "no copy" attribute described by another extension) are: [source,c++] @@ -763,7 +742,7 @@ int main () { `device_global` prioritizes usability over simplicity of implementation, and therefore adds requirements such as (1) that contents and addresses of the allocation on each device remain stable across changes to specialization constant values, and (2) that the allocation be accessible across `device_image` on the same device. These requirements mean that the semantics of `device_global` do not match the semantics of SPIR-V module scope variables, and therefore may not be implementable exclusively using the SPIR-V feature in existing SPIR-V consuming implementations. -Also note that there are no restrictions on passing (and subsequent dereferencing) of pointers obtained on a device from a `device_global`, between kernels on a device, including through storage to memory. +Also note that there are no restrictions on passing (and subsequent dereferencing) of pointers obtained on a device from a `device_global`, between kernels on the same device, including through storage to memory. == Issues From 86abbad5592a01b9af2b9d55830aa47943a005f3 Mon Sep 17 00:00:00 2001 From: Mike Kinsner Date: Mon, 9 Aug 2021 09:50:31 -0300 Subject: [PATCH 06/13] Update sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc Co-authored-by: Ronan Keryell --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index 397c5cf5e1d28..5fcbb409bf9b5 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -131,7 +131,7 @@ to be extracted, to which the usual dot operator may be applied as in: ---- sycl::queue Q; Q.submit([&](sycl::handler& h) { - h.single_task([=]() { + h.single_task([=] { int x = 5; if (dm1.get().flag) x = dm2[0]; From 08d79115ca21de621675169b8f84a2cbfc090c5d Mon Sep 17 00:00:00 2001 From: Mike Kinsner Date: Mon, 9 Aug 2021 10:59:03 -0300 Subject: [PATCH 07/13] Update sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc Co-authored-by: Ronan Keryell --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index 5fcbb409bf9b5..0beb16a76b360 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -729,7 +729,7 @@ static device_global dm2{1, 3, 5, 7}; // Requires C++20 to be enabled int main () { sycl::queue Q; Q.submit([&](sycl::handler& h) { - h.single_task([=]() { + h.single_task([=] { int x = 5; if (dm1.get().flag) x = dm2[0]; From 5b0484ba0af214ea1d066eae8a3c1b0d6955d5af Mon Sep 17 00:00:00 2001 From: Mike Kinsner Date: Tue, 17 Aug 2021 13:25:24 -0300 Subject: [PATCH 08/13] Update sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc Co-authored-by: Greg Lueck --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index 0beb16a76b360..b36df28b552a0 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -103,7 +103,7 @@ important use cases. === Examples -Two example `device_global` can be declared at namespace scope, as follows: +Two example `device_global` variables can be declared at namespace scope, as follows: [source,c++] ---- From 4faf98ba4cbf5621599e4239ce96109de1c75815 Mon Sep 17 00:00:00 2001 From: Mike Kinsner Date: Tue, 17 Aug 2021 13:25:39 -0300 Subject: [PATCH 09/13] Update sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc Co-authored-by: Greg Lueck --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index b36df28b552a0..4b7741d2e0a59 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -164,7 +164,7 @@ that the implementation supports. `device_global` provides a mechanism to allocate device scope memory - memory which has unique underlying storage (of type _T_) for each `sycl::device` object. If multiple device objects are present then each device object receives its own unique underlying allocation. All kernels that reference the same `device_global` entity (either directly or via a pointer to its underlying object of type _T_) share the same allocation of that object when those kernels run on the same device. -`device_global` allocations are in the global address space, as are any underlying allocations of type `T` which are implicitly allocated on each device as a result of a `device_global` object. It is undefined behavior if the host program directly accesses a `device_global` or any address obtained from a `device_global` member function, and similarly it is undefined behavior if a `device_global` or address obtained on a device from a `device_global` member function is accessed on a different device. There is no mechanism to obtain addresses of or directly access a device's `device_global` allocation within the host program. +`device_global` allocations are in the global address space, as are any underlying allocations of type `T` which are implicitly allocated on each device as a result of a `device_global` object. It is undefined behavior if the host program directly accesses a `device_global` or any address obtained from a `device_global` member function, and similarly it is undefined behavior if a `device_global` or address obtained on one device from a `device_global` member function is accessed on a different device. There is no mechanism to obtain addresses of or directly access a device's `device_global` allocation within the host program. A `device_global` on a given device maintains its state (address of the allocation and data within the allocation) even after the application changes the value of a specialization constant via `handler::set_specialization_constant()`. Additionally, a `device_global` maintains its state even when it is referenced from a kernel in a different `kernel_bundle`. From 9ecd53416328b64dd638073f6778acb15a3db646 Mon Sep 17 00:00:00 2001 From: Mike Kinsner Date: Tue, 17 Aug 2021 13:25:57 -0300 Subject: [PATCH 10/13] Update sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc Co-authored-by: Greg Lueck --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index 4b7741d2e0a59..fd7a27b26a161 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -374,7 +374,7 @@ a| template static constexpr bool has_property(); ---- -| Returns true if the `property_listT` contains the property specified by `propertyT`. Returns false if it does not. +| Returns true if the `PropertyListT` contains the property specified by `propertyT`. Returns false if it does not. Available only if `sycl::is_property_of_v` is true. // --- ROW BREAK --- From 3d7c2877e9ec55067bbf65ad061a5d182c3aa6f8 Mon Sep 17 00:00:00 2001 From: Mike Kinsner Date: Tue, 17 Aug 2021 13:26:43 -0300 Subject: [PATCH 11/13] Update sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc Co-authored-by: Greg Lueck --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index fd7a27b26a161..14439620635eb 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -385,7 +385,7 @@ template static constexpr auto get_property(); ---- | Returns an object of the class used to represent the value of property `propertyT`. -Must produce a compiler diagnostic if `property_listT` does not contain a `propertyT` property. +Must produce a compiler diagnostic if `PropertyListT` does not contain a `propertyT` property. Available only if `sycl::is_property_of_v` is true. |=== From 82122bf770881fa25da78316a9049e369ca34e41 Mon Sep 17 00:00:00 2001 From: Mike Kinsner Date: Tue, 17 Aug 2021 13:27:06 -0300 Subject: [PATCH 12/13] Update sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc Co-authored-by: Greg Lueck --- .../extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index 14439620635eb..e0377dc115589 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -434,8 +434,8 @@ public: template event copy(const std::remove_all_extents_t *src, device_global& dest, - size_t count, size_t startIndex, - const std::vector &depEvents); + size_t count, size_t startIndex, + const std::vector &depEvents); // Copy from device_global template From 3e04b444abe7b8600880790d41c976cdad7d351c Mon Sep 17 00:00:00 2001 From: Greg Lueck Date: Fri, 24 Sep 2021 10:07:36 -0400 Subject: [PATCH 13/13] Address my remaining review comments @mkinsner told me he agrees with these comments and asked me to make them myself. --- .../SYCL_INTEL_device_global.asciidoc | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc index e0377dc115589..3ed82fa748218 100755 --- a/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc +++ b/sycl/doc/extensions/DeviceGlobal/SYCL_INTEL_device_global.asciidoc @@ -210,6 +210,9 @@ namespace sycl::ext::oneapi { template > class device_global { + using subscript_return_t = + std::remove_reference_t()[std::ptrdiff_t{}])>; + public: using element_type = std::remove_extent_t; @@ -231,9 +234,12 @@ public: device_global &operator=(const device_global &&) = delete; template - multi_ptr get_multi_ptr() noexcept; + multi_ptr + get_multi_ptr() noexcept; + template - multi_ptr get_multi_ptr() const noexcept; + multi_ptr + get_multi_ptr() const noexcept; // Access the underlying data operator T&() noexcept; @@ -246,7 +252,6 @@ public: device_global& operator=(const T&) noexcept; // Available if the operator[] is valid for objects of type T - using subscript_return_t = std::remove_reference_t()[std::ptrdiff_t{}])>; subscript_return_t& operator[]( std::ptrdiff_t idx ) noexcept; const subscript_return_t& operator[]( std::ptrdiff_t idx ) const noexcept; @@ -266,7 +271,7 @@ public: // The return type is an unspecified internal class used to represent // instances of propertyT template - static constexpr auto get_property(); + static constexpr /*unspecified*/ get_property(); }; } // namespace sycl::ext::oneapi @@ -294,9 +299,12 @@ a| [source,c++] ---- template -multi_ptr get_multi_ptr() noexcept; +multi_ptr + get_multi_ptr() noexcept; + template -multi_ptr get_multi_ptr() const noexcept; +multi_ptr + get_multi_ptr() const noexcept; ---- | @@ -402,7 +410,7 @@ following point in Section 5.4 "Language restrictions for device functions". Th of type `device_global` in which case it can be odr-used inside a device function without being `const`/`constexpr` or zero-/constant-initialized. ** Amongst other things, this restriction makes it illegal for a device function to access a -global variable that isn’t `const` or `constexpr` unless the variable is of type `device_global`. +global variable that isn't `const` or `constexpr` unless the variable is of type `device_global`. === Add new copy and memcpy members to the queue class