Skip to content

Commit b762e48

Browse files
committed
For pulling in PR intel#7567
2 parents bf5ba59 + 75302c5 commit b762e48

File tree

19 files changed

+534
-75
lines changed

19 files changed

+534
-75
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9556,7 +9556,8 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA,
95569556
",+SPV_INTEL_bfloat16_conversion"
95579557
",+SPV_INTEL_joint_matrix"
95589558
",+SPV_INTEL_hw_thread_queries"
9559-
",+SPV_KHR_uniform_group_instructions";
9559+
",+SPV_KHR_uniform_group_instructions"
9560+
",+SPV_INTEL_masked_gather_scatter";
95609561
TranslatorArgs.push_back(TCArgs.MakeArgString(ExtArg));
95619562
}
95629563
for (auto I : Inputs) {

clang/test/Driver/sycl-spirv-ext.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@
4848
// CHECK-DEFAULT-SAME:,+SPV_INTEL_bfloat16_conversion
4949
// CHECK-DEFAULT-SAME:,+SPV_INTEL_joint_matrix
5050
// CHECK-DEFAULT-SAME:,+SPV_INTEL_hw_thread_queries
51-
// CHECK-DEFAULT-SAME:,+SPV_KHR_uniform_group_instructions"
51+
// CHECK-DEFAULT-SAME:,+SPV_KHR_uniform_group_instructions
52+
// CHECK-DEFAULT-SAME:,+SPV_INTEL_masked_gather_scatter"
5253
// CHECK-FPGA-HW: llvm-spirv{{.*}}"-spirv-ext=-all
5354
// CHECK-FPGA-HW-SAME:,+SPV_EXT_shader_atomic_float_add
5455
// CHECK-FPGA-HW-SAME:,+SPV_EXT_shader_atomic_float_min_max
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
= sycl_ext_oneapi_queue_priority
2+
3+
:source-highlighter: coderay
4+
:coderay-linenums-mode: table
5+
6+
// This section needs to be after the document title.
7+
:doctype: book
8+
:toc2:
9+
:toc: left
10+
:encoding: utf-8
11+
:lang: en
12+
:dpcpp: pass:[DPC++]
13+
14+
// Set the default source code type in this document to C++,
15+
// for syntax highlighting purposes. This is needed because
16+
// docbook uses c++ and html5 uses cpp.
17+
:language: {basebackend@docbook:c++:cpp}
18+
19+
20+
== Notice
21+
22+
[%hardbreaks]
23+
Copyright (C) 2022-2022 Intel Corporation. All rights reserved.
24+
25+
Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks
26+
of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by
27+
permission by Khronos.
28+
29+
== Contact
30+
31+
To report problems with this extension, please open a new issue at:
32+
33+
https://github.com/intel/llvm/issues
34+
35+
36+
== Dependencies
37+
38+
This extension is written against the SYCL 2020 revision 6 specification. All
39+
references below to the "core SYCL specification" or to section numbers in the
40+
SYCL specification refer to that revision.
41+
42+
== Status
43+
44+
This extension is implemented and fully supported by {dpcpp}.
45+
[NOTE]
46+
====
47+
Although {dpcpp} supports this extension on all backends, it is currently used
48+
only on Level Zero. Other backends ignore the properties defined in this specification.
49+
====
50+
51+
== Overview
52+
53+
Introduce SYCL queue properties specifying the desired priority of a queue.
54+
These priorities are a hint and may be ignored if not supported by
55+
underlying backends.
56+
57+
== Specification
58+
59+
=== Feature test macro
60+
61+
This extension provides a feature-test macro as described in the core SYCL
62+
specification. An implementation supporting this extension must predefine
63+
the macro `SYCL_EXT_ONEAPI_QUEUE_PRIORITY` to one of the values defined
64+
in the table below. Applications can test for the existence of this macro
65+
to determine if the implementation supports this feature, or applications
66+
can test the macro's value to determine which of the extension's features
67+
the implementation supports.
68+
69+
[%header,cols="1,5"]
70+
|===
71+
|Value
72+
|Description
73+
74+
|1
75+
|Initial version of this extension.
76+
|===
77+
78+
=== API of the extension
79+
80+
This extension adds support for new properties for SYCL queue constructors
81+
taking properties list:
82+
83+
```c++
84+
namespace sycl::ext::oneapi::property::queue {
85+
86+
class priority_normal {
87+
public:
88+
priority_normal() = default;
89+
};
90+
class priority_low {
91+
public:
92+
priority_low() = default;
93+
};
94+
class priority_high {
95+
public:
96+
priority_high() = default;
97+
};
98+
99+
} // namespace
100+
```
101+
The new properties hint the SYCL runtime that the queue gets the specified
102+
priority for execution if supported by underlying target runtimes. These
103+
properties are hints and may safely be ignored by an implementation.
104+
105+
It is illegal to specify multiple differrent priority hints for the same queue.
106+
Doing so causes the `queue` constructor to throw a synchronous `exception` with
107+
the `errc::invalid` error code.

sycl/include/sycl/detail/pi.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
// 11.16 Add PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE and
5757
// PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH as an extension for
5858
// piDeviceGetInfo.
59+
// 11.17 Added new PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW and
60+
// PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH queue properties.
5961

6062
#define _PI_H_VERSION_MAJOR 11
6163
#define _PI_H_VERSION_MINOR 16
@@ -580,6 +582,8 @@ constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE = (1 << 1);
580582
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE = (1 << 2);
581583
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = (1 << 3);
582584
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS = (1 << 4);
585+
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW = (1 << 5);
586+
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH = (1 << 6);
583587

584588
using pi_result = _pi_result;
585589
using pi_platform_info = _pi_platform_info;

sycl/include/sycl/detail/properties_traits.def

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,16 @@ __SYCL_PARAM_TRAITS_SPEC(sycl::property::buffer::context_bound)
44
__SYCL_PARAM_TRAITS_SPEC(sycl::property::image::use_host_ptr)
55
__SYCL_PARAM_TRAITS_SPEC(sycl::property::image::use_mutex)
66
__SYCL_PARAM_TRAITS_SPEC(sycl::property::image::context_bound)
7-
__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::buffer::use_pinned_host_memory)
7+
__SYCL_PARAM_TRAITS_SPEC(
8+
sycl::ext::oneapi::property::buffer::use_pinned_host_memory)
89
__SYCL_PARAM_TRAITS_SPEC(sycl::property::noinit)
910
__SYCL_PARAM_TRAITS_SPEC(sycl::property::no_init)
10-
__SYCL_PARAM_TRAITS_SPEC(sycl::property::context::cuda::use_primary_context) // Deprecated
11-
__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::cuda::property::context::use_primary_context)
11+
__SYCL_PARAM_TRAITS_SPEC(
12+
sycl::property::context::cuda::use_primary_context) // Deprecated
13+
__SYCL_PARAM_TRAITS_SPEC(
14+
sycl::ext::oneapi::cuda::property::context::use_primary_context)
1215
__SYCL_PARAM_TRAITS_SPEC(sycl::property::queue::in_order)
1316
__SYCL_PARAM_TRAITS_SPEC(sycl::property::reduction::initialize_to_identity)
17+
__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_low)
18+
__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_high)
19+
__SYCL_PARAM_TRAITS_SPEC(sycl::ext::oneapi::property::queue::priority_normal)

sycl/include/sycl/detail/property_helper.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,11 @@ enum DataLessPropKind {
4040
FusionNoBarrier = 13,
4141
FusionEnable = 14,
4242
FusionForce = 15,
43+
QueuePriorityNormal = 16,
44+
QueuePriorityLow = 17,
45+
QueuePriorityHigh = 18,
4346
// Indicates the last known dataless property.
44-
LastKnownDataLessPropKind = 15,
47+
LastKnownDataLessPropKind = 18,
4548
// Exceeding 32 may cause ABI breaking change on some of OSes.
4649
DataLessPropKindSize = 32
4750
};

sycl/include/sycl/ext/oneapi/bfloat16.hpp

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,21 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
2525
namespace ext {
2626
namespace oneapi {
2727

28+
class bfloat16;
29+
30+
namespace detail {
31+
using Bfloat16StorageT = uint16_t;
32+
Bfloat16StorageT bfloat16ToBits(const bfloat16 &Value);
33+
bfloat16 bitsToBfloat16(const Bfloat16StorageT Value);
34+
} // namespace detail
35+
2836
class bfloat16 {
29-
using storage_t = uint16_t;
30-
storage_t value;
37+
detail::Bfloat16StorageT value;
38+
39+
friend inline detail::Bfloat16StorageT
40+
detail::bfloat16ToBits(const bfloat16 &Value);
41+
friend inline bfloat16
42+
detail::bitsToBfloat16(const detail::Bfloat16StorageT Value);
3143

3244
public:
3345
bfloat16() = default;
@@ -36,7 +48,7 @@ class bfloat16 {
3648

3749
private:
3850
// Explicit conversion functions
39-
static storage_t from_float(const float &a) {
51+
static detail::Bfloat16StorageT from_float(const float &a) {
4052
#if defined(__SYCL_DEVICE_ONLY__)
4153
#if defined(__NVPTX__)
4254
#if (__CUDA_ARCH__ >= 800)
@@ -72,7 +84,7 @@ class bfloat16 {
7284
#endif
7385
}
7486

75-
static float to_float(const storage_t &a) {
87+
static float to_float(const detail::Bfloat16StorageT &a) {
7688
#if defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__)
7789
return __devicelib_ConvertBF16ToFINTEL(a);
7890
#else
@@ -85,12 +97,6 @@ class bfloat16 {
8597
#endif
8698
}
8799

88-
static bfloat16 from_bits(const storage_t &a) {
89-
bfloat16 res;
90-
res.value = a;
91-
return res;
92-
}
93-
94100
public:
95101
// Implicit conversion from float to bfloat16
96102
bfloat16(const float &a) { value = from_float(a); }
@@ -122,7 +128,7 @@ class bfloat16 {
122128
#if defined(__SYCL_DEVICE_ONLY__)
123129
#if defined(__NVPTX__)
124130
#if (__CUDA_ARCH__ >= 800)
125-
return from_bits(__nvvm_neg_bf16(lhs.value));
131+
return detail::bitsToBfloat16(__nvvm_neg_bf16(lhs.value));
126132
#else
127133
return -to_float(lhs.value);
128134
#endif
@@ -203,6 +209,23 @@ class bfloat16 {
203209
// for floating-point types.
204210
};
205211

212+
namespace detail {
213+
214+
// Helper function for getting the internal representation of a bfloat16.
215+
inline Bfloat16StorageT bfloat16ToBits(const bfloat16 &Value) {
216+
return Value.value;
217+
}
218+
219+
// Helper function for creating a float16 from a value with the same type as the
220+
// internal representation.
221+
inline bfloat16 bitsToBfloat16(const Bfloat16StorageT Value) {
222+
bfloat16 res;
223+
res.value = Value;
224+
return res;
225+
}
226+
227+
} // namespace detail
228+
206229
} // namespace oneapi
207230
} // namespace ext
208231

sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ interop_handle::get_native_context<backend::ext_oneapi_cuda>() const {
6969
template <>
7070
inline device make_device<backend::ext_oneapi_cuda>(
7171
const backend_input_t<backend::ext_oneapi_cuda, device> &BackendObject) {
72+
auto devs = device::get_devices(info::device_type::gpu);
73+
for (auto &dev : devs) {
74+
if (dev.get_backend() == backend::ext_oneapi_cuda &&
75+
BackendObject == get_native<backend::ext_oneapi_cuda>(dev)) {
76+
return dev;
77+
}
78+
}
7279
pi_native_handle NativeHandle = static_cast<pi_native_handle>(BackendObject);
7380
return ext::oneapi::cuda::make_device(NativeHandle);
7481
}

0 commit comments

Comments
 (0)