Skip to content

Commit 904b634

Browse files
Work-around issue with sub_group::load, sub_group::store functions
These functions are from oneAPI sycl_ext_oneapi_group_load_store extension. https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_group_load_store.asciidoc The current implementation of these primitives seem to require certain pointer alignment. This PR adds bool template parameter to contig functors for all elementwise functions, and the elementwise_common_impl checks alignment all pointers. If any is not aligned to the expected boundary, a generic implementation is used instead of the one using sg.load/sg.store
1 parent 0b63d4f commit 904b634

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+682
-241
lines changed

dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,15 @@ template <typename argT, typename resT> struct AbsFunctor
132132
template <typename argT,
133133
typename resT = argT,
134134
unsigned int vec_sz = 4,
135-
unsigned int n_vecs = 2>
136-
using AbsContigFunctor = elementwise_common::
137-
UnaryContigFunctor<argT, resT, AbsFunctor<argT, resT>, vec_sz, n_vecs>;
135+
unsigned int n_vecs = 2,
136+
bool enable_sg_loadstore = true>
137+
using AbsContigFunctor =
138+
elementwise_common::UnaryContigFunctor<argT,
139+
resT,
140+
AbsFunctor<argT, resT>,
141+
vec_sz,
142+
n_vecs,
143+
enable_sg_loadstore>;
138144

139145
template <typename T> struct AbsOutputType
140146
{

dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,15 @@ template <typename argT, typename resT> struct AcosFunctor
145145
template <typename argTy,
146146
typename resTy = argTy,
147147
unsigned int vec_sz = 4,
148-
unsigned int n_vecs = 2>
149-
using AcosContigFunctor = elementwise_common::
150-
UnaryContigFunctor<argTy, resTy, AcosFunctor<argTy, resTy>, vec_sz, n_vecs>;
148+
unsigned int n_vecs = 2,
149+
bool enable_sg_loadstore = true>
150+
using AcosContigFunctor =
151+
elementwise_common::UnaryContigFunctor<argTy,
152+
resTy,
153+
AcosFunctor<argTy, resTy>,
154+
vec_sz,
155+
n_vecs,
156+
enable_sg_loadstore>;
151157

152158
template <typename argTy, typename resTy, typename IndexerT>
153159
using AcosStridedFunctor = elementwise_common::

dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,13 +167,15 @@ template <typename argT, typename resT> struct AcoshFunctor
167167
template <typename argTy,
168168
typename resTy = argTy,
169169
unsigned int vec_sz = 4,
170-
unsigned int n_vecs = 2>
170+
unsigned int n_vecs = 2,
171+
bool enable_sg_loadstore = true>
171172
using AcoshContigFunctor =
172173
elementwise_common::UnaryContigFunctor<argTy,
173174
resTy,
174175
AcoshFunctor<argTy, resTy>,
175176
vec_sz,
176-
n_vecs>;
177+
n_vecs,
178+
enable_sg_loadstore>;
177179

178180
template <typename argTy, typename resTy, typename IndexerT>
179181
using AcoshStridedFunctor = elementwise_common::

dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,16 @@ template <typename argT1,
123123
typename argT2,
124124
typename resT,
125125
unsigned int vec_sz = 4,
126-
unsigned int n_vecs = 2>
126+
unsigned int n_vecs = 2,
127+
bool enable_sg_loadstore = true>
127128
using AddContigFunctor =
128129
elementwise_common::BinaryContigFunctor<argT1,
129130
argT2,
130131
resT,
131132
AddFunctor<argT1, argT2, resT>,
132133
vec_sz,
133-
n_vecs>;
134+
n_vecs,
135+
enable_sg_loadstore>;
134136

135137
template <typename argT1, typename argT2, typename resT, typename IndexerT>
136138
using AddStridedFunctor =
@@ -425,13 +427,15 @@ template <typename argT, typename resT> struct AddInplaceFunctor
425427
template <typename argT,
426428
typename resT,
427429
unsigned int vec_sz = 4,
428-
unsigned int n_vecs = 2>
430+
unsigned int n_vecs = 2,
431+
bool enable_sg_loadstore = true>
429432
using AddInplaceContigFunctor = elementwise_common::BinaryInplaceContigFunctor<
430433
argT,
431434
resT,
432435
AddInplaceFunctor<argT, resT>,
433436
vec_sz,
434-
n_vecs>;
437+
n_vecs,
438+
enable_sg_loadstore>;
435439

436440
template <typename argT, typename resT, typename IndexerT>
437441
using AddInplaceStridedFunctor =
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
//
2+
// Data Parallel Control (dpctl)
3+
//
4+
// Copyright 2020-2023 Intel Corporation
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
19+
#pragma once
20+
21+
#include <cstddef>
22+
#include <cstdint>
23+
24+
namespace dpctl
25+
{
26+
namespace tensor
27+
{
28+
namespace kernels
29+
{
30+
namespace alignment_utils
31+
{
32+
33+
static constexpr size_t required_alignment = 64;
34+
35+
template <std::uintptr_t alignment, typename Ptr> bool is_aligned(Ptr p)
36+
{
37+
return !(reinterpret_cast<std::uintptr_t>(p) % alignment);
38+
}
39+
40+
template <typename KernelName> class disabled_sg_loadstore_wrapper_krn;
41+
42+
} // end of namespace alignment_utils
43+
} // end of namespace kernels
44+
} // end of namespace tensor
45+
} // end of namespace dpctl

dpctl/tensor/libtensor/include/kernels/elementwise_functions/angle.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,15 @@ template <typename argT, typename resT> struct AngleFunctor
8080
template <typename argTy,
8181
typename resTy = argTy,
8282
unsigned int vec_sz = 4,
83-
unsigned int n_vecs = 2>
83+
unsigned int n_vecs = 2,
84+
bool enable_sg_loadstire = true>
8485
using AngleContigFunctor =
8586
elementwise_common::UnaryContigFunctor<argTy,
8687
resTy,
8788
AngleFunctor<argTy, resTy>,
8889
vec_sz,
89-
n_vecs>;
90+
n_vecs,
91+
enable_sg_loadstire>;
9092

9193
template <typename argTy, typename resTy, typename IndexerT>
9294
using AngleStridedFunctor = elementwise_common::

dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,15 @@ template <typename argT, typename resT> struct AsinFunctor
169169
template <typename argTy,
170170
typename resTy = argTy,
171171
unsigned int vec_sz = 4,
172-
unsigned int n_vecs = 2>
173-
using AsinContigFunctor = elementwise_common::
174-
UnaryContigFunctor<argTy, resTy, AsinFunctor<argTy, resTy>, vec_sz, n_vecs>;
172+
unsigned int n_vecs = 2,
173+
bool enable_sg_loadstore = true>
174+
using AsinContigFunctor =
175+
elementwise_common::UnaryContigFunctor<argTy,
176+
resTy,
177+
AsinFunctor<argTy, resTy>,
178+
vec_sz,
179+
n_vecs,
180+
enable_sg_loadstore>;
175181

176182
template <typename argTy, typename resTy, typename IndexerT>
177183
using AsinStridedFunctor = elementwise_common::

dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,13 +143,15 @@ template <typename argT, typename resT> struct AsinhFunctor
143143
template <typename argTy,
144144
typename resTy = argTy,
145145
unsigned int vec_sz = 4,
146-
unsigned int n_vecs = 2>
146+
unsigned int n_vecs = 2,
147+
bool enable_sg_loadstore = true>
147148
using AsinhContigFunctor =
148149
elementwise_common::UnaryContigFunctor<argTy,
149150
resTy,
150151
AsinhFunctor<argTy, resTy>,
151152
vec_sz,
152-
n_vecs>;
153+
n_vecs,
154+
enable_sg_loadstore>;
153155

154156
template <typename argTy, typename resTy, typename IndexerT>
155157
using AsinhStridedFunctor = elementwise_common::

dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,15 @@ template <typename argT, typename resT> struct AtanFunctor
145145
template <typename argTy,
146146
typename resTy = argTy,
147147
unsigned int vec_sz = 4,
148-
unsigned int n_vecs = 2>
149-
using AtanContigFunctor = elementwise_common::
150-
UnaryContigFunctor<argTy, resTy, AtanFunctor<argTy, resTy>, vec_sz, n_vecs>;
148+
unsigned int n_vecs = 2,
149+
bool enable_sg_loadstore = true>
150+
using AtanContigFunctor =
151+
elementwise_common::UnaryContigFunctor<argTy,
152+
resTy,
153+
AtanFunctor<argTy, resTy>,
154+
vec_sz,
155+
n_vecs,
156+
enable_sg_loadstore>;
151157

152158
template <typename argTy, typename resTy, typename IndexerT>
153159
using AtanStridedFunctor = elementwise_common::

dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,16 @@ template <typename argT1,
7070
typename argT2,
7171
typename resT,
7272
unsigned int vec_sz = 4,
73-
unsigned int n_vecs = 2>
73+
unsigned int n_vecs = 2,
74+
bool enable_sg_loadstore = true>
7475
using Atan2ContigFunctor =
7576
elementwise_common::BinaryContigFunctor<argT1,
7677
argT2,
7778
resT,
7879
Atan2Functor<argT1, argT2, resT>,
7980
vec_sz,
80-
n_vecs>;
81+
n_vecs,
82+
enable_sg_loadstore>;
8183

8284
template <typename argT1, typename argT2, typename resT, typename IndexerT>
8385
using Atan2StridedFunctor =

0 commit comments

Comments
 (0)