Skip to content

Commit 819a94a

Browse files
[SYCL] removing std::memcpy from sycl headers (#11436)
removing std::memcpy from sycl headers, continuation of work originally done in #1117.
1 parent 341e989 commit 819a94a

File tree

4 files changed

+27
-26
lines changed

4 files changed

+27
-26
lines changed

sycl/include/sycl/detail/spirv.hpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
#include <sycl/ext/oneapi/experimental/non_uniform_groups.hpp> // for IdToMaskPosition
1414

15-
#include <cstring> // for std::memcpy
15+
#include <sycl/detail/memcpy.hpp> // sycl::detail::memcpy
1616

1717
namespace sycl {
1818
inline namespace _V1 {
@@ -370,9 +370,9 @@ EnableIfGenericBroadcast<T, IdT> GroupBroadcast(Group g, T x, IdT local_id) {
370370
char *ResultBytes = reinterpret_cast<char *>(&Result);
371371
auto BroadcastBytes = [=](size_t Offset, size_t Size) {
372372
uint64_t BroadcastX, BroadcastResult;
373-
std::memcpy(&BroadcastX, XBytes + Offset, Size);
373+
detail::memcpy(&BroadcastX, XBytes + Offset, Size);
374374
BroadcastResult = GroupBroadcast(g, BroadcastX, local_id);
375-
std::memcpy(ResultBytes + Offset, &BroadcastResult, Size);
375+
detail::memcpy(ResultBytes + Offset, &BroadcastResult, Size);
376376
};
377377
GenericCall<T>(BroadcastBytes);
378378
return Result;
@@ -424,9 +424,9 @@ EnableIfGenericBroadcast<T> GroupBroadcast(Group g, T x,
424424
char *ResultBytes = reinterpret_cast<char *>(&Result);
425425
auto BroadcastBytes = [=](size_t Offset, size_t Size) {
426426
uint64_t BroadcastX, BroadcastResult;
427-
std::memcpy(&BroadcastX, XBytes + Offset, Size);
427+
detail::memcpy(&BroadcastX, XBytes + Offset, Size);
428428
BroadcastResult = GroupBroadcast(g, BroadcastX, local_id);
429-
std::memcpy(ResultBytes + Offset, &BroadcastResult, Size);
429+
detail::memcpy(ResultBytes + Offset, &BroadcastResult, Size);
430430
};
431431
GenericCall<T>(BroadcastBytes);
432432
return Result;
@@ -957,9 +957,9 @@ EnableIfGenericShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
957957
char *ResultBytes = reinterpret_cast<char *>(&Result);
958958
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
959959
ShuffleChunkT ShuffleX, ShuffleResult;
960-
std::memcpy(&ShuffleX, XBytes + Offset, Size);
960+
detail::memcpy(&ShuffleX, XBytes + Offset, Size);
961961
ShuffleResult = SubgroupShuffle(ShuffleX, local_id);
962-
std::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
962+
detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
963963
};
964964
GenericCall<T>(ShuffleBytes);
965965
return Result;
@@ -972,9 +972,9 @@ EnableIfGenericShuffle<T> SubgroupShuffleXor(T x, id<1> local_id) {
972972
char *ResultBytes = reinterpret_cast<char *>(&Result);
973973
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
974974
ShuffleChunkT ShuffleX, ShuffleResult;
975-
std::memcpy(&ShuffleX, XBytes + Offset, Size);
975+
detail::memcpy(&ShuffleX, XBytes + Offset, Size);
976976
ShuffleResult = SubgroupShuffleXor(ShuffleX, local_id);
977-
std::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
977+
detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
978978
};
979979
GenericCall<T>(ShuffleBytes);
980980
return Result;
@@ -987,9 +987,9 @@ EnableIfGenericShuffle<T> SubgroupShuffleDown(T x, uint32_t delta) {
987987
char *ResultBytes = reinterpret_cast<char *>(&Result);
988988
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
989989
ShuffleChunkT ShuffleX, ShuffleResult;
990-
std::memcpy(&ShuffleX, XBytes + Offset, Size);
990+
detail::memcpy(&ShuffleX, XBytes + Offset, Size);
991991
ShuffleResult = SubgroupShuffleDown(ShuffleX, delta);
992-
std::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
992+
detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
993993
};
994994
GenericCall<T>(ShuffleBytes);
995995
return Result;
@@ -1002,9 +1002,9 @@ EnableIfGenericShuffle<T> SubgroupShuffleUp(T x, uint32_t delta) {
10021002
char *ResultBytes = reinterpret_cast<char *>(&Result);
10031003
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
10041004
ShuffleChunkT ShuffleX, ShuffleResult;
1005-
std::memcpy(&ShuffleX, XBytes + Offset, Size);
1005+
detail::memcpy(&ShuffleX, XBytes + Offset, Size);
10061006
ShuffleResult = SubgroupShuffleUp(ShuffleX, delta);
1007-
std::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
1007+
detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
10081008
};
10091009
GenericCall<T>(ShuffleBytes);
10101010
return Result;

sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99
#pragma once
1010

1111
#include <sycl/builtins.hpp> // for ceil, cos, exp, exp10, exp2
12+
#include <sycl/detail/memcpy.hpp> // sycl::detail::memcpy
1213
#include <sycl/ext/oneapi/bfloat16.hpp> // for bfloat16, bfloat16ToBits
1314
#include <sycl/marray.hpp> // for marray
1415

15-
#include <cstring> // for size_t, memcpy
16+
#include <cstring> // for size_t
1617
#include <stdint.h> // for uint32_t
1718
#include <type_traits> // for enable_if_t, is_same
1819

@@ -24,7 +25,7 @@ namespace detail {
2425
template <size_t N>
2526
uint32_t to_uint32_t(sycl::marray<bfloat16, N> x, size_t start) {
2627
uint32_t res;
27-
std::memcpy(&res, &x[start], sizeof(uint32_t));
28+
sycl::detail::memcpy(&res, &x[start], sizeof(uint32_t));
2829
return res;
2930
}
3031
} // namespace detail
@@ -71,7 +72,7 @@ sycl::marray<bfloat16, N> fabs(sycl::marray<bfloat16, N> x) {
7172
(__SYCL_CUDA_ARCH__ >= 800)
7273
for (size_t i = 0; i < N / 2; i++) {
7374
auto partial_res = __clc_fabs(detail::to_uint32_t(x, i * 2));
74-
std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
75+
sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
7576
}
7677

7778
if (N % 2) {
@@ -126,7 +127,7 @@ sycl::marray<bfloat16, N> fmin(sycl::marray<bfloat16, N> x,
126127
for (size_t i = 0; i < N / 2; i++) {
127128
auto partial_res = __clc_fmin(detail::to_uint32_t(x, i * 2),
128129
detail::to_uint32_t(y, i * 2));
129-
std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
130+
sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
130131
}
131132

132133
if (N % 2) {
@@ -182,7 +183,7 @@ sycl::marray<bfloat16, N> fmax(sycl::marray<bfloat16, N> x,
182183
for (size_t i = 0; i < N / 2; i++) {
183184
auto partial_res = __clc_fmax(detail::to_uint32_t(x, i * 2),
184185
detail::to_uint32_t(y, i * 2));
185-
std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
186+
sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
186187
}
187188

188189
if (N % 2) {
@@ -226,7 +227,7 @@ sycl::marray<bfloat16, N> fma(sycl::marray<bfloat16, N> x,
226227
auto partial_res =
227228
__clc_fma(detail::to_uint32_t(x, i * 2), detail::to_uint32_t(y, i * 2),
228229
detail::to_uint32_t(z, i * 2));
229-
std::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
230+
sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(uint32_t));
230231
}
231232

232233
if (N % 2) {

sycl/include/sycl/ext/oneapi/experimental/builtins.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
#include <sycl/detail/builtins.hpp> // for __invoke_exp2, __invo...
1414
#include <sycl/detail/defines_elementary.hpp> // for __SYCL_ALWAYS_INLINE
1515
#include <sycl/detail/generic_type_traits.hpp> // for is_svgenfloath, is_sv...
16+
#include <sycl/detail/memcpy.hpp> // detail::memcpy
1617
#include <sycl/marray.hpp> // for marray
1718
#include <sycl/types.hpp> // for vec
1819

19-
#include <cstring> // for memcpy, size_t
20+
#include <cstring> // for size_t
2021
#include <stdio.h> // for printf
2122
#include <type_traits> // for enable_if_t
2223

@@ -122,7 +123,7 @@ inline __SYCL_ALWAYS_INLINE
122123
auto partial_res = __sycl_std::__invoke_tanh<sycl::vec<T, 2>>(
123124
sycl::detail::to_vec2(x, i * 2));
124125
#endif
125-
std::memcpy(&res[i * 2], &partial_res, sizeof(vec<T, 2>));
126+
sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(vec<T, 2>));
126127
}
127128
if (N % 2) {
128129
#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)
@@ -165,7 +166,7 @@ exp2(sycl::marray<half, N> x) __NOEXC {
165166
auto partial_res = __sycl_std::__invoke_exp2<sycl::vec<half, 2>>(
166167
sycl::detail::to_vec2(x, i * 2));
167168
#endif
168-
std::memcpy(&res[i * 2], &partial_res, sizeof(vec<half, 2>));
169+
sycl::detail::memcpy(&res[i * 2], &partial_res, sizeof(vec<half, 2>));
169170
}
170171
if (N % 2) {
171172
#if defined(__SYCL_DEVICE_ONLY__) && defined(__NVPTX__)

sycl/include/sycl/ext/oneapi/sub_group_mask.hpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#pragma once
99

1010
#include <sycl/detail/helpers.hpp> // for Builder
11+
#include <sycl/detail/memcpy.hpp> // detail::memcpy
1112
#include <sycl/exception.hpp> // for errc, exception
1213
#include <sycl/feature_test.hpp> // for SYCL_EXT_ONEAPI_SUB_GROUP_MASK
1314
#include <sycl/id.hpp> // for id
@@ -16,7 +17,6 @@
1617

1718
#include <assert.h> // for assert
1819
#include <climits> // for CHAR_BIT
19-
#include <cstring> // for memcpy
2020
#include <stddef.h> // for size_t
2121
#include <stdint.h> // for uint32_t
2222
#include <system_error> // for error_code
@@ -110,9 +110,8 @@ struct sub_group_mask {
110110
size_t RemainingBytes = sizeof(Bits) - BytesCopied;
111111
size_t BytesToCopy =
112112
RemainingBytes < sizeof(T) ? RemainingBytes : sizeof(T);
113-
// TODO: memcpy is not guaranteed to work in kernels. Find alternative.
114-
std::memcpy(reinterpret_cast<char *>(&Bits) + BytesCopied, &val[I],
115-
BytesToCopy);
113+
sycl::detail::memcpy(reinterpret_cast<char *>(&Bits) + BytesCopied,
114+
&val[I], BytesToCopy);
116115
BytesCopied += BytesToCopy;
117116
}
118117
}

0 commit comments

Comments
 (0)