Skip to content

Commit 6dfa250

Browse files
committed
Fix bugs in brev, byte_perm
Signed-off-by: jinge90 <[email protected]>
1 parent c47c071 commit 6dfa250

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

libdevice/device_imf.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,8 @@ static inline _iml_half __trunc(_iml_half x) {
419419
}
420420

421421
static inline int __clz(int x) {
422+
if (x == 0)
423+
return 32;
422424
uint32_t xi32 = x;
423425
#if defined(__LIBDEVICE_HOST_IMPL__)
424426
return __builtin_clz(xi32);
@@ -428,6 +430,8 @@ static inline int __clz(int x) {
428430
}
429431

430432
static inline int __clzll(long long int x) {
433+
if (x == 0)
434+
return 64;
431435
uint64_t xi64 = x;
432436
#if defined(__LIBDEVICE_HOST_IMPL__)
433437
return __builtin_clzll(xi64);

libdevice/imf_utils/integer_misc.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,25 @@ DEVICE_EXTERN_C_INLINE
1414
unsigned int __devicelib_imf_brev(unsigned int x) {
1515
unsigned int res = 0;
1616
size_t bit_count = 8 * sizeof(unsigned int);
17-
for (size_t idx = 0; idx < bit_count; ++idx) {
17+
for (size_t idx = 0; idx < bit_count - 1; ++idx) {
1818
res |= x & 0x1;
1919
res <<= 1;
2020
x >>= 1;
2121
}
22+
res |= x & 0x1;
2223
return res;
2324
}
2425

2526
DEVICE_EXTERN_C_INLINE
2627
unsigned long int __devicelib_imf_brevll(unsigned long long int x) {
2728
unsigned long long int res = 0;
2829
size_t bit_count = 8 * sizeof(unsigned long long int);
29-
for (size_t idx = 0; idx < bit_count; ++idx) {
30+
for (size_t idx = 0; idx < bit_count - 1; ++idx) {
3031
res |= x & 0x1;
3132
res <<= 1;
3233
x >>= 1;
3334
}
35+
res |= x & 0x1;
3436
return res;
3537
}
3638

@@ -69,7 +71,7 @@ unsigned int __devicelib_imf_byte_perm(unsigned int x, unsigned int y,
6971
if (select_idx < 4)
7072
buf[idx] = __get_bytes_by_index<unsigned int, uint8_t>(x, select_idx);
7173
else
72-
buf[idx] = __get_bytes_by_index<unsigned int, uint8_t>(y, select_idx - 3);
74+
buf[idx] = __get_bytes_by_index<unsigned int, uint8_t>(y, select_idx - 4);
7375
s >>= 4;
7476
}
7577
return __assemble_integral_value<unsigned int, uint8_t, 4>(buf);

0 commit comments

Comments
 (0)