Skip to content

Commit be435f8

Browse files
committed
Added new and updated testcases reflecting the dagCombine effect.
1 parent b1fdd89 commit be435f8

13 files changed

+4212
-2980
lines changed

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
8282
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
8383
; CHECK-NEXT: s_mov_b32 s4, 0x800000
8484
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
85-
; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
85+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
8686
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1
87-
; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
88-
; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3
87+
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
88+
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
8989
; CHECK-NEXT: v_log_f32_e32 v3, v3
9090
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
9191
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
@@ -98,10 +98,10 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
9898
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
9999
; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3
100100
; CHECK-NEXT: v_exp_f32_e32 v2, v2
101-
; CHECK-NEXT: v_mov_b32_e32 v3, 0x1f800000
102-
; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
101+
; CHECK-NEXT: v_not_b32_e32 v3, 63
102+
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
103103
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1
104-
; CHECK-NEXT: v_mul_f32_e32 v2, v2, v3
104+
; CHECK-NEXT: v_ldexp_f32 v2, v2, v3
105105
; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2
106106
; CHECK-NEXT: s_setpc_b64 s[30:31]
107107
%y = sitofp i32 %y.i to float
@@ -228,9 +228,9 @@ define float @test_powr_fast_f32(float %x, float %y) {
228228
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229229
; CHECK-NEXT: s_mov_b32 s4, 0x800000
230230
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
231-
; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
232-
; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
233-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v3
231+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
232+
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
233+
; CHECK-NEXT: v_ldexp_f32 v0, v0, v3
234234
; CHECK-NEXT: v_log_f32_e32 v0, v0
235235
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
236236
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -242,9 +242,9 @@ define float @test_powr_fast_f32(float %x, float %y) {
242242
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
243243
; CHECK-NEXT: v_fma_f32 v0, v1, v0, v2
244244
; CHECK-NEXT: v_exp_f32_e32 v0, v0
245-
; CHECK-NEXT: v_mov_b32_e32 v1, 0x1f800000
246-
; CHECK-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
247-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
245+
; CHECK-NEXT: v_not_b32_e32 v1, 63
246+
; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
247+
; CHECK-NEXT: v_ldexp_f32 v0, v0, v1
248248
; CHECK-NEXT: s_setpc_b64 s[30:31]
249249
%powr = tail call fast float @_Z4powrff(float %x, float %y)
250250
ret float %powr
@@ -368,9 +368,9 @@ define float @test_pown_fast_f32(float %x, i32 %y) {
368368
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369369
; CHECK-NEXT: s_mov_b32 s4, 0x800000
370370
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
371-
; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
372-
; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
373-
; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3
371+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
372+
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
373+
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
374374
; CHECK-NEXT: v_log_f32_e32 v3, v3
375375
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
376376
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
@@ -383,10 +383,10 @@ define float @test_pown_fast_f32(float %x, i32 %y) {
383383
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
384384
; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3
385385
; CHECK-NEXT: v_exp_f32_e32 v2, v2
386-
; CHECK-NEXT: v_mov_b32_e32 v3, 0x1f800000
387-
; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
386+
; CHECK-NEXT: v_not_b32_e32 v3, 63
387+
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
388388
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1
389-
; CHECK-NEXT: v_mul_f32_e32 v2, v2, v3
389+
; CHECK-NEXT: v_ldexp_f32 v2, v2, v3
390390
; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2
391391
; CHECK-NEXT: s_setpc_b64 s[30:31]
392392
%call = tail call fast float @_Z4pownfi(float %x, i32 %y)
@@ -511,9 +511,9 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
511511
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512512
; CHECK-NEXT: s_mov_b32 s4, 0x800000
513513
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
514-
; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
515-
; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
516-
; CHECK-NEXT: v_mul_f32_e64 v0, |v0|, v3
514+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
515+
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
516+
; CHECK-NEXT: v_ldexp_f32 v0, |v0|, v3
517517
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1
518518
; CHECK-NEXT: v_log_f32_e32 v0, v0
519519
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
@@ -527,9 +527,9 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
527527
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
528528
; CHECK-NEXT: v_fma_f32 v0, v0, v1, v2
529529
; CHECK-NEXT: v_exp_f32_e32 v0, v0
530-
; CHECK-NEXT: v_mov_b32_e32 v1, 0x1f800000
531-
; CHECK-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
532-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
530+
; CHECK-NEXT: v_not_b32_e32 v1, 63
531+
; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
532+
; CHECK-NEXT: v_ldexp_f32 v0, v0, v1
533533
; CHECK-NEXT: s_setpc_b64 s[30:31]
534534
%y = shl i32 %y.arg, 1
535535
%call = tail call fast float @_Z4pownfi(float %x, i32 %y)
@@ -651,9 +651,9 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
651651
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652652
; CHECK-NEXT: s_mov_b32 s4, 0x800000
653653
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
654-
; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
655-
; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
656-
; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3
654+
; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
655+
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3
656+
; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
657657
; CHECK-NEXT: v_or_b32_e32 v1, 1, v1
658658
; CHECK-NEXT: v_log_f32_e32 v3, v3
659659
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
@@ -667,10 +667,10 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
667667
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
668668
; CHECK-NEXT: v_fma_f32 v1, v2, v1, v3
669669
; CHECK-NEXT: v_exp_f32_e32 v1, v1
670-
; CHECK-NEXT: v_mov_b32_e32 v2, 0x1f800000
671-
; CHECK-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc
670+
; CHECK-NEXT: v_not_b32_e32 v2, 63
671+
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
672672
; CHECK-NEXT: s_brev_b32 s4, -2
673-
; CHECK-NEXT: v_mul_f32_e32 v1, v1, v2
673+
; CHECK-NEXT: v_ldexp_f32 v1, v1, v2
674674
; CHECK-NEXT: v_bfi_b32 v0, s4, v1, v0
675675
; CHECK-NEXT: s_setpc_b64 s[30:31]
676676
%y = or i32 %y.arg, 1

0 commit comments

Comments
 (0)