From 878141adab9e1e23e458e88f66f336c32c60148a Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Fri, 27 Sep 2024 15:32:57 +0100 Subject: [PATCH] [LLVM][InstCombine][SVE] fcvtnt(a,all_active,b) != fcvtnt(undef,all_active,b) The "narrowing top" convert instructions leave the bottom half of active elements untouched and thus the first paramater of their associated intrinsic remains live even when there are no inactive lanes. --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 9 +++++---- .../AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index da0798ebf7957..4be236a7827f9 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2156,11 +2156,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_fcvt_f64f32: case Intrinsic::aarch64_sve_fcvtlt_f32f16: case Intrinsic::aarch64_sve_fcvtlt_f64f32: - case Intrinsic::aarch64_sve_fcvtnt_bf16f32: - case Intrinsic::aarch64_sve_fcvtnt_f16f32: - case Intrinsic::aarch64_sve_fcvtnt_f32f64: case Intrinsic::aarch64_sve_fcvtx_f32f64: - case Intrinsic::aarch64_sve_fcvtxnt_f32f64: case Intrinsic::aarch64_sve_fcvtzs: case Intrinsic::aarch64_sve_fcvtzs_i32f16: case Intrinsic::aarch64_sve_fcvtzs_i32f64: @@ -2182,6 +2178,11 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ucvtf_f32i64: case Intrinsic::aarch64_sve_ucvtf_f64i32: return instCombineSVEAllOrNoActiveUnary(IC, II); + case Intrinsic::aarch64_sve_fcvtnt_bf16f32: + case Intrinsic::aarch64_sve_fcvtnt_f16f32: + case Intrinsic::aarch64_sve_fcvtnt_f32f64: + case Intrinsic::aarch64_sve_fcvtxnt_f32f64: + return instCombineSVENoActiveReplace(IC, II, true); case Intrinsic::aarch64_sve_st1_scatter: case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: case Intrinsic::aarch64_sve_st1_scatter_sxtw: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll index 374a985191768..04550156be30b 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll @@ -138,7 +138,7 @@ define @test_fcvtnt_bf16_f32( %a, @test_fcvtnt_bf16_f32( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { ; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.bf16f32( [[A]], [[PG]], [[B]]) ; CHECK-NEXT: ret [[OUT]] ; %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) @@ -150,7 +150,7 @@ define @test_fcvtnt_f16_f32( %a, @test_fcvtnt_f16_f32( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { ; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.f16f32( undef, [[PG]], [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.f16f32( [[A]], [[PG]], [[B]]) ; CHECK-NEXT: ret [[OUT]] ; %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -162,7 +162,7 @@ define @test_fcvtnt_f32_f64( %a, @test_fcvtnt_f32_f64( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { ; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.f32f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtnt.f32f64( [[A]], [[PG]], [[B]]) ; CHECK-NEXT: ret [[OUT]] ; %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -186,7 +186,7 @@ define @test_fcvtxnt_f32_f64( %a, @test_fcvtxnt_f32_f64( ; CHECK-SAME: [[A:%.*]], [[B:%.*]]) { ; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtxnt.f32f64( undef, [[PG]], [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.fcvtxnt.f32f64( [[A]], [[PG]], [[B]]) ; CHECK-NEXT: ret [[OUT]] ; %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)