diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index fb0eb7a80c6d7..6be5eaf221d17 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4265,7 +4265,7 @@ defm TBLQ_ZZZ : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>; //===----------------------------------------------------------------------===// let Predicates = [HasSVE2p2orSME2p2] in { // SVE Floating-point convert precision, zeroing predicate - defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt">; + defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">; // SVE2p2 floating-point convert precision down (placing odd), zeroing predicate defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">; @@ -4279,7 +4279,7 @@ let Predicates = [HasSVE2p2orSME2p2] in { // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>; // Placing corresponding - def BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd<0b1001010, "bfcvt", ZPR32, ZPR16>; + defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>; // Floating-point convert to integer, zeroing predicate defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index faaaca3f28d75..1d32f2ab75852 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -576,6 +576,11 @@ multiclass SVE_3_Op_Undef_Pat; } +class SVE_3_Op_UndefZero_Pat + : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)), + (inst $Op1, $Op2)>; + class SVE_4_Op_Pat @@ -3273,6 +3278,12 @@ multiclass sve_fp_z2op_p_zd_frint opc, string asm> { def _D : sve_fp_z2op_p_zd<{ 0b0010, opc{1}, 1, opc{0} }, asm, ZPR64, ZPR64>; } +multiclass sve_fp_z2op_p_zd_bfcvt { + def NAME : sve_fp_z2op_p_zd<0b1001010, asm, ZPR32, ZPR16>; + + def : SVE_3_Op_UndefZero_Pat(NAME)>; +} + multiclass sve_fp_z2op_p_zd_d { def _HtoH : sve_fp_z2op_p_zd<{ 0b011101, U }, asm, ZPR16, ZPR16>; def _HtoS : sve_fp_z2op_p_zd<{ 0b011110, U }, asm, ZPR16, ZPR32>; @@ -3299,13 +3310,20 @@ multiclass sve_fp_z2op_p_zd_d_flogb { def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>; } -multiclass sve_fp_z2op_p_zd_b_0 { +multiclass sve_fp_z2op_p_zd_b_0 { def _StoH : sve_fp_z2op_p_zd<0b1001000, asm, ZPR32, ZPR16>; def _HtoS : sve_fp_z2op_p_zd<0b1001001, asm, ZPR16, ZPR32>; def _DtoH : sve_fp_z2op_p_zd<0b1101000, asm, ZPR64, ZPR16>; def _HtoD : sve_fp_z2op_p_zd<0b1101001, asm, ZPR16, ZPR64>; def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>; def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_UndefZero_Pat(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast(NAME # _StoH)>; + def : SVE_3_Op_UndefZero_Pat(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast(NAME # _DtoH)>; + def : SVE_3_Op_UndefZero_Pat(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; + def : SVE_3_Op_UndefZero_Pat(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast(NAME # _HtoS)>; + def : SVE_3_Op_UndefZero_Pat(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast(NAME # _HtoD)>; + def : SVE_3_Op_UndefZero_Pat(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll new file mode 100644 index 0000000000000..cf9ac49ca7b23 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve,+bf16 < %s | FileCheck %s +; RUN: llc -mattr=+sve2p2,+bf16 < %s | FileCheck %s -check-prefix CHECK-2p2 + +; RUN: llc -mattr=+sme,+bf16 -force-streaming < %s | FileCheck %s +; RUN: llc -mattr=+sme2p2,+bf16 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2 + +target triple = "aarch64-linux" + +define @test_svcvt_f16_f32_x_1( %pg, %x) { +; CHECK-LABEL: test_svcvt_f16_f32_x_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f32_x_1: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z0.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f16_f32_x_2( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f16_f32_x_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f32_x_2: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f16_f32_z( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f16_f32_z: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f32_z: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( zeroinitializer, %pg, %x) + ret %0 +} + +define @test_svcvt_bf16_f32_x_1( %pg, %x) { +; CHECK-LABEL: test_svcvt_bf16_f32_x_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bfcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_bf16_f32_x_1: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z0.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_bf16_f32_x_2( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_bf16_f32_x_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_bf16_f32_x_2: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_bf16_f32_z( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_bf16_f32_z: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_bf16_f32_z: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( zeroinitializer, %pg, %x) + ret %0 +} + +define @test_svcvt_f16_f64_x_1( %pg, %x) { +; CHECK-LABEL: test_svcvt_f16_f64_x_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f64_x_1: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z0.d +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f16_f64_x_2( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f16_f64_x_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.h, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f64_x_2: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f16_f64_z( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f16_f64_z: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: fcvt z0.h, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f64_z: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( zeroinitializer, %pg, %x) + ret %0 +} + +define @test_svcvt_f32_f64_x_1( %pg, %x) { +; CHECK-LABEL: test_svcvt_f32_f64_x_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f64_x_1: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z0.d +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f32f64( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f32_f64_x_2( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f32_f64_x_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.s, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f64_x_2: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f32f64( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f32_f64_z( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f32_f64_z: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: fcvt z0.s, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f64_z: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f32f64( zeroinitializer, %pg, %x) + ret %0 +} + +define @test_svcvt_f32_f16_x_1( %pg, %x) { +; CHECK-LABEL: test_svcvt_f32_f16_x_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f16_x_1: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z0.h +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f32f16( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f32_f16_x_2( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f32_f16_x_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f16_x_2: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f32f16( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f32_f16_z( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f32_f16_z: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f16_z: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f32f16( zeroinitializer, %pg, %x) + ret %0 +} + +define @test_svcvt_f64_f16_x_1( %pg, %x) { +; CHECK-LABEL: test_svcvt_f64_f16_x_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.d, p0/m, z0.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f16_x_1: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z0.h +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f64f16( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f64_f16_x_2( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f64_f16_x_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f16_x_2: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f64f16( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f64_f16_z( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f64_f16_z: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f16_z: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f64f16( zeroinitializer, %pg, %x) + ret %0 +} + +define @test_svcvt_f64_f32_x_1( %pg, %x) { +; CHECK-LABEL: test_svcvt_f64_f32_x_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.d, p0/m, z0.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f32_x_1: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z0.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f64_f32_x_2( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f64_f32_x_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f32_x_2: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( undef, %pg, %x) + ret %0 +} + +define @test_svcvt_f64_f32_z( %pg, double %z0, %x) { +; CHECK-LABEL: test_svcvt_f64_f32_z: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f32_z: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( zeroinitializer, %pg, %x) + ret %0 +}