From c71c5dd6030234aa95642447c134a1b69ff439c6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 Aug 2024 18:13:40 +0400 Subject: [PATCH 1/8] AMDGPU: Update live intervals in convertToThreeAddress Fixes #98741 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 37 ++++-- .../test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 117 ++++++++++++++++-- 2 files changed, 134 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 90e11df500bc9..e401d3275905e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4058,14 +4058,32 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { MachineInstr *DefMI; - const auto killDef = [&]() -> void { + const auto killDef = [&](SlotIndex NewIdx) -> void { const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // The only user is the instruction which will be killed. Register DefReg = DefMI->getOperand(0).getReg(); + + if (LIS) { + LiveInterval &DefLI = LIS->getInterval(DefReg); + LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx); + + if (OldSeg->end == NewIdx.getRegSlot()) { + DefLI.removeSegment(OldSeg->start, NewIdx.getRegSlot(), true); + + for (auto &SR : DefLI.subranges()) { + LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx); + SR.removeSegment(OldSegSR->start, NewIdx.getRegSlot(), true); + } + + DefLI.removeEmptySubRanges(); + } + } + if (!MRI.hasOneNonDBGUse(DefReg)) return; // We cannot just remove the DefMI here, calling pass will crash. DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); + DefMI->getOperand(0).setIsDead(true); for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) DefMI->removeOperand(I); if (LV) @@ -4087,9 +4105,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .addImm(Imm) .setMIFlags(MI.getFlags()); updateLiveVariables(LV, MI, *MIB); + SlotIndex NewIdx; if (LIS) - LIS->ReplaceMachineInstrInMaps(MI, *MIB); - killDef(); + NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); + killDef(NewIdx); return MIB; } } @@ -4107,9 +4126,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .add(*Src2) .setMIFlags(MI.getFlags()); updateLiveVariables(LV, MI, *MIB); + + SlotIndex NewIdx; if (LIS) - LIS->ReplaceMachineInstrInMaps(MI, *MIB); - killDef(); + NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); + killDef(NewIdx); return MIB; } } @@ -4129,10 +4150,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .add(*Src2) .setMIFlags(MI.getFlags()); updateLiveVariables(LV, MI, *MIB); + + SlotIndex NewIdx; if (LIS) - LIS->ReplaceMachineInstrInMaps(MI, *MIB); + NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); if (DefMI) - killDef(); + killDef(NewIdx); return MIB; } } diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir index 1768e39d1a06c..afb36041d7f4b 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir @@ -1,29 +1,120 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s # GFX10-LABEL: name: test_fmamk_reg_imm_f16 -# GFX10: %2:vgpr_32 = IMPLICIT_DEF +# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF # GFX10-NOT: V_MOV_B32 # GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16 -registers: - - { id: 0, class: vreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } - - { id: 3, class: vgpr_32 } body: | bb.0: - %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 - %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF +# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f16__imm_is_subreg +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + undef %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec +# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec +# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e +--- +name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec + %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_mac +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec +# GFX10: S_NOP 0, implicit %2 +# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f16__use_imm_before_mac +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + S_NOP 0, implicit %2 + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_after_mac +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec +# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f16__use_imm_after_mac +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + S_NOP 0, implicit %2 + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_after_mac +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec +# GFX10: S_NOP 0, implicit %2 +# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10: S_NOP 0, implicit %2 + +--- +name: test_fmamk_reg_imm_f16__use_imm_before_after_mac +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + S_NOP 0, implicit %2 + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + S_NOP 0, implicit %2 ... # GFX10-LABEL: name: test_fmamk_imm_reg_f16 -# GFX10: %2:vgpr_32 = IMPLICIT_DEF +# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF # GFX10-NOT: V_MOV_B32 # GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- From 15573e49b115ff7197b6ab80de1c1b819c9bc674 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 19 Aug 2024 17:25:26 +0400 Subject: [PATCH 2/8] Comments --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e401d3275905e..c5b320699a1d0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4066,13 +4066,15 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, if (LIS) { LiveInterval &DefLI = LIS->getInterval(DefReg); LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx); + assert(OldSeg && "segment not found for instruction in LiveInterval"); if (OldSeg->end == NewIdx.getRegSlot()) { - DefLI.removeSegment(OldSeg->start, NewIdx.getRegSlot(), true); + DefLI.removeSegment(*OldSeg, true); for (auto &SR : DefLI.subranges()) { LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx); - SR.removeSegment(OldSegSR->start, NewIdx.getRegSlot(), true); + if (OldSegSR->end == NewIdx.getRegSlot()) + SR.removeSegment(*OldSegSR, true); } DefLI.removeEmptySubRanges(); From a332148d28f8f6f422df94e18b0449dd91573ed3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 19 Aug 2024 17:28:27 +0400 Subject: [PATCH 3/8] Rename variable --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index c5b320699a1d0..ab22a7b311c2c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4058,22 +4058,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { MachineInstr *DefMI; - const auto killDef = [&](SlotIndex NewIdx) -> void { + const auto killDef = [&](SlotIndex OldDefIdx) -> void { const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // The only user is the instruction which will be killed. Register DefReg = DefMI->getOperand(0).getReg(); if (LIS) { LiveInterval &DefLI = LIS->getInterval(DefReg); - LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx); + LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldDefIdx); assert(OldSeg && "segment not found for instruction in LiveInterval"); - if (OldSeg->end == NewIdx.getRegSlot()) { + if (OldSeg->end == OldDefIdx.getRegSlot()) { DefLI.removeSegment(*OldSeg, true); for (auto &SR : DefLI.subranges()) { - LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx); - if (OldSegSR->end == NewIdx.getRegSlot()) + LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldDefIdx); + if (OldSegSR->end == OldDefIdx.getRegSlot()) SR.removeSegment(*OldSegSR, true); } From 270f7806331e3e7cf435f20407f28775da39345c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Sep 2024 15:16:32 +0400 Subject: [PATCH 4/8] Rename variable --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ab22a7b311c2c..bff43336b017d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4058,22 +4058,22 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { MachineInstr *DefMI; - const auto killDef = [&](SlotIndex OldDefIdx) -> void { + const auto killDef = [&](SlotIndex OldUseIdx) -> void { const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // The only user is the instruction which will be killed. Register DefReg = DefMI->getOperand(0).getReg(); if (LIS) { LiveInterval &DefLI = LIS->getInterval(DefReg); - LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldDefIdx); + LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldUseIdx); assert(OldSeg && "segment not found for instruction in LiveInterval"); - if (OldSeg->end == OldDefIdx.getRegSlot()) { + if (OldSeg->end == OldUseIdx.getRegSlot()) { DefLI.removeSegment(*OldSeg, true); for (auto &SR : DefLI.subranges()) { - LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldDefIdx); - if (OldSegSR->end == OldDefIdx.getRegSlot()) + LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx); + if (OldSegSR->end == OldUseIdx.getRegSlot()) SR.removeSegment(*OldSegSR, true); } From 1fc765d473ecad1c93b414233b1b431bfb901d9f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Sep 2024 16:41:02 +0400 Subject: [PATCH 5/8] Remove less --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 11 ++++++++--- llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index bff43336b017d..3f11ffca76f3f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4069,12 +4069,17 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, assert(OldSeg && "segment not found for instruction in LiveInterval"); if (OldSeg->end == OldUseIdx.getRegSlot()) { - DefLI.removeSegment(*OldSeg, true); + // We only want to leave the dead def. + DefLI.removeSegment(OldSeg->start.getDeadSlot(), OldUseIdx.getRegSlot(), + true); for (auto &SR : DefLI.subranges()) { LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx); - if (OldSegSR->end == OldUseIdx.getRegSlot()) - SR.removeSegment(*OldSegSR, true); + if (OldSegSR->end == OldUseIdx.getRegSlot()) { + // We only want to leave the dead def. + SR.removeSegment(OldSegSR->start.getDeadSlot(), + OldUseIdx.getRegSlot(), true); + } } DefLI.removeEmptySubRanges(); diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir index afb36041d7f4b..ccb2f5f6a1888 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir @@ -1,6 +1,6 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=livevars,liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s # GFX10-LABEL: name: test_fmamk_reg_imm_f16 # GFX10: dead %2:vgpr_32 = IMPLICIT_DEF @@ -8,6 +8,7 @@ # GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16 +tracksRegLiveness: true body: | bb.0: From 7d7a5e68db28c1f524966e5943941d10fff07e42 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Sep 2024 16:42:35 +0400 Subject: [PATCH 6/8] Add tracksRegLiveness to every function --- llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir index ccb2f5f6a1888..bf11ed874afd0 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir @@ -26,6 +26,7 @@ body: | # GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16__imm_is_subreg +tracksRegLiveness: true body: | bb.0: @@ -44,9 +45,9 @@ body: | # GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e --- name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined +tracksRegLiveness: true body: | bb.0: - %0:vreg_64 = IMPLICIT_DEF %1:vgpr_32 = COPY %0.sub1 undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec @@ -63,6 +64,7 @@ body: | # GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16__use_imm_before_mac +tracksRegLiveness: true body: | bb.0: @@ -81,6 +83,7 @@ body: | # GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16__use_imm_after_mac +tracksRegLiveness: true body: | bb.0: @@ -102,6 +105,7 @@ body: | --- name: test_fmamk_reg_imm_f16__use_imm_before_after_mac +tracksRegLiveness: true body: | bb.0: @@ -120,6 +124,7 @@ body: | # GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_imm_reg_f16 +tracksRegLiveness: true registers: - { id: 0, class: vreg_64 } - { id: 1, class: vgpr_32 } @@ -141,6 +146,7 @@ body: | # GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_fmaak_f16 +tracksRegLiveness: true registers: - { id: 0, class: vreg_64 } - { id: 1, class: vgpr_32 } From 61b61f2b4b03c2ba85b2c02a7c916a3845d5935e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Sep 2024 18:05:01 +0400 Subject: [PATCH 7/8] Hackily use shrinkToUses --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 51 +++++++++---------- .../test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 34 ++++++++----- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 3f11ffca76f3f..de68ff2d02c2a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4059,42 +4059,37 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { MachineInstr *DefMI; const auto killDef = [&](SlotIndex OldUseIdx) -> void { - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // The only user is the instruction which will be killed. Register DefReg = DefMI->getOperand(0).getReg(); + if (MRI.hasOneNonDBGUse(DefReg)) { + // We cannot just remove the DefMI here, calling pass will crash. + DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); + DefMI->getOperand(0).setIsDead(true); + for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) + DefMI->removeOperand(I); + if (LV) + LV->getVarInfo(DefReg).AliveBlocks.clear(); + } + if (LIS) { LiveInterval &DefLI = LIS->getInterval(DefReg); - LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldUseIdx); - assert(OldSeg && "segment not found for instruction in LiveInterval"); - - if (OldSeg->end == OldUseIdx.getRegSlot()) { - // We only want to leave the dead def. - DefLI.removeSegment(OldSeg->start.getDeadSlot(), OldUseIdx.getRegSlot(), - true); - - for (auto &SR : DefLI.subranges()) { - LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx); - if (OldSegSR->end == OldUseIdx.getRegSlot()) { - // We only want to leave the dead def. - SR.removeSegment(OldSegSR->start.getDeadSlot(), - OldUseIdx.getRegSlot(), true); - } - } - DefLI.removeEmptySubRanges(); + // We cannot delete the original instruction here, so hack out the use + // in the original instruction with a dummy register so we can use + // shrinkToUses to deal with any multi-use edge cases. Other targets do + // not have the complexity of deleting a use to consider here. + Register DummyReg = MRI.cloneVirtualRegister(DefReg); + for (MachineOperand &MIOp : MI.uses()) { + if (MIOp.isReg() && MIOp.getReg() == DefReg) { + MIOp.setIsUndef(true); + MIOp.setReg(DummyReg); + } } - } - if (!MRI.hasOneNonDBGUse(DefReg)) - return; - // We cannot just remove the DefMI here, calling pass will crash. - DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); - DefMI->getOperand(0).setIsDead(true); - for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) - DefMI->removeOperand(I); - if (LV) - LV->getVarInfo(DefReg).AliveBlocks.clear(); + LIS->shrinkToUses(&DefLI); + } }; int64_t Imm; diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir index bf11ed874afd0..f814dd335d20c 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir @@ -1,11 +1,13 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=livevars,liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-LIS %s + # GFX10-LABEL: name: test_fmamk_reg_imm_f16 # GFX10: dead %2:vgpr_32 = IMPLICIT_DEF # GFX10-NOT: V_MOV_B32 -# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16 tracksRegLiveness: true @@ -23,7 +25,8 @@ body: | # GFX10: %0:vreg_64 = IMPLICIT_DEF # GFX10: %1:vgpr_32 = COPY %0.sub1 # GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF -# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16__imm_is_subreg tracksRegLiveness: true @@ -42,7 +45,8 @@ body: | # GFX10: %1:vgpr_32 = COPY %0.sub1 # GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec # GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec -# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e +# GFX10-NOLIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e +# GFX10-LIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0.sub0, 0, %2.sub0, 0, %1, 0, 0, 0, implicit $mode, implicit $e --- name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined tracksRegLiveness: true @@ -61,7 +65,8 @@ body: | # GFX10: %1:vgpr_32 = COPY %0.sub1 # GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec # GFX10: S_NOP 0, implicit %2 -# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16__use_imm_before_mac tracksRegLiveness: true @@ -80,7 +85,8 @@ body: | # GFX10: %0:vreg_64 = IMPLICIT_DEF # GFX10: %1:vgpr_32 = COPY %0.sub1 # GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec -# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16__use_imm_after_mac tracksRegLiveness: true @@ -100,7 +106,8 @@ body: | # GFX10: %1:vgpr_32 = COPY %0.sub1 # GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec # GFX10: S_NOP 0, implicit %2 -# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec # GFX10: S_NOP 0, implicit %2 --- @@ -121,7 +128,8 @@ body: | # GFX10-LABEL: name: test_fmamk_imm_reg_f16 # GFX10: dead %2:vgpr_32 = IMPLICIT_DEF # GFX10-NOT: V_MOV_B32 -# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec --- name: test_fmamk_imm_reg_f16 tracksRegLiveness: true @@ -143,7 +151,8 @@ body: | # GFX10-LABEL: name: test_fmaak_f16 # GFX10: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec # GFX10-NOT: V_MOV_B32 -# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec +# GFX10-NOLIS: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec +# GFX10-LIS: V_FMAAK_F16 %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_fmaak_f16 tracksRegLiveness: true @@ -163,7 +172,8 @@ body: | # GFX10-LABEL: name: test_fmaak_inline_literal_f16 # GFX10: %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec # GFX10-NOT: V_MOV_B32 -# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec +# GFX10-NOLIS: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec +# GFX10-LIS: %2:vgpr_32 = V_FMAAK_F16 16384, %0, 49664, implicit $mode, implicit $exec --- name: test_fmaak_inline_literal_f16 From 7a564772ca3f85e3ec3299fbcec8d40f2286be21 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 6 Sep 2024 11:16:58 +0400 Subject: [PATCH 8/8] Remove unused index argument --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index de68ff2d02c2a..c6f28af1e5e73 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4058,7 +4058,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { MachineInstr *DefMI; - const auto killDef = [&](SlotIndex OldUseIdx) -> void { + const auto killDef = [&]() -> void { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // The only user is the instruction which will be killed. Register DefReg = DefMI->getOperand(0).getReg(); @@ -4107,10 +4107,9 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .addImm(Imm) .setMIFlags(MI.getFlags()); updateLiveVariables(LV, MI, *MIB); - SlotIndex NewIdx; if (LIS) - NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); - killDef(NewIdx); + LIS->ReplaceMachineInstrInMaps(MI, *MIB); + killDef(); return MIB; } } @@ -4129,10 +4128,9 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .setMIFlags(MI.getFlags()); updateLiveVariables(LV, MI, *MIB); - SlotIndex NewIdx; if (LIS) - NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); - killDef(NewIdx); + LIS->ReplaceMachineInstrInMaps(MI, *MIB); + killDef(); return MIB; } } @@ -4153,11 +4151,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .setMIFlags(MI.getFlags()); updateLiveVariables(LV, MI, *MIB); - SlotIndex NewIdx; if (LIS) - NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); + LIS->ReplaceMachineInstrInMaps(MI, *MIB); if (DefMI) - killDef(NewIdx); + killDef(); return MIB; } }