From 885dcd85bfab2e1f149a52d32837525e0f3f03b9 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Mon, 31 Mar 2025 17:35:32 +0000 Subject: [PATCH 1/3] [AMDGPU] Fix SIFoldOperandsImpl::tryFoldZeroHighBits when met non-reg src1 operand. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 2 +- .../AMDGPU/fold-zero-high-bits-skips-non-reg.mir | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index cc15dd7cb495c..46bd5d8044c45 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1453,7 +1453,7 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const { return false; std::optional Src0Imm = getImmOrMaterializedImm(MI.getOperand(1)); - if (!Src0Imm || *Src0Imm != 0xffff) + if (!Src0Imm || *Src0Imm != 0xffff || !MI.getOperand(2).isReg()) return false; Register Src1 = MI.getOperand(2).getReg(); diff --git a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir new file mode 100644 index 0000000000000..f0b0d1b7948dd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir @@ -0,0 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s +--- +name: test_tryFoldZeroHighBits_skips_nonreg +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: test_tryFoldZeroHighBits_skips_nonreg + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 65535, 0, implicit $exec + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1 + %2:vgpr_32 = V_AND_B32_e64 65535, %1.sub0, implicit $exec + From 40838d89609890eb9c0481c8692a456916c43dff Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Tue, 1 Apr 2025 07:06:06 +0000 Subject: [PATCH 2/3] per review fixes. --- .../CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir index f0b0d1b7948dd..68e2f5ae23dce 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -run-pass si-fold-operands %s -o - | FileCheck %s --- name: test_tryFoldZeroHighBits_skips_nonreg tracksRegLiveness: true @@ -9,7 +9,11 @@ body: | ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1 ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 65535, 0, implicit $exec + ; CHECK-NEXT: S_NOP 0, implicit [[V_AND_B32_e64_]] %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %1:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1 %2:vgpr_32 = V_AND_B32_e64 65535, %1.sub0, implicit $exec + S_NOP 0, implicit %2 +... + From 9b01a203c106c82b1cac33e503e674cfb9a71b58 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Tue, 1 Apr 2025 07:11:48 +0000 Subject: [PATCH 3/3] removed extra lines --- llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir index 68e2f5ae23dce..b1aa88969c5bb 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-zero-high-bits-skips-non-reg.mir @@ -15,5 +15,3 @@ body: | %2:vgpr_32 = V_AND_B32_e64 65535, %1.sub0, implicit $exec S_NOP 0, implicit %2 ... - -