diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ceab6c9dcca34..7dace11d208a0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5931,11 +5931,15 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, if (!MO) MO = &MI.getOperand(OpIdx); - const MachineOperand *UsedLiteral = nullptr; + const bool IsInlineConst = !MO->isReg() && isInlineConstant(*MO, OpInfo); - int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode()); - int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0; - if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) { + if (isVALU(MI) && !IsInlineConst && usesConstantBus(MRI, *MO, OpInfo)) { + const MachineOperand *UsedLiteral = nullptr; + + int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode()); + int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0; + + // TODO: Be more permissive with frame indexes. if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) { if (!LiteralLimit--) return false; @@ -5974,9 +5978,19 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, return false; } } - } else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() && - isF16PseudoScalarTrans(MI.getOpcode()) && - isInlineConstant(*MO, OpInfo)) { + } else if (!IsInlineConst && !MO->isReg() && isSALU(MI)) { + // There can be at most one literal operand, but it can be repeated. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (i == OpIdx) + continue; + const MachineOperand &Op = MI.getOperand(i); + if (!Op.isReg() && !Op.isFI() && + !isInlineConstant(Op, InstDesc.operands()[i]) && + !Op.isIdenticalTo(*MO)) + return false; + } + } else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() && + isF16PseudoScalarTrans(MI.getOpcode())) { return false; } diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir index 08693ec9db1d4..2492eb2982aac 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir @@ -133,7 +133,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode + ; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890 + ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 %noninlinable, [[COPY]], 1056964608, implicit $mode ; CHECK-NEXT: $sgpr0 = COPY %fma %0:sreg_32 = COPY $sgpr0 %inlinable:sreg_32 = S_MOV_B32 1056964608 @@ -152,7 +153,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode + ; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890 + ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], %noninlinable, 1056964608, implicit $mode ; CHECK-NEXT: $sgpr0 = COPY %fma %0:sreg_32 = COPY $sgpr0 %inlinable:sreg_32 = S_MOV_B32 1056964608 @@ -210,7 +212,8 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], 1056964608, 1234567890, implicit $mode + ; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890 + ; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, %noninlinable, implicit $mode ; CHECK-NEXT: $sgpr0 = COPY %fma %0:sreg_32 = COPY $sgpr0 %inlinable:sreg_32 = S_MOV_B32 1056964608 diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir index 5f985605c082d..c8afb89aa272a 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir @@ -69,3 +69,202 @@ body: | %0:sreg_32 = S_MOV_B32 63 %1:sreg_32 = S_ADD_I32 %stack.0, %0, implicit-def $scc ... + +# GCN-LABEL: name: test_no_fold_literal_already_inline_lhs{{$}} +# GCN: %0:sreg_32 = S_MOV_B32 80 +# GCN-NEXT: %1:sreg_32 = S_ADD_I32 70, %0 +--- +name: test_no_fold_literal_already_inline_lhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + %1:sreg_32 = S_ADD_I32 70, %0, implicit-def $scc +... + +# GCN-LABEL: name: test_no_fold_literal_already_inline_rhs{{$}} +# GCN: %0:sreg_32 = S_MOV_B32 80 +# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %0, 70 +--- +name: test_no_fold_literal_already_inline_rhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + %1:sreg_32 = S_ADD_I32 %0, 70, implicit-def $scc +... + +# GCN-LABEL: name: test_fold_literal_inlineimm_lhs{{$}} +# GCN: %1:sreg_32 = S_ADD_I32 64, 80 +--- +name: test_fold_literal_inlineimm_lhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + %1:sreg_32 = S_ADD_I32 64, %0, implicit-def $scc +... + +# GCN-LABEL: name: test_fold_literal_inlineimm_rhs{{$}} +# GCN: %1:sreg_32 = S_ADD_I32 80, 64 +--- +name: test_fold_literal_inlineimm_rhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + %1:sreg_32 = S_ADD_I32 %0, 64, implicit-def $scc +... + +# GCN-LABEL: name: test_fold_same_literal_2x{{$}} +# GCN: %2:sreg_32 = S_ADD_I32 70, %1 +--- +name: test_fold_same_literal_2x +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_MOV_B32 70 + %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc +... + +# GCN-LABEL: name: test_fold_same_literal_lhs{{$}} +# GCN: %1:sreg_32 = S_ADD_I32 70, %0 +--- +name: test_fold_same_literal_lhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_ADD_I32 70, %0, implicit-def $scc +... + +# GCN-LABEL: name: test_fold_same_literal_rhs{{$}} +# GCN: %1:sreg_32 = S_ADD_I32 %0, 70 +--- +name: test_fold_same_literal_rhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_ADD_I32 %0, 70, implicit-def $scc +... + + +# GCN-LABEL: name: test_s_cselect_b32_2x_literal_fold{{$}} +# GCN: %2:sreg_32 = S_CSELECT_B32 70, %1, implicit $scc +--- +name: test_s_cselect_b32_2x_literal_fold +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_MOV_B32 80 + $scc = IMPLICIT_DEF + %2:sreg_32 = S_CSELECT_B32 %0, %1, implicit $scc +... + +# GCN-LABEL: name: test_s_cselect_b32_fold_literal_literal_lhs{{$}} +# GCN: %1:sreg_32 = S_CSELECT_B32 70, %0, implicit $scc +--- +name: test_s_cselect_b32_fold_literal_literal_lhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + $scc = IMPLICIT_DEF + %1:sreg_32 = S_CSELECT_B32 70, %0, implicit $scc +... + +# GCN-LABEL: name: test_s_cselect_b32_fold_literal_literal_rhs{{$}} +# GCN: %1:sreg_32 = S_CSELECT_B32 %0, 70, implicit $scc +--- +name: test_s_cselect_b32_fold_literal_literal_rhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + $scc = IMPLICIT_DEF + %1:sreg_32 = S_CSELECT_B32 %0, 70, implicit $scc +... + +# GCN-LABEL: name: test_s_cselect_b32_fold_literal_inlineimm_lhs{{$}} +# GCN: %1:sreg_32 = S_CSELECT_B32 64, 80, implicit $scc +--- +name: test_s_cselect_b32_fold_literal_inlineimm_lhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + $scc = IMPLICIT_DEF + %1:sreg_32 = S_CSELECT_B32 64, %0, implicit $scc +... + +# GCN-LABEL: name: test_s_cselect_b32_fold_literal_inlineimm_rhs{{$}} +# GCN: %1:sreg_32 = S_CSELECT_B32 80, 64, implicit $scc +--- +name: test_s_cselect_b32_fold_literal_inlineimm_rhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + $scc = IMPLICIT_DEF + %1:sreg_32 = S_CSELECT_B32 %0, 64, implicit $scc +... + +# GCN-LABEL: name: test_s_cmp_b32_2x_literal_fold{{$}} +# GCN: S_CMP_EQ_U32 70, %1, implicit-def $scc +--- +name: test_s_cmp_b32_2x_literal_fold +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 70 + %1:sreg_32 = S_MOV_B32 80 + $scc = IMPLICIT_DEF + S_CMP_EQ_U32 %0, %1, implicit-def $scc +... + +# GCN-LABEL: name: test_s_cmp_b32_literal_literal_lhs{{$}} +# GCN: S_CMP_EQ_U32 70, %0, implicit-def $scc +--- +name: test_s_cmp_b32_literal_literal_lhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + S_CMP_EQ_U32 70, %0, implicit-def $scc +... + +# GCN-LABEL: name: test_s_cmp_b32_literal_literal_rhs{{$}} +# GCN: S_CMP_EQ_U32 %0, 70, implicit-def $scc +--- +name: test_s_cmp_b32_literal_literal_rhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + S_CMP_EQ_U32 %0, 70, implicit-def $scc +... + +# GCN-LABEL: name: test_s_cmp_b32_literal_inlineimm_lhs{{$}} +# GCN: S_CMP_EQ_U32 64, 80, implicit-def $scc +--- +name: test_s_cmp_b32_literal_inlineimm_lhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + S_CMP_EQ_U32 64, %0, implicit-def $scc +... + +# GCN-LABEL: name: test_s_cmp_b32_literal_inlineimm_rhs{{$}} +# GCN: S_CMP_EQ_U32 80, 64, implicit-def $scc +--- +name: test_s_cmp_b32_literal_inlineimm_rhs +tracksRegLiveness: true +body: | + bb.0: + %0:sreg_32 = S_MOV_B32 80 + S_CMP_EQ_U32 %0, 64, implicit-def $scc +...