diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index f5346c8805733..6c6d38462484a 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -573,14 +573,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; - // Duplicating a BB which has both multiple predecessors and successors will - // result in a complex CFG and also may cause huge amount of PHI nodes. If we - // want to remove this limitation, we have to address - // https://github.com/llvm/llvm-project/issues/78578. - if (TailBB.pred_size() > TailDupPredSize && - TailBB.succ_size() > TailDupSuccSize) - return false; - // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. @@ -618,6 +610,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; + unsigned NumPhis = 0; for (MachineInstr &MI : TailBB) { // Non-duplicable things shouldn't be tail-duplicated. // CFI instructions are marked as non-duplicable, because Darwin compact @@ -661,6 +654,20 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (InstrCount > MaxDuplicateCount) return false; + NumPhis += MI.isPHI(); + } + + // Duplicating a BB which has both multiple predecessors and successors will + // may cause huge amount of PHI nodes. If we want to remove this limitation, + // we have to address https://github.com/llvm/llvm-project/issues/78578. + if (TailBB.pred_size() > TailDupPredSize && + TailBB.succ_size() > TailDupSuccSize) { + // If TailBB or any of its successors contains a phi, we may have to add a + // large number of additional phis with additional incoming values. + if (NumPhis != 0 || any_of(TailBB.successors(), [](MachineBasicBlock *MBB) { + return any_of(*MBB, [](MachineInstr &MI) { return MI.isPHI(); }); + })) + return false; } // Check if any of the successors of TailBB has a PHI node in which the diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir index 1d17672e2c6bd..2f1ff76fda76c 100644 --- a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir +++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir @@ -538,43 +538,47 @@ body: | ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.2: - ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg - ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.3: - ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg - ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags - ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.4: - ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg - ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags - ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.5: - ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg - ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags - ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.6: ; LIMIT-NEXT: successors: ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: bb.7: - ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) - ; LIMIT-NEXT: {{ $}} - ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags - ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags - ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit - ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg - ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.9: ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV32rm4]] :: (store (s32)) @@ -583,23 +587,23 @@ body: | ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.10: ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg - ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags - ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri5]] :: (store (s32)) - ; LIMIT-NEXT: $eax = COPY [[SHR32ri5]] + ; LIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri8]] :: (store (s32)) + ; LIMIT-NEXT: $eax = COPY [[SHR32ri8]] ; LIMIT-NEXT: RET 0, $eax ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.11: ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg - ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags - ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri6]] :: (store (s32)) - ; LIMIT-NEXT: $eax = COPY [[SHR32ri6]] + ; LIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri9]] :: (store (s32)) + ; LIMIT-NEXT: $eax = COPY [[SHR32ri9]] ; LIMIT-NEXT: RET 0, $eax ; LIMIT-NEXT: {{ $}} ; LIMIT-NEXT: bb.12: ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg - ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags - ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri7]] :: (store (s32)) - ; LIMIT-NEXT: $eax = COPY [[SHR32ri7]] + ; LIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri10]] :: (store (s32)) + ; LIMIT-NEXT: $eax = COPY [[SHR32ri10]] ; LIMIT-NEXT: RET 0, $eax ; ; NOLIMIT-LABEL: name: foo_no_phis