-
Notifications
You must be signed in to change notification settings - Fork 14.5k
Closed
Labels
Description
In the revision https://reviews.llvm.org/D157462 branch weights updating was added to the LoopRotate
.
But let's run the pass bin/opt -passes=loop-rotate test.ll
on with the test case with 1:0
weights ratio:
declare void @llvm.experimental.deoptimize.isVoid(...)
define void @test(ptr addrspace(1) %0, i32 %limit) {
br label %loop
loop: ; preds = %guarded, %1
%iv = phi i32 [ %iv.next, %guarded ], [ 0, %1 ]
%gc = icmp slt i32 %iv, %limit
br i1 %gc, label %guarded, label %exit, !prof !0
guarded: ; preds = %loop
%iv.next = add i32 %iv, 1
%load = load atomic ptr addrspace(1), ptr addrspace(1) %0 unordered, align 8
%ec = icmp eq ptr addrspace(1) %load, null
br i1 %ec, label %deopt, label %loop
deopt: ; preds = %guarded
call void (...) @llvm.experimental.deoptimize.isVoid(i32 11) [ "deopt"(i32 0) ]
ret void
exit: ; preds = %loop
ret void
}
!0 = !{!"branch_weights", i32 1, i32 0}
Resulting branch weigths looks weird:
define void @test(ptr addrspace(1) %0, i32 %limit) {
%gc1 = icmp slt i32 0, %limit
br i1 %gc1, label %guarded.lr.ph, label %exit, !prof !0
guarded.lr.ph: ; preds = %1
br label %guarded
loop: ; preds = %guarded
%iv = phi i32 [ %iv.next, %guarded ]
%gc = icmp slt i32 %iv, %limit
br i1 %gc, label %guarded, label %loop.exit_crit_edge, !prof !1
guarded: ; preds = %guarded.lr.ph, %loop
%iv2 = phi i32 [ 0, %guarded.lr.ph ], [ %iv, %loop ]
%iv.next = add i32 %iv2, 1
%load = load atomic ptr addrspace(1), ptr addrspace(1) %0 unordered, align 8
%ec = icmp eq ptr addrspace(1) %load, null
br i1 %ec, label %deopt, label %loop
deopt: ; preds = %guarded
call void (...) @llvm.experimental.deoptimize.isVoid(i32 11) [ "deopt"(i32 0) ]
ret void
loop.exit_crit_edge: ; preds = %loop
br label %exit
exit: ; preds = %loop.exit_crit_edge, %1
ret void
}
!0 = !{!"branch_weights", i32 -1, i32 1}
!1 = !{!"branch_weights", i32 -2147483647, i32 -1}
It seems the reason is that we don't scale OrigLoopExitWeight
when it's 0
:
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp#L314
// Scale up counts if necessary so we can match `ZeroTripCountWeights` for
// the `ExitWeight0`:`ExitWeight1` (aka `x0`:`x1` ratio`) ratio.
while (OrigLoopExitWeight < ZeroTripCountWeights[1] + ExitWeight0) {
// ... but don't overflow.
uint32_t const HighBit = uint32_t{1} << (sizeof(uint32_t) * 8 - 1);
if ((OrigLoopBackedgeWeight & HighBit) != 0 ||
(OrigLoopExitWeight & HighBit) != 0)
break;
OrigLoopBackedgeWeight <<= 1;
OrigLoopExitWeight <<= 1;
}
and the subsequent computation overflowing:
uint32_t ExitWeight1 = OrigLoopExitWeight - ExitWeight0;