Skip to content

[LoopRotate] Incorrect branch weights updating #66675

@aleks-tmb

Description

@aleks-tmb

In the revision https://reviews.llvm.org/D157462 branch weights updating was added to the LoopRotate.
But let's run the pass bin/opt -passes=loop-rotate test.ll on with the test case with 1:0 weights ratio:

declare void @llvm.experimental.deoptimize.isVoid(...)

define void @test(ptr addrspace(1) %0, i32 %limit) {
  br label %loop

loop:                                             ; preds = %guarded, %1
  %iv = phi i32 [ %iv.next, %guarded ], [ 0, %1 ]
  %gc = icmp slt i32 %iv, %limit
  br i1 %gc, label %guarded, label %exit, !prof !0

guarded:                                          ; preds = %loop
  %iv.next = add i32 %iv, 1
  %load = load atomic ptr addrspace(1), ptr addrspace(1) %0 unordered, align 8
  %ec = icmp eq ptr addrspace(1) %load, null
  br i1 %ec, label %deopt, label %loop

deopt:                                            ; preds = %guarded
  call void (...) @llvm.experimental.deoptimize.isVoid(i32 11) [ "deopt"(i32 0) ]
  ret void

exit:                                             ; preds = %loop
  ret void
}

!0 = !{!"branch_weights", i32 1, i32 0}

Resulting branch weigths looks weird:

define void @test(ptr addrspace(1) %0, i32 %limit) {
  %gc1 = icmp slt i32 0, %limit
  br i1 %gc1, label %guarded.lr.ph, label %exit, !prof !0

guarded.lr.ph:                                    ; preds = %1
  br label %guarded

loop:                                             ; preds = %guarded
  %iv = phi i32 [ %iv.next, %guarded ]
  %gc = icmp slt i32 %iv, %limit
  br i1 %gc, label %guarded, label %loop.exit_crit_edge, !prof !1

guarded:                                          ; preds = %guarded.lr.ph, %loop
  %iv2 = phi i32 [ 0, %guarded.lr.ph ], [ %iv, %loop ]
  %iv.next = add i32 %iv2, 1
  %load = load atomic ptr addrspace(1), ptr addrspace(1) %0 unordered, align 8
  %ec = icmp eq ptr addrspace(1) %load, null
  br i1 %ec, label %deopt, label %loop

deopt:                                            ; preds = %guarded
  call void (...) @llvm.experimental.deoptimize.isVoid(i32 11) [ "deopt"(i32 0) ]
  ret void

loop.exit_crit_edge:                              ; preds = %loop
  br label %exit

exit:                                             ; preds = %loop.exit_crit_edge, %1
  ret void
}

!0 = !{!"branch_weights", i32 -1, i32 1}
!1 = !{!"branch_weights", i32 -2147483647, i32 -1}

It seems the reason is that we don't scale OrigLoopExitWeight when it's 0:
https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp#L314

      // Scale up counts if necessary so we can match `ZeroTripCountWeights` for
      // the `ExitWeight0`:`ExitWeight1` (aka `x0`:`x1` ratio`) ratio.
      while (OrigLoopExitWeight < ZeroTripCountWeights[1] + ExitWeight0) {
        // ... but don't overflow.
        uint32_t const HighBit = uint32_t{1} << (sizeof(uint32_t) * 8 - 1);
        if ((OrigLoopBackedgeWeight & HighBit) != 0 ||
            (OrigLoopExitWeight & HighBit) != 0)
          break;
        OrigLoopBackedgeWeight <<= 1;
        OrigLoopExitWeight <<= 1;
      }

and the subsequent computation overflowing:
uint32_t ExitWeight1 = OrigLoopExitWeight - ExitWeight0;

Metadata

Metadata

Assignees

Labels

PGOProfile Guided Optimizationsloopoptim

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions