diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 37a356c43e29a..f2744b5d66a26 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -888,6 +888,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; } // end of PHI handling + // or-disjoint instruction which is predicated maybe disjoint because of a + // dominating condition. Avoid vectorizing in such cases, since dropping + // the disjoint (to avoid poison) changes the meaning of the instruction + // which was originally an add. + // TODO: We should check the terminating condition of all + // dominating blocks in the loop to see if Op0 is used for their + // terminating condition. + if (auto *Op = dyn_cast(&I)) + if (Op->isDisjoint() && blockNeedsPredication(BB)) + return false; + // We handle calls that: // * Are debug info intrinsics. // * Have a mapping to an IR intrinsic. diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll new file mode 100644 index 0000000000000..20a6732e340b7 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@global = external global ptr addrspace(1), align 8 + +; PR 81872 explains the issue. + +; If we vectorize, we have a miscompile where array IV and thereby value stored in (arr[99], +; arr[98]) is calculated incorrectly since disjoint or was only disjoint because +; of dominating conditions. Dropping the disjoint to avoid poison still changes +; the behaviour since now the or is not longer equivalent to the add. +; Function Attrs: uwtable +define void @test(ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) %arg1) #0 { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb5: +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 16 +; CHECK-NEXT: br label [[BB8:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[PHI9:%.*]], 1 +; CHECK-NEXT: [[ICMP7:%.*]] = icmp sgt i32 [[PHI9]], -2 +; CHECK-NEXT: br i1 [[ICMP7]], label [[BB10:%.*]], label [[BB8]] +; CHECK: bb8: +; CHECK-NEXT: [[PHI9]] = phi i32 [ -60516, [[BB5:%.*]] ], [ [[ADD]], [[BB6:%.*]] ] +; CHECK-NEXT: br label [[BB15:%.*]] +; CHECK: bb10: +; CHECK-NEXT: [[GETELEMENTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 96 +; CHECK-NEXT: [[LOAD12:%.*]] = load i64, ptr addrspace(1) [[GETELEMENTPTR11]], align 8, !noundef [[META0:![0-9]+]] +; CHECK-NEXT: [[LOAD13:%.*]] = load ptr addrspace(1), ptr @global, align 8, !invariant.load [[META0]], !nonnull [[META0]], !dereferenceable_or_null !1, !align [[META2:![0-9]+]] +; CHECK-NEXT: [[GETELEMENTPTR14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[LOAD13]], i64 848 +; CHECK-NEXT: store i64 [[LOAD12]], ptr addrspace(1) [[GETELEMENTPTR14]], align 8 +; CHECK-NEXT: ret void +; CHECK: bb15: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 99, [[BB8]] ], [ [[IV_NEXT:%.*]], [[BB20:%.*]] ] +; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 +; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0 +; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[BB20]], !prof [[PROF3:![0-9]+]] +; CHECK: bb18: +; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 +; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[GETELEMENTPTR]], i64 [[OR]] +; CHECK-NEXT: store i64 1, ptr addrspace(1) [[GETELEMENTPTR19]], align 8 +; CHECK-NEXT: br label [[BB20]] +; CHECK: bb20: +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90 +; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[BB15]], !prof [[PROF4:![0-9]+]] +; +bb5: + %getelementptr = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 16 + br label %bb8 + +bb6: ; preds = %bb20 + %add = add nsw i32 %phi9, 1 + %icmp7 = icmp sgt i32 %phi9, -2 + br i1 %icmp7, label %bb10, label %bb8 + +bb8: ; preds = %bb6, %bb5 + %phi9 = phi i32 [ -60516, %bb5 ], [ %add, %bb6 ] + br label %bb15 + +bb10: ; preds = %bb6 + %getelementptr11 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 96 + %load12 = load i64, ptr addrspace(1) %getelementptr11, align 8, !noundef !4 + %load13 = load ptr addrspace(1), ptr @global, align 8, !invariant.load !4, !nonnull !4, !dereferenceable_or_null !16, !align !17 + %getelementptr14 = getelementptr inbounds i8, ptr addrspace(1) %load13, i64 848 + store i64 %load12, ptr addrspace(1) %getelementptr14, align 8 + ret void + +bb15: ; preds = %bb20, %bb8 + %iv = phi i64 [ 99, %bb8 ], [ %iv.next, %bb20 ] + %and = and i64 %iv, 1 + %icmp17 = icmp eq i64 %and, 0 + br i1 %icmp17, label %bb18, label %bb20, !prof !21 + +bb18: ; preds = %bb15 + %or = or disjoint i64 %iv, 1 + %getelementptr19 = getelementptr inbounds i64, ptr addrspace(1) %getelementptr, i64 %or + store i64 1, ptr addrspace(1) %getelementptr19, align 8 + br label %bb20 + +bb20: ; preds = %bb18, %bb15 + %iv.next = add nsw i64 %iv, -1 + %icmp22 = icmp eq i64 %iv.next, 90 + br i1 %icmp22, label %bb6, label %bb15, !prof !22 +} + +attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } + +!4 = !{} +!10 = !{i32 1} +!16 = !{i64 864} +!17 = !{i64 8} +!21 = !{!"branch_weights", i32 1, i32 1} +!22 = !{!"branch_weights", i32 1, i32 95} +;. +; CHECK: [[META0]] = !{} +; CHECK: [[META2]] = !{i64 8} +; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 95} +;.