From 9d9ad9ff3d807ecfce5f4ed392a5c506b115816c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 14 Jan 2025 18:40:10 +0800 Subject: [PATCH 1/3] Precommit test --- ...-force-tail-with-evl-reverse-load-store.ll | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index f323231445aad..fa62117f4912f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -249,3 +249,120 @@ loopend: ret void } +define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d) { +; IF-EVL-LABEL: @multiple_reverse_vector_pointer( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16 +; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[N_VEC]] +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP7]] +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP9]] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP9]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP10]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP11]] +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], [[VP_REVERSE]] +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv16i8.nxv16p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP7]] +; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]] +; IF-EVL-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE1]], ptr align 1 [[TMP20]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[TMP7]] +; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 +; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP22]] +; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP22]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP23]] +; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 [[TMP24]] +; IF-EVL-NEXT: [[VP_REVERSE2:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE2]], ptr align 1 [[TMP26]], splat (i1 true), i32 [[TMP6]]) +; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1024, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[LOOP:%.*]] +; IF-EVL: loop: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; IF-EVL-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: [[X:%.*]] = load i8, ptr [[GEP_A]], align 1 +; IF-EVL-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i8 [[X]] +; IF-EVL-NEXT: [[Y:%.*]] = load i8, ptr [[GEP_B]], align 1 +; IF-EVL-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: store i8 [[Y]], ptr [[GEP_C]], align 1 +; IF-EVL-NEXT: [[GEP_D:%.*]] = getelementptr i8, ptr [[D]], i64 [[IV]] +; IF-EVL-NEXT: store i8 [[Y]], ptr [[GEP_D]], align 1 +; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 +; IF-EVL-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; IF-EVL-NEXT: br i1 [[CMP_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL: exit: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @multiple_reverse_vector_pointer( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: br label [[LOOP:%.*]] +; NO-VP: loop: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; NO-VP-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[IV]] +; NO-VP-NEXT: [[X:%.*]] = load i8, ptr [[GEP_A]], align 1 +; NO-VP-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B:%.*]], i8 [[X]] +; NO-VP-NEXT: [[Y:%.*]] = load i8, ptr [[GEP_B]], align 1 +; NO-VP-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[IV]] +; NO-VP-NEXT: store i8 [[Y]], ptr [[GEP_C]], align 1 +; NO-VP-NEXT: [[GEP_D:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[IV]] +; NO-VP-NEXT: store i8 [[Y]], ptr [[GEP_D]], align 1 +; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 +; NO-VP-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; NO-VP-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]] +; NO-VP: exit: +; NO-VP-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1024, %entry ], [ %iv.next, %loop ] + + %gep.a = getelementptr i8, ptr %a, i64 %iv + %x = load i8, ptr %gep.a + + %gep.b = getelementptr i8, ptr %b, i8 %x + %y = load i8, ptr %gep.b + + %gep.c = getelementptr i8, ptr %c, i64 %iv + store i8 %y, ptr %gep.c + + %gep.d = getelementptr i8, ptr %d, i64 %iv + store i8 %y, ptr %gep.d + + %iv.next = add i64 %iv, -1 + %cmp.not = icmp eq i64 %iv, 0 + br i1 %cmp.not, label %exit, label %loop + +exit: + ret void +} From 595f7391847a7bc2ce51ea6b46f0782b23d8e988 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 14 Jan 2025 18:41:35 +0800 Subject: [PATCH 2/3] [VPlan] Fix mutating whilst iterating over users iterator in EVL transform This fixes a miscompilation extracted from 525.x264_r, where we were failing to update the runtime VF of a VPReverseVectorPointerRecipe. We were replacing a user of VF whilst iterating over the users() iterator, which meant that we ended up missing some recipes. This fixes it by storing it into a SmallVector first before iterating. (make_early_inc_range doesn't work here either) --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +- .../vectorize-force-tail-with-evl-reverse-load-store.ll | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 545d277d7aa01..33f6dc94b9e7a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1603,7 +1603,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { LLVMContext &Ctx = CanonicalIVType->getContext(); VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx)); - for (VPUser *U : Plan.getVF().users()) { + for (VPUser *U : SmallVector(Plan.getVF().users())) { if (auto *R = dyn_cast(U)) R->setOperand(1, &EVL); } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index fa62117f4912f..c50e2adac8d7d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -282,8 +282,9 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], [[VP_REVERSE]] ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv16i8.nxv16p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]] -; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP6]] to i64 +; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] +; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP16]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]] ; IF-EVL-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) From 19bd16ae980bf5c0e1ab1ef67c25d143a023a479 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 14 Jan 2025 22:16:44 +0800 Subject: [PATCH 3/3] Use to_vector, link to GitHub issue --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +- .../RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 33f6dc94b9e7a..a04ad1b370537 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1603,7 +1603,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { LLVMContext &Ctx = CanonicalIVType->getContext(); VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx)); - for (VPUser *U : SmallVector(Plan.getVF().users())) { + for (VPUser *U : to_vector(Plan.getVF().users())) { if (auto *R = dyn_cast(U)) R->setOperand(1, &EVL); } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index c50e2adac8d7d..5b579b0749c67 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -249,6 +249,9 @@ loopend: ret void } +; From a miscompile originally reported at +; https://github.com/llvm/llvm-project/issues/122681 + define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d) { ; IF-EVL-LABEL: @multiple_reverse_vector_pointer( ; IF-EVL-NEXT: entry: