diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index e80c6f7c0f49d..b0d3a0e21480f 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/AttributeMask.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ModRef.h" @@ -265,6 +266,125 @@ static bool runIPSCCP( } } + // If a function has one use, has an alloca parameter, and its caller has + // nothing but geps/stores to the alloca, push the alloca definition and all + // stores/geps into the caller. For now, rely on argpromotion to clean up the + // dead arguments left in the caller + for (auto &F : M) { + if (F.hasOneUse() && canTrackArgumentsInterprocedurally(&F)) { + CallInst *CI = dyn_cast(*F.user_begin()); + if (!CI) + continue; + for (auto &Arg : CI->args()) { + auto AI = dyn_cast(Arg); + if (!AI) + continue; + + auto GetAllocaUsers = [&CI](AllocaInst *AI, + SmallVector &AllocaUsers) -> bool { + for (User *U : AI->users()) { + if (U == CI) + continue; + + auto I = dyn_cast(U); + if (!I) + continue; + switch (I->getOpcode()) { + default: { + return false; + } + case Instruction::Store: { + auto SI = cast(U); + if (SI->isVolatile() || !isa(SI->getValueOperand())) { + return false; + } + AllocaUsers.push_back(SI); + break; + } + case Instruction::GetElementPtr: { + auto GEP = cast(U); + auto SI = dyn_cast(*GEP->users().begin()); + if (GEP->getNumUses() != 1 || !SI || + !isa(SI->getValueOperand())) { + return false; + } + AllocaUsers.push_back(GEP); + break; + } + } + } + return !AllocaUsers.empty(); + }; + + SmallVector AllocaUsers; + if (!GetAllocaUsers(AI, AllocaUsers)) + continue; + + // Copy uses of the Alloca to the callee + IRBuilder<> B(&F.getEntryBlock().front()); + DataLayout DL = AI->getDataLayout(); + AllocaInst *NewAI = + B.CreateAlloca(AI->getAllocatedType(), nullptr, AI->getName()); + F.getArg(Arg.getOperandNo())->replaceAllUsesWith(NewAI); + NewAI->setAlignment(AI->getAlign()); + + for (auto U : AllocaUsers) { + switch (cast(U)->getOpcode()) { + default: + llvm_unreachable("Illegal user type in AllocaUsers"); + case Instruction::Store: { + auto SI = cast(U); + auto NewStore = B.CreateStore(SI->getValueOperand(), NewAI); + NewStore->setAlignment(SI->getAlign()); + break; + } + case Instruction::GetElementPtr: { + auto GEP = cast(U); + + SmallVector GepIndices; + for (unsigned i = 0; i < GEP->getNumIndices(); i++) + GepIndices.push_back(GEP->getOperand(i + 1)); + + GetElementPtrInst *NewGep = cast( + B.CreateGEP(GEP->getSourceElementType(), NewAI, GepIndices)); + NewGep->setNoWrapFlags(GEP->getNoWrapFlags()); + + auto SI = cast(*GEP->users().begin()); + auto NewStore = B.CreateStore(SI->getValueOperand(), NewGep); + NewStore->setAlignment(SI->getAlign()); + } + } + } + + // Remove old uses of the Alloca in the caller + while (!AllocaUsers.empty()) { + Instruction *I = cast(AllocaUsers.pop_back_val()); + switch (I->getOpcode()) { + default: + llvm_unreachable("Illegal user type when removing Alloca users"); + case Instruction::Store: { + I->removeFromParent(); + I->deleteValue(); + break; + } + case Instruction::GetElementPtr: { + auto SI = cast(*I->users().begin()); + SI->removeFromParent(); + SI->deleteValue(); + I->removeFromParent(); + I->deleteValue(); + } + } + } + MadeChanges = true; + + // TODO: + // - delete dead params here instead of relying on argpromotion + // - remove empty alloca instruction + } + } + } + // If we inferred constant or undef return values for a function, we replaced // all call uses with the inferred value. This means we don't need to bother // actually returning anything from the function. Replace all return diff --git a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll index c33fcfbe6ed97..67e2d4e5f8a87 100644 --- a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll +++ b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll @@ -9,10 +9,8 @@ define internal void @f(ptr byval(%struct.ss) align 8 %b, ptr byval(i32) align 4 %X) noinline nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (i32 [[B_0:%.*]]){{[^#]*}} #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 -; CHECK-NEXT: store i32 [[TEMP]], ptr [[DUMMY]], align 4 +; CHECK-NEXT: store i32 2, ptr [[DUMMY]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -27,7 +25,7 @@ define i32 @test(ptr %X) { ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: (ptr {{[^%]*}} [[X:%.*]]){{[^#]*}} #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call {{.*}}void @f(i32 1) +; CHECK-NEXT: tail call {{.*}}void @f() ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SCCP/push_stores.ll b/llvm/test/Transforms/SCCP/push_stores.ll new file mode 100644 index 0000000000000..a5265c7f85cff --- /dev/null +++ b/llvm/test/Transforms/SCCP/push_stores.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=ipsccp,argpromotion < %s | FileCheck %s + +@sudoku0 = internal global [9 x [9 x i32]] zeroinitializer +@sudoku1 = internal global [9 x [9 x i32]] zeroinitializer + +declare void @callee2(ptr nocapture nonnull readonly %0) + +define internal i64 @callee(ptr nocapture readonly %0, ptr nocapture readonly %1) local_unnamed_addr { +; CHECK-LABEL: define internal i64 @callee() local_unnamed_addr { +; CHECK-NEXT: [[TMP1:%.*]] = alloca { ptr, i64 }, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: store i64 4, ptr [[TMP2]], align 8 +; CHECK-NEXT: store ptr @sudoku1, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = alloca { ptr, i64 }, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: store i64 4, ptr [[TMP4]], align 8 +; CHECK-NEXT: store ptr @sudoku0, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[MEGASTRUCT:%.*]] = alloca { ptr, ptr, { ptr, i64 }, { ptr, i64 } }, align 8 +; CHECK-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[GEP_0_1:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[GEP_0_1]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[GEP_1_1]], align 8 +; CHECK-NEXT: store ptr [[ALLOCA0]], ptr [[MEGASTRUCT]], align 8 +; CHECK-NEXT: [[MEGASTRUCT_GEP_1:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 8 +; CHECK-NEXT: store ptr [[ALLOCA1]], ptr [[MEGASTRUCT_GEP_1]], align 8 +; CHECK-NEXT: [[MEGASTRUCT_GEP_2:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 16 +; CHECK-NEXT: store ptr [[TMP5]], ptr [[MEGASTRUCT_GEP_2]], align 8 +; CHECK-NEXT: [[MEGASTRUCT_GEP_3:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 24 +; CHECK-NEXT: store i64 [[TMP6]], ptr [[MEGASTRUCT_GEP_3]], align 8 +; CHECK-NEXT: [[MEGASTRUCT_GEP_4:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 32 +; CHECK-NEXT: store ptr [[TMP7]], ptr [[MEGASTRUCT_GEP_4]], align 8 +; CHECK-NEXT: [[MEGASTRUCT_GEP_5:%.*]] = getelementptr inbounds i8, ptr [[MEGASTRUCT]], i64 40 +; CHECK-NEXT: store i64 [[TMP8]], ptr [[MEGASTRUCT_GEP_5]], align 8 +; CHECK-NEXT: call fastcc void @callee2(ptr [[MEGASTRUCT]]) +; CHECK-NEXT: ret i64 poison +; + %megastruct = alloca { ptr, ptr, { ptr, i64 }, { ptr, i64 } } + + %alloca0 = alloca i32, align 4 + %alloca1 = alloca i32, align 4 + + %3 = load ptr, ptr %0, align 8 + %gep.0.1 = getelementptr inbounds i8, ptr %0, i64 8 + %4 = load i64, ptr %gep.0.1, align 8 + + %5 = load ptr, ptr %0, align 8 + %gep.1.1 = getelementptr inbounds i8, ptr %1, i64 8 + %6 = load i64, ptr %gep.1.1, align 8 + + store ptr %alloca0, ptr %megastruct, align 8 + %megastruct.gep.1 = getelementptr inbounds i8, ptr %megastruct, i64 8 + store ptr %alloca1, ptr %megastruct.gep.1, align 8 + + %megastruct.gep.2 = getelementptr inbounds i8, ptr %megastruct, i64 16 + store ptr %3, ptr %megastruct.gep.2, align 8 + + %megastruct.gep.3 = getelementptr inbounds i8, ptr %megastruct, i64 24 + store i64 %4, ptr %megastruct.gep.3, align 8 + + %megastruct.gep.4 = getelementptr inbounds i8, ptr %megastruct, i64 32 + store ptr %5, ptr %megastruct.gep.4, align 8 + + %megastruct.gep.5 = getelementptr inbounds i8, ptr %megastruct, i64 40 + store i64 %6, ptr %megastruct.gep.5, align 8 + + call fastcc void @callee2(ptr %megastruct) + ret i64 1 +} + +define i64 @caller() local_unnamed_addr { +; CHECK-LABEL: define i64 @caller() local_unnamed_addr { +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee() +; CHECK-NEXT: ret i64 1 +; + %1 = alloca { ptr, i64 }, align 8 + %2 = alloca { ptr, i64 }, align 8 + + store ptr @sudoku0, ptr %1, align 8 + %.gep0 = getelementptr inbounds i8, ptr %1, i64 8 + store i64 4, ptr %.gep0, align 8 + + store ptr @sudoku1, ptr %2, align 8 + %.gep1 = getelementptr inbounds i8, ptr %2, i64 8 + store i64 4, ptr %.gep1, align 8 + + %p = call i64 @callee(ptr nonnull %1, ptr nonnull %2) + ret i64 %p +} + +define i64 @m() local_unnamed_addr { +; CHECK-LABEL: define i64 @m() local_unnamed_addr { +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @caller() +; CHECK-NEXT: ret i64 1 +; + %1 = call i64 @caller() + ret i64 %1 +}