From 795bd074752cd5e854b630344669038f02fbaae9 Mon Sep 17 00:00:00 2001 From: Zarko Todorovski Date: Fri, 24 May 2024 16:06:36 -0400 Subject: [PATCH 1/3] [PowerPC][AIX] Support ByVals with greater alignment then pointer size --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 45 ++++++++++------- .../PowerPC/aix-cc-byval-limitation3.ll | 2 +- .../PowerPC/aix-vector-byval-callee.ll | 50 +++++++++++++++++++ llvm/test/CodeGen/PowerPC/aix-vector-byval.ll | 49 ++++++++++++++++++ 4 files changed, 128 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-vector-byval.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8450ce9e0e3b3..0b628ea2b3f6a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6855,7 +6855,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, const PPCSubtarget &Subtarget = static_cast( State.getMachineFunction().getSubtarget()); const bool IsPPC64 = Subtarget.isPPC64(); - const Align PtrAlign = IsPPC64 ? Align(8) : Align(4); + const Align PtrAlign(PtrSize); + const Align StackAlign(16); const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; if (ValVT == MVT::f128) @@ -6876,12 +6877,16 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13}; + const ArrayRef GPRs = IsPPC64 ? GPR_64 : GPR_32; + if (ArgFlags.isByVal()) { + const Align ByValAlign(ArgFlags.getNonZeroByValAlign()); if (ArgFlags.getNonZeroByValAlign() > PtrAlign) report_fatal_error("Pass-by-value arguments with alignment greater than " - "register width are not supported."); + "16 are not supported."); const unsigned ByValSize = ArgFlags.getByValSize(); + const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign; // An empty aggregate parameter takes up no storage and no registers, // but needs a MemLoc for a stack slot for the formal arguments side. @@ -6891,11 +6896,23 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, return false; } - const unsigned StackSize = alignTo(ByValSize, PtrAlign); - unsigned Offset = State.AllocateStack(StackSize, PtrAlign); - for (const unsigned E = Offset + StackSize; Offset < E; - Offset += PtrAlign.value()) { - if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) + // Shadow allocate any registers that are not properly aligned. + unsigned NextReg = State.getFirstUnallocated(GPRs); + while (NextReg != GPRs.size() && + !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) { + // Shadow allocate next registers since its aligment is not strict enough. + unsigned Reg = State.AllocateReg(GPRs); + // Allocate the stack space shadowed by said register. + State.AllocateStack(PtrSize, PtrAlign); + assert(Reg && "Alocating register unexpectedly failed."); + (void)Reg; + NextReg = State.getFirstUnallocated(GPRs); + } + + const unsigned StackSize = alignTo(ByValSize, ObjAlign); + unsigned Offset = State.AllocateStack(StackSize, ObjAlign); + for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) { + if (unsigned Reg = State.AllocateReg(GPRs)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else { State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE, @@ -6917,12 +6934,12 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, [[fallthrough]]; case MVT::i1: case MVT::i32: { - const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign); + const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign); // AIX integer arguments are always passed in register width. if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits()) LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt : CCValAssign::LocInfo::ZExt; - if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) + if (unsigned Reg = State.AllocateReg(GPRs)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo)); @@ -6942,8 +6959,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo)); // Reserve and initialize GPRs or initialize the PSA as required. - for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) { - if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { + for (unsigned I = 0; I < StoreSize; I += PtrSize) { + if (unsigned Reg = State.AllocateReg(GPRs)) { assert(FReg && "An FPR should be available when a GPR is reserved."); if (State.isVarArg()) { // Successfully reserved GPRs are only initialized for vararg calls. @@ -6995,9 +7012,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, return false; } - const unsigned PtrSize = IsPPC64 ? 8 : 4; - ArrayRef GPRs = IsPPC64 ? GPR_64 : GPR_32; - unsigned NextRegIndex = State.getFirstUnallocated(GPRs); // Burn any underaligned registers and their shadowed stack space until // we reach the required alignment. @@ -7347,9 +7361,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( const MCPhysReg ArgReg = VA.getLocReg(); const PPCFrameLowering *FL = Subtarget.getFrameLowering(); - if (Flags.getNonZeroByValAlign() > PtrByteSize) - report_fatal_error("Over aligned byvals not supported yet."); - const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize); const int FI = MF.getFrameInfo().CreateFixedObject( StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false, diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll index bb0231dbf5417..d4d9d67061fcb 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll @@ -13,4 +13,4 @@ entry: declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, ptr byval(%struct.S) align 32) -; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported. +; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than 16 are not supported. diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll new file mode 100644 index 0000000000000..3491779b1002c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll @@ -0,0 +1,50 @@ +TE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ +; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=32BIT %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ +; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=64BIT %s + +%struct.vec_struct = type { <4 x i32> } + +; Function Attrs: norecurse nounwind readonly +define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval(%struct.vec_struct) align 16 %vs) { + ; 32BIT-LABEL: name: vec_struct_test + + ; 32BIT: fixedStack: + ; 32BIT: - { id: 0, type: default, offset: 32, size: 16, alignment: 16, stack-id: default, + ; 32BIT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true, + ; 32BIT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + + ; 32BIT: bb.0.entry: + ; 32BIT: liveins: $r3, $r5, $r6, $r7, $r8 + ; 32BIT: renamable $r3 = nsw ADD4 renamable $r5, killed renamable $r3 + ; 32BIT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8) + ; 32BIT: STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) + ; 32BIT: STW killed renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) + ; 32BIT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12) + ; 32BIT: BLR implicit $lr, implicit $rm, implicit $r3 + + + ; 64BIT-LABEL: name: vec_struct_test + ; 64BIT: fixedStack: + ; 64BIT: - { id: 0, type: default, offset: 64, size: 16, alignment: 16, stack-id: default, + ; 64BIT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true, + ; 64BIT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + + ; 64BIT: bb.0.entry: + ; 64BIT: liveins: $x3, $x5, $x6 + ; 64BIT: STD killed renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) + ; 64BIT: renamable $r4 = LWZ 0, %fixed-stack.0 :: (dereferenceable load (s32) from %ir.vsi1, align 16) + ; 64BIT: renamable $r3 = nsw ADD4 killed renamable $r4, renamable $r3, implicit killed $x3, implicit-def $x3 + ; 64BIT: STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) + ; 64BIT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %vsi = getelementptr inbounds %struct.vec_struct, %struct.vec_struct* %vs, i32 0, i32 0 + %0 = load <4 x i32>, <4 x i32>* %vsi, align 16 + %vecext = extractelement <4 x i32> %0, i32 0 + %add = add nsw i32 %vecext, %i + ret i32 %add +} diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll new file mode 100644 index 0000000000000..9e4139cdf0c6c --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ +; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=32BIT %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ +; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=64BIT %s + +%struct.B = type { <8 x i16>, i32, i32, [8 x i8] } + +; Function Attrs: nounwind +define i32 @caller() { + ; 32BIT-LABEL: name: caller + ; 32BIT: bb.0.entry: + ; 32BIT: renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4) + ; 32BIT: STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16) + ; 32BIT: renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16) + ; 32BIT: STW killed renamable $r3, 56, $r1 :: (store (s32), align 16) + ; 32BIT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT: renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20) + ; 32BIT: renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16) + ; 32BIT: renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12) + ; 32BIT: renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8) + ; 32BIT: renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4) + ; 32BIT: renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16) + ; 32BIT: $r3 = LI 0 + ; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3 + ; 32BIT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT: BLR implicit $lr, implicit $rm, implicit $r3 + + ; 64BIT-LABEL: name: caller + ; 64BIT: bb.0.entry: + ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT: renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24) + ; 64BIT: renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16) + ; 64BIT: renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8) + ; 64BIT: renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16) + ; 64BIT: $x3 = LI8 0 + ; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3 + ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT: BLR8 implicit $lr8, implicit $rm, implicit $x3 + entry: + %vs = alloca %struct.B, align 16 + %call = tail call i32 @vec_struct_test(i32 0, %struct.B* nonnull byval(%struct.B) align 16 %vs) + ret i32 %call +} + +declare i32 @vec_struct_test(i32, %struct.B* byval(%struct.B) align 16) From 7f91bbedec4fe39ff619d6bc05307eb0e5dcee53 Mon Sep 17 00:00:00 2001 From: Zarko Todorovski Date: Sat, 25 May 2024 16:47:42 -0400 Subject: [PATCH 2/3] Fix typos and update tests --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +- .../PowerPC/aix-vector-byval-callee.ll | 44 +++++++--------- llvm/test/CodeGen/PowerPC/aix-vector-byval.ll | 50 +++++++++---------- 3 files changed, 44 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0b628ea2b3f6a..6f273bc9a9d9d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6855,6 +6855,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, const PPCSubtarget &Subtarget = static_cast( State.getMachineFunction().getSubtarget()); const bool IsPPC64 = Subtarget.isPPC64(); + const unsigned PtrSize = IsPPC64 ? 8 : 4; const Align PtrAlign(PtrSize); const Align StackAlign(16); const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; @@ -6881,7 +6882,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, if (ArgFlags.isByVal()) { const Align ByValAlign(ArgFlags.getNonZeroByValAlign()); - if (ArgFlags.getNonZeroByValAlign() > PtrAlign) + if (ByValAlign > StackAlign) report_fatal_error("Pass-by-value arguments with alignment greater than " "16 are not supported."); diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll index 3491779b1002c..002ee0e47f14f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll @@ -1,4 +1,4 @@ -TE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ ; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ ; RUN: FileCheck --check-prefix=32BIT %s @@ -12,35 +12,25 @@ TE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; Function Attrs: norecurse nounwind readonly define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval(%struct.vec_struct) align 16 %vs) { ; 32BIT-LABEL: name: vec_struct_test - - ; 32BIT: fixedStack: - ; 32BIT: - { id: 0, type: default, offset: 32, size: 16, alignment: 16, stack-id: default, - ; 32BIT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true, - ; 32BIT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - ; 32BIT: bb.0.entry: - ; 32BIT: liveins: $r3, $r5, $r6, $r7, $r8 - ; 32BIT: renamable $r3 = nsw ADD4 renamable $r5, killed renamable $r3 - ; 32BIT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8) - ; 32BIT: STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) - ; 32BIT: STW killed renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) - ; 32BIT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12) - ; 32BIT: BLR implicit $lr, implicit $rm, implicit $r3 - - + ; 32BIT-NEXT: liveins: $r3, $r5, $r6, $r7, $r8 + ; 32BIT-NEXT: {{ $}} + ; 32BIT-NEXT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8) + ; 32BIT-NEXT: STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) + ; 32BIT-NEXT: STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) + ; 32BIT-NEXT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12) + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; ; 64BIT-LABEL: name: vec_struct_test - ; 64BIT: fixedStack: - ; 64BIT: - { id: 0, type: default, offset: 64, size: 16, alignment: 16, stack-id: default, - ; 64BIT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true, - ; 64BIT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - ; 64BIT: bb.0.entry: - ; 64BIT: liveins: $x3, $x5, $x6 - ; 64BIT: STD killed renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) - ; 64BIT: renamable $r4 = LWZ 0, %fixed-stack.0 :: (dereferenceable load (s32) from %ir.vsi1, align 16) - ; 64BIT: renamable $r3 = nsw ADD4 killed renamable $r4, renamable $r3, implicit killed $x3, implicit-def $x3 - ; 64BIT: STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) - ; 64BIT: BLR8 implicit $lr8, implicit $rm, implicit $x3 + ; 64BIT-NEXT: liveins: $x3, $x5, $x6 + ; 64BIT-NEXT: {{ $}} + ; 64BIT-NEXT: STD renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) + ; 64BIT-NEXT: STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) + ; 64BIT-NEXT: renamable $x4 = RLDICL killed renamable $x5, 32, 32 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4, implicit-def $x3 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: %vsi = getelementptr inbounds %struct.vec_struct, %struct.vec_struct* %vs, i32 0, i32 0 %0 = load <4 x i32>, <4 x i32>* %vsi, align 16 diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll index 9e4139cdf0c6c..5c383e0323cb0 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll @@ -13,33 +13,33 @@ define i32 @caller() { ; 32BIT-LABEL: name: caller ; 32BIT: bb.0.entry: - ; 32BIT: renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4) - ; 32BIT: STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16) - ; 32BIT: renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16) - ; 32BIT: STW killed renamable $r3, 56, $r1 :: (store (s32), align 16) - ; 32BIT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT: renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20) - ; 32BIT: renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16) - ; 32BIT: renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12) - ; 32BIT: renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8) - ; 32BIT: renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4) - ; 32BIT: renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16) - ; 32BIT: $r3 = LI 0 - ; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3 - ; 32BIT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT: BLR implicit $lr, implicit $rm, implicit $r3 - + ; 32BIT-NEXT: renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4) + ; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16) + ; 32BIT-NEXT: renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32), align 16) + ; 32BIT-NEXT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20) + ; 32BIT-NEXT: renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16) + ; 32BIT-NEXT: renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12) + ; 32BIT-NEXT: renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8) + ; 32BIT-NEXT: renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4) + ; 32BIT-NEXT: renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16) + ; 32BIT-NEXT: $r3 = LI 0 + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3 + ; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 + ; ; 64BIT-LABEL: name: caller ; 64BIT: bb.0.entry: - ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 - ; 64BIT: renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24) - ; 64BIT: renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16) - ; 64BIT: renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8) - ; 64BIT: renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16) - ; 64BIT: $x3 = LI8 0 - ; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3 - ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - ; 64BIT: BLR8 implicit $lr8, implicit $rm, implicit $x3 + ; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24) + ; 64BIT-NEXT: renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16) + ; 64BIT-NEXT: renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8) + ; 64BIT-NEXT: renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16) + ; 64BIT-NEXT: $x3 = LI8 0 + ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3 + ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: %vs = alloca %struct.B, align 16 %call = tail call i32 @vec_struct_test(i32 0, %struct.B* nonnull byval(%struct.B) align 16 %vs) From e8a736811b667f2ea5276109b1cdbd7dc15341c3 Mon Sep 17 00:00:00 2001 From: Zarko Todorovski Date: Tue, 28 May 2024 13:36:56 -0400 Subject: [PATCH 3/3] Fix tests so they use opaque pointers --- llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll | 6 +++--- llvm/test/CodeGen/PowerPC/aix-vector-byval.ll | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll index 002ee0e47f14f..80c26471d8cdb 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll @@ -10,7 +10,7 @@ %struct.vec_struct = type { <4 x i32> } ; Function Attrs: norecurse nounwind readonly -define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval(%struct.vec_struct) align 16 %vs) { +define i32 @vec_struct_test(i32 %i, ptr nocapture readonly byval(%struct.vec_struct) align 16 %vs) { ; 32BIT-LABEL: name: vec_struct_test ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3, $r5, $r6, $r7, $r8 @@ -32,8 +32,8 @@ define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval ; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4, implicit-def $x3 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: - %vsi = getelementptr inbounds %struct.vec_struct, %struct.vec_struct* %vs, i32 0, i32 0 - %0 = load <4 x i32>, <4 x i32>* %vsi, align 16 + %vsi = getelementptr inbounds i8, ptr %vs, i32 0 + %0 = load <4 x i32>, ptr %vsi, align 16 %vecext = extractelement <4 x i32> %0, i32 0 %add = add nsw i32 %vecext, %i ret i32 %add diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll index 5c383e0323cb0..8fcac7b6a5921 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll @@ -42,8 +42,8 @@ define i32 @caller() { ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: %vs = alloca %struct.B, align 16 - %call = tail call i32 @vec_struct_test(i32 0, %struct.B* nonnull byval(%struct.B) align 16 %vs) + %call = tail call i32 @vec_struct_test(i32 0, ptr nonnull byval(%struct.B) align 16 %vs) ret i32 %call } -declare i32 @vec_struct_test(i32, %struct.B* byval(%struct.B) align 16) +declare i32 @vec_struct_test(i32, ptr byval(%struct.B) align 16)